最近经常会模拟网页提交返回网页源码,然后获得网页中相应的元素,于是需要常常解析Html中相应的各种元素,网络是个好东西,搜索一番,就找到了好几个Delphi版本的HtmlParser的类库,试着使用了几个,发现解析起来都不完整,或多或少的回出现一些问题!于是想到了如果界面上有一个浏览器,我们可以通过WebBrowser的Document接口对网页元素进行操作,很是方便!但是模拟网页提交,界面上是不一定要出现WebBrowser的,肯定有办法,不通过WebBrowser就直接解析HTML的,那便是我不要WebBrowser这个外壳,只要他里面的Document文档接口对象就能实现对Html的解析了,查找了一番MSDN,然后Google一下,果然可行,构建方法如下:
//创建IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
接口创建好了之后就能够对文档元素进行解析了,很是爽快!
结合了我自己的特有操作,我对Combobox,Table,Frame等一些网页元素做了相应的封装,实现了一个HTMLParser,大致代码如下:
这里只给出声明,代码请在最后下载
代码
(*
****************************************************
*)
(* 得闲工作室 *)
(* 网页元素操作类库 *)
(* *)
(* DxHtmlElement Unit *)
(* Copyright(c) 2008-2010 不得闲 *)
(* email:appleak46@yahoo.com.cn QQ:75492895 *)
(* **************************************************** *)
unit DxHtmlElement;
interface
uses Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Graphics,TypInfo;
{ Get EleMent Type }
function IsSelectElement(eleElement: IHTMLElement): Boolean;
function IsPwdElement(eleElement: IHTMLElement): Boolean;
function IsTextElement(element: IHTMLElement): boolean;
function IsTableElement(element: IHTMLElement): Boolean;
function IsElementCollection(element: IHTMLElement): Boolean;
function IsChkElement(element: IHTMLElement): boolean;
function IsRadioBtnElement(element: IHTMLElement): boolean;
function IsMemoElement(element: IHTMLElement): boolean;
function IsFormElement(element: IHTMLElement): boolean;
function IsIMGElement(element: IHTMLElement): boolean;
function IsInIMGElement(element: IHTMLElement): boolean;
function IsLabelElement(element: IHTMLElement): boolean;
function IsLinkElement(element: IHTMLElement): boolean;
function IsListElement(element: IHTMLElement): boolean;
function IsControlElement(element: IHTMLElement): boolean;
function IsObjectElement(element: IHTMLElement): boolean;
function IsFrameElement(element: IHTMLElement): boolean;
function IsInPutBtnElement(element: IHTMLElement): boolean;
function IsInHiddenElement(element: IHTMLElement): boolean;
function IsSubmitElement(element: IHTMLElement): boolean;
{ Get ImgElement Data }
function GetPicIndex(doc: IHTMLDocument2; Src: string ; Alt: string ): Integer;
function GetPicElement(doc: IHTMLDocument2;imgName: string ;src: string ;Alt: string ): IHTMLImgElement;
function GetRegCodePic(doc: IHTMLDocument2;ImgName: string ; Src: string ; Alt: string ): TPicture; overload ;
function GetRegCodePic(doc: IHTMLDocument2;Index: integer): TPicture; overload ;
function GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture; overload ;
type
TObjectFromLResult = function (LRESULT: lResult; const IID: TIID; WPARAM: wParam; out pObject): HRESULT; stdcall ;
TEleMentType = (ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE,
ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN);
function GetElementType(element: IHTMLELEMENT): TEleMentType;
function GetElementTypeName(element: IHTMLELEMENT): string ;
function GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement;
function GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable;
function GetWebBrowserHtmlTableCellText(Doc: IHTMLDocument2;
const TableIndex, RowIndex, ColIndex: Integer; var ResValue: string ): Boolean;
function GetHtmlTableRowHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;
function GetWebBrowserHtmlTableCellHtml(Doc: IHTMLDocument2;
const TableIndex,RowIndex,ColIndex: Integer; var ResValue: string ): Boolean;
function GeHtmlTableHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;
function GetWebBrowserHtmlTableHtml(Doc: IHTMLDocument2;
const TableIndex,RowIndex: Integer; var ResValue: string ): Boolean;
type
TDxWebFrameCollection = class ;
TDxWebElementCollection = class ;
TLoadState = (Doc_Loading,Doc_Completed,Doc_Invalidate);
TDxWebFrame = class
private
FFrame: IHTMLWINDOW2;
FElementCollections: TDxWebElementCollection;
FWebFrameCollections: TDxWebFrameCollection;
function GetSrc: string ;
function GetElementCount: integer;
function GetWebFrameCollections: TDxWebFrameCollection;
function GetElementCollections: TDxWebElementCollection;
function GetDocument: IHTMLDOCUMENT2;
function GetReadState: TLoadState;
function GetIsLoaded: boolean;
procedure SetFrame( const Value: IHTMLWINDOW2);
function GetName: string ;
public
Constructor Create(IFrame: IHTMLWINDOW2);
Destructor Destroy; override ;
property Frame: IHTMLWINDOW2 read FFrame write SetFrame;
property Src: string read GetSrc;
property Document: IHTMLDOCUMENT2 read GetDocument;
property Name: string read GetName;
property Frames: TDxWebFrameCollection read GetWebFrameCollections;
property ElementCount: integer read GetElementCount;
property ElementCollections: TDxWebElementCollection read GetElementCollections;
property ReadyState: TLoadState read GetReadState;
property IsLoaded: boolean read GetIsLoaded;
end ;
TDxWebFrameCollection = Class
private
FFrameCollection: IHTMLFramesCollection2;
Frame: TDxWebFrame;
function GetCount: integer;
function GetFrameInterfaceByIndex(index: integer): IHTMLWINDOW2;
function GetFrameInterfaceByName(Name: string ): IHTMLWINDOW2;
function GetFrameByIndex(index: integer): TDxWebFrame;
function GetFrameByName(Name: string ): TDxWebFrame;
procedure SetFrameCollection( const Value: IHTMLFramesCollection2);
public
Constructor Create(ACollection: IHTMLFramesCollection2);
Destructor Destroy; override ;
property FrameCollection: IHTMLFramesCollection2 read FFrameCollection write SetFrameCollection;
property Count: integer read GetCount;
property FrameInterfaceByIndex[index: integer]: IHTMLWINDOW2 read GetFrameInterfaceByIndex;
property FrameInterfaceByName[Name: string ]: IHTMLWINDOW2 read GetFrameInterfaceByName;
property FrameByIndex[index: integer]: TDxWebFrame read GetFrameByIndex;
property FrameByName[Name: string ]: TDxWebFrame read GetFrameByName;
end ;
TDxWebElementCollection = class
private
FCollection: IHTMLElementCollection;
FChildCollection: TDxWebElementCollection;
function GetCollection(index: String): TDxWebElementCollection;
function GetCount: integer;
function GetElement(itemName: string ; index: integer): IHTMLElement;
function GetElementByName(itemName: string ): IHTMLELEMENT;
function GetElementByIndex(index: integer): IHTMLELEMENT;
procedure SetCollection( const Value: IHTMLElementCollection);
public
Constructor Create(ACollection: IHTMLElementCollection);
Destructor Destroy; override ;
property Collection: IHTMLElementCollection read FCollection write SetCollection;
property ChildElementCollection[index: String]: TDxWebElementCollection read GetCollection;
property ElementCount: integer read GetCount;
property Element[itemName: string ;index: integer]: IHTMLElement read GetElement;
property ElementByName[itemName: string ]: IHTMLELEMENT read GetElementByName;
property ElementByIndex[index: integer]: IHTMLELEMENT read GetElementByIndex;
end ;
TLinkCollection = class (TDxWebElementCollection)
end ;
TDxWebTable = class ;
TDxTableCollection = class
private
FTableCollection: IHTMLElementCollection;
FDocument: IHTMLDOCUMENT2;
FWebTable: TDxWebTable;
function GetTableInterfaceByName(AName: string ): IHTMLTABLE;
procedure SetDocument(Value: IHTMLDOCUMENT2);
function GetTableInterfaceByIndex(index: integer): IHTMLTABLE;
function GetCount: integer;
function GetTableByIndex(index: integer): TDxWebTable;
function GetTableByName(AName: string ): TDxWebTable;
public
Constructor Create(Doc: IHTMLDOCUMENT2);
destructor Destroy; override ;
property TableInterfaceByName[AName: string ]: IHTMLTABLE read GetTableInterfaceByName;
property TableInterfaceByIndex[index: integer]: IHTMLTABLE read GetTableInterfaceByIndex;
property TableByName[AName: string ]: TDxWebTable read GetTableByName;
property TableByIndex[index: integer]: TDxWebTable read GetTableByIndex;
property Document: IHTMLDOCUMENT2 read FDocument write SetDocument;
property Count: integer read GetCount;
end ;
TDxWebTable = class
private
FTableInterface: IHTMLTABLE;
function GetRowCount: integer;
procedure SetTableInterface( const Value: IHTMLTABLE);
function GetCell(ACol, ARow: integer): string ;
function GetRowColCount(RowIndex: integer): integer;
function GetInnerHtml: string ;
function GetInnerText: string ;
function GetCellElement(ACol, ARow: Integer): IHTMLTableCell;
public
Constructor Create(ATable: IHTMLTABLE);
property TableInterface: IHTMLTABLE read FTableInterface write SetTableInterface;
property RowCount: integer read GetRowCount;
property Cell[ACol: integer;ARow: integer]: string read GetCell;
property CellElement[ACol: Integer;ARow: Integer]: IHTMLTableCell read GetCellElement;
property RowColCount[RowIndex: integer]: integer read GetRowColCount;
property InnerHtml: string read GetInnerHtml;
property InnerText: string read GetInnerText;
end ;
TDxWebCombobox = class
private
FHtmlSelect: IHTMLSelectElement;
function GetCount: Integer;
procedure SetItemIndex( const Value: Integer);
function GetItemIndex: Integer;
function GetName: string ;
procedure SetName( const Value: string );
function GetValue: string ;
procedure SetValue( const Value: string );
procedure SetCombInterface( const Value: IHTMLSelectElement);
function GetItemByName(EleName: string ): string ;
function GetItemByIndex(index: integer): string ;
function GetItemAttribute(index: Integer; AttribName: string ): OleVariant;
public
constructor Create(AWebCombo: IHTMLSelectElement);
procedure Add(Ele: IHTMLElement);
procedure Insert(Ele: IHTMLElement;Index: Integer);
procedure Remove(index: Integer);
property CombInterface: IHTMLSelectElement read FHtmlSelect write SetCombInterface;
property Count: Integer read GetCount;
property ItemIndex: Integer read GetItemIndex write SetItemIndex;
property ItemByIndex[index: integer]: string read GetItemByIndex;
property ItemByName[EleName: string ]: string read GetItemByName;
property ItemAttribute[index: Integer;AttribName: string ]: OleVariant read GetItemAttribute;
property Name: string read GetName write SetName;
property value: string read GetValue write SetValue;
end ;
implementation
end .
(* 得闲工作室 *)
(* 网页元素操作类库 *)
(* *)
(* DxHtmlElement Unit *)
(* Copyright(c) 2008-2010 不得闲 *)
(* email:appleak46@yahoo.com.cn QQ:75492895 *)
(* **************************************************** *)
unit DxHtmlElement;
interface
uses Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Graphics,TypInfo;
{ Get EleMent Type }
function IsSelectElement(eleElement: IHTMLElement): Boolean;
function IsPwdElement(eleElement: IHTMLElement): Boolean;
function IsTextElement(element: IHTMLElement): boolean;
function IsTableElement(element: IHTMLElement): Boolean;
function IsElementCollection(element: IHTMLElement): Boolean;
function IsChkElement(element: IHTMLElement): boolean;
function IsRadioBtnElement(element: IHTMLElement): boolean;
function IsMemoElement(element: IHTMLElement): boolean;
function IsFormElement(element: IHTMLElement): boolean;
function IsIMGElement(element: IHTMLElement): boolean;
function IsInIMGElement(element: IHTMLElement): boolean;
function IsLabelElement(element: IHTMLElement): boolean;
function IsLinkElement(element: IHTMLElement): boolean;
function IsListElement(element: IHTMLElement): boolean;
function IsControlElement(element: IHTMLElement): boolean;
function IsObjectElement(element: IHTMLElement): boolean;
function IsFrameElement(element: IHTMLElement): boolean;
function IsInPutBtnElement(element: IHTMLElement): boolean;
function IsInHiddenElement(element: IHTMLElement): boolean;
function IsSubmitElement(element: IHTMLElement): boolean;
{ Get ImgElement Data }
function GetPicIndex(doc: IHTMLDocument2; Src: string ; Alt: string ): Integer;
function GetPicElement(doc: IHTMLDocument2;imgName: string ;src: string ;Alt: string ): IHTMLImgElement;
function GetRegCodePic(doc: IHTMLDocument2;ImgName: string ; Src: string ; Alt: string ): TPicture; overload ;
function GetRegCodePic(doc: IHTMLDocument2;Index: integer): TPicture; overload ;
function GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture; overload ;
type
TObjectFromLResult = function (LRESULT: lResult; const IID: TIID; WPARAM: wParam; out pObject): HRESULT; stdcall ;
TEleMentType = (ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE,
ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN);
function GetElementType(element: IHTMLELEMENT): TEleMentType;
function GetElementTypeName(element: IHTMLELEMENT): string ;
function GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement;
function GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable;
function GetWebBrowserHtmlTableCellText(Doc: IHTMLDocument2;
const TableIndex, RowIndex, ColIndex: Integer; var ResValue: string ): Boolean;
function GetHtmlTableRowHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;
function GetWebBrowserHtmlTableCellHtml(Doc: IHTMLDocument2;
const TableIndex,RowIndex,ColIndex: Integer; var ResValue: string ): Boolean;
function GeHtmlTableHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement;
function GetWebBrowserHtmlTableHtml(Doc: IHTMLDocument2;
const TableIndex,RowIndex: Integer; var ResValue: string ): Boolean;
type
TDxWebFrameCollection = class ;
TDxWebElementCollection = class ;
TLoadState = (Doc_Loading,Doc_Completed,Doc_Invalidate);
TDxWebFrame = class
private
FFrame: IHTMLWINDOW2;
FElementCollections: TDxWebElementCollection;
FWebFrameCollections: TDxWebFrameCollection;
function GetSrc: string ;
function GetElementCount: integer;
function GetWebFrameCollections: TDxWebFrameCollection;
function GetElementCollections: TDxWebElementCollection;
function GetDocument: IHTMLDOCUMENT2;
function GetReadState: TLoadState;
function GetIsLoaded: boolean;
procedure SetFrame( const Value: IHTMLWINDOW2);
function GetName: string ;
public
Constructor Create(IFrame: IHTMLWINDOW2);
Destructor Destroy; override ;
property Frame: IHTMLWINDOW2 read FFrame write SetFrame;
property Src: string read GetSrc;
property Document: IHTMLDOCUMENT2 read GetDocument;
property Name: string read GetName;
property Frames: TDxWebFrameCollection read GetWebFrameCollections;
property ElementCount: integer read GetElementCount;
property ElementCollections: TDxWebElementCollection read GetElementCollections;
property ReadyState: TLoadState read GetReadState;
property IsLoaded: boolean read GetIsLoaded;
end ;
TDxWebFrameCollection = Class
private
FFrameCollection: IHTMLFramesCollection2;
Frame: TDxWebFrame;
function GetCount: integer;
function GetFrameInterfaceByIndex(index: integer): IHTMLWINDOW2;
function GetFrameInterfaceByName(Name: string ): IHTMLWINDOW2;
function GetFrameByIndex(index: integer): TDxWebFrame;
function GetFrameByName(Name: string ): TDxWebFrame;
procedure SetFrameCollection( const Value: IHTMLFramesCollection2);
public
Constructor Create(ACollection: IHTMLFramesCollection2);
Destructor Destroy; override ;
property FrameCollection: IHTMLFramesCollection2 read FFrameCollection write SetFrameCollection;
property Count: integer read GetCount;
property FrameInterfaceByIndex[index: integer]: IHTMLWINDOW2 read GetFrameInterfaceByIndex;
property FrameInterfaceByName[Name: string ]: IHTMLWINDOW2 read GetFrameInterfaceByName;
property FrameByIndex[index: integer]: TDxWebFrame read GetFrameByIndex;
property FrameByName[Name: string ]: TDxWebFrame read GetFrameByName;
end ;
TDxWebElementCollection = class
private
FCollection: IHTMLElementCollection;
FChildCollection: TDxWebElementCollection;
function GetCollection(index: String): TDxWebElementCollection;
function GetCount: integer;
function GetElement(itemName: string ; index: integer): IHTMLElement;
function GetElementByName(itemName: string ): IHTMLELEMENT;
function GetElementByIndex(index: integer): IHTMLELEMENT;
procedure SetCollection( const Value: IHTMLElementCollection);
public
Constructor Create(ACollection: IHTMLElementCollection);
Destructor Destroy; override ;
property Collection: IHTMLElementCollection read FCollection write SetCollection;
property ChildElementCollection[index: String]: TDxWebElementCollection read GetCollection;
property ElementCount: integer read GetCount;
property Element[itemName: string ;index: integer]: IHTMLElement read GetElement;
property ElementByName[itemName: string ]: IHTMLELEMENT read GetElementByName;
property ElementByIndex[index: integer]: IHTMLELEMENT read GetElementByIndex;
end ;
TLinkCollection = class (TDxWebElementCollection)
end ;
TDxWebTable = class ;
TDxTableCollection = class
private
FTableCollection: IHTMLElementCollection;
FDocument: IHTMLDOCUMENT2;
FWebTable: TDxWebTable;
function GetTableInterfaceByName(AName: string ): IHTMLTABLE;
procedure SetDocument(Value: IHTMLDOCUMENT2);
function GetTableInterfaceByIndex(index: integer): IHTMLTABLE;
function GetCount: integer;
function GetTableByIndex(index: integer): TDxWebTable;
function GetTableByName(AName: string ): TDxWebTable;
public
Constructor Create(Doc: IHTMLDOCUMENT2);
destructor Destroy; override ;
property TableInterfaceByName[AName: string ]: IHTMLTABLE read GetTableInterfaceByName;
property TableInterfaceByIndex[index: integer]: IHTMLTABLE read GetTableInterfaceByIndex;
property TableByName[AName: string ]: TDxWebTable read GetTableByName;
property TableByIndex[index: integer]: TDxWebTable read GetTableByIndex;
property Document: IHTMLDOCUMENT2 read FDocument write SetDocument;
property Count: integer read GetCount;
end ;
TDxWebTable = class
private
FTableInterface: IHTMLTABLE;
function GetRowCount: integer;
procedure SetTableInterface( const Value: IHTMLTABLE);
function GetCell(ACol, ARow: integer): string ;
function GetRowColCount(RowIndex: integer): integer;
function GetInnerHtml: string ;
function GetInnerText: string ;
function GetCellElement(ACol, ARow: Integer): IHTMLTableCell;
public
Constructor Create(ATable: IHTMLTABLE);
property TableInterface: IHTMLTABLE read FTableInterface write SetTableInterface;
property RowCount: integer read GetRowCount;
property Cell[ACol: integer;ARow: integer]: string read GetCell;
property CellElement[ACol: Integer;ARow: Integer]: IHTMLTableCell read GetCellElement;
property RowColCount[RowIndex: integer]: integer read GetRowColCount;
property InnerHtml: string read GetInnerHtml;
property InnerText: string read GetInnerText;
end ;
TDxWebCombobox = class
private
FHtmlSelect: IHTMLSelectElement;
function GetCount: Integer;
procedure SetItemIndex( const Value: Integer);
function GetItemIndex: Integer;
function GetName: string ;
procedure SetName( const Value: string );
function GetValue: string ;
procedure SetValue( const Value: string );
procedure SetCombInterface( const Value: IHTMLSelectElement);
function GetItemByName(EleName: string ): string ;
function GetItemByIndex(index: integer): string ;
function GetItemAttribute(index: Integer; AttribName: string ): OleVariant;
public
constructor Create(AWebCombo: IHTMLSelectElement);
procedure Add(Ele: IHTMLElement);
procedure Insert(Ele: IHTMLElement;Index: Integer);
procedure Remove(index: Integer);
property CombInterface: IHTMLSelectElement read FHtmlSelect write SetCombInterface;
property Count: Integer read GetCount;
property ItemIndex: Integer read GetItemIndex write SetItemIndex;
property ItemByIndex[index: integer]: string read GetItemByIndex;
property ItemByName[EleName: string ]: string read GetItemByName;
property ItemAttribute[index: Integer;AttribName: string ]: OleVariant read GetItemAttribute;
property Name: string read GetName write SetName;
property value: string read GetValue write SetValue;
end ;
implementation
end .
HTMLParser解析类的代码实现单元
代码
(*
****************************************************
*)
(* 得闲工作室 *)
(* HTML解析单元库 *)
(* *)
(* DxHtmlParser Unit *)
(* Copyright(c) 2008-2010 不得闲 *)
(* email:appleak46@yahoo.com.cn QQ:75492895 *)
(* **************************************************** *)
unit DxHtmlParser;
interface
uses Windows,MSHTML,ActiveX,DxHtmlElement,Forms;
type
TDxHtmlParser = class
private
FHtmlDoc: IHTMLDocument2;
FHTML: string ;
FWebTables: TDxTableCollection;
FWebElements: TDxWebElementCollection;
FWebComb: TDxWebCombobox;
procedure SetHTML( const Value: string );
function GetWebCombobox(AName: string ): TDxWebCombobox;
public
constructor Create;
destructor Destroy; override ;
property HTML: string read FHTML write SetHTML;
property WebTables: TDxTableCollection read FWebTables;
property WebElements: TDxWebElementCollection read FWebElements;
property WebCombobox[Name: string ]: TDxWebCombobox read GetWebCombobox;
end ;
implementation
{ TDxHtmlParser }
constructor TDxHtmlParser.Create;
begin
CoInitialize( nil );
// 创建IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument, nil , CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
Assert(FHtmlDoc <> nil , ' 构建HTMLDocument接口失败 ' );
FHtmlDoc.Set_designMode( ' On ' ); // 设置为设计模式,不执行脚本
while not (FHtmlDoc.readyState = ' complete ' ) do
begin
sleep( 1 );
Application.ProcessMessages;
end ;
FWebTables : = TDxTableCollection.Create(FHtmlDoc);
FWebElements : = TDxWebElementCollection.Create( nil );
FWebComb : = TDxWebCombobox.Create( nil );
end ;
destructor TDxHtmlParser.Destroy;
begin
FWebTables.Free;
FWebElements.Free;
FWebComb.Free;
CoUninitialize;
inherited ;
end ;
function TDxHtmlParser.GetWebCombobox(AName: string ): TDxWebCombobox;
begin
if FWebElements.Collection <> nil then
begin
FWebComb.CombInterface : = FWebElements.ElementByName[AName] as IHTMLSelectElement;
Result : = FWebComb;
end
else Result : = nil ;
end ;
procedure TDxHtmlParser.SetHTML( const Value: string );
begin
if FHTML <> Value then
begin
FHTML : = Value;
FHtmlDoc.body.innerHTML : = FHTML;
FWebElements.Collection : = FHtmlDoc.all;
end ;
end ;
end .
(* 得闲工作室 *)
(* HTML解析单元库 *)
(* *)
(* DxHtmlParser Unit *)
(* Copyright(c) 2008-2010 不得闲 *)
(* email:appleak46@yahoo.com.cn QQ:75492895 *)
(* **************************************************** *)
unit DxHtmlParser;
interface
uses Windows,MSHTML,ActiveX,DxHtmlElement,Forms;
type
TDxHtmlParser = class
private
FHtmlDoc: IHTMLDocument2;
FHTML: string ;
FWebTables: TDxTableCollection;
FWebElements: TDxWebElementCollection;
FWebComb: TDxWebCombobox;
procedure SetHTML( const Value: string );
function GetWebCombobox(AName: string ): TDxWebCombobox;
public
constructor Create;
destructor Destroy; override ;
property HTML: string read FHTML write SetHTML;
property WebTables: TDxTableCollection read FWebTables;
property WebElements: TDxWebElementCollection read FWebElements;
property WebCombobox[Name: string ]: TDxWebCombobox read GetWebCombobox;
end ;
implementation
{ TDxHtmlParser }
constructor TDxHtmlParser.Create;
begin
CoInitialize( nil );
// 创建IHTMLDocument2接口
CoCreateInstance(CLASS_HTMLDocument, nil , CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc);
Assert(FHtmlDoc <> nil , ' 构建HTMLDocument接口失败 ' );
FHtmlDoc.Set_designMode( ' On ' ); // 设置为设计模式,不执行脚本
while not (FHtmlDoc.readyState = ' complete ' ) do
begin
sleep( 1 );
Application.ProcessMessages;
end ;
FWebTables : = TDxTableCollection.Create(FHtmlDoc);
FWebElements : = TDxWebElementCollection.Create( nil );
FWebComb : = TDxWebCombobox.Create( nil );
end ;
destructor TDxHtmlParser.Destroy;
begin
FWebTables.Free;
FWebElements.Free;
FWebComb.Free;
CoUninitialize;
inherited ;
end ;
function TDxHtmlParser.GetWebCombobox(AName: string ): TDxWebCombobox;
begin
if FWebElements.Collection <> nil then
begin
FWebComb.CombInterface : = FWebElements.ElementByName[AName] as IHTMLSelectElement;
Result : = FWebComb;
end
else Result : = nil ;
end ;
procedure TDxHtmlParser.SetHTML( const Value: string );
begin
if FHTML <> Value then
begin
FHTML : = Value;
FHtmlDoc.body.innerHTML : = FHTML;
FWebElements.Collection : = FHtmlDoc.all;
end ;
end ;
end .