UI_Less.pas:
?1 unit UI_Less; ?2 ??3 interface ?4 ??5 uses ?6 ??Windows, Classes, Messages, Forms, MsHtml, Urlmon, ActiveX; ?7 ??8 const ?9 ??WM_USER_STARTWALKING = WM_USER + 1; 10 ??DISPID_AMBIENT_DLCONTROL = (-5512); 11 ??READYSTATE_COMPLETE = $00000004; 12 ?13 ??DLCTL_DLIMAGES = $00000010; 14 ??DLCTL_VIDEOS = $00000020; 15 ??DLCTL_BGSOUNDS = $00000040; 16 ??DLCTL_NO_SCRIPTS = $00000080; 17 ??DLCTL_NO_JAVA = $00000100; 18 ??DLCTL_NO_RUNACTIVEXCTLS = $00000200; 19 ??DLCTL_NO_DLACTIVEXCTLS = $00000400; 20 ??DLCTL_DOWNLOADONLY = $00000800; 21 ??DLCTL_NO_FRAMEDOWNLOAD = $00001000; 22 ??DLCTL_RESYNCHRONIZE = $00002000; 23 ??DLCTL_PRAGMA_NO_CACHE = $00004000; 24 ??DLCTL_NO_BEHAVIORS = $00008000; 25 ??DLCTL_NO_METACHARSET = $00010000; 26 ??DLCTL_URL_ENCODING_DISABLE_UTF8 = $00020000; 27 ??DLCTL_URL_ENCODING_ENABLE_UTF8 = $00040000; 28 ??DLCTL_FORCEOFFLINE = $10000000; 29 ??DLCTL_NO_CLIENTPULL = $20000000; 30 ??DLCTL_SILENT = $40000000; 31 ??DLCTL_OFFLINEIFNOTCONNECTED = $80000000; 32 ??DLCTL_OFFLINE = DLCTL_OFFLINEIFNOTCONNECTED; 33 ?34 type 35 ??TUILess = class(TComponent, IUnknown, IDispatch, IPropertyNotifySink, 36 ????IOleClientSite) 37 ??private 38 ????FDocTitle: string; 39 ????FBodyText: TStrings; 40 ????FBodyHtml: TStrings; 41 ??protected 42 ????/// IDISPATCH 43 ????function Invoke(DispID: Integer; const IID: TGUID; LocaleID: Integer; 44 ??????Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult; 45 ??????stdcall; 46 ????/// IPROPERTYNOTIFYSINK 47 ????function OnChanged(DispID: TDispID): HResult; stdcall; 48 ????function OnRequestEdit(DispID: TDispID): HResult; stdcall; 49 ????/// IOLECLIENTSITE 50 ????function SaveObject: HResult; stdcall; 51 ????function GetMoniker(dwAssign: Longint; dwWhichMoniker: Longint; 52 ??????out mk: IMoniker): HResult; stdcall; 53 ????function GetContainer(out container: IOleContainer): HResult; stdcall; 54 ????function ShowObject: HResult; stdcall; 55 ????function OnShowWindow(fShow: BOOL): HResult; stdcall; 56 ????function RequestNewObjectLayout: HResult; stdcall; 57 ????/// 58 ????function LoadUrlFromMoniker: HResult; 59 ????function LoadUrlFromFile: HResult; 60 ????// * We only use LoadUrlFromMoniker, but we could use LoadUrlFromFile instead. 61 ?62 ??public 63 ????constructor Create(AOwner: TComponent); override; 64 ????destructor Destroy; override; 65 ????property DocTitle: string read FDocTitle; 66 ????property BodyText: TStrings read FBodyText write FBodyText; 67 ????property BodyHtml: TStrings read FBodyHtml write FBodyHtml; 68 ????function Get(URL: PWidechar; var IsSucceed: Boolean; IsStop: Boolean) 69 ??????: IHTMLELEMENTCollection; 70 ????procedure GetAnchorList(IC: IHTMLELEMENTCollection; Anchorlist: TStrings); 71 ????procedure GetImageList(IC: IHTMLELEMENTCollection; ImageList: TStrings); 72 ??end; 73 ?74 implementation 75 ?76 var 77 ??Doc: IhtmlDocument2; 78 ??_URL: PWidechar; 79 ?80 constructor TUILess.Create(AOwner: TComponent); 81 begin 82 ??inherited Create(AOwner); 83 ??FBodyText := TStringList.Create; 84 ??FBodyHtml := TStringList.Create; 85 end; 86 ?87 destructor TUILess.Destroy; 88 begin 89 ??if Assigned(FBodyText) then 90 ????FBodyText.Free; 91 ??if Assigned(FBodyHtml) then 92 ????FBodyHtml.Free; 93 ??inherited Destroy; 94 end; 95 ?96 /// CORE ---->>>>>>>>> 97 function TUILess.Get(URL: PWidechar; var IsSucceed: Boolean; IsStop: Boolean) 98 ??: IHTMLELEMENTCollection; 99 var100 ??Cookie: Integer;101 ??CP: IConnectionPoint;102 ??OleObject: IOleObject;103 ??OleControl: IOleControl;104 ??CPC: IConnectionPointContainer;105 ??All: IHTMLElement;106 ??Msg: TMsg;107 ??hr: HResult;108 begin109 ??_URL := URL;110 ??IsSucceed := false;111 ??try112 ????CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER,113 ??????IID_IHTMLDocument2, Doc);114 ????OleObject := Doc as IOleObject;115 ????OleObject.SetClientSite(self);116 ????OleControl := Doc as IOleControl;117 ????OleControl.OnAmbientPropertyChange(DISPID_AMBIENT_DLCONTROL);118 ????CPC := Doc as IConnectionPointContainer;119 ????CPC.FindConnectionPoint(IPropertyNotifySink, CP);120 ????CP.Advise(self, Cookie);121 ????hr := LoadUrlFromMoniker; // alternative: Hr:= LoadUrlFromFile;122 ????if ((SUCCEEDED(hr)) or (hr = E_PENDING)) then123 ??????while (GetMessage(Msg, 0, 0, 0)) do124 ??????begin125 ????????if ((Msg.message = WM_USER_STARTWALKING) and (Msg.hwnd = 0)) then126 ????????begin127 ??????????PostQuitMessage(0);128 ??????????result := Doc.Get_all;129 ??????????All := Doc.Get_body;130 ??????????FDocTitle := string(Doc.nameProp);131 ??????????FBodyText.Text := string(All.outerText);132 ??????????FBodyHtml.Text := string(All.outerHTML);133 ??????????IsSucceed := true;134 ????????end135 ????????else136 ??????????DispatchMessage(Msg);137 ????????if IsStop then138 ??????????Exit;139 ??????end;140 ??except141 ????Exit;142 ??end;143 end;144 145 function TUILess.Invoke(DispID: Integer; const IID: TGUID; LocaleID: Integer;146 ??Flags: Word; var Params; VarResult, ExcepInfo, ArgErr: Pointer): HResult;147 var148 ??I: Integer;149 begin150 ??if DISPID_AMBIENT_DLCONTROL = DispID then151 ??begin152 ????I := DLCTL_DOWNLOADONLY + DLCTL_NO_SCRIPTS + DLCTL_NO_JAVA +153 ??????DLCTL_NO_DLACTIVEXCTLS + DLCTL_NO_RUNACTIVEXCTLS;154 ????PVariant(VarResult)^ := I;155 ????result := S_OK;156 ??end157 ??else158 ????result := DISP_E_MEMBERNOTFOUND;159 end;160 161 function TUILess.OnChanged(DispID: TDispID): HResult;162 var163 ??dp: TDispParams;164 ??vResult: OleVariant;165 begin166 ??if (DISPID_READYSTATE = DispID) then167 ????if SUCCEEDED((Doc as IhtmlDocument2).Invoke(DISPID_READYSTATE, GUID_null,168 ????????LOCALE_SYSTEM_DEFAULT, DISPATCH_PROPERTYGET, dp, @vResult, nil, nil))169 ??????then170 ??????if Integer(vResult) = READYSTATE_COMPLETE then171 ????????PostThreadMessage(GetCurrentThreadId(), WM_USER_STARTWALKING, 0, 0);172 end;173 174 function TUILess.LoadUrlFromMoniker: HResult;175 var176 ??Moniker: IMoniker;177 ??BindCtx: IBindCTX;178 ??PM: IPersistMoniker;179 begin180 ??createURLMoniker(nil, _URL, Moniker);181 ??CreateBindCtx(0, BindCtx);182 ??PM := Doc as IPersistMoniker;183 ??result := PM.Load(LongBool(0), Moniker, BindCtx, STGM_READ)184 end;185 186 function TUILess.LoadUrlFromFile: HResult;187 var188 ??PF: IPersistfile;189 begin190 ??PF := Doc as IPersistfile;191 ??result := PF.Load(_URL, 0);192 end;193 194 // 获取图像链接195 procedure TUILess.GetImageList(IC: IHTMLELEMENTCollection; ImageList: TStrings);196 var197 ??Image: IHTMLImgElement;198 ??Disp: IDispatch;199 ??x: Integer;200 begin201 ??if IC <> nil then202 ??begin203 ????for x := 0 to IC.Length - 1 do204 ????begin205 ??????application.ProcessMessages;206 ??????Disp := IC.item(x, 0);207 ??????if SUCCEEDED(Disp.QueryInterface(IHTMLImgElement, Image)) then208 ????????ImageList.add(string(Image.src));209 ????end;210 ??end;211 end;212 213 // 获取链接214 procedure TUILess.GetAnchorList(IC: IHTMLELEMENTCollection;215 ??Anchorlist: TStrings);216 var217 ??anchor: IHTMLAnchorElement;218 ??Disp: IDispatch;219 ??x: Integer;220 begin221 ??if IC <> nil then222 ??begin223 ????for x := 0 to IC.Length - 1 do224 ????begin225 ??????application.ProcessMessages;226 ??????Disp := IC.item(x, 0);227 ??????if (SUCCEEDED(Disp.QueryInterface(IHTMLAnchorElement, anchor)) and228 ??????????(anchor.href <> ‘‘)) then229 ????????Anchorlist.add(string(anchor.href));230 ????end;231 ??end;232 end;233 234 /// Don‘t Care ------>>>>>>>>>>>235 function TUILess.OnRequestEdit(DispID: TDispID): HResult;236 begin237 ??result := E_NOTIMPL;238 end;239 240 function TUILess.SaveObject: HResult;241 begin242 ??result := E_NOTIMPL;243 end;244 245 function TUILess.GetMoniker(dwAssign: Longint; dwWhichMoniker: Longint;246 ??out mk: IMoniker): HResult;247 begin248 ??result := E_NOTIMPL;249 end;250 251 function TUILess.GetContainer(out container: IOleContainer): HResult;252 begin253 ??result := E_NOTIMPL;254 end;255 256 function TUILess.ShowObject: HResult;257 begin258 ??result := E_NOTIMPL;259 end;260 261 function TUILess.OnShowWindow(fShow: BOOL): HResult;262 begin263 ??result := E_NOTIMPL;264 end;265 266 function TUILess.RequestNewObjectLayout: HResult;267 begin268 ??result := E_NOTIMPL;269 end;270 271 end.
Unit3.pas:
?1 unit Unit3; ?2 ??3 interface ?4 ??5 uses ?6 ??Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms, ?7 ??Dialogs, StdCtrls; ?8 ??9 type 10 ??TForm3 = class(TForm) 11 ????Button1: TButton; 12 ????Edit1: TEdit; 13 ????Memo1: TMemo; 14 ????Button2: TButton; 15 ????Button3: TButton; 16 ????Button4: TButton; 17 ????procedure Button1Click(Sender: TObject); 18 ????procedure Button2Click(Sender: TObject); 19 ????procedure Button3Click(Sender: TObject); 20 ????procedure Button4Click(Sender: TObject); 21 ??private 22 ????{ Private declarations } 23 ????procedure into(i: Word); 24 ??public 25 ????{ Public declarations } 26 ??end; 27 ?28 var 29 ??Form3: TForm3; 30 ?31 implementation 32 uses UI_Less; 33 ?34 {$R *.dfm} 35 ?36 function DoStrToWideChar(s: string): PWideChar; 37 var 38 ?// ??s:sting; 39 ??pwc: PWidechar; 40 ??len: integer; 41 begin 42 ??// ?s:= ‘abcdefg ‘; 43 ??len := length(s) + 1; 44 ??pwc := AllocMem(len * sizeof(widechar)); 45 ??stringtowidechar(s, pwc, len); 46 ???// showmessage(widechartostring(pwc)); 47 ?48 ??result := pwc; 49 ???// ?FreeMem(pwc); 50 end; 51 ?52 ?53 ?54 procedure TForm3.into(i: Word); 55 var 56 ??sh: TUILess; 57 ??su: boolean; // 是否获取成功 58 ??// isstop: boolean; //设全局变量可以中断连接 ,避免出错 59 ??surl: PWideChar; 60 begin 61 ??surl := DoStrToWideChar(Trim(Edit1.Text)); 62 ??sh := TUILess.Create(nil); 63 ??try 64 ????Memo1.Clear; 65 ????case i of 66 ??????1: 67 ????????sh.GetAnchorList(sh.get(surl, su, False), Memo1.Lines); 68 ??????2: 69 ????????sh.GetImageList(sh.get(surl, su, False), Memo1.Lines); 70 ??????3: 71 ????????begin 72 ??????????sh.get(surl, su, False); 73 ??????????Memo1.Lines := sh.BodyText; 74 ????????end; 75 ??????4: 76 ????????begin 77 ??????????sh.get(surl, su, False); 78 ??????????Memo1.Lines := sh.BodyHtml; 79 ????????end; 80 ????end; 81 ??finally 82 ????//sh.Free; 83 ??end; 84 end; 85 ?86 procedure TForm3.Button1Click(Sender: TObject); 87 begin 88 ??into(1); 89 end; 90 ?91 procedure TForm3.Button2Click(Sender: TObject); 92 begin 93 ??into(2); 94 end; 95 ?96 procedure TForm3.Button3Click(Sender: TObject); 97 begin 98 ??into(3); 99 end;100 101 procedure TForm3.Button4Click(Sender: TObject);102 begin103 ??into(4);104 end;105 106 end.
html网页采集
原文地址:https://www.cnblogs.com/FKdelphi/p/10357222.html