以delphi XE8 自带indy(10.5.8.0)组件为例,分享实战中遇到的问题及解决方法。
TIdHttpEx 用法实例01[多线程获取网页](包含完整源码)
实例02(如何Post参数,如何保存与提取Cookie)待写
TIdHttpEx 已实现了对GZIP的解压,对UTF-8编码解码等
本文包含以下几个单元
uIdhttp.pas (TIdHttpEx)
uIdCookieMgr.pas (TIdCookieMgr)
uOperateIndy.pas 操作 TIdhttpEx 全靠它了
uIdhttp.Pas
1 unit uIdHttpEx; 2 3 interface 4 5 uses 6 Classes, Idhttp, uIdCookieMgr, IdSSLOpenSSL; 7 {uIdCookieMgr 是我改进的} 8 9 type 10 11 TIdhttpEx = class(TIdhttp) 12 private 13 FIdCookieMgr: TIdCookieMgr; 14 FIdSSL: TIdSSLIOHandlerSocketOpenSSL; 15 public 16 constructor Create(AOwner: TComponent); 17 property CookieMgr: TIdCookieMgr read FIdCookieMgr; 18 procedure GenRandomUserAgent; //随便生成一个请求头,可以忽略或自己改进 19 property IdSSL: TIdSSLIOHandlerSocketOpenSSL read FIdSSL; 20 21 end; 22 23 implementation 24 25 { TIdhttpEx } 26 27 const 28 29 sUserAgent = 30 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727)'; 31 // sAccept = 'image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-shockwave-flash, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, */*'; 32 sUserAgent2 = 33 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.3; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)'; 34 sAccept = 'application/x-shockwave-flash, image/gif, image/jpeg, image/pjpeg, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, */*'; 35 36 sUserAgent3 = 37 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36'; 38 sAccept2 = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'; 39 40 MaxUserAgentCount = 3; 41 42 var 43 UserAgent: array [0 .. MaxUserAgentCount - 1] of string; 44 45 constructor TIdhttpEx.Create(AOwner: TComponent); 46 begin 47 inherited; 48 49 HTTPOptions := []; // 禁止POST参数编码,自己手动编 HttpEncodeX 50 51 // HTTPOptions := [hoNoParseMetaHTTPEquiv]; // 禁止POST参数编码,自己手动编 HttpEncodeX 52 // hoNoParseMetaHTTPEquiv 禁止解析html 此可能造成假死! 53 54 FIdCookieMgr := TIdCookieMgr.Create(self); 55 CookieManager := FIdCookieMgr; 56 57 // ssl 需要 libeay32.dll ssleay32.dll 阿里旺旺目录下可以搜索到 58 59 FIdSSL := TIdSSLIOHandlerSocketOpenSSL.Create(self); 60 IOHandler := FIdSSL; 61 62 HandleRedirects := true; 63 AllowCookies := true; 64 ProtocolVersion := pv1_1; 65 66 Request.RawHeaders.FoldLength := 25000; // 参数头长度,重要 67 68 ReadTimeout := 15000; 69 ConnectTimeout := 15000; 70 71 RedirectMaximum := 5; 72 Request.UserAgent := sUserAgent3; 73 Request.Accept := sAccept; 74 Request.AcceptEncoding := 'gzip'; 75 76 end; 77 78 procedure TIdhttpEx.GenRandomUserAgent; 79 begin 80 Randomize; 81 self.Request.UserAgent := UserAgent[Random(MaxUserAgentCount)]; 82 end; 83 84 initialization 85 86 UserAgent[0] := 87 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727)'; 88 UserAgent[1] := 89 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.3; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)'; 90 UserAgent[2] := 91 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36'; 92 93 // 这三句请忽略,有些网站认求头,我随便写的。请大家根本实际情况改进 94 finalization 95 96 end.
uIdCookieMgr.Pas
1 unit uIdCookieMgr; 2 3 interface 4 5 uses 6 IdCookieManager, Classes; 7 8 type 9 TIdCookieMgr = class(TIdCookieManager) 10 private 11 12 procedure SetCurCookies(const Value: string); 13 14 function GetCurCookies: string; 15 function GetCookieList: TStringList; 16 17 public 18 19 procedure SaveCookies(const AFileName: string); 20 procedure LoadCookies(const AFileName: string); 21 22 function GetCookieValue(const ACookieName: string): string; 23 property CurCookies: string read GetCurCookies write SetCurCookies; 24 25 end; 26 27 implementation 28 29 uses 30 IdCookie, SysUtils, IdURI, uStrUtils, IdGlobalProtocols, DateUtils; 31 { uStrUtils 一套操作字串的函数单元 } 32 33 function TIdCookieMgr.GetCookieList: TStringList; 34 var 35 C: Tcollectionitem; 36 begin 37 result := TStringList.Create; 38 for C in CookieCollection do 39 result.add((C as TIdCookie).CookieText); 40 end; 41 42 function TIdCookieMgr.GetCookieValue(const ACookieName: string): string; 43 var 44 n: integer; 45 begin 46 result := ''; 47 if IsNotEmptyStr(ACookieName) then 48 begin 49 n := CookieCollection.GetCookieIndex(ACookieName); 50 if n >= 0 then 51 result := CookieCollection.Cookies[n].Value; 52 end; 53 end; 54 55 function TIdCookieMgr.GetCurCookies: string; 56 var 57 strs: TStringList; 58 begin 59 strs := GetCookieList; 60 try 61 result := strs.Text; 62 finally 63 strs.Free; 64 end; 65 end; 66 67 procedure TIdCookieMgr.LoadCookies(const AFileName: string); 68 var 69 StrLst: TStringList; 70 C: TIdCookie; 71 uri: TIdURI; 72 s, t: string; 73 begin 74 StrLst := TStringList.Create; 75 uri := TIdURI.Create; 76 try 77 if FileExists(AFileName) then 78 begin 79 StrLst.LoadFromFile(AFileName); 80 for s in StrLst do 81 begin 82 C := CookieCollection.add; 83 CookieCollection.AddCookie(C, uri); 84 C.ParseServerCookie(s, uri); 85 C.Domain := GetStrBetween(s, 'Domain=', ';'); 86 C.Path := GetStrBetween(s, 'Path=', ';'); 87 t := GetStrBetween(s, 'Expires=', 'GMT') + 'GMT'; // GetStrBetween 在 uStrUtils 单元中 88 C.Expires := CookieStrToLocalDateTime(t); 89 end; 90 end; 91 finally 92 uri.Free; 93 StrLst.Free; 94 end; 95 end; 96 97 procedure TIdCookieMgr.SaveCookies(const AFileName: string); 98 var 99 StrLst: TStringList; 100 begin 101 StrLst := GetCookieList; 102 try 103 StrLst.SaveToFile(AFileName); 104 finally 105 StrLst.Free; 106 end; 107 end; 108 109 procedure TIdCookieMgr.SetCurCookies(const Value: string); 110 var 111 StrLst: TStringList; 112 C: TIdCookie; 113 uri: TIdURI; 114 s, t: string; 115 begin 116 StrLst := TStringList.Create; 117 uri := TIdURI.Create; 118 try 119 StrLst.Text := Value; 120 CookieCollection.Clear; 121 for s in StrLst do 122 begin 123 C := CookieCollection.add; 124 CookieCollection.AddCookie(C, uri); 125 C.ParseServerCookie(s, uri); 126 C.Domain := GetStrBetween(s, 'Domain=', ';'); 127 C.Path := GetStrBetween(s, 'Path=', ';'); 128 t := GetStrBetween(s, 'Expires=', 'GMT') + 'GMT'; 129 C.Expires := CookieStrToLocalDateTime(t); 130 end; 131 finally 132 uri.Free; 133 StrLst.Free; 134 end; 135 end; 136 137 end.
uOperateIndy.pas 非常有用操作 TIdhttpEx 全靠它了
1 unit uOperateIndy; 2 3 interface 4 5 uses 6 Classes, Idhttp, IdMultipartFormData; 7 8 function IdhttpGet(AIdhttp: TIdHTTP; AUrl: string; var AHtml: string): Boolean; 9 function IdhttpPost(AIdhttp: TIdHTTP; AStrList: TStringList; AUrl: string; var AHtml: String) 10 : Boolean; overload; 11 function IdhttpPost(AIdhttp: TIdHTTP; AIdMul: TIdMultiPartFormDataStream; AUrl: string; 12 var AHtml: string): Boolean; overload; 13 14 function GetHtmlFromUrl(AUrl: string; var AHtml: string): Boolean; 15 16 implementation 17 18 uses 19 uIdhttpEx, SysUtils, ZLibEx, StrUtils, uStrUtils, uHtmlElement, uParseHtml; 20 { 带u的单元,都是我写的,ZLibEx 是解压库 } 21 22 //解压GZIP 那个参数31是试出来的 23 procedure DecompressGZIP(inStream, outStream: TStream); inline; 24 begin 25 ZDecompressStream2(inStream, outStream, 31); 26 end; 27 28 function HtmlIsUTF8(AHtml: string): Boolean; 29 var 30 BMetaList: TSingleHtmlElementList; 31 BMeta: TSingleHtmlElement; 32 BKeyElement: PKeyElement; 33 BCheckOver: Boolean; 34 sKeyName: string; 35 sKeyValue: string; 36 begin 37 Result := false; 38 BMetaList := TSingleHtmlElementList.Create; 39 try 40 41 GetMetaList(AHtml, BMetaList); 42 43 BCheckOver := false; 44 45 for BMeta in BMetaList do 46 begin 47 48 for BKeyElement in BMeta.KeyElementList do 49 begin 50 51 sKeyName := UpperCase(BKeyElement.Name); 52 sKeyValue := UpperCase(BKeyElement.Value); 53 54 if PosEx('UTF-8', sKeyValue) > 0 then 55 begin 56 Result := true; 57 BCheckOver := true; 58 break; 59 end; 60 61 end; 62 63 if BCheckOver then 64 break; 65 end; 66 67 finally 68 BMetaList.Free; 69 end; 70 end; 71 72 function GetHtmlAfterOperateIdhttp(AIdhttp: TIdHTTP; AStream: TStream): string; 73 var 74 BSize: Int64; 75 BOutStream: TMemoryStream; 76 TempStream: TMemoryStream; 77 rS: RawByteString; 78 s: string; 79 sUtf8: string; 80 BIsUtf8: Boolean; 81 sCharSet: string; 82 83 begin 84 BSize := AStream.Size; 85 86 BOutStream := TMemoryStream.Create; 87 try 88 if BSize > 0 then 89 begin 90 91 if PosEx('GZIP', UpperCase(AIdhttp.Response.ContentEncoding)) > 0 then 92 begin 93 AStream.Position := 0; 94 DecompressGZIP(AStream, BOutStream); 95 TempStream := BOutStream; 96 end 97 else 98 TempStream := TMemoryStream(AStream); 99 100 BSize := TempStream.Size; 101 SetLength(rS, BSize); 102 TempStream.Position := 0; 103 TempStream.ReadBuffer(rS[1], BSize); 104 105 s := string(rS); 106 sUtf8 := UTF8ToString(rS); 107 108 sCharSet := AIdhttp.Response.CharSet; 109 BIsUtf8 := PosEx('UTF-8', UpperCase(sCharSet)) > 0; 110 if not BIsUtf8 then 111 BIsUtf8 := HtmlIsUTF8(s); 112 113 if BIsUtf8 then 114 Result := sUtf8 115 else 116 begin 117 118 if (PosEx('的', sUtf8) > 0) or (PosEx('地', sUtf8) > 0) or (PosEx('为', sUtf8) > 0) or 119 (PosEx('于', sUtf8) > 0) or (PosEx('我们', sUtf8) > 0) or (PosEx('电', sUtf8) > 0) or 120 (PosEx('邮', sUtf8) > 0) then 121 122 begin 123 Result := sUtf8; 124 end 125 else 126 Result := s; 127 128 end; 129 130 end 131 finally 132 BOutStream.Free; 133 end; 134 135 end; 136 137 function IdhttpGet(AIdhttp: TIdHTTP; AUrl: string; var AHtml: string): Boolean; 138 var 139 BStrStream: TMemoryStream; 140 begin 141 AHtml := ''; 142 BStrStream := TMemoryStream.Create; 143 try 144 try 145 AIdhttp.Get(AUrl, BStrStream); 146 AHtml := GetHtmlAfterOperateIdhttp(AIdhttp, BStrStream); 147 Result := true; 148 except 149 on e: Exception do 150 begin 151 Result := false; 152 AHtml := e.Message; 153 end; 154 end; 155 finally 156 BStrStream.Free; 157 end; 158 end; 159 160 function IdhttpPost(AIdhttp: TIdHTTP; AStrList: TStringList; AUrl: string; var AHtml: String) 161 : Boolean; overload; 162 var 163 BStrStream: TMemoryStream; 164 begin 165 Result := true; 166 AHtml := ''; 167 BStrStream := TMemoryStream.Create; 168 try 169 try 170 AIdhttp.Post(AUrl, AStrList, BStrStream); 171 AHtml := GetHtmlAfterOperateIdhttp(AIdhttp, BStrStream); 172 except 173 on e: Exception do 174 begin 175 AHtml := e.Message; 176 Result := false; 177 end; 178 end; 179 finally 180 BStrStream.Free; 181 end; 182 end; 183 184 function IdhttpPost(AIdhttp: TIdHTTP; AIdMul: TIdMultiPartFormDataStream; AUrl: string; 185 var AHtml: string): Boolean; overload; 186 var 187 BStrStream: TMemoryStream; 188 begin 189 Result := true; 190 AHtml := ''; 191 BStrStream := TMemoryStream.Create; 192 try 193 try 194 AIdhttp.Post(AUrl, AIdMul, BStrStream); 195 AHtml := GetHtmlAfterOperateIdhttp(AIdhttp, BStrStream); 196 except 197 on e: Exception do 198 begin 199 AHtml := e.Message; 200 Result := false; 201 end; 202 end; 203 finally 204 BStrStream.Free; 205 end; 206 end; 207 208 function GetHtmlFromUrl(AUrl: string; var AHtml: string): Boolean; 209 var 210 Idhttp: TIdhttpEx; 211 begin 212 Idhttp := TIdhttpEx.Create(nil); 213 try 214 Result := IdhttpGet(Idhttp, AUrl, AHtml); 215 finally 216 Idhttp.Free; 217 end; 218 end; 219 220 end.