以delphi XE8 自带indy(10.5.8.0)组件为例,分享实战中遇到的问题及解决方法。
TIdHttpEx 用法实例01[多线程获取网页](包含完整源码)
实例02(如何Post参数,如何保存与提取Cookie)待写
TIdHttpEx 已实现了对GZIP的解压,对UTF-8编码解码等
本文包含以下几个单元
uIdhttp.pas (TIdHttpEx)
uIdCookieMgr.pas (TIdCookieMgr)
uOperateIndy.pas 操作 TIdhttpEx 全靠它了
uIdhttp.Pas
复制代码
1 unit uIdHttpEx;
2
3 interface
4
5 uses
6 Classes, Idhttp, uIdCookieMgr, IdSSLOpenSSL;
7 {uIdCookieMgr 是我改进的}
8
9 type
10
11 TIdhttpEx = class(TIdhttp)
12 private
13 FIdCookieMgr: TIdCookieMgr;
14 FIdSSL: TIdSSLIOHandlerSocketOpenSSL;
15 public
16 constructor Create(AOwner: TComponent);
17 property CookieMgr: TIdCookieMgr read FIdCookieMgr;
18 procedure GenRandomUserAgent; //随便生成一个请求头,可以忽略或自己改进
19 property IdSSL: TIdSSLIOHandlerSocketOpenSSL read FIdSSL;
20
21 end;
22
23 implementation
24
25 { TIdhttpEx }
26
27 const
28
29 sUserAgent =
30 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727)';
31 // sAccept = 'image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-shockwave-flash, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, */*';
32 sUserAgent2 =
33 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.3; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)';
34 sAccept = 'application/x-shockwave-flash, image/gif, image/jpeg, image/pjpeg, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, */*';
35
36 sUserAgent3 =
37 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36';
38 sAccept2 = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8';
39
40 MaxUserAgentCount = 3;
41
42 var
43 UserAgent: array [0 .. MaxUserAgentCount - 1] of string;
44
45 constructor TIdhttpEx.Create(AOwner: TComponent);
46 begin
47 inherited;
48
49 HTTPOptions := []; // 禁止POST参数编码,自己手动编 HttpEncodeX
50
51 // HTTPOptions := [hoNoParseMetaHTTPEquiv]; // 禁止POST参数编码,自己手动编 HttpEncodeX
52 // hoNoParseMetaHTTPEquiv 禁止解析html 此可能造成假死!
53
54 FIdCookieMgr := TIdCookieMgr.Create(self);
55 CookieManager := FIdCookieMgr;
56
57 // ssl 需要 libeay32.dll ssleay32.dll 阿里旺旺目录下可以搜索到
58
59 FIdSSL := TIdSSLIOHandlerSocketOpenSSL.Create(self);
60 IOHandler := FIdSSL;
61
62 HandleRedirects := true;
63 AllowCookies := true;
64 ProtocolVersion := pv1_1;
65
66 Request.RawHeaders.FoldLength := 25000; // 参数头长度,重要
67
68 ReadTimeout := 15000;
69 ConnectTimeout := 15000;
70
71 RedirectMaximum := 5;
72 Request.UserAgent := sUserAgent3;
73 Request.Accept := sAccept;
74 Request.AcceptEncoding := 'gzip';
75
76 end;
77
78 procedure TIdhttpEx.GenRandomUserAgent;
79 begin
80 Randomize;
81 self.Request.UserAgent := UserAgent[Random(MaxUserAgentCount)];
82 end;
83
84 initialization
85
86 UserAgent[0] :=
87 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727)';
88 UserAgent[1] :=
89 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.3; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)';
90 UserAgent[2] :=
91 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36';
92
93 // 这三句请忽略,有些网站认求头,我随便写的。请大家根本实际情况改进
94 finalization
95
96 end.
复制代码
uIdCookieMgr.Pas
复制代码
1 unit uIdCookieMgr;
2
3 interface
4
5 uses
6 IdCookieManager, Classes;
7
8 type
9 TIdCookieMgr = class(TIdCookieManager)
10 private
11
12 procedure SetCurCookies(const Value: string);
13
14 function GetCurCookies: string;
15 function GetCookieList: TStringList;
16
17 public
18
19 procedure SaveCookies(const AFileName: string);
20 procedure LoadCookies(const AFileName: string);
21
22 function GetCookieValue(const ACookieName: string): string;
23 property CurCookies: string read GetCurCookies write SetCurCookies;
24
25 end;
26
27 implementation
28
29 uses
30 IdCookie, SysUtils, IdURI, uStrUtils, IdGlobalProtocols, DateUtils;
31 { uStrUtils 一套操作字串的函数单元 }
32
33 function TIdCookieMgr.GetCookieList: TStringList;
34 var
35 C: Tcollectionitem;
36 begin
37 result := TStringList.Create;
38 for C in CookieCollection do
39 result.add((C as TIdCookie).CookieText);
40 end;
41
42 function TIdCookieMgr.GetCookieValue(const ACookieName: string): string;
43 var
44 n: integer;
45 begin
46 result := '';
47 if IsNotEmptyStr(ACookieName) then
48 begin
49 n := CookieCollection.GetCookieIndex(ACookieName);
50 if n >= 0 then
51 result := CookieCollection.Cookies[n].Value;
52 end;
53 end;
54
55 function TIdCookieMgr.GetCurCookies: string;
56 var
57 strs: TStringList;
58 begin
59 strs := GetCookieList;
60 try
61 result := strs.Text;
62 finally
63 strs.Free;
64 end;
65 end;
66
67 procedure TIdCookieMgr.LoadCookies(const AFileName: string);
68 var
69 StrLst: TStringList;
70 C: TIdCookie;
71 uri: TIdURI;
72 s, t: string;
73 begin
74 StrLst := TStringList.Create;
75 uri := TIdURI.Create;
76 try
77 if FileExists(AFileName) then
78 begin
79 StrLst.LoadFromFile(AFileName);
80 for s in StrLst do
81 begin
82 C := CookieCollection.add;
83 CookieCollection.AddCookie(C, uri);
84 C.ParseServerCookie(s, uri);
85 C.Domain := GetStrBetween(s, 'Domain=', ';');
86 C.Path := GetStrBetween(s, 'Path=', ';');
87 t := GetStrBetween(s, 'Expires=', 'GMT') + 'GMT'; // GetStrBetween 在 uStrUtils 单元中
88 C.Expires := CookieStrToLocalDateTime(t);
89 end;
90 end;
91 finally
92 uri.Free;
93 StrLst.Free;
94 end;
95 end;
96
97 procedure TIdCookieMgr.SaveCookies(const AFileName: string);
98 var
99 StrLst: TStringList;
100 begin
101 StrLst := GetCookieList;
102 try
103 StrLst.SaveToFile(AFileName);
104 finally
105 StrLst.Free;
106 end;
107 end;
108
109 procedure TIdCookieMgr.SetCurCookies(const Value: string);
110 var
111 StrLst: TStringList;
112 C: TIdCookie;
113 uri: TIdURI;
114 s, t: string;
115 begin
116 StrLst := TStringList.Create;
117 uri := TIdURI.Create;
118 try
119 StrLst.Text := Value;
120 CookieCollection.Clear;
121 for s in StrLst do
122 begin
123 C := CookieCollection.add;
124 CookieCollection.AddCookie(C, uri);
125 C.ParseServerCookie(s, uri);
126 C.Domain := GetStrBetween(s, 'Domain=', ';');
127 C.Path := GetStrBetween(s, 'Path=', ';');
128 t := GetStrBetween(s, 'Expires=', 'GMT') + 'GMT';
129 C.Expires := CookieStrToLocalDateTime(t);
130 end;
131 finally
132 uri.Free;
133 StrLst.Free;
134 end;
135 end;
136
137 end.
复制代码
uOperateIndy.pas 非常有用操作 TIdhttpEx 全靠它了
复制代码
1 unit uOperateIndy;
2
3 interface
4
5 uses
6 Classes, Idhttp, IdMultipartFormData;
7
8 function IdhttpGet(AIdhttp: TIdHTTP; AUrl: string; var AHtml: string): Boolean;
9 function IdhttpPost(AIdhttp: TIdHTTP; AStrList: TStringList; AUrl: string; var AHtml: String)
10 : Boolean; overload;
11 function IdhttpPost(AIdhttp: TIdHTTP; AIdMul: TIdMultiPartFormDataStream; AUrl: string;
12 var AHtml: string): Boolean; overload;
13
14 function GetHtmlFromUrl(AUrl: string; var AHtml: string): Boolean;
15
16 implementation
17
18 uses
19 uIdhttpEx, SysUtils, ZLibEx, StrUtils, uStrUtils, uHtmlElement, uParseHtml;
20 { 带u的单元,都是我写的,ZLibEx 是解压库 }
21
22 //解压GZIP 那个参数31是试出来的
23 procedure DecompressGZIP(inStream, outStream: TStream); inline;
24 begin
25 ZDecompressStream2(inStream, outStream, 31);
26 end;
27
28 function HtmlIsUTF8(AHtml: string): Boolean;
29 var
30 BMetaList: TSingleHtmlElementList;
31 BMeta: TSingleHtmlElement;
32 BKeyElement: PKeyElement;
33 BCheckOver: Boolean;
34 sKeyName: string;
35 sKeyValue: string;
36 begin
37 Result := false;
38 BMetaList := TSingleHtmlElementList.Create;
39 try
40
41 GetMetaList(AHtml, BMetaList);
42
43 BCheckOver := false;
44
45 for BMeta in BMetaList do
46 begin
47
48 for BKeyElement in BMeta.KeyElementList do
49 begin
50
51 sKeyName := UpperCase(BKeyElement.Name);
52 sKeyValue := UpperCase(BKeyElement.Value);
53
54 if PosEx('UTF-8', sKeyValue) > 0 then
55 begin
56 Result := true;
57 BCheckOver := true;
58 break;
59 end;
60
61 end;
62
63 if BCheckOver then
64 break;
65 end;
66
67 finally
68 BMetaList.Free;
69 end;
70 end;
71
72 function GetHtmlAfterOperateIdhttp(AIdhttp: TIdHTTP; AStream: TStream): string;
73 var
74 BSize: Int64;
75 BOutStream: TMemoryStream;
76 TempStream: TMemoryStream;
77 rS: RawByteString;
78 s: string;
79 sUtf8: string;
80 BIsUtf8: Boolean;
81 sCharSet: string;
82
83 begin
84 BSize := AStream.Size;
85
86 BOutStream := TMemoryStream.Create;
87 try
88 if BSize > 0 then
89 begin
90
91 if PosEx('GZIP', UpperCase(AIdhttp.Response.ContentEncoding)) > 0 then
92 begin
93 AStream.Position := 0;
94 DecompressGZIP(AStream, BOutStream);
95 TempStream := BOutStream;
96 end
97 else
98 TempStream := TMemoryStream(AStream);
99
100 BSize := TempStream.Size;
101 SetLength(rS, BSize);
102 TempStream.Position := 0;
103 TempStream.ReadBuffer(rS[1], BSize);
104
105 s := string(rS);
106 sUtf8 := UTF8ToString(rS);
107
108 sCharSet := AIdhttp.Response.CharSet;
109 BIsUtf8 := PosEx('UTF-8', UpperCase(sCharSet)) > 0;
110 if not BIsUtf8 then
111 BIsUtf8 := HtmlIsUTF8(s);
112
113 if BIsUtf8 then
114 Result := sUtf8
115 else
116 begin
117
118 if (PosEx('的', sUtf8) > 0) or (PosEx('地', sUtf8) > 0) or (PosEx('为', sUtf8) > 0) or
119 (PosEx('于', sUtf8) > 0) or (PosEx('我们', sUtf8) > 0) or (PosEx('电', sUtf8) > 0) or
120 (PosEx('邮', sUtf8) > 0) then
121
122 begin
123 Result := sUtf8;
124 end
125 else
126 Result := s;
127
128 end;
129
130 end
131 finally
132 BOutStream.Free;
133 end;
134
135 end;
136
137 function IdhttpGet(AIdhttp: TIdHTTP; AUrl: string; var AHtml: string): Boolean;
138 var
139 BStrStream: TMemoryStream;
140 begin
141 AHtml := '';
142 BStrStream := TMemoryStream.Create;
143 try
144 try
145 AIdhttp.Get(AUrl, BStrStream);
146 AHtml := GetHtmlAfterOperateIdhttp(AIdhttp, BStrStream);
147 Result := true;
148 except
149 on e: Exception do
150 begin
151 Result := false;
152 AHtml := e.Message;
153 end;
154 end;
155 finally
156 BStrStream.Free;
157 end;
158 end;
159
160 function IdhttpPost(AIdhttp: TIdHTTP; AStrList: TStringList; AUrl: string; var AHtml: String)
161 : Boolean; overload;
162 var
163 BStrStream: TMemoryStream;
164 begin
165 Result := true;
166 AHtml := '';
167 BStrStream := TMemoryStream.Create;
168 try
169 try
170 AIdhttp.Post(AUrl, AStrList, BStrStream);
171 AHtml := GetHtmlAfterOperateIdhttp(AIdhttp, BStrStream);
172 except
173 on e: Exception do
174 begin
175 AHtml := e.Message;
176 Result := false;
177 end;
178 end;
179 finally
180 BStrStream.Free;
181 end;
182 end;
183
184 function IdhttpPost(AIdhttp: TIdHTTP; AIdMul: TIdMultiPartFormDataStream; AUrl: string;
185 var AHtml: string): Boolean; overload;
186 var
187 BStrStream: TMemoryStream;
188 begin
189 Result := true;
190 AHtml := '';
191 BStrStream := TMemoryStream.Create;
192 try
193 try
194 AIdhttp.Post(AUrl, AIdMul, BStrStream);
195 AHtml := GetHtmlAfterOperateIdhttp(AIdhttp, BStrStream);
196 except
197 on e: Exception do
198 begin
199 AHtml := e.Message;
200 Result := false;
201 end;
202 end;
203 finally
204 BStrStream.Free;
205 end;
206 end;
207
208 function GetHtmlFromUrl(AUrl: string; var AHtml: string): Boolean;
209 var
210 Idhttp: TIdhttpEx;
211 begin
212 Idhttp := TIdhttpEx.Create(nil);
213 try
214 Result := IdhttpGet(Idhttp, AUrl, AHtml);
215 finally
216 Idhttp.Free;
217 end;
218 end;
219
220 end.
复制代码
附:delphi 进阶基础技能说明
http://www.cnblogs.com/lackey/p/4085131.html