在做数据采集时,有些网站需要输入验证码,但各网站验证码都不同,不可能有完美的识别验证码的代码,所以我也没去研究,我所采取的方案是:在winform里通过WebBrowser调用网页先手动登录系统,然后再通过webBrowser1.Document.Cookie获取其Cookie,再将其用到HttpWebRequest里用来访问自己需要的页。
一直用的都没有问题,但最近在对一个网站操作时总是不能获取完整Cookie,百思不得其解,使用Fiddler2对比发现,不能获取到的Cookie里最后有一个HttpOnly的标识,顺藤摸瓜发现原来webBrowser1.Document.Cookie里是取不到HttpOnly的Cookie的,也找到解决方法:
1 using System; 2 using System.ComponentModel; 3 using System.Net; 4 using System.Runtime.InteropServices; 5 using System.Security; 6 using System.Security.Permissions; 7 using System.Text; 8 9 10 internal sealed class NativeMethods 11 { 12 #region enums 13 14 public enum ErrorFlags 15 { 16 ERROR_INSUFFICIENT_BUFFER = 122, 17 ERROR_INVALID_PARAMETER = 87, 18 ERROR_NO_MORE_ITEMS = 259 19 } 20 21 public enum InternetFlags 22 { 23 INTERNET_COOKIE_HTTPONLY = 8192, //Requires IE 8 or higher 24 INTERNET_COOKIE_THIRD_PARTY = 131072, 25 INTERNET_FLAG_RESTRICTED_ZONE = 16 26 } 27 28 #endregion 29 30 #region DLL Imports 31 32 [SuppressUnmanagedCodeSecurity, SecurityCritical, DllImport("wininet.dll", EntryPoint = "InternetGetCookieExW", CharSet = CharSet.Unicode, SetLastError = true, ExactSpelling = true)] 33 internal static extern bool InternetGetCookieEx([In] string Url, [In] string cookieName, [Out] StringBuilder cookieData, [In, Out] ref uint pchCookieData, uint flags, IntPtr reserved); 34 35 #endregion 36 } 37 38 39 /// <SUMMARY></SUMMARY> 40 /// 取得WebBrowser的完整Cookie。 41 /// 因为默认的webBrowser1.Document.Cookie取不到HttpOnly的Cookie 42 /// 43 public class FullWebBrowserCookie 44 { 45 46 [SecurityCritical] 47 public static string GetCookieInternal(Uri uri, bool throwIfNoCookie) 48 { 49 uint pchCookieData = 0; 50 string url = UriToString(uri); 51 uint flag = (uint)NativeMethods.InternetFlags.INTERNET_COOKIE_HTTPONLY; 52 53 //Gets the size of the string builder 54 if (NativeMethods.InternetGetCookieEx(url, null, null, ref pchCookieData, flag, IntPtr.Zero)) 55 { 56 pchCookieData++; 57 StringBuilder cookieData = new StringBuilder((int)pchCookieData); 58 59 //Read the cookie 60 if (NativeMethods.InternetGetCookieEx(url, null, cookieData, ref pchCookieData, flag, IntPtr.Zero)) 61 { 62 DemandWebPermission(uri); 63 return cookieData.ToString(); 64 } 65 } 66 67 int lastErrorCode = Marshal.GetLastWin32Error(); 68 69 if (throwIfNoCookie || (lastErrorCode != (int)NativeMethods.ErrorFlags.ERROR_NO_MORE_ITEMS)) 70 { 71 throw new Win32Exception(lastErrorCode); 72 } 73 74 return null; 75 } 76 77 private static void DemandWebPermission(Uri uri) 78 { 79 string uriString = UriToString(uri); 80 81 if (uri.IsFile) 82 { 83 string localPath = uri.LocalPath; 84 new FileIOPermission(FileIOPermissionAccess.Read, localPath).Demand(); 85 } 86 else 87 { 88 new WebPermission(NetworkAccess.Connect, uriString).Demand(); 89 } 90 } 91 92 private static string UriToString(Uri uri) 93 { 94 if (uri == null) 95 { 96 throw new ArgumentNullException("uri"); 97 } 98 99 UriComponents components = (uri.IsAbsoluteUri ? UriComponents.AbsoluteUri : UriComponents.SerializationInfoString); 100 return new StringBuilder(uri.GetComponents(components, UriFormat.SafeUnescaped), 2083).ToString(); 101 } 102 }