zoukankan      html  css  js  c++  java
  • C# HttpBrowser 跨进程访问,解决内存泄露问题

      1 #undef DEBUG
      2 using Microsoft.Win32;
      3 using Newtonsoft.Json;
      4 using System;
      5 using System.Collections.Generic;
      6 using System.Collections.Specialized;
      7 using System.Diagnostics;
      8 using System.Diagnostics.Contracts;
      9 using System.Drawing;
     10 using System.IO;
     11 using System.Linq;
     12 using System.Net;
     13 using System.Runtime.InteropServices;
     14 using System.Text;
     15 using System.Threading;
     16 using System.Threading.Tasks;
     17 using System.Windows.Forms;
     18 
     19 namespace AnfleCrawler.Common
     20 {
     21     /// <summary>
     22     /// Chromium / CasperJS + PhantomJS
     23     /// http://pinvoke.net/index.aspx
     24     /// </summary>
     25     public sealed partial class HttpBrowser : IHttpClient
     26     {
     27         #region NestedTypes
     28         [Serializable]
     29         public class AjaxBlockEntity
     30         {
     31             internal const string AjaxBlock = "_AjaxBlock";
     32             public string ID { get; set; }
     33             public string Text { get; set; }
     34             public bool IsEvent { get; set; }
     35         }
     36         public class AjaxEventEntity : MarshalByRefObject
     37         {
     38             public string ListenerSelector { get; set; }
     39             public bool EntryCall { get; set; }
     40             public Action<string> FinalCallback { get; set; }
     41         }
     42 
     43         [ComVisible(true)]
     44         public sealed class STAContext : Disposable
     45         {
     46             #region Fields
     47             public volatile bool IsRedirect;
     48             //internal MessageLoopApartment _Apartment;
     49             private SynchronizedCollection<Tuple<HtmlElement, EventHandler>> _releaseSet;
     50             private AutoResetEvent _sendReceiveWaiter;
     51             private CountdownEvent _ajaxWaiter;
     52             private System.Threading.Timer _lazyTimer;
     53 
     54             internal volatile bool DoInvokeHtml;
     55             private volatile string _outerHtml;
     56             #endregion
     57 
     58             #region Properties
     59             public Uri RequestUrl { get; private set; }
     60             public HttpRequestContent RequestContent { get; private set; }
     61             internal AutoResetEvent WaitHandle { get; set; }
     62 
     63             internal AutoResetEvent SendReceiveWaiter
     64             {
     65                 get
     66                 {
     67                     if (_sendReceiveWaiter == null)
     68                     {
     69                         _sendReceiveWaiter = new AutoResetEvent(false);
     70                     }
     71                     return _sendReceiveWaiter;
     72                 }
     73             }
     74             internal AjaxBlockEntity[] AjaxBlocks { get; private set; }
     75             internal CountdownEvent AjaxWaiter
     76             {
     77                 get
     78                 {
     79                     if (_ajaxWaiter == null)
     80                     {
     81                         _ajaxWaiter = new CountdownEvent(1);
     82                     }
     83                     return _ajaxWaiter;
     84                 }
     85             }
     86             internal volatile bool IsProcessEvent;
     87             internal AjaxEventEntity AjaxEvent { get; set; }
     88 
     89             internal string OuterHtml
     90             {
     91                 get
     92                 {
     93                     DoInvokeHtml = true;
     94                     return _outerHtml;
     95                 }
     96                 set
     97                 {
     98                     _outerHtml = value;
     99                 }
    100             }
    101             #endregion
    102 
    103             #region Constructor
    104             internal STAContext(Uri url, HttpRequestContent content)
    105             {
    106                 this.RequestUrl = url;
    107                 this.RequestContent = content;
    108                 string ablock;
    109                 if (this.RequestContent != null && this.RequestContent.Form != null)
    110                 {
    111                     if (!string.IsNullOrEmpty(ablock = this.RequestContent.Form.Get(AjaxBlockEntity.AjaxBlock)))
    112                     {
    113                         this.AjaxBlocks = JsonConvert.DeserializeObject<AjaxBlockEntity[]>(ablock);
    114                         this.RequestContent.Form.Remove(AjaxBlockEntity.AjaxBlock);
    115                     }
    116                 }
    117                 DoInvokeHtml = true;
    118             }
    119 
    120             protected override void DisposeInternal(bool disposing)
    121             {
    122                 if (disposing)
    123                 {
    124                     //if (_Apartment != null)
    125                     //{
    126                     //    _Apartment.Dispose();
    127                     //    _Apartment = null;
    128                     //}
    129                     if (_lazyTimer != null)
    130                     {
    131                         _lazyTimer.Dispose();
    132                         _lazyTimer = null;
    133                     }
    134                     if (this.WaitHandle != null)
    135                     {
    136                         this.WaitHandle.Dispose();
    137                         this.WaitHandle = null;
    138                     }
    139 
    140                     DisposeObject(_sendReceiveWaiter);
    141                     DisposeObject(_ajaxWaiter);
    142                 }
    143             }
    144             #endregion
    145 
    146             #region Methods
    147             public void SetHtml(string html)
    148             {
    149                 _outerHtml = html;
    150                 DoInvokeHtml = false;
    151             }
    152 
    153             internal void RegisterLazyLoad(Action<object> func, object state)
    154             {
    155                 if (_lazyTimer != null)
    156                 {
    157                     return;
    158                 }
    159                 _lazyTimer = new System.Threading.Timer(x => STA_Run(func, x, this), state, 2000, Timeout.Infinite);
    160             }
    161             /// <summary>
    162             /// 另种思路,在每次加载完毕后delay
    163             /// </summary>
    164             internal void DelayLazyLoad()
    165             {
    166                 if (_lazyTimer == null)
    167                 {
    168                     return;
    169                 }
    170                 _lazyTimer.Change(2000, Timeout.Infinite);
    171             }
    172 
    173             /// <summary>
    174             /// STA
    175             /// </summary>
    176             /// <param name="node"></param>
    177             /// <param name="e"></param>
    178             internal void AjaxMark(HtmlElement node, EventHandler e)
    179             {
    180                 if (_releaseSet == null)
    181                 {
    182                     _releaseSet = new SynchronizedCollection<Tuple<HtmlElement, EventHandler>>();
    183                 }
    184                 var q = from t in _releaseSet
    185                         where t.Item1 == node
    186                         select t;
    187                 if (q.Any())
    188                 {
    189                     return;
    190                 }
    191                 _releaseSet.Add(Tuple.Create(node, e));
    192                 node.AttachEventHandler("onpropertychange", e);
    193             }
    194 
    195             /// <summary>
    196             /// STA
    197             /// </summary>
    198             internal void AjaxUnmarks()
    199             {
    200                 if (_releaseSet.IsNullOrEmpty())
    201                 {
    202                     return;
    203                 }
    204                 foreach (var item in _releaseSet)
    205                 {
    206                     var node = item.Item1;
    207                     node.DetachEventHandler("onpropertychange", item.Item2);
    208                 }
    209                 _releaseSet = null;
    210             }
    211 
    212             internal void _ReleaseMemory()
    213             {
    214                 return;
    215 #if !DEBUG
    216                 var proc = Process.GetCurrentProcess();
    217                 //128M
    218                 if (proc.PrivateMemorySize64 <= 134217728L)
    219                 {
    220                     return;
    221                 }
    222                 base.ReleaseMemory();
    223 #endif
    224             }
    225             #endregion
    226         }
    227         #endregion
    228 
    229         #region Static
    230         public const string Callback_Snapshot = "_xSnapshot";
    231 
    232         static HttpBrowser()
    233         {
    234             SetBrowserFeatureControl();
    235             //NativeMethods.SetErrorMode(NativeMethods.ErrorModes.SYSTEM_DEFAULT);
    236             NativeMethods.SetErrorMode(NativeMethods.ErrorModes.SEM_FAILCRITICALERRORS | NativeMethods.ErrorModes.SEM_NOGPFAULTERRORBOX | NativeMethods.ErrorModes.SEM_NOOPENFILEERRORBOX);
    237         }
    238 
    239         /// <summary>
    240         /// http://msdn.microsoft.com/en-us/library/ee330720(v=vs.85).aspx
    241         /// </summary>
    242         private static void SetBrowserFeatureControl()
    243         {
    244             // FeatureControl settings are per-process
    245             string fileName = Path.GetFileName(Process.GetCurrentProcess().MainModule.FileName);
    246             string[] skip = new string[] { "devenv.exe", "XDesProc.exe" };
    247             if (skip.Any(p => p.Equals(fileName, StringComparison.OrdinalIgnoreCase)))
    248             {
    249                 return;
    250             }
    251 
    252             SetBrowserFeatureControlKey("FEATURE_BROWSER_EMULATION", fileName, GetBrowserEmulationMode());
    253             SetBrowserFeatureControlKey("FEATURE_MANAGE_SCRIPT_CIRCULAR_REFS", fileName, 1);
    254             //SetBrowserFeatureControlKey("FEATURE_GPU_RENDERING ", fileName, 1);
    255             //SetBrowserFeatureControlKey("FEATURE_AJAX_CONNECTIONEVENTS", fileName, 1);
    256             //SetBrowserFeatureControlKey("FEATURE_ENABLE_CLIPCHILDREN_OPTIMIZATION", fileName, 1);
    257             //SetBrowserFeatureControlKey("FEATURE_DOMSTORAGE ", fileName, 1);
    258             //SetBrowserFeatureControlKey("FEATURE_IVIEWOBJECTDRAW_DMLT9_WITH_GDI  ", fileName, 0);
    259             //SetBrowserFeatureControlKey("FEATURE_NINPUT_LEGACYMODE", fileName, 0);
    260             //SetBrowserFeatureControlKey("FEATURE_DISABLE_LEGACY_COMPRESSION", fileName, 1);
    261             //SetBrowserFeatureControlKey("FEATURE_LOCALMACHINE_LOCKDOWN", fileName, 0);
    262             //SetBrowserFeatureControlKey("FEATURE_BLOCK_LMZ_OBJECT", fileName, 0);
    263             //SetBrowserFeatureControlKey("FEATURE_BLOCK_LMZ_SCRIPT", fileName, 0);
    264             //SetBrowserFeatureControlKey("FEATURE_DISABLE_NAVIGATION_SOUNDS", fileName, 1);
    265             //SetBrowserFeatureControlKey("FEATURE_SCRIPTURL_MITIGATION", fileName, 1);
    266             //SetBrowserFeatureControlKey("FEATURE_SPELLCHECKING", fileName, 0);
    267             //SetBrowserFeatureControlKey("FEATURE_STATUS_BAR_THROTTLING", fileName, 1);
    268             //SetBrowserFeatureControlKey("FEATURE_TABBED_BROWSING", fileName, 1);
    269             //SetBrowserFeatureControlKey("FEATURE_VALIDATE_NAVIGATE_URL", fileName, 1);
    270             //SetBrowserFeatureControlKey("FEATURE_WEBOC_DOCUMENT_ZOOM", fileName, 1);
    271             //SetBrowserFeatureControlKey("FEATURE_WEBOC_POPUPMANAGEMENT", fileName, 0);
    272             //SetBrowserFeatureControlKey("FEATURE_WEBOC_MOVESIZECHILD", fileName, 1);
    273             //SetBrowserFeatureControlKey("FEATURE_ADDON_MANAGEMENT", fileName, 0);
    274             //SetBrowserFeatureControlKey("FEATURE_WEBSOCKET", fileName, 1);
    275             //SetBrowserFeatureControlKey("FEATURE_WINDOW_RESTRICTIONS ", fileName, 0);
    276             //SetBrowserFeatureControlKey("FEATURE_XMLHTTP", fileName, 1);
    277         }
    278         /// <summary>
    279         /// http://msdn.microsoft.com/en-us/library/ie/ee330730(v=vs.85).aspx
    280         /// </summary>
    281         /// <returns></returns>
    282         private static uint GetBrowserEmulationMode()
    283         {
    284             int browserVersion;
    285             using (var ieKey = Registry.LocalMachine.OpenSubKey(@"SOFTWAREMicrosoftInternet Explorer",
    286                 RegistryKeyPermissionCheck.ReadSubTree, System.Security.AccessControl.RegistryRights.QueryValues))
    287             {
    288                 var version = ieKey.GetValue("svcVersion") ?? ieKey.GetValue("Version");
    289                 if (version == null)
    290                 {
    291                     throw new ApplicationException("Microsoft Internet Explorer is required!");
    292                 }
    293                 int.TryParse(version.ToString().Split('.')[0], out browserVersion);
    294             }
    295             if (browserVersion < 8)
    296             {
    297                 throw new ApplicationException("Microsoft Internet Explorer 8 is required!");
    298             }
    299             switch (browserVersion)
    300             {
    301                 case 9:
    302                     return 9000;
    303                 case 10:
    304                     return 10000;
    305                 case 11:
    306                     return 11000;
    307                 default:
    308                     return 8000;
    309             }
    310         }
    311         private static void SetBrowserFeatureControlKey(string feature, string appName, uint value)
    312         {
    313             using (var key = Registry.CurrentUser.CreateSubKey(
    314                 String.Concat(@"SoftwareMicrosoftInternet ExplorerMainFeatureControl", feature),
    315                 RegistryKeyPermissionCheck.ReadWriteSubTree))
    316             {
    317                 key.SetValue(appName, value, RegistryValueKind.DWord);
    318             }
    319         }
    320 
    321         private static void STA_Run(Action<object> func, object state, STAContext context)
    322         {
    323             var sta = new Thread(arg =>
    324             {
    325                 var set = (object[])arg;
    326                 try
    327                 {
    328                     var func2 = (Action<object>)set[0];
    329                     func2(set[1]);
    330                 }
    331                 catch (Exception ex)
    332                 {
    333                     App.LogError(ex, "STA_Run");
    334                 }
    335             }, 1024 * 512);   //1024 * 512, 默认1M
    336             sta.IsBackground = true;
    337             sta.SetApartmentState(ApartmentState.STA);
    338             try
    339             {
    340                 sta.Start(new object[2] { func, state });
    341             }
    342             catch (OutOfMemoryException ex)
    343             {
    344                 HandleException(ex);
    345             }
    346 
    347             //context._Apartment.Invoke(func, state);
    348         }
    349 
    350         public static void FillAjaxBlock(NameValueCollection form, AjaxBlockEntity[] set)
    351         {
    352             Contract.Requires(form != null);
    353 
    354             form[AjaxBlockEntity.AjaxBlock] = JsonConvert.SerializeObject(set, Formatting.None);
    355         }
    356         #endregion
    357 
    358         #region Fields
    359         private EndPoint _proxyAddr;
    360         private Lazy<IHttpClient> _lazyClient;
    361         private CookieContainer _cookieContainer;
    362         private Action<STAContext, HtmlDocument> _onLoad;
    363         #endregion
    364 
    365         #region Properties
    366         public int SendReceiveTimeout { get; set; }
    367         public ushort? RetryCount { get; set; }
    368         public TimeSpan? RetryWaitDuration { get; set; }
    369         public bool UseCookies { get; set; }
    370         public CookieContainer CookieContainer
    371         {
    372             get { return _cookieContainer; }
    373         }
    374         public string SaveFileDirectory { get; set; }
    375         /// <summary>
    376         /// 网页快照大小,Full Screenshot则设置Size.Empty
    377         /// </summary>
    378         public Size? Snapshot { get; set; }
    379         /// <summary>
    380         /// 供下载使用
    381         /// </summary>
    382         internal IHttpClient Client
    383         {
    384             get
    385             {
    386                 var client = _lazyClient.Value;
    387                 client.SendReceiveTimeout = this.SendReceiveTimeout;
    388                 client.RetryCount = this.RetryCount;
    389                 client.RetryWaitDuration = this.RetryWaitDuration;
    390                 client.UseCookies = this.UseCookies;
    391                 client.SaveFileDirectory = this.SaveFileDirectory;
    392                 return client;
    393             }
    394         }
    395         #endregion
    396 
    397         #region Constructors
    398         public HttpBrowser()
    399         {
    400             this.SendReceiveTimeout = -1;
    401             _lazyClient = new Lazy<IHttpClient>(() => new HttpClient(), false);
    402             _cookieContainer = new CookieContainer();
    403             this.UseCookies = true;
    404         }
    405         /// <summary>
    406         /// crossLoad中如有跨域交互,请继承扩展IsolateProxy
    407         /// </summary>
    408         /// <param name="crossLoad"></param>
    409         public HttpBrowser(Action<STAContext, HtmlDocument> crossLoad)
    410             : this()
    411         {
    412             _onLoad = crossLoad;
    413         }
    414         #endregion
    415 
    416         #region Methods
    417         public void SetProxy(EndPoint address, NetworkCredential credential = null)
    418         {
    419             if (credential != null)
    420             {
    421                 throw new NotSupportedException("credential");
    422             }
    423 
    424             if (IsSpawned)
    425             {
    426                 _proxyAddr = address;
    427             }
    428             else
    429             {
    430 #if DEBUG
    431                 App.LogInfo("SetProxy HttpBrowser {0}", address);
    432 #endif
    433                 if (WinInetInterop.SetConnectionProxy(address.ToString()))
    434                 {
    435                     App.LogInfo("SetProxy HttpBrowser {0} succeed", address);
    436                 }
    437             }
    438         }
    439         internal void RestoreSystemProxy()
    440         {
    441             if (IsSpawned)
    442             {
    443                 _proxyAddr = null;
    444             }
    445             else
    446             {
    447 #if DEBUG
    448                 App.LogInfo("RestoreSystemProxy HttpBrowser");
    449 #endif
    450                 if (WinInetInterop.RestoreSystemProxy())
    451                 {
    452                     App.LogInfo("RestoreSystemProxy HttpBrowser succeed");
    453                 }
    454             }
    455         }
    456 
    457         public string GetHtml(Uri requestUrl, HttpRequestContent content = null)
    458         {
    459             if (IsSpawned)
    460             {
    461                 return SpawnedStart(_proxyAddr, requestUrl, content);
    462             }
    463             using (var arg = new STAContext(requestUrl, content))
    464             {
    465                 arg.WaitHandle = new AutoResetEvent(false);
    466                 this.STA_Run(arg);
    467                 arg.WaitHandle.WaitOne();
    468                 return arg.OuterHtml;
    469             }
    470         }
    471 
    472         public string GetHtml(Uri requestUrl, AjaxEventEntity local, HttpRequestContent content = null)
    473         {
    474             Contract.Requires(requestUrl != null);
    475             if (local == null)
    476             {
    477                 return GetHtml(requestUrl, content);
    478             }
    479 
    480             using (var arg = new STAContext(requestUrl, content))
    481             {
    482                 arg.AjaxEvent = local;
    483                 arg.WaitHandle = new AutoResetEvent(false);
    484                 this.STA_Run(arg);
    485                 arg.WaitHandle.WaitOne();
    486                 return arg.OuterHtml;
    487             }
    488         }
    489 
    490         public Stream GetStream(Uri requestUrl, HttpRequestContent content = null)
    491         {
    492             return this.Client.GetStream(requestUrl, content);
    493         }
    494 
    495         public void DownloadFile(Uri fileUrl, out string fileName)
    496         {
    497             this.Client.DownloadFile(fileUrl, out fileName);
    498         }
    499         #endregion
    500 
    501         #region Hepler
    502         /// <summary>
    503         /// 注入Script
    504         /// </summary>
    505         /// <param name="document"></param>
    506         /// <param name="js"></param>
    507         public void InjectScript(HtmlDocument document, string js)
    508         {
    509             Contract.Requires(document != null);
    510 
    511             if (!CheckDocument(document.Url))
    512             {
    513                 App.LogInfo("HttpBrowser InjectScript Cancel");
    514                 return;
    515             }
    516             var head = document.GetElementsByTagName("head")[0];
    517             var script = document.CreateElement("script");
    518             script.SetAttribute("type", "text/javascript");
    519             script.SetAttribute("text", js);
    520             head.AppendChild(script);
    521         }
    522         private bool CheckDocument(Uri documentUrl)
    523         {
    524             if (documentUrl != null && documentUrl.OriginalString.StartsWith("res://ieframe.dll", StringComparison.OrdinalIgnoreCase))
    525             {
    526                 App.LogInfo("CheckDocument {0}", documentUrl);
    527                 return false;
    528             }
    529             return true;
    530         }
    531 
    532         /// <summary>
    533         /// 设置ajax参数
    534         /// </summary>
    535         /// <param name="browser"></param>
    536         private void SetAjax(WebBrowser browser, bool isEvent)
    537         {
    538             var arg = (STAContext)browser.ObjectForScripting;
    539             if (arg.AjaxBlocks.IsNullOrEmpty())
    540             {
    541                 return;
    542             }
    543             foreach (var block in arg.AjaxBlocks.Where(p => p.IsEvent == isEvent))
    544             {
    545                 var node = browser.Document.GetElementById(block.ID);
    546                 if (node == null)
    547                 {
    548                     continue;
    549                 }
    550                 arg.AjaxWaiter.AddCount();
    551                 arg.AjaxMark(node, (sender, e) =>
    552                 {
    553                     node = browser.Document.GetElementById(block.ID);
    554                     if (node == null || block.Text == null
    555                         || (!block.Text.Equals(node.InnerText, StringComparison.OrdinalIgnoreCase)))
    556                     {
    557                         // bug 如果先Signal再AddCount就会出错
    558                         arg.AjaxWaiter.Signal();
    559                     }
    560                 });
    561             }
    562             arg.AjaxWaiter.Signal();
    563         }
    564         /// <summary>
    565         /// 等待ajax执行
    566         /// </summary>
    567         /// <param name="arg"></param>
    568         private bool WaitAjax(STAContext arg)
    569         {
    570             if (arg.AjaxBlocks.IsNullOrEmpty())
    571             {
    572                 return false;
    573             }
    574             int aTimeout = this.SendReceiveTimeout;
    575             if (aTimeout <= 0)
    576             {
    577                 aTimeout = (int)TimeSpan.FromSeconds(60d).TotalMilliseconds;
    578             }
    579             if (!arg.AjaxWaiter.Wait(aTimeout))
    580             {
    581                 App.LogInfo("HttpBrowser Ajax Timeout {0}", arg.RequestUrl);
    582                 return false;
    583             }
    584             return true;
    585         }
    586 
    587         private void ProcessAjaxEvent(WebBrowser browser)
    588         {
    589             var arg = (STAContext)browser.ObjectForScripting;
    590             if (arg.AjaxEvent == null || string.IsNullOrEmpty(arg.AjaxEvent.ListenerSelector))
    591             {
    592                 return;
    593             }
    594 
    595             arg.IsProcessEvent = true;
    596             if (arg.AjaxEvent.EntryCall && arg.AjaxEvent.FinalCallback != null)
    597             {
    598                 InvokeHtml(browser);
    599                 arg.AjaxEvent.FinalCallback(arg.OuterHtml);
    600             }
    601             object val = browser.Document.InvokeScript("Soubiscbot", new object[] { 0, arg.AjaxEvent.ListenerSelector });
    602             var set = val.ToString().Split(',');
    603             foreach (string id in set)
    604             {
    605                 var node = browser.Document.GetElementById(id);
    606                 if (node == null)
    607                 {
    608                     continue;
    609                 }
    610                 arg.AjaxWaiter.Reset();
    611                 SetAjax(browser, true);
    612                 node.InvokeMember("click");
    613                 bool isSet = WaitAjax(arg);
    614                 Console.WriteLine("ProcessAjaxEvent isSet={0}", isSet);
    615                 if (arg.AjaxEvent.FinalCallback != null)
    616                 {
    617                     InvokeHtml(browser);
    618                     arg.AjaxEvent.FinalCallback(arg.OuterHtml);
    619                 }
    620             }
    621             arg.IsProcessEvent = false;
    622         }
    623 
    624         /// <summary>
    625         /// 读取页面OuterHtml
    626         /// </summary>
    627         /// <param name="browser"></param>
    628         /// <returns></returns>
    629         private void InvokeHtml(WebBrowser browser)
    630         {
    631             var scripting = (STAContext)browser.ObjectForScripting;
    632             if (scripting == null)
    633             {
    634                 throw new InvalidOperationException("InvokeHtml");
    635             }
    636             if (!scripting.DoInvokeHtml)
    637             {
    638                 return;
    639             }
    640             scripting.OuterHtml = (string)browser.Document.InvokeScript("Soubiscbot");
    641         }
    642         #endregion
    643 
    644         #region STAThread
    645         private void STA_Run(STAContext context)
    646         {
    647             context._ReleaseMemory();
    648             //context._Apartment = new MessageLoopApartment();
    649             STA_Run(state =>
    650             {
    651                 var browser = new WebBrowser()
    652                 {
    653                     ScriptErrorsSuppressed = true,
    654                     IsWebBrowserContextMenuEnabled = false,
    655                     ObjectForScripting = state
    656                 };
    657                 browser.Navigating += browser_Navigating;
    658                 browser.DocumentCompleted += browser_DocumentCompleted;
    659                 browser.NewWindow += browser_NewWindow;
    660                 if (this.Snapshot.HasValue)
    661                 {
    662                     browser.ScrollBarsEnabled = false;
    663                     browser.Size = new Size(Screen.PrimaryScreen.WorkingArea.Width, 10240);
    664                     browser.Show();
    665                 }
    666                 else
    667                 {
    668                     browser.Hide();
    669                 }
    670                 var arg = (STAContext)state;
    671                 byte[] postData = null;
    672                 string headers = null;
    673                 if (arg.RequestContent != null)
    674                 {
    675                     if (this.UseCookies)
    676                     {
    677                         if (arg.RequestContent.HasCookie)
    678                         {
    679                             _cookieContainer.Add(arg.RequestUrl, arg.RequestContent.Cookies);
    680                         }
    681                         string cookieHeader = arg.RequestContent.Headers[HttpRequestHeader.Cookie];
    682                         if (!string.IsNullOrEmpty(cookieHeader))
    683                         {
    684                             _cookieContainer.SetCookies(arg.RequestUrl, cookieHeader.Replace(';', ','));
    685                             arg.RequestContent.Headers.Remove(HttpRequestHeader.Cookie);
    686                         }
    687                         cookieHeader = _cookieContainer.GetCookieHeader(arg.RequestUrl);
    688                         if (cookieHeader.Length > 0)
    689                         {
    690                             arg.RequestContent.Headers[HttpRequestHeader.Cookie] = cookieHeader.Replace(',', ';');
    691                         }
    692                         //WinInetInterop.SaveCookies(_cookieContainer, absoluteUri);
    693                     }
    694                     else
    695                     {
    696                         arg.RequestContent.Headers[HttpRequestHeader.Cookie] = string.Empty;
    697                         //WinInetInterop.DeleteCache(WinInetInterop.CacheKind.Cookies);
    698                     }
    699                     if (arg.RequestContent.HasBody)
    700                     {
    701                         arg.RequestContent.Headers[HttpRequestHeader.ContentType] = "application/x-www-form-urlencoded";
    702                         postData = Encoding.UTF8.GetBytes(arg.RequestContent.GetFormString());
    703                     }
    704                     headers = arg.RequestContent.GetHeadersString();
    705                 }
    706                 browser.Navigate(arg.RequestUrl, "_self", postData, headers);
    707 
    708                 STA_Run(STA_Wait, browser, arg);
    709                 //会阻塞当前线程
    710                 Application.Run();
    711             }, context, context);
    712         }
    713         private void STA_Wait(object state)
    714         {
    715             var browser = (WebBrowser)state;
    716 #if DEBUG
    717             App.LogInfo("STA_Wait {0}", browser.Url);
    718 #endif
    719             var arg = (STAContext)browser.ObjectForScripting;
    720             try
    721             {
    722                 int srTimeout = this.SendReceiveTimeout;
    723                 if (srTimeout > -1 && !arg.SendReceiveWaiter.WaitOne(srTimeout))
    724                 {
    725                     //请求超时
    726                     browser.Invoke((Action)(() =>
    727                     {
    728                         if (browser.ReadyState != WebBrowserReadyState.Complete)
    729                         {
    730                             browser.Stop();
    731                             App.LogInfo("HttpBrowser SendReceive Timeout {0}", arg.RequestUrl);
    732                         }
    733                     }));
    734                 }
    735                 WaitAjax(arg);
    736             }
    737             catch (Exception ex)
    738             {
    739                 App.LogError(ex, "HttpBrowser STA_Wait {0}", arg.RequestUrl);
    740                 HandleException(ex);
    741             }
    742         }
    743 
    744         private void browser_NewWindow(object sender, System.ComponentModel.CancelEventArgs e)
    745         {
    746             var browser = (WebBrowser)sender;
    747             var node = browser.Document.ActiveElement;
    748             string link;
    749             if (node != null && !string.IsNullOrEmpty(link = node.GetAttribute("href")))
    750             {
    751                 e.Cancel = true;
    752                 browser.Navigate(link);
    753             }
    754         }
    755         private void browser_Navigating(object sender, WebBrowserNavigatingEventArgs e)
    756         {
    757             var browser = (WebBrowser)sender;
    758 #if DEBUG
    759             App.LogInfo("browser_Navigating {0}", browser.Url);
    760 #endif
    761             var arg = (STAContext)browser.ObjectForScripting;
    762             arg.DelayLazyLoad();
    763         }
    764         private void browser_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
    765         {
    766             var browser = (WebBrowser)sender;
    767 #if DEBUG
    768             App.LogInfo("browser_DocumentCompleted {0}", browser.Url);
    769 #endif
    770             var arg = (STAContext)browser.ObjectForScripting;
    771             try
    772             {
    773                 //e.Url不会变res:// 
    774                 if (!CheckDocument(browser.Url))
    775                 {
    776                     App.LogInfo("HttpBrowser DocumentCompleted Cancel {0}", browser.Url);
    777                     return;
    778                 }
    779                 if (browser.ReadyState != WebBrowserReadyState.Complete)
    780                 {
    781                     return;
    782                 }
    783 
    784                 //发生redirect或iframe load
    785                 if (browser.Url != e.Url)
    786                 {
    787                     App.LogInfo("HttpBrowser Redirect {0} to {1}", arg.RequestUrl, e.Url);
    788                 }
    789                 if (this.UseCookies)
    790                 {
    791                     WinInetInterop.LoadCookies(_cookieContainer, browser.Document.Url);
    792                 }
    793                 InjectScript(browser.Document, @"if (typeof ($) == 'undefined') {
    794             var script = document.createElement('script');
    795             script.src = 'http://libs.baidu.com/jquery/1.9.0/jquery.js';
    796             document.getElementsByTagName('head')[0].appendChild(script);
    797         }
    798         function Soubiscbot(kind) {
    799             switch (kind) {
    800                 case 0:
    801                     var set = [];
    802                     $(arguments[1]).each(function (i, o) {
    803                         var me = $(o);
    804                         var id = me.attr('id');
    805                         if (!id) {
    806                             id = Math.random();
    807                             me.attr('id', id);
    808                         }
    809                         set[i] = id;
    810                     });
    811                     return set.toString();
    812                     break;
    813                 case 1:
    814                     try {
    815                         return arguments[1]();
    816                     }
    817                     catch (ex) {
    818                         return ex.toString();
    819                     }
    820                     break;
    821                 default:
    822                     return document.documentElement.outerHTML;
    823                     break;
    824             }
    825         }");
    826 
    827                 if (this.SendReceiveTimeout > -1)
    828                 {
    829                     arg.SendReceiveWaiter.Set();
    830                 }
    831                 SetAjax(browser, false);
    832                 if (_onLoad != null)
    833                 {
    834                     _onLoad(arg, browser.Document);
    835                 }
    836                 if (arg.IsRedirect)
    837                 {
    838                     STA_Run(STA_Wait, browser, arg);
    839                 }
    840                 else
    841                 {
    842                     arg.RegisterLazyLoad(x =>
    843                     {
    844                         var b = (WebBrowser)x;
    845                         if (b.IsDisposed)
    846                         {
    847                             return;
    848                         }
    849                         b.Invoke((Action<WebBrowser>)ProcessAjaxEvent, b);
    850                         b.Invoke((Action<object>)Callback, b);
    851                     }, browser);
    852                 }
    853             }
    854             catch (Exception ex)
    855             {
    856                 App.LogError(ex, "HttpBrowser DocumentCompleted RequestUrl={0} BrowserUrl={1}", arg.RequestUrl, browser.Url);
    857                 HandleException(ex);
    858             }
    859         }
    860 
    861         private static void HandleException(Exception ex)
    862         {
    863             if (ex is OutOfMemoryException || ex is AccessViolationException)
    864             {
    865                 App.LogInfo("HttpBrowser auto exit {0}", ex.HResult);
    866                 Environment.Exit(ex.HResult);
    867             }
    868         }
    869         #endregion
    870 
    871         #region Callback
    872         private void Callback(object state)
    873         {
    874             var browser = (WebBrowser)state;
    875 #if DEBUG
    876             App.LogInfo("Callback {0}", browser.Url);
    877 #endif
    878             var arg = (STAContext)browser.ObjectForScripting;
    879             if (!Monitor.TryEnter(arg))
    880             {
    881                 return;
    882             }
    883             try
    884             {
    885 #warning HACK
    886                 if (this.Snapshot.HasValue)
    887                 {
    888                     Thread.Sleep(4000);
    889                 }
    890                 browser.Invoke((Action)(() =>
    891                 {
    892                     if (this.Snapshot.HasValue)
    893                     {
    894                         //Guid fileID = CryptoManaged.MD5Hash(browser.Url.OriginalString);//browser.Url为ResponseUrl
    895                         Guid fileID = Guid.NewGuid();
    896                         var js = new StringBuilder();
    897                         js.AppendFormat("document.body.setAttribute('{0}', '{1}');", Callback_Snapshot, fileID);
    898                         js.Append(@"    window.addEventListener('load', function () {
    899         window.scrollTo(0, document.documentElement.offsetHeight);
    900     });
    901 ");
    902                         browser.Document.InvokeScript("eval", new object[] { js.ToString() });
    903                         string savePath = Path.Combine(this.SaveFileDirectory, string.Format("{0}.png", fileID));
    904                         try
    905                         {
    906                             var shotSize = this.Snapshot.Value == Size.Empty ? browser.Document.Body.ScrollRectangle.Size : this.Snapshot.Value;
    907                             browser.Size = shotSize;
    908                             using (var img = new Bitmap(browser.Width, browser.Height))
    909                             {
    910                                 //browser.DrawToBitmap(img, new Rectangle(Point.Empty, img.Size));
    911                                 NativeMethods.DrawTo(browser.ActiveXInstance, img, Color.White);
    912                                 img.Save(savePath, System.Drawing.Imaging.ImageFormat.Png);
    913                                 App.LogInfo("xSnapshot {0} {1}", browser.Url, savePath);
    914                             }
    915                         }
    916                         catch (Exception ex)
    917                         {
    918                             App.LogError(ex, "xSnapshot {0} {1}", browser.Url, savePath);
    919                         }
    920                     }
    921                     InvokeHtml(browser);
    922                 }));
    923             }
    924             catch (Exception ex)
    925             {
    926                 App.LogError(ex, "HttpBrowser Callback {0}", arg.RequestUrl);
    927                 HandleException(ex);
    928             }
    929             finally
    930             {
    931                 Monitor.Exit(arg);
    932                 STA_Exit(browser);
    933             }
    934         }
    935 
    936         /// <summary>
    937         /// !重要! 退出STAUI线程
    938         /// </summary>
    939         private void STA_Exit(WebBrowser browser)
    940         {
    941 #if DEBUG
    942             App.LogInfo("STA_Exit {0}", browser.Url);
    943 #endif
    944             RestoreSystemProxy();
    945             var arg = (STAContext)browser.ObjectForScripting;
    946             if (arg.WaitHandle != null)
    947             {
    948                 arg.WaitHandle.Set();
    949             }
    950             try
    951             {
    952                 browser.Stop();
    953                 arg.AjaxUnmarks();
    954                 //arg._Apartment.Dispose();
    955                 browser.Invoke((Action)(() => Application.ExitThread()));
    956                 browser.Dispose();
    957             }
    958             catch (SystemException ex)
    959             {
    960                 //AccessViolationException
    961                 //InvalidComObjectException
    962                 App.LogError(ex, "HttpBrowser STA_Exit {0}", arg.RequestUrl);
    963             }
    964         }
    965         #endregion
    966     }
    967 }
    HttpBrowser
    #region Spawned Process
            public bool IsSpawned { get; set; }
    
            internal string SpawnedStart(EndPoint proxy, Uri requestUrl, HttpRequestContent content)
            {
    #if DEBUG
                App.LogInfo("SpawnedStart: Proxy={0}	Url={1}", proxy, requestUrl);
    #endif
                bool hasValue = content != null;
                var stream = Serializer.Serialize(Tuple.Create(proxy, requestUrl,
                    hasValue ? content.Headers : null,
                    hasValue ? content.Form : null));
                RestoreSystemProxy();
                string[] args = Environment.GetCommandLineArgs();
                string arg = string.Format("x#{0}", Convert.ToBase64String(stream.ToArray()));
                var proc = Process.Start(new ProcessStartInfo(args[0], arg)
                {
                    RedirectStandardOutput = true,
                    UseShellExecute = false,
                });
                string html = proc.StandardOutput.ReadToEnd();
                if (!proc.WaitForExit(120 * 1000))
                {
                    proc.Kill();
                }
                proc.Close();
                return html;
            }
    
            public static bool SpawnedMain()
            {
                string[] args = Environment.GetCommandLineArgs();
                if (!(args.Length > 1 && args[1].StartsWith("x#")))
                {
                    return false;
                }
                var stream = new MemoryStream(Convert.FromBase64String(args[1].Substring(2)));
                var arg = (Tuple<EndPoint, Uri, WebHeaderCollection, NameValueCollection>)Serializer.Deserialize(stream);
                var client = (IHttpClient)new HttpBrowser();
                if (arg.Item1 != null)
                {
                    client.SetProxy(arg.Item1);
                }
                string html = client.GetHtml(arg.Item2, new HttpRequestContent()
                {
                    Headers = arg.Item3,
                    Form = arg.Item4
                });
                Console.WriteLine(html);
                return true;
            }
            #endregion
  • 相关阅读:
    思蕊防静电地板
    一个老站长的22条军规
    百度天天快照知识宝典
    搜索引擎常用搜索技巧
    网站运营工作流程
    关于线程间通信
    VS2012 EF5 连接oracle11.2
    ArcSde for Oracle服务注册
    NHibernate composite-id联合主键配置
    NHibernate 的 ID 标识选择器
  • 原文地址:https://www.cnblogs.com/Googler/p/3850443.html
Copyright © 2011-2022 走看看