zoukankan      html  css  js  c++  java
  • C#彻底解决Web Browser 跨域读取Iframes内容

    用C# winform的控件web browser 读取网页内容,分析一下数据,做一些采集工作。

    如果是同一个域名下面还是好办的,基本上用HtmlAgilityPack就完全可以解决问题。 

    但是现在遇到跨域问题,比如我需要打开页面上存在的广告联盟的地址,进行保存。 

    这就是牵扯到跨域。 一般的错误是:拒绝访问。

    "Access is denied. (Exception from HRESULT: 0x80070005 (E_ACCESSDENIED))"。 

    因为你没有在这个网站去修改另一个网站数据的权利。 

    怎么办?很困恼吧。现在就告诉大家一个好办法。 

    直接上代码了。 

    工具类,大家保存成一个类。需要引用mshtml

      1 using System;
      2 using System.Runtime.InteropServices;
      3 using System.Windows.Forms;
      4 using mshtml;
      5 
      6 namespace WebBrowserTest
      7 {
      8 
      9     // This is the COM IServiceProvider interface, not System.IServiceProvider .Net interface! 
     10 
     11     [ComImport(), ComVisible(true), Guid("6D5140C1-7436-11CE-8034-00AA006009FA"),
     12 
     13     InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
     14 
     15     public interface IServiceProvider
     16     {
     17 
     18         [return: MarshalAs(UnmanagedType.I4)]
     19 
     20         [PreserveSig]
     21 
     22         int QueryService(ref Guid guidService, ref Guid riid, [MarshalAs(UnmanagedType.Interface)] out object ppvObject);
     23 
     24     }
     25 
     26     public enum OLECMDF
     27     {
     28 
     29         OLECMDF_DEFHIDEONCTXTMENU = 0x20,
     30 
     31         OLECMDF_ENABLED = 2,
     32 
     33         OLECMDF_INVISIBLE = 0x10,
     34 
     35         OLECMDF_LATCHED = 4,
     36 
     37         OLECMDF_NINCHED = 8,
     38 
     39         OLECMDF_SUPPORTED = 1
     40 
     41     }
     42 
     43     public enum OLECMDID
     44     {
     45 
     46         OLECMDID_PAGESETUP = 8,
     47 
     48         OLECMDID_PRINT = 6,
     49 
     50         OLECMDID_PRINTPREVIEW = 7,
     51 
     52         OLECMDID_PROPERTIES = 10,
     53 
     54         OLECMDID_SAVEAS = 4
     55 
     56     }
     57 
     58     public enum OLECMDEXECOPT
     59     {
     60 
     61         OLECMDEXECOPT_DODEFAULT,
     62 
     63         OLECMDEXECOPT_PROMPTUSER,
     64 
     65         OLECMDEXECOPT_DONTPROMPTUSER,
     66 
     67         OLECMDEXECOPT_SHOWHELP
     68 
     69     }
     70 
     71     [ComImport, Guid("D30C1661-CDAF-11d0-8A3E-00C04FC9E26E"), TypeLibType(TypeLibTypeFlags.FOleAutomation | TypeLibTypeFlags.FDual | TypeLibTypeFlags.FHidden)]
     72 
     73     public interface IWebBrowser2
     74     {
     75 
     76         [DispId(100)]
     77 
     78         void GoBack();
     79 
     80         [DispId(0x65)]
     81 
     82         void GoForward();
     83 
     84         [DispId(0x66)]
     85 
     86         void GoHome();
     87 
     88         [DispId(0x67)]
     89 
     90         void GoSearch();
     91 
     92         [DispId(0x68)]
     93 
     94         void Navigate([In] string Url, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers);
     95 
     96         [DispId(-550)]
     97 
     98         void Refresh();
     99 
    100         [DispId(0x69)]
    101 
    102         void Refresh2([In] ref object level);
    103 
    104         [DispId(0x6a)]
    105 
    106         void Stop();
    107 
    108         [DispId(200)]
    109 
    110         object Application { [return: MarshalAs(UnmanagedType.IDispatch)] get; }
    111 
    112         [DispId(0xc9)]
    113 
    114         object Parent { [return: MarshalAs(UnmanagedType.IDispatch)] get; }
    115 
    116         [DispId(0xca)]
    117 
    118         object Container { [return: MarshalAs(UnmanagedType.IDispatch)] get; }
    119 
    120         [DispId(0xcb)]
    121 
    122         object Document { [return: MarshalAs(UnmanagedType.IDispatch)] get; }
    123 
    124         [DispId(0xcc)]
    125 
    126         bool TopLevelContainer { get; }
    127 
    128         [DispId(0xcd)]
    129 
    130         string Type { get; }
    131 
    132         [DispId(0xce)]
    133 
    134         int Left { get; set; }
    135 
    136         [DispId(0xcf)]
    137 
    138         int Top { get; set; }
    139 
    140         [DispId(0xd0)]
    141 
    142         int Width { get; set; }
    143 
    144         [DispId(0xd1)]
    145 
    146         int Height { get; set; }
    147 
    148         [DispId(210)]
    149 
    150         string LocationName { get; }
    151 
    152         [DispId(0xd3)]
    153 
    154         string LocationURL { get; }
    155 
    156         [DispId(0xd4)]
    157 
    158         bool Busy { get; }
    159 
    160         [DispId(300)]
    161 
    162         void Quit();
    163 
    164         [DispId(0x12d)]
    165 
    166         void ClientToWindow(out int pcx, out int pcy);
    167 
    168         [DispId(0x12e)]
    169 
    170         void PutProperty([In] string property, [In] object vtValue);
    171 
    172         [DispId(0x12f)]
    173 
    174         object GetProperty([In] string property);
    175 
    176         [DispId(0)]
    177 
    178         string Name { get; }
    179 
    180         [DispId(-515)]
    181 
    182         int HWND { get; }
    183 
    184         [DispId(400)]
    185 
    186         string FullName { get; }
    187 
    188         [DispId(0x191)]
    189 
    190         string Path { get; }
    191 
    192         [DispId(0x192)]
    193 
    194         bool Visible { get; set; }
    195 
    196         [DispId(0x193)]
    197 
    198         bool StatusBar { get; set; }
    199 
    200         [DispId(0x194)]
    201 
    202         string StatusText { get; set; }
    203 
    204         [DispId(0x195)]
    205 
    206         int ToolBar { get; set; }
    207 
    208         [DispId(0x196)]
    209 
    210         bool MenuBar { get; set; }
    211 
    212         [DispId(0x197)]
    213 
    214         bool FullScreen { get; set; }
    215 
    216         [DispId(500)]
    217 
    218         void Navigate2([In] ref object URL, [In] ref object flags, [In] ref object targetFrameName, [In] ref object postData, [In] ref object headers);
    219 
    220         [DispId(0x1f5)]
    221 
    222         OLECMDF QueryStatusWB([In] OLECMDID cmdID);
    223 
    224         [DispId(0x1f6)]
    225 
    226         void ExecWB([In] OLECMDID cmdID, [In] OLECMDEXECOPT cmdexecopt, ref object pvaIn, IntPtr pvaOut);
    227 
    228         [DispId(0x1f7)]
    229 
    230         void ShowBrowserBar([In] ref object pvaClsid, [In] ref object pvarShow, [In] ref object pvarSize);
    231 
    232         [DispId(-525)]
    233 
    234         WebBrowserReadyState ReadyState { get; }
    235 
    236         [DispId(550)]
    237 
    238         bool Offline { get; set; }
    239 
    240         [DispId(0x227)]
    241 
    242         bool Silent { get; set; }
    243 
    244         [DispId(0x228)]
    245 
    246         bool RegisterAsBrowser { get; set; }
    247 
    248         [DispId(0x229)]
    249 
    250         bool RegisterAsDropTarget { get; set; }
    251 
    252         [DispId(0x22a)]
    253 
    254         bool TheaterMode { get; set; }
    255 
    256         [DispId(0x22b)]
    257 
    258         bool AddressBar { get; set; }
    259 
    260         [DispId(0x22c)]
    261 
    262         bool Resizable { get; set; }
    263 
    264     }
    265 
    266     class CorssDomainHelper
    267     {
    268 
    269         private static Guid IID_IWebBrowserApp = new Guid("0002DF05-0000-0000-C000-000000000046");
    270 
    271         private static Guid IID_IWebBrowser2 = new Guid("D30C1661-CDAF-11D0-8A3E-00C04FC9E26E");
    272 
    273         // Utility for IE cross domain access 
    274 
    275         // Returns null in case of failure. 
    276 
    277         public static IHTMLDocument3 GetDocumentFromWindow(IHTMLWindow2 htmlWindow)
    278         {
    279 
    280             if (htmlWindow == null)
    281             {
    282                 return null;
    283             }
    284 
    285             // First try the usual way to get the document. 
    286 
    287             try
    288             {
    289 
    290                 IHTMLDocument2 doc = htmlWindow.document;
    291 
    292                 return (IHTMLDocument3)doc;
    293 
    294             }
    295 
    296             catch (COMException comEx)
    297             {
    298 
    299                 // I think COMException won't be ever fired but just to be sure ... 
    300 
    301             }
    302 
    303             catch (UnauthorizedAccessException)
    304             {
    305 
    306             }
    307 
    308             catch (Exception ex)
    309             {
    310                 return null;
    311             }
    312 
    313             // At this point the error was E_ACCESSDENIED because the frame contains a document from another domain. 
    314             // IE tries to prevent a cross frame scripting security issue. 
    315 
    316             try
    317             {
    318 
    319                 // Convert IHTMLWindow2 to IWebBrowser2 using IServiceProvider. 
    320                 IServiceProvider sp = (IServiceProvider)htmlWindow;
    321                 // Use IServiceProvider.QueryService to get IWebBrowser2 object. 
    322                 Object brws = null;
    323                 sp.QueryService(ref IID_IWebBrowserApp, ref IID_IWebBrowser2, out brws);
    324                 // Get the document from IWebBrowser2. 
    325                 IWebBrowser2 browser = (IWebBrowser2)(brws);
    326                 return (IHTMLDocument3)browser.Document;
    327             }
    328 
    329             catch (Exception ex)
    330             {
    331                 Console.WriteLine(ex);
    332             }
    333             return null;
    334         }
    335     }
    336 }

    调用方法:
     1 public void test()
     2         {
     3             WebBrowser browser = new WebBrowser();
     4 
     5             HTMLDocument doc = (HTMLDocument)browser.Document.DomDocument;
     6 
     7             for (int i = 0; i < browser.Document.Window.Frames.Count; i++)
     8             {
     9 
    10                 IHTMLDocument3 baiduDoc = CorssDomainHelper.GetDocumentFromWindow(browser.Document.Window.Frames[i].DomWindow
    11 
    12                     as IHTMLWindow2);
    13 
    14                 if (baiduDoc != null && baiduDoc.documentElement != null && baiduDoc.documentElement.document != null)
    15                 {
    16 
    17                     IHTMLElementCollection linkss = ((HTMLDocument)(baiduDoc.documentElement.document)).links;
    18 
    19                     foreach (mshtml.IHTMLElement element in linkss)
    20                     {
    21 
    22                         //加入你的代码就可以了。
    23 
    24                     }
    25                 }
    26             }
    27         }

    原文出自:http://www.cnblogs.com/Leo_wl/p/3181353.html

  • 相关阅读:
    mysql 查看数据库及表大小以及数据库扩容评估
    idea中配置mybatis 映射文件模版及 mybatis plus 自定义sql
    mybatis plus 中增删改查及Wrapper的使用
    mybatis plus 主键策略
    搭建 spring boot + mybatis plus 项目框架并进行调试
    jeecg datagrid重新指定数据源
    java 日志脱敏框架 sensitive-新版本0.0.2-深度拷贝,属性为对象和集合的
    java 实现敏感词(sensitive word)工具详解使用说明
    java 日志脱敏框架 sensitive,优雅的打印脱敏日志
    互联网公司OpenAPI链接
  • 原文地址:https://www.cnblogs.com/changjianblog/p/7456618.html
Copyright © 2011-2022 走看看