zoukankan      html  css  js  c++  java
  • AngleSharp一些示例

    看到了AngleSharp,感觉这个非常好用,比HtmlAgilityPack感觉好用点

    AngleSharp 地址:https://github.com/AngleSharp/AngleSharp

    在Nuget中要安装这两个包,一个是主包,另一个是js的扩展包

    首先看第一个例子

    static void FirstExample ()
            {
                //创建一个html的解析器
                var parser = new HtmlParser ();
                //使用解析器解析文档
                var document = parser.Parse ("<h1>Some example source</h1><p>This is a paragraph element");
        
               
                Console.WriteLine ("输出整个文档的html:");
                Console.WriteLine (document.DocumentElement.OuterHtml);
    
                //创建一个p元素
                var p = document.CreateElement ("p");
                //给p元素添加文本
                p.TextContent = "This is another paragraph.";
    
                Console.WriteLine ("在body中插入一个P元素");
                document.Body.AppendChild (p);
    
                Console.WriteLine ("输出整个文档的html:");
                Console.WriteLine (document.DocumentElement.OuterHtml);
            }
    

      这个例子是向html本地文档中追加的

    第二个例子,获取指定的元素

     static void UsingLinq ()
            {
                //创建解析器
                var parser = new HtmlParser ();
                //创建本地文档
                var document = parser.Parse ("<ul><li>First item<li>Second item<li class='blue'>Third item!<li class='blue red'>Last item!</ul>");
    
                //选取class='blue'的li集合,使用linq
                var blueListItemsLinq = document.All.Where (m => m.LocalName == "li" && m.ClassList.Contains ("blue"));
    
                //选取class='blue'的li集合,使用css选择器
                var blueListItemsCssSelector = document.QuerySelectorAll ("li.blue");
    
                Console.WriteLine ("两种选择结果 ...");
    
                Console.WriteLine ();
                Console.WriteLine ("LINQ:");
    
                foreach ( var item in blueListItemsLinq )
                    
                    Console.WriteLine (item.Text ());
    
                Console.WriteLine ();
                Console.WriteLine ("CSS:");
    
                foreach ( var item in blueListItemsCssSelector )
                    Console.WriteLine (item.Text ());
            }
    

      

    单选元素

    static void SingleElements ()
            {
                var parser = new HtmlParser ();
                var document = parser.Parse ("<b><i>This is some <em> bold <u>and</u> italic </em> text!</i></b>");
                var emphasize = document.QuerySelector ("em");
    
                Console.WriteLine ("Difference between several ways of getting text:");
                Console.WriteLine ();
                Console.WriteLine ("Only from C# / AngleSharp:");
                Console.WriteLine ();
                //使用C#的方式输出
                Console.WriteLine (emphasize.ToHtml ());   //<em> bold <u>and</u> italic </em>
                Console.WriteLine (emphasize.Text ());   //bold and italic
    
                Console.WriteLine ();
                Console.WriteLine ("From the DOM:");
                Console.WriteLine ();
                //使用dom的方式输出
                Console.WriteLine (emphasize.InnerHtml);  // bold <u>and</u> italic
                Console.WriteLine (emphasize.OuterHtml);  //<em> bold <u>and</u> italic </em>
                Console.WriteLine (emphasize.TextContent);// bold and italic 
            }
    

      

    下面这个示例是执行其中的js代码

    static void SimpleScriptingSample ()
            {
                //创建一个自定义的配置文件,使用javascript
                var config = Configuration.Default.WithJavaScript ();
                //使用自定义的配置创建一个解析器
                var parser = new HtmlParser (config);
    
                //将要被处理的文本
                var source = @"<!doctype html>
            <html>
            <head><title>Sample</title></head>
            <body>
            <script>
            document.title = 'Simple manipulation...';
            document.write('<span class=greeting>Hello World!</span>');
            </script>
            </body>";
                //使用解析器解析文档
                var document = parser.Parse (source);
    
                //获取输出
                //因为我们使用的是javascript的配置
                ///所以在此文档 source 中会将其中的js代码执行
                Console.WriteLine (document.DocumentElement.OuterHtml);
            }
    

      

    static void ExtendedScriptingSample ()
            {
                //创建一个自定义的配置器,使用js和css
                var config = Configuration.Default.WithJavaScript ().WithCss ();
                //使用自定义的配置器创建解析器
                var parser = new HtmlParser (config);
    
                //创建一些html文档
                var source = @"<!doctype html>
            <html>
            <head><title>Sample</title></head>
            <style>
            .bold {
            font-weight: bold;
            }
            .italic {
            font-style: italic;
            }
            span {
            font-size: 12pt;
            }
            div {
            background: #777;
            color: #f3f3f3;
            }
            </style>
            <body>
            <div id=content></div>
            <script>
            (function() {
            var doc = document;
            var content = doc.querySelector('#content');
            var span = doc.createElement('span');
            span.id = 'myspan';
            span.classList.add('bold', 'italic');
            span.textContent = 'Some sample text';
            content.appendChild(span);
            var script = doc.querySelector('script');
            script.parentNode.removeChild(script);
            })();
            </script>
            </body>";
                //解析文档
                var document = parser.Parse (source);
    
                ///输出文档
                ///文档输出后会发现,js代码被执行了 css类也如js代码中写的那样被加入到元素上
                Console.WriteLine (document.DocumentElement.OuterHtml);
            }
    

      下面示例是向注册html文档中的事件js事件,在C#输出

    public static void EventScriptingExample ()
            {
                //We require a custom configuration
                var config = Configuration.Default.WithJavaScript ();
                //Let's create a new parser using this configuration
                var parser = new HtmlParser (config);
    
                //This is our sample source, we will trigger the load event
                var source = @"<!doctype html>
            <html>
            <head><title>Event sample</title></head>
            <body>
            <script>
            console.log('Before setting the handler!');
    
            document.addEventListener('load', function() {
            console.log('Document loaded!');
            });
    
            document.addEventListener('hello', function() {
            console.log('hello world from JavaScript!');
            });
    
            console.log('After setting the handler!');
            </script>
            </body>";
                var document = parser.Parse (source);
    
                //输出html
                Console.WriteLine (document.DocumentElement.OuterHtml);
    
                //注册html中的js事件
                document.AddEventListener ("hello" , (s , ev) =>
                {
                    Console.WriteLine ("hello world from C#!");
                });
    
                //创建一个js事件
                var e = document.CreateEvent ("event");
                //初始化事件
                e.Init ("hello" , false , false);
                //调用事件
                document.Dispatch (e);
            }
    

      

    这是git上的一些示例,我只是翻译了下,

    用了下,的确比CsQuery和HtmlAgilityPack要好用得多

    特别是熟悉Css选择器语法的

    另外上一张图和一张表的对比,该图表是AngleSharp自己的测试

    RUNNING TESTS (v0.9.1)
    ============================================================================
                           AngleSharp           CsQuery        HTMLAgilityPack
    ----------------------------------------------------------------------------
    amazon                     1ms                7ms                0ms
    blogspot                   1ms                2ms                5ms
    smashing                   1ms                1ms                1ms
    youtube                   11ms               15ms               13ms
    weibo                      0ms                0ms                0ms
    yahoo                      8ms               35ms               22ms
    google                     2ms                2ms                8ms
    linkedin                   3ms                2ms                3ms
    pinterest                  1ms                1ms                5ms
    news.google               28ms               34ms               41ms
    baidu                      1ms                1ms                6ms
    codeproject                4ms                4ms                4ms
    ebay                       8ms                8ms                8ms
    msn                       18ms               18ms               13ms
    nbc                        5ms                4ms                8ms
    qq                        17ms              1060ms              52ms
    florian-rappl              0ms                1ms                0ms
    stackoverflow             16ms               15ms               12ms
    html5rocks                 0ms                0ms                0ms
    live                       0ms                0ms                0ms
    taobao                    14ms               15ms                7ms
    huffingtonpost            11ms                9ms               10ms
    wordpress                  1ms                0ms                0ms
    myspace                   20ms               29ms               21ms
    flickr                     3ms                5ms               13ms
    godaddy                    6ms                5ms                7ms
    reddit                     6ms                9ms                6ms
    nytimes                   14ms               13ms               13ms
    peacekeeper.futu...        0ms                0ms                1ms
    pcmag                      9ms               11ms               16ms
    sitepoint                  1ms                2ms                3ms
    html5test                  0ms                1ms                2ms
    spiegel                   15ms               12ms               13ms
    tmall                      2ms                3ms                2ms
    sohu                      20ms               46ms               39ms
    vk                         2ms                0ms                1ms
    wordpress                  2ms                0ms                0ms
    bing                       1ms                1ms                4ms
    tumblr                     2ms                3ms                3ms
    ask                        0ms                0ms                1ms
    mail.ru                    6ms               11ms               15ms
    imdb                       6ms                4ms                6ms
    kickass.to                 0ms                0ms                0ms
    360.cn                     4ms                4ms                8ms
    163                       32ms               45ms               56ms
    neobux                     1ms                0ms                0ms
    aliexpress                10ms                9ms                9ms
    netflix                    4ms                3ms                7ms
    w3                        912ms              579ms             1064ms
    en.wikipedia              37ms               26ms               33ms
    ----------------------------------------------------------------------------
    Total                    1292ms             2080ms             1583ms
    ----------------------------------------------------------------------------
    Fastest                    20                 19                 11
    ----------------------------------------------------------------------------
    Slowest                    13                 12                 25
    ----------------------------------------------------------------------------
    
    
  • 相关阅读:
    pycharm pip 源修改以及包管理(转载)
    zabbix在执行docker命令是报错
    ubuntu 14.04zabbix的安装
    docker搭建zabbix
    docker的安装
    docker registry v2与harbor的搭建
    docker常用命令
    centos安装桌面,下面的几个包缺一不可
    MSSQL数据库高版本迁移到低版本
    .NET开源MSSQL、Redis监控产品Opserver之Exception配置
  • 原文地址:https://www.cnblogs.com/rbzz/p/10037055.html
Copyright © 2011-2022 走看看