zoukankan      html  css  js  c++  java
  • 使用Phantomjs和ChromeDriver添加Cookies的方法

    一、查看代码 :

      

    namespace ToutiaoSpider
    {
        class Program
        {
            static void Main(string[] args)
            {
                var db = Db.GetDataHelper();
                db.CreateOrUpdateTable(typeof(Models.ArticleMonitorModule));
                while (true)
                {
                    var src = LoadSource();
                    try
                    {
                        //var item = GetItem(src);
                        //if (item != null)
                        //{
                        //    try
                        //    {
                        //        db.Insert(item.ToArray());
                        //        Console.WriteLine("Insert a row in :" + DateTime.UtcNow.AddHours(8).ToString("yyyy-MM-dd HH:mm:ss"));
                        //    }
                        //    catch (Exception ex)
                        //    { Console.WriteLine("Insert to database error:" + ex.Message); }
                        //}
                        if (!Directory.Exists("C:\TempFiles\")) Directory.CreateDirectory("C:\TempFiles\");
                        File.WriteAllText("C:\TempFiles\" + DateTime.Now.ToString("yyyy_MM_dd_HH_mm_ss") + ".html", src);
                    }
                    catch (Exception ex) { Console.WriteLine("Get item error:" + ex.Message); }
                    Thread.Sleep(5 * 60 * 1000);
                }
            }
            static List<Models.ArticleMonitorModule> GetItem(string src)
            {
                List<Models.ArticleMonitorModule> lst = new List<ArticleMonitorModule>();
                HtmlAgilityPack.HtmlDocument docs = new HtmlAgilityPack.HtmlDocument();
                docs.LoadHtml(src);
                var jsonText = docs.DocumentNode.InnerText;
                var json = (JObject)JsonConvert.DeserializeObject(jsonText);
                if (json["message"].Value<string>() == "success")
                {
                    var token = json.GetValue("data");
                    if (token == null) return lst;
                    foreach (var data in token)
                    {
                        var title = data["title"]?.Value<string>();
                        //if (title != "中国与巴拿马建立外交关系") continue;
                        var go_detail_count = data["go_detail_count"]?.Value<int>();
                        var url = data["display_url"]?.Value<string>();
                        var itemId = data["item_id"]?.Value<string>();
                        var comments_count = data["comments_count"]?.Value<int>();
                        var site = "toutiao.com";
                        ArticleMonitorModule item = new ArticleMonitorModule()
                        {
                            __id = Guid.NewGuid(),
                            article_id = itemId,
                            site = site,
                            url = url,
                            comment_count = comments_count,
                            visit_count = go_detail_count,
                            fetch_time = DateTime.UtcNow.AddHours(8),
                            like_count = null
                        };
                        lst.Add(item);
                    }
                }
                return lst;
            }
            static IWebDriver CreateDriver()
            {
                var service = PhantomJSDriverService.CreateDefaultService();
                var driver = new OpenQA.Selenium.PhantomJS.PhantomJSDriver(service, new PhantomJSOptions(), TimeSpan.FromSeconds(120));
                //var service = ChromeDriverService.CreateDefaultService();
                //var driver = new ChromeDriver(service, new ChromeOptions(), TimeSpan.FromSeconds(120));
                //var service = OpenQA.Selenium.Firefox.FirefoxDriverService.CreateDefaultService();
                //var driver = new OpenQA.Selenium.Firefox.FirefoxDriver(service, new OpenQA.Selenium.Firefox.FirefoxOptions(), TimeSpan.FromSeconds(120));
                return driver;
            }
            static string LoadSource()
            {
                var driver = CreateDriver();
                var nav = driver.Navigate();
                var cookies = driver.Manage().Cookies;
                //nav.GoToUrl("http://www.toutiao.com/c/user/favourite/?page_type=2&user_id=61045799395&max_behot_time=0&count=20&as=A15569B3CF98ED7&cp=593F781EDDB7FE1&max_repin_time=0");
                int retryCount = 6;
                while (true)
                {
                    try
                    {
                        //nav.GoToUrl("http://www.toutiao.com/c/user/favourite/?page_type=2&user_id=61045799395&max_behot_time=0&count=20&as=A14529438F1A7A4&cp=593F3A47DAD44E1&max_repin_time=0");
                        var url = "http://is.snssdk.com/2/article/information/v21/?version_code=6.1.6&app_name=news_article&vid=C5585644-2731-495E-8CF2-B42BBA4D7780&device_id=35980279488&channel=App%20Store&resolution=1125*2001&aid=13&ab_version=120431,134942,136400,126064,122834,130106,126068,128826,134127,136030,137117,136268,137571,126070,136111,116022,135623,125502,137069,125174,135489,133019,137083,126059,137452,135631,136930,122948,137474,137431,31210,133013,135290,131207,114338,133770&ab_feature=z1&openudid=f870822c71509e95ee8f58db8b1d70ce9cb14713&live_sdk_version=1.6.5&idfv=C5585644-2731-495E-8CF2-B42BBA4D7780&ac=WIFI&os_version=10.3.2&ssmix=a&device_platform=iphone&iid=11267657395&ab_client=a1,f2,f7,e1&device_type=iPhone%206S%20Plus&idfa=B1742B5B-DF14-44EF-A325-362873389ABA&aggr_type=1&article_page=0&device_id=35980279488&from_category=news_entertainment&group_id=6411002681368035586";
                        nav.GoToUrl(url);
                        var uri = new Uri(url);
                        cookies.AddCookie(new Cookie("csrftoken", "b9e36219cad78dfe6a1c687d6b368b52", uri.DnsSafeHost, "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("csrftoken", "b9e36219cad78dfe6a1c687d6b368b52", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("CNZZDATA1259612802", "2103889297-1495413998-null%7C1495413998", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("UM_distinctid", "15c2dec3e02f09-0fc740fef2ffb7-572f7b6e-1fa400-15c2dec3e03cf0", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("tt_webid", "6431015020234769922", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("uuid", ""w:a5e3254676244e0ab15fc4291e372d14"", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("_ga", "GA1.2.1639521857.1495419078", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("_gid", "GA1.2.396214455.1495419159", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("sso_login_status", "1", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("login_flag", "f8947cb01c5a760d0cbc4925e601ca60", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("sessionid", "5c865f7a96598cff3b3d580fcd3dfd27", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("sid_tt", "5c865f7a96598cff3b3d580fcd3dfd27", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        //cookies.AddCookie(new Cookie("sid_guard", ""5c865f7a96598cff3b3d580fcd3dfd27|1495419148|2591999|Wed54 21-Jun-2017 02:12:27 GMT"", "toutiao.com", "/", DateTime.Now.AddYears(1)));
                        nav.Refresh();
                        Console.WriteLine("OK!" + DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"));
                        break;
                    }
                    catch (Exception ex)
                    {
                        nav.Refresh();//说明,对于PhantomJS的话,虽然前面AddCookies时总是抛出异常,但是实际Cookies是添加成功的。所以需要在此Refresh。
                        Console.WriteLine("retry " + retryCount + ":" + ex.Message); Thread.Sleep(1000); if (retryCount-- <= 0) break;
                    }
                }
                var pageSource = driver.PageSource;
                //var st = driver.TakeScreenshot();
                //st.SaveAsFile("d:\" + DateTime.Now.ToString("yyyyMMddHHmmssfff") + ".jpg", ScreenshotImageFormat.Jpeg);
                driver.Close();
                driver.Dispose();
                return pageSource;
            }
        }

    二、代码解说:

      在Main方法中调用了LoadSource方法,LoadSource方法中第1步先调用CreateDriver方法创建一个Driver,例子中创建了一个Phantomjs的Driver,然后通过driver.Navigate()取得Navigate对像,然后调用 GotoURL转到一个URL,之后才能给driver.Manager().Cookies中添加Cookie,添加完Cookies之后,调用nav的Refresh方法,即可重新发起携带Cookies的请求。不过这里有个小问题,就是添加Cookie这一步,当使用Chrome时,在GoToURL之后,即可直接成功添加Cookie,但是在Phantomjs中总是AddCookie时抛出异常。这时候查看官方文档,官方文档说,虽然这里抛出异常,但是Cookies依然是成功添加的。所以我们在Catch到异常之后,调用了一下Refresh去刷新。这时使用Fiddler检测,果然携带了Cookie,以上代码写的非常乱,因为只是想简单快速测试Demo,所以代码非常乱。

    三、如图:

  • 相关阅读:
    无重叠区间
    ARC109E 1D Reversi Builder
    IOI2021集训队作业129CF Longest Rivers
    IOI2021集训队作业281CA Balanced Diet
    IOI2021集训队作业227CB Branch Assignment
    IOI2021集训队作业277BK Tours
    IOI2021集训队作业125AA Baggage
    6894. 【2020.11.25提高组模拟】小 S 埋地雷 (loj6611. 摧毁时间线)
    ARC108 题解&总结
    HTML教程
  • 原文地址:https://www.cnblogs.com/songxingzhu/p/7110723.html
Copyright © 2011-2022 走看看