zoukankan      html  css  js  c++  java
  • get Toutiao.com news list

    .get cookie with your browser then can do 

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Threading.Tasks;
    using System.Windows.Forms;
    using System.Net;
    using System.Text;
    using HtmlAgilityPack;
    using Newtonsoft.Json;
    
    namespace WindowsFormsApp1
    {
        static class Program
        {
            /// <summary>
            /// The main entry point for the application.
            /// </summary>
            [STAThread]
            static void Main()
            {
                Application.EnableVisualStyles();
                Application.SetCompatibleTextRenderingDefault(false);
                // Application.Run(new Form1());
                List<string> _listURL = new List<string>();
                test(_listURL,null);
    
            }
    
            static string _signature = "_02B4Z6wo00f01F.sjWQAAIBCgeX81SJdOnBf6YnAAEhxoVC2JRfZ0QahpPnV6EmcPuHz04o3v5s-QKaNBQ2luCcHl2v4nNzEuGllKmCUQVrCn77y3PXmNdXh-3lMlrkkvt4ItWbtvRpTLbfB6f";
            static string cookie = "csrftoken=620bbe6e6bec516a228938d9adc9d616; tt_webid=6888172478528259598; MONITOR_WEB_ID=53483fbc-ef71-4859-91cd-1833242346e8; s_v_web_id=verify_kgrjtabo_fuY8uuEL_ry3f_4d0q_8fHv_EJ3P2bCGH4Ky; ttcid=2d01cbca59c4456ba0a8137e857a01e511; tt_scid=LtVXXilIPbrZ2acC-MSiW7ELGN9OtxOtSL0Aof-FE-vtbYbZFhCAJlgknDzQu0uZ32f1";
            static Random rnd = new Random();
    
            static void test(List<string> listURL, string max_behot_time)
            {
                    WebClient wc = new WebClient();
                    wc.Encoding = Encoding.UTF8;
                    wc.Headers.Add("referer", "https://www.toutiao.com/");
                    wc.Headers.Add("cookie", cookie);
                    string url = "https://www.toutiao.com/api/pc/feed/?min_behot_time=0&category=__all__&utm_source=toutiao&widen=1&tadrequire=true&_signature="+_signature;
                    if ( string.IsNullOrEmpty( max_behot_time)) {
                        url = "https://www.toutiao.com/api/pc/feed/?max_behot_time=" + max_behot_time + "&category=__all__&utm_source=toutiao&widen=1&tadrequire=true&_signature=" + _signature;
                    }
                    string strJson = wc.DownloadString(url);
                    //then get source_url:  group/6887092770658320903/
                    //contact to be url : https://www.toutiao.com/group/6887092770658320903/
                    if (strJson == "") return;
                    var list = JsonConvert.DeserializeObject<NewsObject>(strJson);
                    list.data.ToList().ForEach(n => listURL.Add("https://www.toutiao.com" + n.source_url));
                    wc.Dispose();
                    wc = null;
                   if (listURL.Count >= 10) {
                      return;
                   }
                   test(listURL, ""+list.next.max_behot_time);
                // string str = wc.DownloadString("https://www.toutiao.com/group/6887092770658320903/");
                // var htmlText = parseHtml(str);
                // cutRandSection(htmlText);
    
    
    
            }
    
    
            static string parseHtml(string strHtml)
            {
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(strHtml);
                HtmlNode rootnode = doc.DocumentNode;
                string xpathstring = "//article";//</article> ////span[@class='num']/font[last()]
                var node = rootnode.SelectSingleNode(xpathstring);
                string s = node.InnerText;
                return s;
            }
    
    
            static string cutRandSection(string htmlText)
            {
                try
                {
                    var list = htmlText.Split("
    ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                    int i = rnd.Next(0, list.Length + 1);
                    return list[i];
                }
                catch
                {
    
                    return htmlText;
                }
    
            }
    
            static string currTimeStamp{
            get{
                    return ""+ (DateTime.Now.AddMinutes(0).ToUniversalTime().Ticks - 621355968000000000) / 10000000;
              }
            
            
            
            }
        }
    
        #region NewsObject
        public class NewsObject
        {
            public bool has_more { get; set; }
            public string message { get; set; }
            public Datum[] data { get; set; }
            public Next next { get; set; }
        }
    
        public class Next
        {
            public int max_behot_time { get; set; }
        }
    
        public class Datum
        {
            public bool single_mode { get; set; }
            public string _abstract { get; set; }
            public bool middle_mode { get; set; }
            public bool more_mode { get; set; }
            public string tag { get; set; }
            public int comments_count { get; set; }
            public string tag_url { get; set; }
            public string title { get; set; }
            public string chinese_tag { get; set; }
            public string source { get; set; }
            public long group_source { get; set; }
            public bool has_gallery { get; set; }
            public string media_url { get; set; }
            public string media_avatar_url { get; set; }
            public string source_url { get; set; }
            public string article_genre { get; set; }
            public string item_id { get; set; }
            public bool is_feed_ad { get; set; }
            public long behot_time { get; set; }
            public string image_url { get; set; }
            public string group_id { get; set; }
            public string middle_image { get; set; }
            public Image_List[] image_list { get; set; }
        }
    
        public class Image_List
        {
            public string url { get; set; }
        }
    
        #endregion
    
    
    
    
    
    
    
    }
    

      

    make article:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Threading.Tasks;
    using System.Windows.Forms;
    using System.Net;
    using System.Text;
    using HtmlAgilityPack;
    using Newtonsoft.Json;
    using System.Diagnostics;
    
    namespace WindowsFormsApp1
    {
        static class Program
        {
            /// <summary>
            /// The main entry point for the application.
            /// </summary>
            [STAThread]
            static void Main()
            {
                Application.EnableVisualStyles();
                Application.SetCompatibleTextRenderingDefault(false);
                // Application.Run(new Form1());
    
    
                if (keyWords == "") { keyWords="AI智能写作"; }
                List<string> _listURL = new List<string>();
                getArticleList(_listURL, 0);
                MakeArticle(_listURL);
            }
    
    
            static string keyWords= "蚂蚁金服上市";
            static string _signature = "_02B4Z6wo00f01F.sjWQAAIBCgeX81SJdOnBf6YnAAEhxoVC2JRfZ0QahpPnV6EmcPuHz04o3v5s-QKaNBQ2luCcHl2v4nNzEuGllKmCUQVrCn77y3PXmNdXh-3lMlrkkvt4ItWbtvRpTLbfB6f";
            static string cookie = "csrftoken=620bbe6e6bec516a228938d9adc9d616; tt_webid=6888172478528259598; MONITOR_WEB_ID=53483fbc-ef71-4859-91cd-1833242346e8; s_v_web_id=verify_kgrjtabo_fuY8uuEL_ry3f_4d0q_8fHv_EJ3P2bCGH4Ky; ttcid=2d01cbca59c4456ba0a8137e857a01e511; tt_scid=LtVXXilIPbrZ2acC-MSiW7ELGN9OtxOtSL0Aof-FE-vtbYbZFhCAJlgknDzQu0uZ32f1";
            static Random rnd = new Random();
            static bool includePic = true;
            
            static void getArticleList(List<string> listURL, int pageIndex)
            {
                int pageCount = 20;
                string url = "https://www.toutiao.com/api/search/content/?aid=24&app_name=web_search&offset=" + (pageIndex * pageCount) + "&format=json&keyword="+ Uri.EscapeDataString(keyWords)+"&autoload=true&count=20&en_qc=1&cur_tab=1&from=search_tab&pd=synthesis&timestamp=" + currTimeStamp + "000" + "&_signature=" + _signature;
                string strJson = HttpGet(url);
                if (strJson == "") return;
                var list = JsonConvert.DeserializeObject<NewsObject>(strJson);
                foreach (var d in list.data.Where(n => !string.IsNullOrEmpty(n.open_url)))
                {
                    listURL.Add("https://www.toutiao.com" + d.open_url);
                }
                if (listURL.Count >= 20)
                {
                    return;
                }
                getArticleList(listURL, pageIndex++);
            }
            static string MakeArticle(List<string> _listURL)
            {
    
                string OKArticle = "";
                string pic = "";
                foreach (var url in _listURL)
                {
                    pic = "";
                    string strHtml = HttpGet(url);
                    if (strHtml == "") continue;
                    ContentData cdata=ParseContentData(strHtml);
                    if (string.IsNullOrEmpty(cdata.InnerText)) continue;
                    if (includePic) {
                        if (cdata.InnerPicUrls != null && cdata.InnerPicUrls.Count > 0)
                        {
                            pic = cdata.InnerPicUrls[0];
                        }
                    }
                    OKArticle += "<p><img src='"+pic+"'/></p>"+cutRandSection(cdata.InnerText);
                    if (OKArticle.Length >= 800) break;
                }
    
                return OKArticle;
            }
    
    
            static ContentData ParseContentData(string strHtml)
            {
    
                ContentData cdata = new ContentData() { SoruceHtml=strHtml  };
                try
                {
                    HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                    doc.LoadHtml(strHtml);
                    HtmlNode rootnode = doc.DocumentNode;
                    string xpathstring = "//article";//</article> ////span[@class='num']/font[last()]
                    var node = rootnode.SelectSingleNode(xpathstring);
                    if (node == null) return cdata;
                    string s = node.InnerText;
                    cdata.InnerText= s;
                    //
                    var list = rootnode.SelectNodes("//img");
                    if (list == null) return cdata;
                    foreach (var n in list)
                    {
                        var picUrl = n.Attributes["src"].Value;
                        if (string.IsNullOrEmpty(picUrl)|| !picUrl.ToLower().StartsWith("http")) { continue; }
                        cdata.InnerPicUrls.Add(picUrl);
                    }
    
    
    
    
                }
                catch
                {
                   
    
                }
                return cdata;
    
    
            }
    
            enum ddd {
            eee,eeer
            
            }
            static string cutRandSection(string htmlText)
            {
                try
                {
                    var list = htmlText.Split("
    ".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                    int i = rnd.Next(0, list.Length + 1);
                    return list[i];
                }
                catch
                {
    
                    return htmlText;
                }
    
            }
    
            static string currTimeStamp
            {
                get
                {
                    return "" + (DateTime.Now.AddMinutes(0).ToUniversalTime().Ticks - 621355968000000000) / 10000000;
                }
    
    
    
            }
    
    
            public static string HttpGet(string url)
            {
    
                string strJson = "";
                try
                {
                    WebClient wc = new WebClient();
                    wc.Encoding = Encoding.UTF8;
                    wc.Headers.Add("referer", "https://www.toutiao.com/");
                    wc.Headers.Add("cookie", cookie);
                    strJson = wc.DownloadString(url);
                    wc.Dispose();
                    wc = null;
                }
                catch { }
                return strJson;
            }
    
    
    
        }
    
        public class ContentData {
            public string SoruceHtml { get; set; }
            public string InnerText { get; set; }
            public List<string> InnerPicUrls { get; set; }
            public ContentData(){
                InnerPicUrls = new List<string>();
    
               }
    
        }
    
    
    
        #region NewsObject
    
    
        public class NewsObject
        {
            public int count { get; set; }
            public int return_count { get; set; }
            public string query_id { get; set; }
            public int has_more { get; set; }
            public string request_id { get; set; }
            public string search_id { get; set; }
            public long cur_ts { get; set; }
            public int offset { get; set; }
            public string message { get; set; }
            public string pd { get; set; }
            public int show_tabs { get; set; }
            public string keyword { get; set; }
            public string city { get; set; }
            public string[] tokens { get; set; }
            public Datum[] data { get; set; }
            public int status { get; set; }
        }
    
    
    
        public class Datum
        {
            public string create_time { get; set; }
            public string open_url { get; set; }
    
            public string source_url { get; set; }
    
        }
    
    
    
        #endregion
    
    
    
    
    
    
    }
    

      

  • 相关阅读:
    《算法》C++代码 Floyd
    《算法》C++代码 快速排序
    3-3当访问到一个文件跳转到另一个文件
    分别应用include指令和include动作标识在一个jsp页面中包含一个文件。
    历届试题 蚂蚁感冒
    HDU 2817 A sequence of numbers
    HDU-2018 母牛的故事
    算法提高 复数归一化
    算法提高 十进制数转八进制数
    算法提高 约数个数
  • 原文地址:https://www.cnblogs.com/wgscd/p/13884546.html
Copyright © 2011-2022 走看看