zoukankan      html  css  js  c++  java
  • 一个基于Android系统的新闻客户端(二)

                                 一个基于Android系统的新闻客户端(二)

          三、抓取消息标题

            网络爬虫很复杂很复杂,这里不能做,所以这里只做抓取网页。

            C#中有一个类叫WebClient,这个类的成员方法可以下载指定网页的html代码,用法为:

            WebClient wl=new WebClient();

            Stream sm=wl.OpenRead("http://xxxxxx");

            StreamReader sr=new StreamReader(sm);

            string str=String.Empty;

            string ch=String.Empty;

            while((ch=sm.ReadLine())!=NULL)

            {

               str +=ch;

            }

            新建一个类库项目:Crawler。

            添加类CrawlerMain。

            代码为:

    using System;
    using System.Collections.Generic;
    using System.Configuration;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Crawler
    {
        public class CrawlerMain
        {
            private WebClient wc;
            private string New;
            public CrawlerMain()
            {
                wc = new WebClient();
                New = String.Empty;
            }
    
            
            private async void ReadWeb()
            {
                New=await Main_ReadWebAsync(); 
            }
    
            private Task<string> Main_ReadWebAsync()
            {
                return Task.Run<string>(() =>
                {
                    return this.Main_ReadWeb();
                });
            }
    
            public string Main_ReadWeb()
            {
                Stream sm = wc.OpenRead(ConfigurationManager.ConnectionStrings["Ardess"].ToString());
                StreamReader sr = new StreamReader(sm);
                string liner = String.Empty;
                string ch = String.Empty;
                while ((ch = sr.ReadLine()) != null)
                {
                    liner += ch;
                }
                return liner;
            }
    
            public string GetNew()
            {
                if (String.IsNullOrEmpty(New))
                {
                    this.ReadWeb();
                }
                return New;
            }
    
        }
    }
    View Code

            新建一个控制台程序

             代码为:     

    using Crawler;
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Text;
    using System.Threading.Tasks;
    
    namespace Test
    {
        class Program
        {
            static void Main(string[] args)
            {
                string str = new CrawlerMain().Main_ReadWeb();
                //str = "sadsadsadsd<body>adasdasdasdsa</body>sadasdasdasd";
                string str_1 = str.Substring(str.LastIndexOf("box_01"), str.LastIndexOf("box_02") - str.LastIndexOf("box_01"));
                string[] strNew = str_1.Split(new char[]{'a'});
                bool j = false;
                foreach(string s in strNew)
                {
                    Console.Write(s+"
    ");
                    string[] ss = s.Split('=');
                    if (ss.Length > 1)
                    {
                        string[] sss = ss[1].Split('"');
                        if (j)
                        {
                            File.AppendAllText("t.txt", sss[1] + "
    ");
                            j = false;
                        }
                        if (sss[1] == "_bl")
                        {
                            j = true;
                        }
                       
                    }
                }
                
       
                Console.Read();
            }
        }
    }
    View Code

            好了就这样吧!

            

  • 相关阅读:
    Codeforces Round #594 (Div. 2) ABC题
    Codeforces Round #596 (Div. 2) ABCD题
    数据结构实验5——二叉树
    数据结构实验4——字符串匹配
    数据结构实验3——用栈求解算术表达式
    友链
    Codeforces Round #577 (Div. 2)
    Educational Codeforces Round 70 (Rated for Div. 2)
    Codeforces Round #578 (Div. 2)
    2020 Multi-University Training Contest 10(待补
  • 原文地址:https://www.cnblogs.com/liguifa/p/3801508.html
Copyright © 2011-2022 走看看