zoukankan      html  css  js  c++  java
  • 抓取网页文本内容

    使用的是WebRequest类,在这以http://novel.hongxiu.com/a/1036665/10425842.html为例。

    代码如下:

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.IO;
    using System.Net;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Windows.Forms;
    
    namespace 网页抓取
    {
        public partial class Form1 : Form
        {
            public Form1()
            {
                InitializeComponent();
            }
    
            public void zhuaqu()
            {
                WebRequest request = WebRequest.Create(label1.Text);//发出请求
                WebResponse response = request.GetResponse();//Internet请求的响应
                StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8);//按编码方式读取Internet返回的数据流
                string html = sr.ReadToEnd();
                string th = thtxt(html);//使用正则表达式替换html源代码中的标签为空格
                sr.Close();
                
    
                int sindex = th.IndexOf("红|袖|言|情|小|说");//查索引
                int lindex = th.IndexOf("但是什么?");
                string subtxt = th.Substring(sindex,lindex-sindex+6);//截取想要的内容
                StreamWriter sw = new StreamWriter("E:\x1.txt");//写入流保存
                sw.WriteLine(subtxt);
                sw.Close();
                richTextBox1.Text = subtxt;
    
            }
            private void button1_Click(object sender, EventArgs e)
            {
                zhuaqu();
            }
    
            private string thtxt(string Html)
            {
                Regex reg = new Regex("<(.|
    )+?>");
                //Regex r = new Regex(@"s+");//把空格替换掉的正则表达式
                string th = reg.Replace(Html, "");
                th = th.Replace("<", "<");
                th = th.Replace(">", "");
                //th = r.Replace(th,"");
                return th;
            }
        }
    }

    运行效果

  • 相关阅读:
    Use Gravatar in ASP.NET
    Silverlight ToolkitPivotViewer
    The Future of Silverlight December 2, 2010 at 9:00
    WPF杂记…
    Windows Phone 7开发者站点
    安装 Internet Explorer 9 Beta 的先决条件
    Internet Explorer 9 Beta(多图)
    Expression Blend4 中文
    Silverlight and WPF Virtual books
    Server2008 安装 Zune
  • 原文地址:https://www.cnblogs.com/happinesshappy/p/4579410.html
Copyright © 2011-2022 走看看