zoukankan      html  css  js  c++  java
  • C# 网页数据表格抓取数据

    主要方法:

    public List<string> datasearch()
    {
    List<string> list = new List<string>();

    string url = @""+txtUrl.Text.Trim().ToString();

    WebRequest request = WebRequest.Create(url); //请求url
    WebResponse response = request.GetResponse(); //获取url数据
    StreamReader reader = null;
    switch (cboCode.SelectedItem.ToString())
    {
    case "UTF-8":
    reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("UTF-8"));
    break;
    case "Default":
    reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
    break;
    default:
    reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
    break;
    }
    string str = reader.ReadToEnd(); //将数据写入到textbox中

    reader.Close();
    reader.Dispose();
    response.Close();
    string strRegexR = @"(?<=<tr>)([sS]*?)(?=</tr>)"; //构造解析表格行数据的正则表达式
    string strRegexD = @"(?<=<td[^>]*>[s]*?)([S]*)(?=[s]*?</td>)"; //构造解析表格列数据的正则表达式
    Regex regexR = new Regex(strRegexR);
    MatchCollection mcR = regexR.Matches(str); //执行匹配
    bool first = true;
    foreach (Match mr in mcR)
    {
    Regex regexD = new Regex(strRegexD);
    MatchCollection mcD = regexD.Matches(mr.Groups[0].ToString()); //执行匹配

    string Mydata = "";
    for (int i = 0;i < mcD.Count; i++)
    {
    Mydata += mcD[i].Value + " ";
    }
    list.Add(Mydata);
    }

    return list;
    }

    Form1.cs 文件

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    using System.Text.RegularExpressions;
    using System.IO;
    using System.Net;

    namespace Demo
    {
    public partial class Form1 : Form
    {
    public Form1()
    {
    InitializeComponent();
    cboCode.SelectedIndex = 0;
    }

    public List<string> datasearch()
    {
    List<string> list = new List<string>();

    string url = @""+txtUrl.Text.Trim().ToString();

    WebRequest request = WebRequest.Create(url); //请求url
    WebResponse response = request.GetResponse(); //获取url数据
    StreamReader reader = null;
    switch (cboCode.SelectedItem.ToString())
    {
    case "UTF-8":
    reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("UTF-8"));
    break;
    case "Default":
    reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
    break;
    default:
    reader = new StreamReader(response.GetResponseStream(), Encoding.Default);
    break;
    }
    string str = reader.ReadToEnd(); //将数据写入到textbox中

    reader.Close();
    reader.Dispose();
    response.Close();
    string strRegexR = @"(?<=<tr>)([sS]*?)(?=</tr>)"; //构造解析表格数据的正则表达式
    string strRegexD = @"(?<=<td[^>]*>[s]*?)([S]*)(?=[s]*?</td>)";
    Regex regexR = new Regex(strRegexR);
    MatchCollection mcR = regexR.Matches(str); //执行匹配
    bool first = true;
    foreach (Match mr in mcR)
    {
    Regex regexD = new Regex(strRegexD);
    MatchCollection mcD = regexD.Matches(mr.Groups[0].ToString()); //执行匹配

    string Mydata = "";
    for (int i = 0;i < mcD.Count; i++)
    {
    Mydata += mcD[i].Value + " ";
    }
    list.Add(Mydata);
    }

    return list;
    }
    private void btnGet_Click(object sender, EventArgs e)
    {
    List<string> list = datasearch();

    string str = "";
    for (int i = 0; i < list.Count; i++)
    {

    str += list[i].ToString() + @"
    ";
    }
    txtResult.Text = str;
    }

    private void btnSave_Click(object sender, EventArgs e)
    {
    TXTHelper.TxtSaveByStr(@"" + txtSaveUrl.Text.Trim(), txtResult.Text);//C:UsersAdministratorDesktopWork File网页表格抓取1.txt
    MessageBox.Show("存储成功!");
    }

    private void btnAdd_Click(object sender, EventArgs e)
    {
    TXTHelper.TxtAddByStr(@"" + txtSaveUrl.Text.Trim(), txtResult.Text);
    MessageBox.Show("存储添加成功!");
    }
    }
    }

    TXTHelper.cs 文件

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.IO;
    using System.Collections.Specialized;

    namespace Demo
    {
    public class TXTHelper
    {
    public static StringCollection Read_txt(string url) //读取TXT内容
    {
    FileStream fs = new FileStream(url, FileMode.Open, FileAccess.Read);
    /**/
    ///定义输出字符串
    StringCollection collection = new StringCollection();

    /**/
    ///初始化该字符串的长度为0


    /**/
    ///为上面创建的文件流创建读取数据流
    StreamReader read = new StreamReader(fs);

    /**/
    ///设置当前流的起始位置为文件流的起始点
    read.BaseStream.Seek(0, SeekOrigin.Begin);

    /**/
    ///读取文件
    while (read.Peek() > -1)
    {
    /**/
    ///取文件的一行内容并换行
    ///
    string str = read.ReadLine();
    collection.Add(str);
    }

    /**/
    ///关闭释放读数据流
    read.Close();
    fs.Close();
    /**/
    ///返回读到的日志文件内容
    return collection;
    }
    public static void Update_txt(string url) //修改TXT
    {
    FileStream fs = new FileStream(url, FileMode.OpenOrCreate, FileAccess.Write);
    StreamWriter sw = new StreamWriter(fs);
    sw.Flush();
    sw.BaseStream.Seek(0, SeekOrigin.Begin);
    sw.Write("1");
    sw.Flush();
    sw.Close();
    }

    public static void TxtSaveByStr(string savePath, string txtStr)
    {
    FileStream fs = new FileStream(savePath, FileMode.Create);
    StreamWriter sw = new StreamWriter(fs);
    //开始写入
    sw.Write(txtStr);
    //清空缓冲区
    sw.Flush();
    //关闭流
    sw.Close();
    fs.Close();
    }
    public static void TxtAddByStr(string savePath, string txtStr)
    {
    FileStream fs = new FileStream(savePath, FileMode.Append);
    StreamWriter sw = new StreamWriter(fs);
    //开始写入
    sw.Write(txtStr);
    //清空缓冲区
    sw.Flush();
    //关闭流
    sw.Close();
    fs.Close();
    }

    public static DateTime File_Info(string url) //读取txt修改时间
    {
    FileInfo fi = new FileInfo(url);
    DateTime d = fi.LastWriteTime;
    return d;
    }

    }
    }

    运行结果图:

  • 相关阅读:
    设计模式基本原则及实例
    Springboot中发送邮件util
    mysql表关联查询索引不生效问题
    个人读书清单整理
    mysql 显示每条记录行号
    Axure教程
    Tomcat配置及原理文章
    HTTPS 简单学习
    Python实现二叉树的非递归先序遍历
    和HTTP相关的web服务器内容
  • 原文地址:https://www.cnblogs.com/pengJk/p/6423923.html
Copyright © 2011-2022 走看看