zoukankan      html  css  js  c++  java
  • C# 实现抓取网页内容(一)

    一、窗体应用程序界面:

    二、上源码:

    using System;
    using System.Collections.Generic;
    using System.ComponentModel;
    using System.Data;
    using System.Drawing;
    using System.IO;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Threading.Tasks;
    using System.Windows.Forms;

    namespace WebCatchTest0911
    {
    public partial class Form1 : Form
    {
    public Form1()
    {
    InitializeComponent();
    }
    public static CookieCollection CC = new CookieCollection();
    private void btn_Start_Click(object sender, EventArgs e)
    {
    string str = GetWebPageSource(textBox1.Text.Trim());
    }

    public static string GetWebPageSource(string Url)
    {
    if (Url.Contains("about"))
    {
    Url = Url.Replace("about", "http");
    }
    try
    {
    //http://brand.tmall.com/brandMap.htm
    HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(Url);
    MyRequest.Method = "GET";
    MyRequest.Headers.Add("Accept-Encoding", "GBK");
    MyRequest.Headers.Add("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
    MyRequest.Headers.Add("Cache-Control", "max-age=0");
    MyRequest.KeepAlive = true;
    MyRequest.Host = "www.icoolbr.com";
    MyRequest.ProtocolVersion = HttpVersion.Version11;
    MyRequest.ContentType = "text/html; charset=GBK";
    MyRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.152 Safari/537.36";
    MyRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
    MyRequest.AllowAutoRedirect = true;
    MyRequest.CookieContainer = new CookieContainer();
    MyRequest.CookieContainer.Add(CC);
    HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();
    StreamReader srd = new StreamReader(MyResponse.GetResponseStream(), Encoding.GetEncoding("GBK"));
    string txt = srd.ReadToEnd();
    CC = MyResponse.Cookies;
    srd.Close();
    srd.Dispose();
    return txt;
    }
    catch { return ""; }
    }
    }
    }

    三、总结

    1)、HttpWebRequest的参数可以通过浏览器查看(F12);

    2)、注意释放资源;

    四、下章实现提取网页内容

  • 相关阅读:
    [转]Android输入法框的梳理
    [转]Android中OptionMenu的使用
    [转] Android把view的画面转换为bitmap
    Health Level Seven International (HL7)
    [转]andriod的apk文件相关的编译反编译工具
    [转]Android优秀开源项目收集
    而立之年 独立自主
    [转]Android模拟键盘和键盘监听的一些调研
    [转] linux中如何能在DDMS中打开真机中的数据库
    [转]关于使用SurfaceFligner进行绘图的具体实现方法
  • 原文地址:https://www.cnblogs.com/czqbk/p/4801605.html
Copyright © 2011-2022 走看看