输入url把网页内容抓下来,然后再加工处理,保存到数据库中,这应该是采集器的原理。
如果要使用C#做的话,就要用WebClient类了。
大气象
参考:http://www.cnblogs.com/titi/archive/2005/11/20/280914.html
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Net;
namespace FirstWebClient
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
WebClient _client = new WebClient();
_client.BaseAddress = "http://www.cnblogs.com";
_client.Headers.Add("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
_client.Headers.Add("Accept-Language", "zh-cn");
_client.Headers.Add("UA-CPU", "x86");
//_client.Headers.Add("Accept-Encoding","gzip, deflate");
_client.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
System.IO.Stream objStream = _client.OpenRead("/");
System.IO.StreamReader _read = new System.IO.StreamReader(objStream, System.Text.Encoding.UTF8);
textBox1.Text = _read.ReadToEnd();
}
}
}
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Net;
namespace FirstWebClient
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
WebClient _client = new WebClient();
_client.BaseAddress = "http://www.cnblogs.com";
_client.Headers.Add("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
_client.Headers.Add("Accept-Language", "zh-cn");
_client.Headers.Add("UA-CPU", "x86");
//_client.Headers.Add("Accept-Encoding","gzip, deflate");
_client.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
System.IO.Stream objStream = _client.OpenRead("/");
System.IO.StreamReader _read = new System.IO.StreamReader(objStream, System.Text.Encoding.UTF8);
textBox1.Text = _read.ReadToEnd();
}
}
}