zoukankan      html  css  js  c++  java
  • C# .net 采集类

    using System;  
    using System.Collections.Generic;  
    using System.Text;  
    using System.Diagnostics;  
    using System.Text.RegularExpressions;  
    using System.IO;  
    using System.Net;  
     
    namespace Capture
    {  
        class Program  
        {  
            static void Main(string[] args)  
            {  
                string[] urls= {  
                    "http://www.yongfa365.com",  
                    "http://www.cbdcn.com",  
                    "http://www.csdn.net",  
                    "http://www.sina.com",  
                    "http://www.tom.com",  
                };  
     
                string html="";  
     
                html = CaiJi.GetHtmlSource("http://www.yongfa365.com", Encoding.Default);  
                Console.Write(html);  
                Console.ReadKey();  
     
                html = CaiJi.GetHtmlSource("http://www.baidu.com/");  
                Console.Write(html);  
                Console.ReadKey();  
     
                html = CaiJi.GetHtmlSource("http://www.tom.com","utf-8");  
                Console.Write(html);  
                Console.ReadKey();  
     
                foreach (string url in urls)  
                {  
                    Console.Write(CaiJi.GetHtmlSource(url));  
                    Console.ReadKey();  
                  
                }  
            }  
        }  
    }  
     
     
    /// <summary>  
    /// 柳永法采集类  
    /// </summary>  
    class CaiJi  
    {  
        /// <summary>  
        /// 取得网页源码  
        /// </summary>  
        /// <param name="url">网页地址,eg:"http://www.yongfa365.com/" </param>   
        /// <param name="charset">网页编码,eg:"utf-8"</param>  
        /// <returns>返回网页源文件</returns>  
        public static string GetHtmlSource(string url, string charset)  
        {  
            //编码处理   
            Encoding nowCharset;  
            if (charset == "" || charset == null)  
            {  
                nowCharset = Encoding.Default;  
            }  
            else  
            {  
                nowCharset = Encoding.GetEncoding(charset);  
            }  
     
            //处理内容  
            string html = "";  
            try  
            {  
                //WebRequest myWebRequest = WebRequest.Create(url);  
                //WebResponse myWebResponse = myWebRequest.GetResponse();  
                //Stream stream = myWebResponse.GetResponseStream();  
                //StreamReader reader = new StreamReader(stream, nowCharset);  
     
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);  
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();  
                Stream stream = response.GetResponseStream();  
                StreamReader reader = new StreamReader(stream, nowCharset);  
                html = reader.ReadToEnd();  
                stream.Close();  
            }  
            catch (Exception e)  
            {  
            }  
            return html;  
        }  
     
        /// <summary>  
        /// 取得网页源码  
        /// </summary>  
        /// <param name="url">网页地址,eg: "http://www.yongfa365.com/" </param>   
        /// <param name="charset">网页编码,eg: Encoding.UTF8</param>  
        /// <returns>返回网页源文件</returns>  
        public static string GetHtmlSource(string url, Encoding charset)  
        {  
            //处理内容  
            string html = "";  
            try  
            {  
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);  
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();  
                Stream stream = response.GetResponseStream();  
                StreamReader reader = new StreamReader(stream, charset);  
                html = reader.ReadToEnd();  
                stream.Close();  
            }  
            catch (Exception e)  
            {  
            }  
            return html;  
        }  
     
        /// <summary>  
        /// 取得网页源码  
        /// 对于带BOM的网页很有效,不管是什么编码都能正确识别  
        /// </summary>  
        /// <param name="url">网页地址,eg: "http://www.yongfa365.com/" </param>   
        /// <returns>返回网页源文件</returns>  
        public static string GetHtmlSource(string url)  
        {  
            //处理内容  
            string html = "";  
            try  
            {  
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);  
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();  
                Stream stream = response.GetResponseStream();  
                StreamReader reader = new StreamReader(stream, Encoding.Default);   
                html = reader.ReadToEnd();  
                stream.Close();  
            }  
            catch (Exception e)  
            {  
            }  
            return html;  
        }  
    }
  • 相关阅读:
    Abp 领域事件简单实践 <一>
    Abp session和Cookie
    Abp 聚合测试
    反向代理
    Angular 变更检测
    VS 之github
    Docker 启动SQLServer
    查出第二大的数是多少
    DDD 理解
    ActionBar的简单应用
  • 原文地址:https://www.cnblogs.com/top5/p/1610040.html
Copyright © 2011-2022 走看看