zoukankan      html  css  js  c++  java
  • HttpWebRequest 下载网页Html代码 下载文件(Remote和FTP)Get方式

    在.net中可以使用XmlHttp,WebClient,HttpWebRequest等方式下载网页html源码。

    使用XmlHttp需要引用Microsoft.Xml,在使用HttpWebRequest时,如果网站使用了反爬虫技术时,则需要为此模拟一个浏览器的环境访问,才能返回相应的html源码,否则将会是空,如下所示:

    例如:某电子商务网站中有站内搜索

    查看源码或浏览器上的URL

    这样就可以使用GET直接请求。

    public class WebPageUtil
    {
    //部分网站做了反爬虫技术时,需要模拟浏览器进行返回才能获取到相应的数据,否则获取不了
    private static CookieContainer cookie = new CookieContainer();
    private static string contentType = "application/x-www-form-urlencoded;";
    private static string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-silverlight, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
    private static string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";

    /// <summary>
    /// 返回请求的URL地址Tuple&lt;bool,string,string&gt; = 是否成功,网页源码,异常信息
    /// </summary>
    /// <param name="url"></param>
    /// <param name="keyword"></param>
    /// <param name="encoding"></param>
    /// <param name="newUrl"></param>
    /// <returns></returns>
    public static Tuple<bool, string, string> GetHtmlSourceCode(string url, string keyword, Encoding encoding, out string newUrl)
    {
    bool methodStatus = false;
    string pageHtml = "", exceptionInfo = "";
    Tuple
    <bool, string, string> executeResult = new Tuple<bool, string, string>(methodStatus, pageHtml, exceptionInfo);
    //编码
    newUrl = url + System.Uri.EscapeUriString(keyword);
    //不编码:网站如果支持就可以不编码
    //newUrl = url + keyword;

    WebResponse response
    = null;
    HttpWebRequest request
    = null;
    Stream responseStream
    = null;
    StreamReader reader
    = null;
    try
    {
    request
    = (HttpWebRequest)WebRequest.Create(newUrl);
    request.UserAgent
    = userAgent;
    request.ContentType
    = contentType;
    request.CookieContainer
    = cookie;
    request.Accept
    = accept;
    request.Method
    = "GET";
    request.Timeout
    = 30 * 1000;
    //request.Host = "www.suning.com";
    //request.UserAgent = "User-Agent Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1";

    response
    = request.GetResponse();
    responseStream
    = response.GetResponseStream();
    reader
    = new StreamReader(responseStream, encoding);
    pageHtml
    = reader.ReadToEnd();
    methodStatus
    = true;
    }
    catch (System.Net.WebException err)
    {
    exceptionInfo
    = err.Message;
    }
    catch (Exception err)
    {
    exceptionInfo
    = err.Message;
    }
    finally
    {
    if (reader != null) reader.Close();
    if (responseStream != null) responseStream.Close();
    if (response != null) response.Close();
    if (request != null) request = null;
    }

    return Tuple.Create<bool, string, string>(methodStatus, pageHtml, exceptionInfo);
    }

    调用时,直接将url和关键词组合后以GET方式就可以获取。

    下载方法使用指定url地址通过的URI从远程服务器下载数据到本地应用程序.
    1. 获得远程服务器url地址;
    2. 获得目标文件路径;
    3. 使用WebRequest对象检查文件是否存在于服务器端 (导入命名空间System.Net的引用);
    4. HTTP:创建WebClient(System.Net,类似于上面提到的UploadFile方法)实例,   访问你DownloadData() 方法通过指定URI下载文件缓冲资源到本地路径。实际上,对于HTTP资源,使用"GET"方法.

       FTP:创建FtpWebRequest实例,通过使用WebRequestMethods.Ftp.DownloadFile方法,我们可以接受来自服务器的资源流,此方法使用"RETR"命令下载FTP资源;
    5. DownloadData方法会返回下载资源的字节数组,我们只需要从这个下载文件缓冲使用FileStream(using System.IO)写一个字节段到本地服务器路径;
    6. 最后关闭并释放FileStream资源。

    可参阅:WebClient 以及WebClient.DownloadData

    RemoteDownload

    public abstract class RemoteDownload
    {

    public string UrlString{get;set;}
    public string DestDir{get;set;}
    public RemoteDownload(string urlString, string destDir)
    {
    this.UrlString = urlString;
    this.DestDir = destDir;
    }

    ///<summary>
    ///从远程服务器下载文件
    ///</summary>
    public virtual bool DownloadFile()
    {
    return true;
    }
    }

    /// <summary>
    /// HttpRemoteDownload 类
    /// </summary>
    public class HttpRemoteDownload : RemoteDownload
    {
    public HttpRemoteDownload(string urlString, string descFilePath)
    :
    base(urlString, descFilePath)
    {

    }

    public override bool DownloadFile()
    {
    string fileName = System.IO.Path.GetFileName(this.UrlString);
    string descFilePath =
    System.IO.Path.Combine(
    this.DestDir, fileName);
    try
    {
    WebRequest myre
    = WebRequest.Create(this.UrlString);
    }
    catch(Exception ex)
    {
    throw new Exception("服务器上不存在对应文件", ex.InnerException);
    }
    try
    {
    byte[] fileData;
    using (WebClient client = new WebClient())
    {
    fileData
    = client.DownloadData(this.UrlString);
    }
    using (FileStream fs =
    new FileStream(descFilePath, FileMode.OpenOrCreate))
    {
    fs.Write(fileData,
    0, fileData.Length);
    }
    return true;
    }
    catch (Exception ex)
    {
    throw new Exception("下载失败", ex.InnerException);
    }
    }
    }

    FTPDownload

    /// <summary>
    /// FtpDownload 类
    /// </summary>
    public class FtpRemoteDownload : RemoteDownload
    {
    public FtpRemoteDownload(string urlString, string descFilePath)
    :
    base(urlString, descFilePath)
    {

    }

    public override bool DownloadFile()
    {
    FtpWebRequest reqFTP;

    string fileName = System.IO.Path.GetFileName(this.UrlString);
    string descFilePath =
    System.IO.Path.Combine(
    this.DestDir, fileName);

    try
    {

    reqFTP
    = (FtpWebRequest)FtpWebRequest.Create(this.UrlString);
    reqFTP.Method
    = WebRequestMethods.Ftp.DownloadFile;
    reqFTP.UseBinary
    = true;

    using (FileStream outputStream = new FileStream(descFilePath, FileMode.OpenOrCreate))
    {
    using (FtpWebResponse response = (FtpWebResponse)reqFTP.GetResponse())
    {
    using (Stream ftpStream = response.GetResponseStream())
    {
    int bufferSize = 2048;
    int readCount;
    byte[] buffer = new byte[bufferSize];
    readCount
    = ftpStream.Read(buffer, 0, bufferSize);
    while (readCount > 0)
    {
    outputStream.Write(buffer,
    0, readCount);
    readCount
    = ftpStream.Read(buffer, 0, bufferSize);
    }
    }
    }

    }
    return true;
    }

    catch (Exception ex)
    {
    throw new Exception("下载失败", ex.InnerException);
    }
    }
  • 相关阅读:
    带有头结点的链表的基本操作
    转:gdb相关学习
    wareshark网络协议分析之ARP
    wareshark网络协议分析之DHCP
    java多线程(内附实例:窗口售票问题、人和叉子的问题)
    Android深度探索(卷1)HAL与驱动开发 虚拟环境的安装
    source insigt、pc-lint、VS联合使用
    java arrays类学习
    C#函数重载
    (转)Pycharm用鼠标滚轮控制字体大小
  • 原文地址:https://www.cnblogs.com/blackcore/p/2061118.html
Copyright © 2011-2022 走看看