zoukankan      html  css  js  c++  java
  • C#抓取网面上的html内容(JS动态生成的无法抓取)

    抓取内容的代码:

     1         /// </summary>
     2         /// <param name="url">路径URL</param>
     3         /// <param name="path">存储路径</param>
     4         /// <returns></returns>
     5         public static string HttpDownloadFile(string url, string path)
     6         {
     7             try
     8             {
     9                 // 设置参数
    10                 HttpWebRequest request = WebRequest.Create(url) as HttpWebRequest;
    11 
    12                 //发送请求并获取相应回应数据
    13                 HttpWebResponse response = request.GetResponse() as HttpWebResponse;
    14                 //直到request.GetResponse()程序才开始向目标网页发送Post请求
    15                 Stream responseStream = response.GetResponseStream();
    16 
    17                 //可以将生成的流文件直接生成字符串 SourceCode就是生成后的字符串
    18                 //StreamReader readStream = new StreamReader(responseStream, Encoding.UTF8);
    19                 //string SourceCode = readStream.ReadToEnd();
    20 
    21                 //创建本地文件写入流
    22                 if (File.Exists(path))
    23                 {
    24                     File.Delete(path);
    25                 }
    26                 FileStream fs = File.Create(path);
    27                 fs.Close();
    28                 
    29                 Stream stream = new FileStream(path, FileMode.Create);
    30                 byte[] bArr = new byte[1024];
    31                 int size = responseStream.Read(bArr, 0, (int)bArr.Length);
    32                 while (size > 0)
    33                 {
    34                     stream.Write(bArr, 0, size);
    35                     size = responseStream.Read(bArr, 0, (int)bArr.Length);
    36                 }
    37                 stream.Close();
    38                 responseStream.Close();
    39                 return path;
    40             }
    41             catch (Exception ex)
    42             {
    43 
    44                 throw ex;
    45             }
    46 
    47         }

    调用方式:

    1                 HttpReviceFile.HttpDownloadFile("http://localhost:811/ ", @"D:WorkTest.xml");
  • 相关阅读:
    杯具的流浪狗
    数据加密与数据压缩后加密的效率
    XMPP协议自定义消息类型扩展
    have a try
    linux修改网卡名称的方法
    WARNING: old character encoding and/or character set解决办法
    extern用法总结
    linux下的c++线程池实现
    32位linux系统操作大于2G文件方法
    eclipse中gdb调试输出stl容器的内容
  • 原文地址:https://www.cnblogs.com/870060760JR/p/6118024.html
Copyright © 2011-2022 走看看