zoukankan      html  css  js  c++  java
  • 利用 WebClient 实现下载并另存为txt 格式的文本文件

    前几天看到同事在网上复制、粘贴管理方面的文章,一遍一遍地重复,这让我想到可不可写一个程序来完成呢,于是上网查资料,终于给他解决了,代码如下:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.Web.UI;
    using System.Web.UI.WebControls;
    using System.Text.RegularExpressions;
    using System.IO;
    using System.Text;
    
    namespace WebUI
    {
        public partial class TestWebClient : System.Web.UI.Page
        {
            protected void Page_Load(object sender, EventArgs e)
            {
    
            }
    
            protected void btnDownLoad_Click(object sender, EventArgs e)
            {
                for (int i = 1507; i <= 1507; i++)
                {
                    string url = "http://www.ccmcsz.com/management/" + i + ".htm";
                    Response.Write(url);
                    SetLog(url, i.ToString());
                    Response.Write("<br/>");
                }
            }
            public void SetLog(string url, string name)
            {
                try
                {
                    string filepath = @"D:\Test163\";
                    Encoding defaultencode = Encoding.GetEncoding("gb2312");
                    string FileName = name + ".txt";
    
                    string NewFilePath = Path.Combine(filepath, FileName);
                    if (!Directory.Exists(filepath))
                    {
                        Directory.CreateDirectory(filepath);
                    }
                    System.Net.WebClient wc = new System.Net.WebClient();
                    Stream ss = wc.OpenRead(url);
                    StreamReader rd = new StreamReader(ss, defaultencode);
                    string message = rd.ReadToEnd();
                    rd.Close();
                    wc.Dispose();
                    message = DelHTML(message);
                    StreamWriter Sw = new StreamWriter(NewFilePath, true, defaultencode);
                    Sw.Write(message);
                    Sw.Flush();
                    Sw.Close();
                    Sw = null;
                }
                catch
                {
                    this.Response.Write(url + "<br/>");
                }
    
            }
    
            public static string DelHTML(string Htmlstring)
            {
                //删除脚本
                Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
                //删除HTML
                Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
                //Htmlstring = Regex.Replace(Htmlstring,@"<A>.*</A>","");
                //Htmlstring = Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");
                Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
                Htmlstring.Replace("<", "");
                Htmlstring.Replace(">", "");
                Htmlstring.Replace("\r\n", "");
                //Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
                return Htmlstring;
            } 
    
        }
    }
    
    

    等待更新...

  • 相关阅读:
    CodeForces 510C Fox And Names (拓扑排序)
    Codeforces 1153D Serval and Rooted Tree (简单树形DP)
    HDU 6437 Problem L.Videos (最大费用)【费用流】
    Luogu P3381 (模板题) 最小费用最大流
    Codeforces 741B Arpa's weak amphitheater and Mehrdad's valuable Hoses (并查集+分组背包)
    Codeforces 1144F Graph Without Long Directed Paths (DFS染色+构造)
    HDU 2204 Eddy's 爱好 (容斥原理)
    Codeforces 939E Maximize! (三分 || 尺取)
    Codeforces 938D. Buy a Ticket (最短路+建图)
    CodeForces 959E Mahmoud and Ehab and the xor-MST (MST+找规律)
  • 原文地址:https://www.cnblogs.com/Music/p/WebClientDemo.html
Copyright © 2011-2022 走看看