zoukankan      html  css  js  c++  java
  • 利用 WebClient 实现下载并另存为txt 格式的文本文件

    前几天看到同事在网上复制、粘贴管理方面的文章,一遍一遍地重复,这让我想到可不可写一个程序来完成呢,于是上网查资料,终于给他解决了,代码如下:

    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Web;
    using System.Web.UI;
    using System.Web.UI.WebControls;
    using System.Text.RegularExpressions;
    using System.IO;
    using System.Text;
    
    namespace WebUI
    {
        public partial class TestWebClient : System.Web.UI.Page
        {
            protected void Page_Load(object sender, EventArgs e)
            {
    
            }
    
            protected void btnDownLoad_Click(object sender, EventArgs e)
            {
                for (int i = 1507; i <= 1507; i++)
                {
                    string url = "http://www.ccmcsz.com/management/" + i + ".htm";
                    Response.Write(url);
                    SetLog(url, i.ToString());
                    Response.Write("<br/>");
                }
            }
            public void SetLog(string url, string name)
            {
                try
                {
                    string filepath = @"D:\Test163\";
                    Encoding defaultencode = Encoding.GetEncoding("gb2312");
                    string FileName = name + ".txt";
    
                    string NewFilePath = Path.Combine(filepath, FileName);
                    if (!Directory.Exists(filepath))
                    {
                        Directory.CreateDirectory(filepath);
                    }
                    System.Net.WebClient wc = new System.Net.WebClient();
                    Stream ss = wc.OpenRead(url);
                    StreamReader rd = new StreamReader(ss, defaultencode);
                    string message = rd.ReadToEnd();
                    rd.Close();
                    wc.Dispose();
                    message = DelHTML(message);
                    StreamWriter Sw = new StreamWriter(NewFilePath, true, defaultencode);
                    Sw.Write(message);
                    Sw.Flush();
                    Sw.Close();
                    Sw = null;
                }
                catch
                {
                    this.Response.Write(url + "<br/>");
                }
    
            }
    
            public static string DelHTML(string Htmlstring)
            {
                //删除脚本
                Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
                //删除HTML
                Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
                //Htmlstring = Regex.Replace(Htmlstring,@"<A>.*</A>","");
                //Htmlstring = Regex.Replace(Htmlstring,@"<[a-zA-Z]*=\.[a-zA-Z]*\?[a-zA-Z]+=\d&\w=%[a-zA-Z]*|[A-Z0-9]","");
                Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
                Htmlstring.Replace("<", "");
                Htmlstring.Replace(">", "");
                Htmlstring.Replace("\r\n", "");
                //Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
                return Htmlstring;
            } 
    
        }
    }
    
    

    等待更新...

  • 相关阅读:
    XV Open Cup named after E.V. Pankratiev. GP of Central Europe (AMPPZ-2014)--B.Petrol
    XVI Open Cup named after E.V. Pankratiev. GP of Eurasia
    Petrozavodsk Winter Camp, Warsaw U, 2014, A The Carpet
    训练日志4
    训练日志3
    训练日志2
    多校中期总结
    训练日志
    计算几何学习12 + 组队训练
    计算几何学习11
  • 原文地址:https://www.cnblogs.com/Music/p/WebClientDemo.html
Copyright © 2011-2022 走看看