zoukankan      html  css  js  c++  java
  • 帮助类5

    #region  获取网页的HTML内容
     2          // 获取网页的HTML内容,指定Encoding
     3         public static string GetHtml(string url, Encoding encoding)
     4          {
     5              byte[] buf = new WebClient().DownloadData(url);
     6              if (encoding != null) return encoding.GetString(buf);
     7              string html = Encoding.UTF8.GetString(buf);
     8              encoding = GetEncoding(html);
     9              if (encoding == null || encoding == Encoding.UTF8) return html;
    10              return encoding.GetString(buf);
    11          }
    12          // 根据网页的HTML内容提取网页的Encoding
    13         public static Encoding GetEncoding(string html)
    14          {




    using System;
     2 using System.Collections.Generic;
     3 using System.Linq;
     4 using System.Text;
     5 using System.IO;
     6 using System.Net;
     7 using System.Web;
     8 using System.Security.Cryptography;
     9 using System.Text.RegularExpressions;
    10 using System.Web.Script.Serialization;
    11 using System.Data;
    12 using System.Collections;
    13 using System.Runtime.Serialization.Json;
    14 using System.Configuration;
    15 using System.Reflection;
    15 string pattern = @"(?i)charset=(?<charset>[-a-zA-Z_0-9]+)"; 16 string charset = Regex.Match(html, pattern).Groups["charset"].Value; 17 try { return Encoding.GetEncoding(charset); } 18 catch (ArgumentException) { return null; } 19 } 20 #endregion
  • 相关阅读:
    __autoload函数
    错误处理try catch
    PHP面向对象基础实例
    类的继承关系实例
    YII重点文件
    //计算今年月度利息和
    cookie保存分页参数
    win64(win8)的python拓展包安装经验总结
    matcom安装时无法寻找到matlab.exe的解决办法
    《人人都是产品经理》阅读笔记一
  • 原文地址:https://www.cnblogs.com/zhangxiaolei521/p/5808718.html
Copyright © 2011-2022 走看看