zoukankan      html  css  js  c++  java
  • 帮助类5

    #region  获取网页的HTML内容
     2          // 获取网页的HTML内容,指定Encoding
     3         public static string GetHtml(string url, Encoding encoding)
     4          {
     5              byte[] buf = new WebClient().DownloadData(url);
     6              if (encoding != null) return encoding.GetString(buf);
     7              string html = Encoding.UTF8.GetString(buf);
     8              encoding = GetEncoding(html);
     9              if (encoding == null || encoding == Encoding.UTF8) return html;
    10              return encoding.GetString(buf);
    11          }
    12          // 根据网页的HTML内容提取网页的Encoding
    13         public static Encoding GetEncoding(string html)
    14          {




    using System;
     2 using System.Collections.Generic;
     3 using System.Linq;
     4 using System.Text;
     5 using System.IO;
     6 using System.Net;
     7 using System.Web;
     8 using System.Security.Cryptography;
     9 using System.Text.RegularExpressions;
    10 using System.Web.Script.Serialization;
    11 using System.Data;
    12 using System.Collections;
    13 using System.Runtime.Serialization.Json;
    14 using System.Configuration;
    15 using System.Reflection;
    15 string pattern = @"(?i)charset=(?<charset>[-a-zA-Z_0-9]+)"; 16 string charset = Regex.Match(html, pattern).Groups["charset"].Value; 17 try { return Encoding.GetEncoding(charset); } 18 catch (ArgumentException) { return null; } 19 } 20 #endregion
  • 相关阅读:
    OD 实验(十三)
    第一个 Windows 界面程序
    C 语言
    C 语言
    OD 实验(十二)
    PowerShell 常用命令
    OD 实验(十一)
    OD 实验(十)
    redis
    memcached缓存系统
  • 原文地址:https://www.cnblogs.com/zhangxiaolei521/p/5808718.html
Copyright © 2011-2022 走看看