zoukankan      html  css  js  c++  java
  • 使用聚合接口获取汉字数据字典

    如何获取全部汉字及汉字的详细信息?

    刚刚扒了一通汉字完整的数据字典,mark作下记录。

    所有汉字集合

    我们匹配中文字符时,经常使用Unicode字符"[^u4e00-u9fa5]"正则表达式来校验。

    详细的汉字字符集列表可参考:

    具体的汉字对应的字符,可查询汉字字符集编码查询网站

    基本汉字中从4e00到9fa5,十六进制转化为十进制后,可知共有20902个汉字。

    汉字的集合,是开源的,可以直接从网上获取。

    从以上链接中,下载汉字集合

    获取汉字信息

    汉字的详细信息,包括拼音/笔画数/部首/五笔输入/笔顺编号/注解

    网上有相应的新华字典Api接口,这里选的是聚合平台-新华字典

    通过他的C#源码,我们可以获取到汉字字典的详细信息

    详细数据请点击 下载

    PS:

    • 部首和五笔可能为空,部首返回的数据为:"难检字"。
    • 笔顺编号可通过返回的简解中得到。笔顺编号,即指每个笔画对应的号码,这个非常有用!

    聚合接口-汉字

    获取汉字:

     1         public static HanziDetail FindHanzi(string hanzi)
     2         {
     3             //1.根据汉字查询字典
     4             string url1 = "http://v.juhe.cn/xhzd/query";
     5 
     6             var parameters1 = new Dictionary<string, string>();
     7 
     8             parameters1.Add("word", hanzi); //填写需要查询的汉字,UTF8 urlencode编码
     9             parameters1.Add("key", "XXXXXX");//你申请的key
    10             parameters1.Add("dtype", ""); //返回数据的格式,xml或json,默认json
    11 
    12             string result2 = SendPost(url1, parameters1, "get");
    13 
    14             var hanziRequestResponse = JsonConvert.DeserializeObject<HanziRequestResponse>(result2);
    15 
    16             //HanziDetail hanziDetail = null;
    17             //if (hanziRequestResponse.ErrorCode == "0" && hanziRequestResponse.Result != null)
    18             //{
    19             //    hanziDetail = hanziRequestResponse.Result;
    20             //}
    21 
    22             return hanziRequestResponse.Result;
    23         }
    24     }

    解析类:

     1     [DataContract]
     2     public class HanziRequestResponse
     3     {
     4         [DataMember(Name = "reason")]
     5         public string Reason { get; set; }
     6 
     7         [DataMember(Name = "error_code")]
     8         public string ErrorCode { get; set; }
     9 
    10         [DataMember(Name = "result")]
    11         public HanziDetail Result { get; set; }
    12     }
    13     [DataContract]
    14     public class HanziDetail
    15     {
    16         [DataMember(Name = "zi")]
    17         public string Hanzi { get; set; }
    18 
    19         /// <summary>
    20         /// 部首
    21         /// </summary>
    22         [DataMember(Name = "bushou")]
    23         public string Radical { get; set; }
    24 
    25         /// <summary>
    26         /// 拼音
    27         /// </summary>
    28         [DataMember(Name = "pinyin")]
    29         public string Pinyin { get; set; }
    30 
    31         /// <summary>
    32         /// 笔画数
    33         /// </summary>
    34         [DataMember(Name = "bihua")]
    35         public string Bihua { get; set; }
    36 
    37         /// <summary>
    38         /// 五笔
    39         /// </summary>
    40         [DataMember(Name = "wubi")]
    41         public string WuBi { get; set; }
    42 
    43         /// <summary>
    44         /// 极简介绍
    45         /// </summary>
    46         [DataMember(Name = "jijie")]
    47         public List<string> SimpleDetailContent { get; set; }
    48     }
    View Code

    访问后台接口通用类:

      1     public class HttpRequestBase
      2     {
      3         /// <summary>
      4         /// Http (GET/POST)
      5         /// </summary>
      6         /// <param name="url">请求URL</param>
      7         /// <param name="parameters">请求参数</param>
      8         /// <param name="method">请求方法</param>
      9         /// <returns>响应内容</returns>
     10         public static string SendPost(string url, IDictionary<string, string> parameters, string method)
     11         {
     12             if (method.ToLower() == "post")
     13             {
     14                 HttpWebRequest req = null;
     15                 HttpWebResponse rsp = null;
     16                 System.IO.Stream reqStream = null;
     17                 try
     18                 {
     19                     req = (HttpWebRequest)WebRequest.Create(url);
     20                     req.Method = method;
     21                     req.KeepAlive = false;
     22                     req.ProtocolVersion = HttpVersion.Version10;
     23                     req.Timeout = 5000;
     24                     req.ContentType = "application/x-www-form-urlencoded;charset=utf-8";
     25                     byte[] postData = Encoding.UTF8.GetBytes(BuildQuery(parameters, "utf8"));
     26                     reqStream = req.GetRequestStream();
     27                     reqStream.Write(postData, 0, postData.Length);
     28                     rsp = (HttpWebResponse)req.GetResponse();
     29                     Encoding encoding = Encoding.GetEncoding(rsp.CharacterSet);
     30                     return GetResponseAsString(rsp, encoding);
     31                 }
     32                 catch (Exception ex)
     33                 {
     34                     return ex.Message;
     35                 }
     36                 finally
     37                 {
     38                     if (reqStream != null) reqStream.Close();
     39                     if (rsp != null) rsp.Close();
     40                 }
     41             }
     42             else
     43             {
     44                 //创建请求
     45                 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url + "?" + BuildQuery(parameters, "utf8"));
     46 
     47                 //GET请求
     48                 request.Method = "GET";
     49                 request.ReadWriteTimeout = 5000;
     50                 request.ContentType = "text/html;charset=UTF-8";
     51                 HttpWebResponse response = (HttpWebResponse)request.GetResponse();
     52                 Stream myResponseStream = response.GetResponseStream();
     53                 StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
     54 
     55                 //返回内容
     56                 string retString = myStreamReader.ReadToEnd();
     57                 return retString;
     58             }
     59         }
     60 
     61         /// <summary>
     62         /// 组装普通文本请求参数。
     63         /// </summary>
     64         /// <param name="parameters">Key-Value形式请求参数字典</param>
     65         /// <returns>URL编码后的请求数据</returns>
     66         public static string BuildQuery(IDictionary<string, string> parameters, string encode)
     67         {
     68             StringBuilder postData = new StringBuilder();
     69             bool hasParam = false;
     70             IEnumerator<KeyValuePair<string, string>> dem = parameters.GetEnumerator();
     71             while (dem.MoveNext())
     72             {
     73                 string name = dem.Current.Key;
     74                 string value = dem.Current.Value;
     75                 // 忽略参数名或参数值为空的参数
     76                 if (!string.IsNullOrEmpty(name))//&& !string.IsNullOrEmpty(value)
     77                 {
     78                     if (hasParam)
     79                     {
     80                         postData.Append("&");
     81                     }
     82                     postData.Append(name);
     83                     postData.Append("=");
     84                     if (encode == "gb2312")
     85                     {
     86                         postData.Append(HttpUtility.UrlEncode(value, Encoding.GetEncoding("gb2312")));
     87                     }
     88                     else if (encode == "utf8")
     89                     {
     90                         postData.Append(HttpUtility.UrlEncode(value, Encoding.UTF8));
     91                     }
     92                     else
     93                     {
     94                         postData.Append(value);
     95                     }
     96                     hasParam = true;
     97                 }
     98             }
     99             return postData.ToString();
    100         }
    101 
    102         /// <summary>
    103         /// 把响应流转换为文本。
    104         /// </summary>
    105         /// <param name="rsp">响应流对象</param>
    106         /// <param name="encoding">编码方式</param>
    107         /// <returns>响应文本</returns>
    108         public static string GetResponseAsString(HttpWebResponse rsp, Encoding encoding)
    109         {
    110             System.IO.Stream stream = null;
    111             StreamReader reader = null;
    112             try
    113             {
    114                 // 以字符流的方式读取HTTP响应
    115                 stream = rsp.GetResponseStream();
    116                 reader = new StreamReader(stream, encoding);
    117                 return reader.ReadToEnd();
    118             }
    119             finally
    120             {
    121                 // 释放资源
    122                 if (reader != null) reader.Close();
    123                 if (stream != null) stream.Close();
    124                 if (rsp != null) rsp.Close();
    125             }
    126         }
    127     }
    View Code

    详细的源Demo,可查看Github :HanziDictionary

  • 相关阅读:
    顺序表与链表
    Python SQL相关操作
    Python 数据分析练习1
    Python 操作MySQL数据库
    Python 乘法口诀表
    Python 导出数据from Mysql
    Python subplot 绘画
    Shell 自定义函数
    Shell 双括号概述
    Shell for、while循环
  • 原文地址:https://www.cnblogs.com/kybs0/p/9484347.html
Copyright © 2011-2022 走看看