zoukankan
html css js c++ java
使用java得到网页编码格式
import java.net.MalformedURLException; import java.net.URL; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HeaderElement; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.methods.GetMethod; import toptrack.tools.JQueryBase; /** * 得到网页编码格式 * @author dl */ public class JHtmlUpdateCheck { /**文本内容编码识别类*/ private static cpdetector.io.CodepageDetectorProxy detector = cpdetector.io.CodepageDetectorProxy.getInstance(); static { detector.add(new cpdetector.io.HTMLCodepageDetector(false)); detector.add(cpdetector.io.JChardetFacade.getInstance()); } /** *<br>方法说明:得到网页编码格式 *<br>输入参数:strUrl 网页链接; timeout 超时设置 *<br>返回类型:网页编码 */ public static String getEncoding(String strUrl, int timeout) { String strEncoding = null; HttpClient client = new HttpClient(); client.getHttpConnectionManager().getParams().setConnectionTimeout(timeout); GetMethod method = new GetMethod(strUrl); method.setFollowRedirects( true ); int statusCode; try { statusCode = client.executeMethod(method); if( statusCode != -1) { //从http头得到网页编码 strEncoding = getContentCharSet(method.getResponseHeader("Content-Type")); if (strEncoding != null) { method.releaseConnection(); return strEncoding; } //通过解析meta得到网页编码 String strHtml = method.getResponseBodyAsString().toLowerCase(); StringBuffer strBuffer = new StringBuffer(); int pos = JQueryBase.getTagText(strHtml, "<meta", ">", strBuffer, false, 0); while (strBuffer.length() > 0) { StringBuffer strEncodingBuffer = new StringBuffer(); JQueryBase.getTagText(strBuffer.toString(), "charset=", "“"", strEncodingBuffer, 0); if (strEncodingBuffer.length() > 0) { strEncoding = strEncodingBuffer.toString(); method.releaseConnection(); return strEncoding; } strBuffer = new StringBuffer(); pos = JQueryBase.getTagText(strHtml, "<meta", ">", strBuffer, false, pos); } //分析字节得到网页编码 strEncoding = getFileEncoding(strUrl, timeout); //设置默认网页字符编码 if (strEncoding == null) strEncoding = "GBK"; } method.releaseConnection(); } catch (Exception e) { // TODO Auto-generated catch block System.out.println(e.getClass() + "对" + strUrl + "提取网页编码信息出错"); return null; } return strEncoding; } /** *<br>方法说明:通过http头得到网页编码信息 *<br>输入参数:contentheade rhttp头 *<br>返回类型:网页编码 */ protected static String getContentCharSet(Header contentheader) { String charset = null; if (contentheader != null) { HeaderElement values[] = contentheader.getElements(); if (values.length == 1) { NameValuePair param = values[0].getParameterByName("charset"); if (param != null) { charset = param.getValue(); } } } return charset; }
欢迎转载,转载请注明出处。本文出自:
http://www.cnblogs.com/zdcaolei
0
查看全文
相关阅读:
使用字符流(Writer、Reader)完成对文件的读写操作
Java中File类重修
适配器模式学习
oracle 新建数据库 ,新建用户
8.19 extjs jar 包使用。
8.15 session 有效时间, session在数据查询中最后不用
8.15 自定义tr行 滚动 信息行的滚动
8.15jsp document 头部声明 区别
8.14 图片滚动无缝
8.13Oracle新建用户、角色,授权
原文地址:https://www.cnblogs.com/zdcaolei/p/2122939.html
最新文章
ASP.NET错误页
URL重写 UrlRewrite
ASP.NET全局文件与防盗链
Reverse Linked List II
Longest Common Prefix
Swap Nodes in Pairs
Merge k Sorted Lists
Kth Largest Element in an Array
Merge Two Sorted Lists
Add Two Numbers
热门文章
Remove Nth Node From End of List 和链表题目总结
Longest Substring Without Repeating Characters
Construct Binary Tree from Inorder and Postorder Traversal
Java数据流学习
Java字节数组流学习
Java对象流与序列化学习
Java打印流学习
Java字节缓冲流和字符缓冲流学习
字节字符转换流学习
使用字节流(InputStream、OutputStream)简单完成对文件的复制
Copyright © 2011-2022 走看看