JavaScript中编码有三种方法:escape、encodeURI、encodeURIComponent,地址栏中那些%XX就是汉字对应的字节被encodeURI编码格式转了。一个字节对应一个%
转换方式如下:
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
String line = request.getQueryString();
System.out.println(line);
String isDecodeUrl = URLDecoder.decode(line, "UTF-8");//将带汉字的encodeURI编码转换成字节,然后用UTF-8格式对字节解码
System.out.println(isDecodeUrl);
String reg = "[\u4e00-\u9fa5]+";//汉字的unicode范围
StringBuilder chineseWord = new StringBuilder();
Pattern p = Pattern.compile(reg);//匹配规则封装
Matcher m = p.matcher(isDecodeUrl);//将匹配规则和要匹配的字符串组合生成匹配引擎
while(m.find())
chineseWord.append(m.group());//用正则取得所有汉字
System.out.println(chineseWord.toString());
String cnm = URLEncoder.encode(chineseWord.toString(), "GBK");//将汉字用GBK编码转成字节,然后用encodeURI进行编码。
System.out.println(cnm);
isDecodeUrl = URLDecoder.decode(cnm, "GBK");//做个试验验证是不是能正常转回来
System.out.println(isDecodeUrl);
response.setCharacterEncoding("GBK");
response.setHeader("Content-type","text/html;charset=GBK");
PrintWriter out = response.getWriter();
out.println(isDecodeUrl);
}