zoukankan      html  css  js  c++  java
  • urt-8转成GBK 之多种方法

    	/**
    	 *  <p>Description:获取字符串后的第二个?后的字符串</p>
    	 *  @author sunshaofeng
    	 * @date 2018-9-18 15:45
    	 * @return 
    	 * @version 1.0
    	 */
    	 private static String getStr(String str, int n) {
    		  try {
    	        int i = 0;
    	        int s = 0;
    	        String ss="";
    	        //循环遍历
    	        while (i++ < n) {
    	            s = str.indexOf("?", s + 1);
    	            if (s == -1) {
    	                return str;
    	            }
    	        }
    			ss=new String(str.substring(s+2));
    			//获取编码方式
    			String encoding = getEncoding(ss);
    			logger.info("encoding :"+encoding);
    			logger.info("ENCODE BEFORE :"+ss);
    			//方法一
    			try {
    				String	gbkStyle = gbk2Utf(ss);
    				logger.info("gbk2Utf encodeToGBK ONE:"+gbkStyle);
    			} catch (Exception e) {
    				e.printStackTrace();
    			}
    			//方法二
    			try {
    				String gbkStyle2 = new String(getUTF8BytesFromGBKString(ss), "UTF-8");
    				logger.info("gbk2Utf encodeToGBK TWO:"+gbkStyle2);
    			} catch (Exception e) {
    				e.printStackTrace();
    			}
    			//方法三
    			try {
    				String gbkStyle3 = charsetConvert(ss,"UTF-8");
    				logger.info("gbk2Utf encodeToGBK THREE:"+gbkStyle3);
    			} catch (Exception e) {
    				e.printStackTrace();
    			}
    			//方法四
    			try {
    				String gbkStyle41 = gbkToUnicode(ss);
    				String gbkStyle42 = unicodeToUtf8(gbkStyle41);
    				logger.info("gbk2Utf encodeToGBK FOUR:"+gbkStyle42);
    				
    				String encoding1 = getEncoding(gbkStyle42);
    				logger.info("encoding After:"+encoding1);
    			} catch (Exception e) {
    				e.printStackTrace();
    			}
    			//方法五
    			try {
    				String gbkStyle5=new String(ss.getBytes("GB2312"),"UTF-8");//转换后的结果
    				logger.info("gbk2Utf encodeToGBK FIVE:"+gbkStyle5);
    			} catch (Exception e) {
    				e.printStackTrace();
    			}
    			//方法六
    			try {
    				String gbkStyle6  = URLEncoder.encode (ss, "UTF-8" );
    				logger.info("gbk2Utf encodeToGBK 6:"+gbkStyle6);
    			} catch (Exception e) {
    				e.printStackTrace();
    			}
    			
    			return ss;
    			} catch (Exception e) {
    				e.printStackTrace();
    			}
    	        return null;
    }
    	/**
    	 * GBK转UTF-8 方式一
    	 */
    	 
    	 private  static String gbk2Utf(String gbk) throws UnsupportedEncodingException {  
     	    char[] c = gbk.toCharArray();  
     	    byte[] fullByte = new byte[3*c.length];  
     	    for (int i=0; i<c.length; i++) {  
     	        String binary = Integer.toBinaryString(c[i]);  
     	        StringBuffer sb = new StringBuffer();  
     	        int len = 16 - binary.length();  
     	        //前面补零  
     	        for(int j=0; j<len; j++){  
     	                sb.append("0");  
     	            }  
     	        sb.append(binary);  
     	        //增加位,达到到24位3个字节  
     	        sb.insert(0, "1110");  
     	            sb.insert(8, "10");  
     	            sb.insert(16, "10");  
     	            fullByte[i*3] = Integer.valueOf(sb.substring(0, 8), 2).byteValue();//二进制字符串创建整型  
     	            fullByte[i*3+1] = Integer.valueOf(sb.substring(8, 16), 2).byteValue();  
     	            fullByte[i*3+2] = Integer.valueOf(sb.substring(16, 24), 2).byteValue();  
     	    }  
     	    //模拟UTF-8编码的网站显示  
     	    
     	    return(new String(fullByte,"UTF-8"));
     	}
    	 /**
    	  * GBK转 UTF-8方式二:
    	  */
    	 
    	 public static byte[] getUTF8BytesFromGBKString(String gbkStr) {  
    	        int n = gbkStr.length();  
    	        byte[] utfBytes = new byte[3 * n];  
    	        int k = 0;  
    	        for (int i = 0; i < n; i++) {  
    	            int m = gbkStr.charAt(i);  
    	            if (m < 128 && m >= 0) {  
    	                utfBytes[k++] = (byte) m;  
    	                continue;  
    	            }  
    	            utfBytes[k++] = (byte) (0xe0 | (m >> 12));  
    	            utfBytes[k++] = (byte) (0x80 | ((m >> 6) & 0x3f));  
    	            utfBytes[k++] = (byte) (0x80 | (m & 0x3f));  
    	        }  
    	        if (k < utfBytes.length) {  
    	            byte[] tmp = new byte[k];  
    	            System.arraycopy(utfBytes, 0, tmp, 0, k);  
    	            return tmp;  
    	        }  
    	        return utfBytes;  
    	    }
    	 /**
    	  * GBK转 UTF-8 方式三
    	  */
    	 private static String charsetConvert(String str, String charset) {
    		    try {
    		        str = new sun.misc.BASE64Encoder().encode(str.getBytes(charset));
    		        byte[] bytes = new sun.misc.BASE64Decoder().decodeBuffer(str);
    		        str = new String(bytes, charset);
    		    } catch(IOException e) {
    		        e.printStackTrace();
    		    }
    		    return str;
    		}
    	 
    	 
    	 	/**
    	 	 * 获取字符串的编码方式
    	 	 * @param str
    	 	 * @return
    	 	 */
    		public static String getEncoding(String str) {      
    		       String encode = "GB2312";      
    		      try {      
    		          if (str.equals(new String(str.getBytes(encode), encode))) {      
    		               String s = encode;      
    		              return s;      
    		           }      
    		       } catch (Exception exception) {      
    		       }      
    		       encode = "ISO-8859-1";      
    		      try {      
    		          if (str.equals(new String(str.getBytes(encode), encode))) {      
    		               String s1 = encode;      
    		              return s1;      
    		           }      
    		       } catch (Exception exception1) {      
    		       }      
    		       encode = "UTF-8";      
    		      try {      
    		          if (str.equals(new String(str.getBytes(encode), encode))) {      
    		               String s2 = encode;      
    		              return s2;      
    		           }      
    		       } catch (Exception exception2) {      
    		       }      
    		       encode = "GBK";      
    		      try {      
    		          if (str.equals(new String(str.getBytes(encode), encode))) {      
    		               String s3 = encode;      
    		              return s3;      
    		           }      
    		       } catch (Exception exception3) {      
    		       }      
    		      return "";      
    		   }
    		/**
    	     * gbk转unicode
    	     * @param str
    	     * @return
    	     */
    	    public static String gbkToUnicode(String str) {
    	        StringBuffer result = new StringBuffer();
    	        for (int i = 0; i < str.length(); i++) {
    	            char chr1 = (char) str.charAt(i);
    	            if ((chr1 & (0x00FF)) == chr1) {
    	                result.append(chr1);
    	                continue;
    	            }
    	            result.append("\u" + Integer.toHexString((int) chr1));
    	        }
    	        return result.toString();
    	    }
    	    /**
    	     * unicode转utf-8
    	     * @param theString
    	     * @return
    	     */
    	    public static String unicodeToUtf8(String theString) {
    	        char aChar;
    	        int len = theString.length();
    	        StringBuffer outBuffer = new StringBuffer(len);
    	        for (int x = 0; x < len;) {
    	            aChar = theString.charAt(x++);
    	            if (aChar == '\') {
    	                aChar = theString.charAt(x++);
    	                if (aChar == 'u') {
    	                    int value = 0;
    	                    for (int i = 0; i < 4; i++) {
    	                        aChar = theString.charAt(x++);
    	                        switch (aChar) {
    	                        case '0':
    	                        case '1':
    	                        case '2':
    	                        case '3':
    	                        case '4':
    	                        case '5':
    	                        case '6':
    	                        case '7':
    	                        case '8':
    	                        case '9':
    	                            value = (value << 4) + aChar - '0';
    	                            break;
    	                        case 'a':
    	                        case 'b':
    	                        case 'c':
    	                        case 'd':
    	                        case 'e':
    	                        case 'f':
    	                            value = (value << 4) + 10 + aChar - 'a';
    	                            break;
    	                        case 'A':
    	                        case 'B':
    	                        case 'C':
    	                        case 'D':
    	                        case 'E':
    	                        case 'F':
    	                            value = (value << 4) + 10 + aChar - 'A';
    	                            break;
    	                        default:
    	                            throw new IllegalArgumentException(
    	                                    "Malformed   \uxxxx   encoding.");
    	                        }
    	                    }
    	                    outBuffer.append((char) value);
    	                } else {
    	                    if (aChar == 't')
    	                        aChar = '	';
    	                    else if (aChar == 'r')
    	                        aChar = '
    ';
    	                    else if (aChar == 'n')
    	                        aChar = '
    ';
    	                    else if (aChar == 'f')
    	                        aChar = 'f';
    	                    outBuffer.append(aChar);
    	                }
    	            } else
    	                outBuffer.append(aChar);
    	        }
    	        return outBuffer.toString();
    	    }
    	    
    	 
    	 
    

      

  • 相关阅读:
    java中a++和++a的区别详解
    Oracle 对比两张表不一样 的数据
    通配符的匹配很全面, 但无法找到元素 'tx:annotation-driven' 的声明
    Java语言基础-运算符
    java中+=详解 a+=b和a=a+b的区别
    java语言基础-变量
    java语言基础-进制
    Spring整合CXF发布及调用WebService
    Oracle Job定时任务的使用详解
    MySQL的主从配置
  • 原文地址:https://www.cnblogs.com/liushisaonian/p/9894633.html
Copyright © 2011-2022 走看看