zoukankan      html  css  js  c++  java
  • 一些中文相关的操作方法

    package com.opslab.util;


    import net.sourceforge.pinyin4j.PinyinHelper;
    import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
    import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
    import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
    import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
    import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

    import java.util.regex.Matcher;
    import java.util.regex.Pattern;

    /**
    * 一些中文相关的操作方法
    */
    public final class ChinesUtil {
    private ChinesUtil(){

    }
    /**
    * 将字符串中的中文转化为拼音,其他字符不变
    *
    * @param inputString
    * @return
    */
    public final static String getPingYin(String inputString) {
    HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
    format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    format.setVCharType(HanyuPinyinVCharType.WITH_V);

    char[] input = inputString.trim().toCharArray();
    String output = "";

    try {
    for (int i = 0; i < input.length; i++) {
    if (java.lang.Character.toString(input[i]).matches("[\u4E00-\u9FA5]+")) {
    String[] temp = PinyinHelper.toHanyuPinyinStringArray(input[i], format);
    output += temp[0];
    } else
    output += java.lang.Character.toString(input[i]);
    }
    } catch (BadHanyuPinyinOutputFormatCombination e) {
    e.printStackTrace();
    }
    return output;
    }

    /**
    * 获取汉字串拼音首字母,英文字符不变
    *
    * @param chinese 汉字串
    * @return 汉语拼音首字母
    */
    public final static String getFirstSpell(String chinese) {
    StringBuffer pybf = new StringBuffer();
    char[] arr = chinese.toCharArray();
    HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
    defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    for (int i = 0; i < arr.length; i++) {
    if (arr[i] > 128) {
    try {
    String[] temp = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
    if (temp != null) {
    pybf.append(temp[0].charAt(0));
    }
    } catch (BadHanyuPinyinOutputFormatCombination e) {
    e.printStackTrace();
    }
    } else {
    pybf.append(arr[i]);
    }
    }
    return pybf.toString().replaceAll("\W", "").trim();
    }

    /**
    * 获取汉字串拼音,英文字符不变
    *
    * @param chinese 汉字串
    * @return 汉语拼音
    */
    public final static String getFullSpell(String chinese) {
    StringBuffer pybf = new StringBuffer();
    char[] arr = chinese.toCharArray();
    HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
    defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    for (int i = 0; i < arr.length; i++) {
    if (arr[i] > 128) {
    try {
    pybf.append(PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat)[0]);
    } catch (BadHanyuPinyinOutputFormatCombination e) {
    e.printStackTrace();
    }
    } else {
    pybf.append(arr[i]);
    }
    }
    return pybf.toString();
    }


    // 只能判断部分CJK字符(CJK统一汉字)
    public final static boolean isChineseByREG(String str) {
    if (str == null) {
    return false;
    }
    Pattern pattern = Pattern.compile("[\u4E00-\u9FBF]+");
    return pattern.matcher(str.trim()).find();
    }

    // 只能判断部分CJK字符(CJK统一汉字)
    public final static boolean isChineseByName(String str) {
    if (str == null) {
    return false;
    }
    // 大小写不同:\p 表示包含,\P 表示不包含
    // \p{Cn} 的意思为 Unicode 中未被定义字符的编码,\P{Cn} 就表示 Unicode中已经被定义字符的编码
    String reg = "\p{InCJK Unified Ideographs}&&\P{Cn}";
    Pattern pattern = Pattern.compile(reg);
    return pattern.matcher(str.trim()).find();
    }


    // 完整的判断中文汉字和符号
    public final static boolean isChinese(String strName) {
    char[] ch = strName.toCharArray();
    for (int i = 0; i < ch.length; i++) {
    char c = ch[i];
    if (isChinese(c)) {
    return true;
    }
    }
    return false;
    }

    /**
    * 判断是否是中文
    *
    * @param c
    * @return
    */
    public final static boolean isChinese(char c) {
    Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
    if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
    return true;
    }
    return false;
    }

    /**
    * 获取一个字符串中中文字符的个数
    */
    public final static int ChineseLength(String str) {
    Pattern p = Pattern.compile("[u4E00-u9FA5]+");
    Matcher m = p.matcher(str);
    int i = 0;
    while (m.find()) {
    String temp = m.group(0);
    i += temp.length();
    }
    return i;
    }

    /**
    * 判断是否是乱码
    *
    * @param strName
    * @return
    */
    public final static boolean isMessyCode(String strName) {
    Pattern p = Pattern.compile("\s*| *| *| *");
    Matcher m = p.matcher(strName);
    String after = m.replaceAll("");
    String temp = after.replaceAll("\p{P}", "");
    char[] ch = temp.trim().toCharArray();
    float chLength = 0;
    float count = 0;
    for (int i = 0; i < ch.length; i++) {
    char c = ch[i];
    if (!Character.isLetterOrDigit(c)) {
    if (!ChinesUtil.isChinese(c)) {
    count = count + 1;
    }
    chLength++;
    }
    }
    float result = count / chLength;
    if (result > 0.4) {
    return true;
    } else {
    return false;
    }
    }
    }

  • 相关阅读:
    How to convert VirtualBox vdi to KVM qcow2
    (OK)(OK) adb -s emulator-5554 shell
    (OK)(OK) using adb with a NAT'ed VM
    (OK) How to access a NAT guest from host with VirtualBox
    (OK) Creating manually one VMs from an existing VDI file in CLI (VBoxManage) in Fedora 23
    (OK)(OK) Creating VMs from an existing VDI file in CLI (VBoxManage) in Fedora 23
    (OK) Creating_VMs_from_an_existing_VDI_file.txt
    (OK) Creating VMs from an existing VDI file —— in OS X
    (OK) install_IBM_SERVER.txt
    (OK) install chrome & busybox in android-x86_64 —— uninstall chrome
  • 原文地址:https://www.cnblogs.com/chinaifae/p/10254805.html
Copyright © 2011-2022 走看看