zoukankan      html  css  js  c++  java
  • java获得中文 拼音 、首字母

    工具类:

    package com.infinitePossibilities.utils;
    
    import java.util.Arrays;
    import java.util.HashSet;
    import java.util.Set;
    import net.sourceforge.pinyin4j.PinyinHelper;
    import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
    import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
    import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
    import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
    import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
    import wfc.service.log.Log;
    
    public class pinYinUtils {
    
        public static int wordLength = 20;
            /**
             * 将字符串中的中文转化为拼音,其他字符不变
             *
             * @param inputString
             * @return
             */
            public static String getPingYin(String inputString,int type) {
                HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
                //大小写
                if(1 == type){
                    format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
                }else{
                    format.setCaseType(HanyuPinyinCaseType.UPPERCASE);
                }
                format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
                format.setVCharType(HanyuPinyinVCharType.WITH_V);
    
                char[] input = inputString.trim().toCharArray();
                String output = "";
    
                try {
                    for (int i = 0; i < input.length; i++) {
                        if (Character.toString(input[i]).matches("[\\u4E00-\\u9FA5]+")) {
                            String[] temp = PinyinHelper.toHanyuPinyinStringArray(input[i], format);
                            output += temp[0];
                        } else {
                            output += Character.toString(input[i]);
                        }
                    }
                } catch (BadHanyuPinyinOutputFormatCombination e) {
                    e.printStackTrace();
                }
                return output;
            }
    
            /**
             * 获取汉字串拼音首字母,英文字符不变
             * @param chinese 汉字串
             * @return 汉语拼音首字母
             */
            public static String getFirstSpell(String chinese,int type) {
                    StringBuffer pybf = new StringBuffer();
                    char[] arr = chinese.toCharArray();
                    HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
                    //大小写
                    if(1 == type){
                        defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
                    }else{
                        defaultFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE);
                    }
                    defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
                    for (int i = 0; i < arr.length; i++) {
                            if (arr[i] > 128) {
                                    try {
                                            String[] temp = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
                                            if (temp != null) {
                                                    pybf.append(temp[0].charAt(0));
                                            }
                                    } catch (BadHanyuPinyinOutputFormatCombination e) {
                                            e.printStackTrace();
                                    }
                            } else {
                                    pybf.append(arr[i]);
                            }
                    }
                    return pybf.toString().replaceAll("\\W", "").trim();
            }
    
    
    
            /**
             * 获取拼音集合
             *
             * @param src
             * @return Set<String>
             */
            public static Set<String> getPinyin(String src) {
                if (src != null && !src.trim().equalsIgnoreCase("")) {
                    char[] srcChar;
                    srcChar = src.toCharArray();
                    // 汉语拼音格式输出类
                    HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat();
    
                    // 输出设置,大小写,音标方式等
                    hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
                    hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
                    hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
                    // 因为多音字问题,导致在Exchange方法中排序组合后内存溢出,所以根据使用习惯或者查询喜欢,只取前几个文字,解决内存溢出问题
                    if (src.length() <= wordLength) {
                        wordLength = src.length();
                    }
                    String[][] temp = new String[wordLength][];
                    for (int i = 0; i < wordLength; i++) {
                        char c = srcChar[i];
                        // 是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z)
                        if (String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")) {
                            try {
                                temp[i] = PinyinHelper.toHanyuPinyinStringArray(
                                        srcChar[i], hanYuPinOutputFormat);
                            } catch (BadHanyuPinyinOutputFormatCombination e) {
                                e.printStackTrace();
                            }
    
                        } else if (((int) c >= 65 && (int) c <= 90) || ((int) c >= 97 && (int) c <= 122)) {
                            temp[i] = new String[] { String.valueOf(srcChar[i]) };
                        } else {
                            temp[i] = new String[] { "" };
                        }
                    }
                    System.out.println(Arrays.toString(temp));
                    String[] pingyinArray = Exchange(temp);
                    Set<String> pinyinSet = new HashSet<String>();
                    for (int i = 0; i < pingyinArray.length; i++) {
                        pinyinSet.add(pingyinArray[i]);
                    }
                    return pinyinSet;
                }
                Log.info("参数校验不合法!");
                return new HashSet<String>();
            }
    
    
    
        public static void main(String[] args) {
    
            System.out.println(getPingYin("将字符串中的中文转化为拼音,其他字符不变",1));
            System.out.println(getFirstSpell("获取汉字串拼音首字母,英文字符不变",1));
            System.out.println(getPinyin("获取拼音集合11测试cc"));
        }
    
    
        /**
         * 递归
         *
         * @param strJaggedArray
         * @return
         */
        public static String[] Exchange(String[][] strJaggedArray) {
            String[][] temp = DoExchange(strJaggedArray);
            System.out.println("---:"+ Arrays.toString(temp));
            return temp[0];
        }
    
        /**
         * 递归
         *
         * @param strJaggedArray
         * @return
         */
        private static String[][] DoExchange(String[][] strJaggedArray) {
            int len = strJaggedArray.length;
            if (len >= 2) {
                int len1 = strJaggedArray[0].length;
                int len2 = strJaggedArray[1].length;
                int newlen = len1 * len2;
                String[] temp = new String[newlen];
                int Index = 0;
                for (int i = 0; i < len1; i++) {
                    for (int j = 0; j < len2; j++) {
                        temp[Index] = strJaggedArray[0][i] + strJaggedArray[1][j];
                        Index++;
                    }
                }
                String[][] newArray = new String[len - 1][];
                for (int i = 2; i < len; i++) {
                    newArray[i - 1] = strJaggedArray[i];
                }
                newArray[0] = temp;
                return DoExchange(newArray);
            } else {
                return strJaggedArray;
            }
        }
    
    
    }

    pom依赖:

     <dependency>
        <groupId>com.belerweb</groupId>
        <artifactId>pinyin4j</artifactId>
        <version>2.5.0</version>
      </dependency>
  • 相关阅读:
    Golang Failpoint 的设计与实现
    没涉及到最值求解;观点:矩阵乘法无法表达出结果。 现实生活中事件、现象的数学表达
    多元微分学 枚举破解15位路由器密码 存储空间限制 拆分减长,求最值 数据去重
    ARP Poisoning Attack and Mitigation Techniques ARP欺骗 中间人攻击 Man-In-The-Middle (MITM) attack 嗅探 防范 Can one MAC address have two different IP addresses within the network?
    The C10K problem
    HTTP Streaming Architecture HLS 直播点播 HTTP流架构
    现代IM系统中消息推送和存储架构的实现
    现代IM系统中的消息系统架构
    长连接锁服务优化实践 C10K问题 nodejs的内部构造 limits.conf文件修改 sysctl.conf文件修改
    doubleclick cookie、动态脚本、用户画像、用户行为分析和海量数据存取 推荐词 京东 电商 信息上传 黑洞 https://blackhole.m.jd.com/getinfo
  • 原文地址:https://www.cnblogs.com/lifan12589/p/15748685.html
Copyright © 2011-2022 走看看