1 package com.example.test; 2 3 import android.text.TextUtils; 4 import android.util.Log; 5 6 import java.text.Collator; 7 import java.util.ArrayList; 8 import java.util.Locale; 9 10 /** 11 * An object to convert Chinese character to its corresponding pinyin string. 12 * For characters with multiple possible pinyin string, only one is selected 13 * according to collator. Polyphone is not supported in this implementation. 14 * This class is implemented to achieve the best runtime performance and minimum 15 * runtime resources with tolerable sacrifice of accuracy. This implementation 16 * highly depends on zh_CN ICU collation data and must be always synchronized 17 * with ICU. 18 * 19 * Currently this file is aligned to zh.txt in ICU 4.6 来自android4.2源码 20 */ 21 public class HanziToPinyin { 22 private static final String TAG = "HanziToPinyin"; 23 24 // Turn on this flag when we want to check internal data structure. 25 private static final boolean DEBUG = false; 26 27 /** 28 * Unihans array. 29 * 30 * Each unihans is the first one within same pinyin when collator is zh_CN. 31 */ 32 public static final char[] UNIHANS = { 'u963f', 'u54ce', 'u5b89', 33 'u80ae', 'u51f9', 'u516b', 'u6300', 'u6273', 'u90a6', 34 'u52f9', 'u9642', 'u5954', 'u4f3b', 'u5c44', 'u8fb9', 35 'u706c', 'u618b', 'u6c43', 'u51ab', 'u7676', 'u5cec', 36 'u5693', 'u5072', 'u53c2', 'u4ed3', 'u64a1', 'u518a', 37 'u5d7e', 'u66fd', 'u66fe', 'u5c64', 'u53c9', 'u8286', 38 'u8fbf', 'u4f25', 'u6284', 'u8f66', 'u62bb', 'u6c88', 39 'u6c89', 'u9637', 'u5403', 'u5145', 'u62bd', 'u51fa', 40 'u6b3b', 'u63e3', 'u5ddb', 'u5205', 'u5439', 'u65fe', 41 'u9034', 'u5472', 'u5306', 'u51d1', 'u7c97', 'u6c46', 42 'u5d14', 'u90a8', 'u6413', 'u5491', 'u5446', 'u4e39', 43 'u5f53', 'u5200', 'u561a', 'u6265', 'u706f', 'u6c10', 44 'u55f2', 'u7538', 'u5201', 'u7239', 'u4e01', 'u4e1f', 45 'u4e1c', 'u543a', 'u53be', 'u8011', 'u8968', 'u5428', 46 'u591a', 'u59b8', 'u8bf6', 'u5940', 'u97a5', 'u513f', 47 'u53d1', 'u5e06', 'u531a', 'u98de', 'u5206', 'u4e30', 48 'u8985', 'u4ecf', 'u7d11', 'u4f15', 'u65ee', 'u4f85', 49 'u7518', 'u5188', 'u768b', 'u6208', 'u7ed9', 'u6839', 50 'u522f', 'u5de5', 'u52fe', 'u4f30', 'u74dc', 'u4e56', 51 'u5173', 'u5149', 'u5f52', 'u4e28', 'u5459', 'u54c8', 52 'u548d', 'u4f44', 'u592f', 'u8320', 'u8bc3', 'u9ed2', 53 'u62eb', 'u4ea8', 'u5677', 'u53ff', 'u9f41', 'u4e6f', 54 'u82b1', 'u6000', 'u72bf', 'u5ddf', 'u7070', 'u660f', 55 'u5419', 'u4e0c', 'u52a0', 'u620b', 'u6c5f', 'u827d', 56 'u9636', 'u5dfe', 'u5755', 'u5182', 'u4e29', 'u51e5', 57 'u59e2', 'u5658', 'u519b', 'u5494', 'u5f00', 'u520a', 58 'u5ffc', 'u5c3b', 'u533c', 'u808e', 'u52a5', 'u7a7a', 59 'u62a0', 'u625d', 'u5938', 'u84af', 'u5bbd', 'u5321', 60 'u4e8f', 'u5764', 'u6269', 'u5783', 'u6765', 'u5170', 61 'u5577', 'u635e', 'u808b', 'u52d2', 'u5d1a', 'u5215', 62 'u4fe9', 'u5941', 'u826f', 'u64a9', 'u5217', 'u62ce', 63 'u5222', 'u6e9c', 'u56d6', 'u9f99', 'u779c', 'u565c', 64 'u5a08', 'u7567', 'u62a1', 'u7f57', 'u5463', 'u5988', 65 'u57cb', 'u5ada', 'u7264', 'u732b', 'u4e48', 'u5445', 66 'u95e8', 'u753f', 'u54aa', 'u5b80', 'u55b5', 'u4e5c', 67 'u6c11', 'u540d', 'u8c2c', 'u6478', 'u54de', 'u6bea', 68 'u55ef', 'u62cf', 'u8149', 'u56e1', 'u56d4', 'u5b6c', 69 'u7592', 'u5a1e', 'u6041', 'u80fd', 'u59ae', 'u62c8', 70 'u5b22', 'u9e1f', 'u634f', 'u56dc', 'u5b81', 'u599e', 71 'u519c', 'u7fba', 'u5974', 'u597b', 'u759f', 'u9ec1', 72 'u90cd', 'u5594', 'u8bb4', 'u5991', 'u62cd', 'u7705', 73 'u4e53', 'u629b', 'u5478', 'u55b7', 'u5309', 'u4e15', 74 'u56e8', 'u527d', 'u6c15', 'u59d8', 'u4e52', 'u948b', 75 'u5256', 'u4ec6', 'u4e03', 'u6390', 'u5343', 'u545b', 76 'u6084', 'u767f', 'u4eb2', 'u72c5', 'u828e', 'u4e18', 77 'u533a', 'u5cd1', 'u7f3a', 'u590b', 'u5465', 'u7a63', 78 'u5a06', 'u60f9', 'u4eba', 'u6254', 'u65e5', 'u8338', 79 'u53b9', 'u909a', 'u633c', 'u5827', 'u5a51', 'u77a4', 80 'u637c', 'u4ee8', 'u6be2', 'u4e09', 'u6852', 'u63bb', 81 'u95aa', 'u68ee', 'u50e7', 'u6740', 'u7b5b', 'u5c71', 82 'u4f24', 'u5f30', 'u5962', 'u7533', 'u8398', 'u6552', 83 'u5347', 'u5c38', 'u53ce', 'u4e66', 'u5237', 'u8870', 84 'u95e9', 'u53cc', 'u8c01', 'u542e', 'u8bf4', 'u53b6', 85 'u5fea', 'u635c', 'u82cf', 'u72fb', 'u590a', 'u5b59', 86 'u5506', 'u4ed6', 'u56fc', 'u574d', 'u6c64', 'u5932', 87 'u5fd1', 'u71a5', 'u5254', 'u5929', 'u65eb', 'u5e16', 88 'u5385', 'u56f2', 'u5077', 'u51f8', 'u6e4d', 'u63a8', 89 'u541e', 'u4e47', 'u7a75', 'u6b6a', 'u5f2f', 'u5c23', 90 'u5371', 'u6637', 'u7fc1', 'u631d', 'u4e4c', 'u5915', 91 'u8672', 'u4eda', 'u4e61', 'u7071', 'u4e9b', 'u5fc3', 92 'u661f', 'u51f6', 'u4f11', 'u5401', 'u5405', 'u524a', 93 'u5743', 'u4e2b', 'u6079', 'u592e', 'u5e7a', 'u503b', 94 'u4e00', 'u56d9', 'u5e94', 'u54df', 'u4f63', 'u4f18', 95 'u625c', 'u56e6', 'u66f0', 'u6655', 'u7b60', 'u7b7c', 96 'u5e00', 'u707d', 'u5142', 'u5328', 'u50ae', 'u5219', 97 'u8d3c', 'u600e', 'u5897', 'u624e', 'u635a', 'u6cbe', 98 'u5f20', 'u957f', 'u9577', 'u4f4b', 'u8707', 'u8d1e', 99 'u4e89', 'u4e4b', 'u5cd9', 'u5ea2', 'u4e2d', 'u5dde', 100 'u6731', 'u6293', 'u62fd', 'u4e13', 'u5986', 'u96b9', 101 'u5b92', 'u5353', 'u4e72', 'u5b97', 'u90b9', 'u79df', 102 'u94bb', 'u539c', 'u5c0a', 'u6628', 'u5159', 'u9fc3', 103 'u9fc4', }; 104 105 /** 106 * Pinyin array. 107 * 108 * Each pinyin is corresponding to unihans of same offset in the unihans 109 * array. 110 */ 111 public static final byte[][] PINYINS = { { 65, 0, 0, 0, 0, 0 }, 112 { 65, 73, 0, 0, 0, 0 }, { 65, 78, 0, 0, 0, 0 }, 113 { 65, 78, 71, 0, 0, 0 }, { 65, 79, 0, 0, 0, 0 }, 114 { 66, 65, 0, 0, 0, 0 }, { 66, 65, 73, 0, 0, 0 }, 115 { 66, 65, 78, 0, 0, 0 }, { 66, 65, 78, 71, 0, 0 }, 116 { 66, 65, 79, 0, 0, 0 }, { 66, 69, 73, 0, 0, 0 }, 117 { 66, 69, 78, 0, 0, 0 }, { 66, 69, 78, 71, 0, 0 }, 118 { 66, 73, 0, 0, 0, 0 }, { 66, 73, 65, 78, 0, 0 }, 119 { 66, 73, 65, 79, 0, 0 }, { 66, 73, 69, 0, 0, 0 }, 120 { 66, 73, 78, 0, 0, 0 }, { 66, 73, 78, 71, 0, 0 }, 121 { 66, 79, 0, 0, 0, 0 }, { 66, 85, 0, 0, 0, 0 }, 122 { 67, 65, 0, 0, 0, 0 }, { 67, 65, 73, 0, 0, 0 }, 123 { 67, 65, 78, 0, 0, 0 }, { 67, 65, 78, 71, 0, 0 }, 124 { 67, 65, 79, 0, 0, 0 }, { 67, 69, 0, 0, 0, 0 }, 125 { 67, 69, 78, 0, 0, 0 }, { 67, 69, 78, 71, 0, 0 }, 126 { 90, 69, 78, 71, 0, 0 }, { 67, 69, 78, 71, 0, 0 }, 127 { 67, 72, 65, 0, 0, 0 }, { 67, 72, 65, 73, 0, 0 }, 128 { 67, 72, 65, 78, 0, 0 }, { 67, 72, 65, 78, 71, 0 }, 129 { 67, 72, 65, 79, 0, 0 }, { 67, 72, 69, 0, 0, 0 }, 130 { 67, 72, 69, 78, 0, 0 }, { 83, 72, 69, 78, 0, 0 }, 131 { 67, 72, 69, 78, 0, 0 }, { 67, 72, 69, 78, 71, 0 }, 132 { 67, 72, 73, 0, 0, 0 }, { 67, 72, 79, 78, 71, 0 }, 133 { 67, 72, 79, 85, 0, 0 }, { 67, 72, 85, 0, 0, 0 }, 134 { 67, 72, 85, 65, 0, 0 }, { 67, 72, 85, 65, 73, 0 }, 135 { 67, 72, 85, 65, 78, 0 }, { 67, 72, 85, 65, 78, 71 }, 136 { 67, 72, 85, 73, 0, 0 }, { 67, 72, 85, 78, 0, 0 }, 137 { 67, 72, 85, 79, 0, 0 }, { 67, 73, 0, 0, 0, 0 }, 138 { 67, 79, 78, 71, 0, 0 }, { 67, 79, 85, 0, 0, 0 }, 139 { 67, 85, 0, 0, 0, 0 }, { 67, 85, 65, 78, 0, 0 }, 140 { 67, 85, 73, 0, 0, 0 }, { 67, 85, 78, 0, 0, 0 }, 141 { 67, 85, 79, 0, 0, 0 }, { 68, 65, 0, 0, 0, 0 }, 142 { 68, 65, 73, 0, 0, 0 }, { 68, 65, 78, 0, 0, 0 }, 143 { 68, 65, 78, 71, 0, 0 }, { 68, 65, 79, 0, 0, 0 }, 144 { 68, 69, 0, 0, 0, 0 }, { 68, 69, 78, 0, 0, 0 }, 145 { 68, 69, 78, 71, 0, 0 }, { 68, 73, 0, 0, 0, 0 }, 146 { 68, 73, 65, 0, 0, 0 }, { 68, 73, 65, 78, 0, 0 }, 147 { 68, 73, 65, 79, 0, 0 }, { 68, 73, 69, 0, 0, 0 }, 148 { 68, 73, 78, 71, 0, 0 }, { 68, 73, 85, 0, 0, 0 }, 149 { 68, 79, 78, 71, 0, 0 }, { 68, 79, 85, 0, 0, 0 }, 150 { 68, 85, 0, 0, 0, 0 }, { 68, 85, 65, 78, 0, 0 }, 151 { 68, 85, 73, 0, 0, 0 }, { 68, 85, 78, 0, 0, 0 }, 152 { 68, 85, 79, 0, 0, 0 }, { 69, 0, 0, 0, 0, 0 }, 153 { 69, 73, 0, 0, 0, 0 }, { 69, 78, 0, 0, 0, 0 }, 154 { 69, 78, 71, 0, 0, 0 }, { 69, 82, 0, 0, 0, 0 }, 155 { 70, 65, 0, 0, 0, 0 }, { 70, 65, 78, 0, 0, 0 }, 156 { 70, 65, 78, 71, 0, 0 }, { 70, 69, 73, 0, 0, 0 }, 157 { 70, 69, 78, 0, 0, 0 }, { 70, 69, 78, 71, 0, 0 }, 158 { 70, 73, 65, 79, 0, 0 }, { 70, 79, 0, 0, 0, 0 }, 159 { 70, 79, 85, 0, 0, 0 }, { 70, 85, 0, 0, 0, 0 }, 160 { 71, 65, 0, 0, 0, 0 }, { 71, 65, 73, 0, 0, 0 }, 161 { 71, 65, 78, 0, 0, 0 }, { 71, 65, 78, 71, 0, 0 }, 162 { 71, 65, 79, 0, 0, 0 }, { 71, 69, 0, 0, 0, 0 }, 163 { 71, 69, 73, 0, 0, 0 }, { 71, 69, 78, 0, 0, 0 }, 164 { 71, 69, 78, 71, 0, 0 }, { 71, 79, 78, 71, 0, 0 }, 165 { 71, 79, 85, 0, 0, 0 }, { 71, 85, 0, 0, 0, 0 }, 166 { 71, 85, 65, 0, 0, 0 }, { 71, 85, 65, 73, 0, 0 }, 167 { 71, 85, 65, 78, 0, 0 }, { 71, 85, 65, 78, 71, 0 }, 168 { 71, 85, 73, 0, 0, 0 }, { 71, 85, 78, 0, 0, 0 }, 169 { 71, 85, 79, 0, 0, 0 }, { 72, 65, 0, 0, 0, 0 }, 170 { 72, 65, 73, 0, 0, 0 }, { 72, 65, 78, 0, 0, 0 }, 171 { 72, 65, 78, 71, 0, 0 }, { 72, 65, 79, 0, 0, 0 }, 172 { 72, 69, 0, 0, 0, 0 }, { 72, 69, 73, 0, 0, 0 }, 173 { 72, 69, 78, 0, 0, 0 }, { 72, 69, 78, 71, 0, 0 }, 174 { 72, 77, 0, 0, 0, 0 }, { 72, 79, 78, 71, 0, 0 }, 175 { 72, 79, 85, 0, 0, 0 }, { 72, 85, 0, 0, 0, 0 }, 176 { 72, 85, 65, 0, 0, 0 }, { 72, 85, 65, 73, 0, 0 }, 177 { 72, 85, 65, 78, 0, 0 }, { 72, 85, 65, 78, 71, 0 }, 178 { 72, 85, 73, 0, 0, 0 }, { 72, 85, 78, 0, 0, 0 }, 179 { 72, 85, 79, 0, 0, 0 }, { 74, 73, 0, 0, 0, 0 }, 180 { 74, 73, 65, 0, 0, 0 }, { 74, 73, 65, 78, 0, 0 }, 181 { 74, 73, 65, 78, 71, 0 }, { 74, 73, 65, 79, 0, 0 }, 182 { 74, 73, 69, 0, 0, 0 }, { 74, 73, 78, 0, 0, 0 }, 183 { 74, 73, 78, 71, 0, 0 }, { 74, 73, 79, 78, 71, 0 }, 184 { 74, 73, 85, 0, 0, 0 }, { 74, 85, 0, 0, 0, 0 }, 185 { 74, 85, 65, 78, 0, 0 }, { 74, 85, 69, 0, 0, 0 }, 186 { 74, 85, 78, 0, 0, 0 }, { 75, 65, 0, 0, 0, 0 }, 187 { 75, 65, 73, 0, 0, 0 }, { 75, 65, 78, 0, 0, 0 }, 188 { 75, 65, 78, 71, 0, 0 }, { 75, 65, 79, 0, 0, 0 }, 189 { 75, 69, 0, 0, 0, 0 }, { 75, 69, 78, 0, 0, 0 }, 190 { 75, 69, 78, 71, 0, 0 }, { 75, 79, 78, 71, 0, 0 }, 191 { 75, 79, 85, 0, 0, 0 }, { 75, 85, 0, 0, 0, 0 }, 192 { 75, 85, 65, 0, 0, 0 }, { 75, 85, 65, 73, 0, 0 }, 193 { 75, 85, 65, 78, 0, 0 }, { 75, 85, 65, 78, 71, 0 }, 194 { 75, 85, 73, 0, 0, 0 }, { 75, 85, 78, 0, 0, 0 }, 195 { 75, 85, 79, 0, 0, 0 }, { 76, 65, 0, 0, 0, 0 }, 196 { 76, 65, 73, 0, 0, 0 }, { 76, 65, 78, 0, 0, 0 }, 197 { 76, 65, 78, 71, 0, 0 }, { 76, 65, 79, 0, 0, 0 }, 198 { 76, 69, 0, 0, 0, 0 }, { 76, 69, 73, 0, 0, 0 }, 199 { 76, 69, 78, 71, 0, 0 }, { 76, 73, 0, 0, 0, 0 }, 200 { 76, 73, 65, 0, 0, 0 }, { 76, 73, 65, 78, 0, 0 }, 201 { 76, 73, 65, 78, 71, 0 }, { 76, 73, 65, 79, 0, 0 }, 202 { 76, 73, 69, 0, 0, 0 }, { 76, 73, 78, 0, 0, 0 }, 203 { 76, 73, 78, 71, 0, 0 }, { 76, 73, 85, 0, 0, 0 }, 204 { 76, 79, 0, 0, 0, 0 }, { 76, 79, 78, 71, 0, 0 }, 205 { 76, 79, 85, 0, 0, 0 }, { 76, 85, 0, 0, 0, 0 }, 206 { 76, 85, 65, 78, 0, 0 }, { 76, 85, 69, 0, 0, 0 }, 207 { 76, 85, 78, 0, 0, 0 }, { 76, 85, 79, 0, 0, 0 }, 208 { 77, 0, 0, 0, 0, 0 }, { 77, 65, 0, 0, 0, 0 }, 209 { 77, 65, 73, 0, 0, 0 }, { 77, 65, 78, 0, 0, 0 }, 210 { 77, 65, 78, 71, 0, 0 }, { 77, 65, 79, 0, 0, 0 }, 211 { 77, 69, 0, 0, 0, 0 }, { 77, 69, 73, 0, 0, 0 }, 212 { 77, 69, 78, 0, 0, 0 }, { 77, 69, 78, 71, 0, 0 }, 213 { 77, 73, 0, 0, 0, 0 }, { 77, 73, 65, 78, 0, 0 }, 214 { 77, 73, 65, 79, 0, 0 }, { 77, 73, 69, 0, 0, 0 }, 215 { 77, 73, 78, 0, 0, 0 }, { 77, 73, 78, 71, 0, 0 }, 216 { 77, 73, 85, 0, 0, 0 }, { 77, 79, 0, 0, 0, 0 }, 217 { 77, 79, 85, 0, 0, 0 }, { 77, 85, 0, 0, 0, 0 }, 218 { 78, 0, 0, 0, 0, 0 }, { 78, 65, 0, 0, 0, 0 }, 219 { 78, 65, 73, 0, 0, 0 }, { 78, 65, 78, 0, 0, 0 }, 220 { 78, 65, 78, 71, 0, 0 }, { 78, 65, 79, 0, 0, 0 }, 221 { 78, 69, 0, 0, 0, 0 }, { 78, 69, 73, 0, 0, 0 }, 222 { 78, 69, 78, 0, 0, 0 }, { 78, 69, 78, 71, 0, 0 }, 223 { 78, 73, 0, 0, 0, 0 }, { 78, 73, 65, 78, 0, 0 }, 224 { 78, 73, 65, 78, 71, 0 }, { 78, 73, 65, 79, 0, 0 }, 225 { 78, 73, 69, 0, 0, 0 }, { 78, 73, 78, 0, 0, 0 }, 226 { 78, 73, 78, 71, 0, 0 }, { 78, 73, 85, 0, 0, 0 }, 227 { 78, 79, 78, 71, 0, 0 }, { 78, 79, 85, 0, 0, 0 }, 228 { 78, 85, 0, 0, 0, 0 }, { 78, 85, 65, 78, 0, 0 }, 229 { 78, 85, 69, 0, 0, 0 }, { 78, 85, 78, 0, 0, 0 }, 230 { 78, 85, 79, 0, 0, 0 }, { 79, 0, 0, 0, 0, 0 }, 231 { 79, 85, 0, 0, 0, 0 }, { 80, 65, 0, 0, 0, 0 }, 232 { 80, 65, 73, 0, 0, 0 }, { 80, 65, 78, 0, 0, 0 }, 233 { 80, 65, 78, 71, 0, 0 }, { 80, 65, 79, 0, 0, 0 }, 234 { 80, 69, 73, 0, 0, 0 }, { 80, 69, 78, 0, 0, 0 }, 235 { 80, 69, 78, 71, 0, 0 }, { 80, 73, 0, 0, 0, 0 }, 236 { 80, 73, 65, 78, 0, 0 }, { 80, 73, 65, 79, 0, 0 }, 237 { 80, 73, 69, 0, 0, 0 }, { 80, 73, 78, 0, 0, 0 }, 238 { 80, 73, 78, 71, 0, 0 }, { 80, 79, 0, 0, 0, 0 }, 239 { 80, 79, 85, 0, 0, 0 }, { 80, 85, 0, 0, 0, 0 }, 240 { 81, 73, 0, 0, 0, 0 }, { 81, 73, 65, 0, 0, 0 }, 241 { 81, 73, 65, 78, 0, 0 }, { 81, 73, 65, 78, 71, 0 }, 242 { 81, 73, 65, 79, 0, 0 }, { 81, 73, 69, 0, 0, 0 }, 243 { 81, 73, 78, 0, 0, 0 }, { 81, 73, 78, 71, 0, 0 }, 244 { 81, 73, 79, 78, 71, 0 }, { 81, 73, 85, 0, 0, 0 }, 245 { 81, 85, 0, 0, 0, 0 }, { 81, 85, 65, 78, 0, 0 }, 246 { 81, 85, 69, 0, 0, 0 }, { 81, 85, 78, 0, 0, 0 }, 247 { 82, 65, 78, 0, 0, 0 }, { 82, 65, 78, 71, 0, 0 }, 248 { 82, 65, 79, 0, 0, 0 }, { 82, 69, 0, 0, 0, 0 }, 249 { 82, 69, 78, 0, 0, 0 }, { 82, 69, 78, 71, 0, 0 }, 250 { 82, 73, 0, 0, 0, 0 }, { 82, 79, 78, 71, 0, 0 }, 251 { 82, 79, 85, 0, 0, 0 }, { 82, 85, 0, 0, 0, 0 }, 252 { 82, 85, 65, 0, 0, 0 }, { 82, 85, 65, 78, 0, 0 }, 253 { 82, 85, 73, 0, 0, 0 }, { 82, 85, 78, 0, 0, 0 }, 254 { 82, 85, 79, 0, 0, 0 }, { 83, 65, 0, 0, 0, 0 }, 255 { 83, 65, 73, 0, 0, 0 }, { 83, 65, 78, 0, 0, 0 }, 256 { 83, 65, 78, 71, 0, 0 }, { 83, 65, 79, 0, 0, 0 }, 257 { 83, 69, 0, 0, 0, 0 }, { 83, 69, 78, 0, 0, 0 }, 258 { 83, 69, 78, 71, 0, 0 }, { 83, 72, 65, 0, 0, 0 }, 259 { 83, 72, 65, 73, 0, 0 }, { 83, 72, 65, 78, 0, 0 }, 260 { 83, 72, 65, 78, 71, 0 }, { 83, 72, 65, 79, 0, 0 }, 261 { 83, 72, 69, 0, 0, 0 }, { 83, 72, 69, 78, 0, 0 }, 262 { 88, 73, 78, 0, 0, 0 }, { 83, 72, 69, 78, 0, 0 }, 263 { 83, 72, 69, 78, 71, 0 }, { 83, 72, 73, 0, 0, 0 }, 264 { 83, 72, 79, 85, 0, 0 }, { 83, 72, 85, 0, 0, 0 }, 265 { 83, 72, 85, 65, 0, 0 }, { 83, 72, 85, 65, 73, 0 }, 266 { 83, 72, 85, 65, 78, 0 }, { 83, 72, 85, 65, 78, 71 }, 267 { 83, 72, 85, 73, 0, 0 }, { 83, 72, 85, 78, 0, 0 }, 268 { 83, 72, 85, 79, 0, 0 }, { 83, 73, 0, 0, 0, 0 }, 269 { 83, 79, 78, 71, 0, 0 }, { 83, 79, 85, 0, 0, 0 }, 270 { 83, 85, 0, 0, 0, 0 }, { 83, 85, 65, 78, 0, 0 }, 271 { 83, 85, 73, 0, 0, 0 }, { 83, 85, 78, 0, 0, 0 }, 272 { 83, 85, 79, 0, 0, 0 }, { 84, 65, 0, 0, 0, 0 }, 273 { 84, 65, 73, 0, 0, 0 }, { 84, 65, 78, 0, 0, 0 }, 274 { 84, 65, 78, 71, 0, 0 }, { 84, 65, 79, 0, 0, 0 }, 275 { 84, 69, 0, 0, 0, 0 }, { 84, 69, 78, 71, 0, 0 }, 276 { 84, 73, 0, 0, 0, 0 }, { 84, 73, 65, 78, 0, 0 }, 277 { 84, 73, 65, 79, 0, 0 }, { 84, 73, 69, 0, 0, 0 }, 278 { 84, 73, 78, 71, 0, 0 }, { 84, 79, 78, 71, 0, 0 }, 279 { 84, 79, 85, 0, 0, 0 }, { 84, 85, 0, 0, 0, 0 }, 280 { 84, 85, 65, 78, 0, 0 }, { 84, 85, 73, 0, 0, 0 }, 281 { 84, 85, 78, 0, 0, 0 }, { 84, 85, 79, 0, 0, 0 }, 282 { 87, 65, 0, 0, 0, 0 }, { 87, 65, 73, 0, 0, 0 }, 283 { 87, 65, 78, 0, 0, 0 }, { 87, 65, 78, 71, 0, 0 }, 284 { 87, 69, 73, 0, 0, 0 }, { 87, 69, 78, 0, 0, 0 }, 285 { 87, 69, 78, 71, 0, 0 }, { 87, 79, 0, 0, 0, 0 }, 286 { 87, 85, 0, 0, 0, 0 }, { 88, 73, 0, 0, 0, 0 }, 287 { 88, 73, 65, 0, 0, 0 }, { 88, 73, 65, 78, 0, 0 }, 288 { 88, 73, 65, 78, 71, 0 }, { 88, 73, 65, 79, 0, 0 }, 289 { 88, 73, 69, 0, 0, 0 }, { 88, 73, 78, 0, 0, 0 }, 290 { 88, 73, 78, 71, 0, 0 }, { 88, 73, 79, 78, 71, 0 }, 291 { 88, 73, 85, 0, 0, 0 }, { 88, 85, 0, 0, 0, 0 }, 292 { 88, 85, 65, 78, 0, 0 }, { 88, 85, 69, 0, 0, 0 }, 293 { 88, 85, 78, 0, 0, 0 }, { 89, 65, 0, 0, 0, 0 }, 294 { 89, 65, 78, 0, 0, 0 }, { 89, 65, 78, 71, 0, 0 }, 295 { 89, 65, 79, 0, 0, 0 }, { 89, 69, 0, 0, 0, 0 }, 296 { 89, 73, 0, 0, 0, 0 }, { 89, 73, 78, 0, 0, 0 }, 297 { 89, 73, 78, 71, 0, 0 }, { 89, 79, 0, 0, 0, 0 }, 298 { 89, 79, 78, 71, 0, 0 }, { 89, 79, 85, 0, 0, 0 }, 299 { 89, 85, 0, 0, 0, 0 }, { 89, 85, 65, 78, 0, 0 }, 300 { 89, 85, 69, 0, 0, 0 }, { 89, 85, 78, 0, 0, 0 }, 301 { 74, 85, 78, 0, 0, 0 }, { 89, 85, 78, 0, 0, 0 }, 302 { 90, 65, 0, 0, 0, 0 }, { 90, 65, 73, 0, 0, 0 }, 303 { 90, 65, 78, 0, 0, 0 }, { 90, 65, 78, 71, 0, 0 }, 304 { 90, 65, 79, 0, 0, 0 }, { 90, 69, 0, 0, 0, 0 }, 305 { 90, 69, 73, 0, 0, 0 }, { 90, 69, 78, 0, 0, 0 }, 306 { 90, 69, 78, 71, 0, 0 }, { 90, 72, 65, 0, 0, 0 }, 307 { 90, 72, 65, 73, 0, 0 }, { 90, 72, 65, 78, 0, 0 }, 308 { 90, 72, 65, 78, 71, 0 }, { 67, 72, 65, 78, 71, 0 }, 309 { 90, 72, 65, 78, 71, 0 }, { 90, 72, 65, 79, 0, 0 }, 310 { 90, 72, 69, 0, 0, 0 }, { 90, 72, 69, 78, 0, 0 }, 311 { 90, 72, 69, 78, 71, 0 }, { 90, 72, 73, 0, 0, 0 }, 312 { 83, 72, 73, 0, 0, 0 }, { 90, 72, 73, 0, 0, 0 }, 313 { 90, 72, 79, 78, 71, 0 }, { 90, 72, 79, 85, 0, 0 }, 314 { 90, 72, 85, 0, 0, 0 }, { 90, 72, 85, 65, 0, 0 }, 315 { 90, 72, 85, 65, 73, 0 }, { 90, 72, 85, 65, 78, 0 }, 316 { 90, 72, 85, 65, 78, 71 }, { 90, 72, 85, 73, 0, 0 }, 317 { 90, 72, 85, 78, 0, 0 }, { 90, 72, 85, 79, 0, 0 }, 318 { 90, 73, 0, 0, 0, 0 }, { 90, 79, 78, 71, 0, 0 }, 319 { 90, 79, 85, 0, 0, 0 }, { 90, 85, 0, 0, 0, 0 }, 320 { 90, 85, 65, 78, 0, 0 }, { 90, 85, 73, 0, 0, 0 }, 321 { 90, 85, 78, 0, 0, 0 }, { 90, 85, 79, 0, 0, 0 }, 322 { 0, 0, 0, 0, 0, 0 }, { 83, 72, 65, 78, 0, 0 }, 323 { 0, 0, 0, 0, 0, 0 }, }; 324 325 /** 326 * First and last Chinese character with known Pinyin according to zh 327 * collation 328 */ 329 private static final String FIRST_PINYIN_UNIHAN = "u963F"; 330 private static final String LAST_PINYIN_UNIHAN = "u9FFF"; 331 332 private static final Collator COLLATOR = Collator.getInstance(Locale.CHINA); 333 334 private static HanziToPinyin sInstance; 335 private final boolean mHasChinaCollator; 336 337 public static class Token { 338 /** 339 * Separator between target string for each source char 340 */ 341 public static final String SEPARATOR = " "; 342 343 public static final int LATIN = 1; 344 public static final int PINYIN = 2; 345 public static final int UNKNOWN = 3; 346 347 public Token() { 348 } 349 350 public Token(int type, String source, String target) { 351 this.type = type; 352 this.source = source; 353 this.target = target; 354 } 355 356 /** 357 * Type of this token, ASCII, PINYIN or UNKNOWN. 358 */ 359 public int type; 360 /** 361 * Original string before translation. 362 */ 363 public String source; 364 /** 365 * Translated string of source. For Han, target is corresponding Pinyin. 366 * Otherwise target is original string in source. 367 */ 368 public String target; 369 } 370 371 protected HanziToPinyin(boolean hasChinaCollator) { 372 mHasChinaCollator = hasChinaCollator; 373 } 374 375 public static HanziToPinyin getInstance() { 376 synchronized (HanziToPinyin.class) { 377 if (sInstance != null) { 378 return sInstance; 379 } 380 // Check if zh_CN collation data is available 381 final Locale locale[] = Collator.getAvailableLocales(); 382 for (int i = 0; i < locale.length; i++) { 383 if (locale[i].equals(Locale.CHINA)) { 384 // Do self validation just once. 385 if (DEBUG) { 386 Log.d(TAG, "Self validation. Result: " 387 + doSelfValidation()); 388 } 389 sInstance = new HanziToPinyin(true); 390 return sInstance; 391 } 392 } 393 Log.w(TAG, 394 "There is no Chinese collator, HanziToPinyin is disabled"); 395 sInstance = new HanziToPinyin(false); 396 return sInstance; 397 } 398 } 399 400 /** 401 * Validate if our internal table has some wrong value. 402 * 403 * @return true when the table looks correct. 404 */ 405 private static boolean doSelfValidation() { 406 char lastChar = UNIHANS[0]; 407 String lastString = Character.toString(lastChar); 408 for (char c : UNIHANS) { 409 if (lastChar == c) { 410 continue; 411 } 412 final String curString = Character.toString(c); 413 int cmp = COLLATOR.compare(lastString, curString); 414 if (cmp >= 0) { 415 Log.e(TAG, "Internal error in Unihan table. " 416 + "The last string "" + lastString 417 + "" is greater than current string "" + curString 418 + ""."); 419 return false; 420 } 421 lastString = curString; 422 } 423 return true; 424 } 425 426 private Token getToken(char character) { 427 Token token = new Token(); 428 final String letter = Character.toString(character); 429 token.source = letter; 430 int offset = -1; 431 int cmp; 432 if (character < 256) { 433 token.type = Token.LATIN; 434 token.target = letter; 435 return token; 436 } else { 437 cmp = COLLATOR.compare(letter, FIRST_PINYIN_UNIHAN); 438 if (cmp < 0) { 439 token.type = Token.UNKNOWN; 440 token.target = letter; 441 return token; 442 } else if (cmp == 0) { 443 token.type = Token.PINYIN; 444 offset = 0; 445 } else { 446 cmp = COLLATOR.compare(letter, LAST_PINYIN_UNIHAN); 447 if (cmp > 0) { 448 token.type = Token.UNKNOWN; 449 token.target = letter; 450 return token; 451 } else if (cmp == 0) { 452 token.type = Token.PINYIN; 453 offset = UNIHANS.length - 1; 454 } 455 } 456 } 457 458 token.type = Token.PINYIN; 459 if (offset < 0) { 460 int begin = 0; 461 int end = UNIHANS.length - 1; 462 while (begin <= end) { 463 offset = (begin + end) / 2; 464 final String unihan = Character.toString(UNIHANS[offset]); 465 cmp = COLLATOR.compare(letter, unihan); 466 if (cmp == 0) { 467 break; 468 } else if (cmp > 0) { 469 begin = offset + 1; 470 } else { 471 end = offset - 1; 472 } 473 } 474 } 475 if (cmp < 0) { 476 offset--; 477 } 478 StringBuilder pinyin = new StringBuilder(); 479 for (int j = 0; j < PINYINS[offset].length && PINYINS[offset][j] != 0; j++) { 480 pinyin.append((char) PINYINS[offset][j]); 481 } 482 token.target = pinyin.toString(); 483 if (TextUtils.isEmpty(token.target)) { 484 token.type = Token.UNKNOWN; 485 token.target = token.source; 486 } 487 return token; 488 } 489 490 /** 491 * Convert the input to a array of tokens. The sequence of ASCII or Unknown 492 * characters without space will be put into a Token, One Hanzi character 493 * which has pinyin will be treated as a Token. If these is no China 494 * collator, the empty token array is returned. 495 */ 496 public ArrayList<Token> get(final String input) { 497 ArrayList<Token> tokens = new ArrayList<Token>(); 498 if (!mHasChinaCollator || TextUtils.isEmpty(input)) { 499 // return empty tokens. 500 return tokens; 501 } 502 final int inputLength = input.length(); 503 final StringBuilder sb = new StringBuilder(); 504 int tokenType = Token.LATIN; 505 // Go through the input, create a new token when 506 // a. Token type changed 507 // b. Get the Pinyin of current charater. 508 // c. current character is space. 509 for (int i = 0; i < inputLength; i++) { 510 final char character = input.charAt(i); 511 if (character == ' ') { 512 if (sb.length() > 0) { 513 addToken(sb, tokens, tokenType); 514 } 515 } else if (character < 256) { 516 if (tokenType != Token.LATIN && sb.length() > 0) { 517 addToken(sb, tokens, tokenType); 518 } 519 tokenType = Token.LATIN; 520 sb.append(character); 521 } else { 522 Token t = getToken(character); 523 if (t.type == Token.PINYIN) { 524 if (sb.length() > 0) { 525 addToken(sb, tokens, tokenType); 526 } 527 tokens.add(t); 528 tokenType = Token.PINYIN; 529 } else { 530 if (tokenType != t.type && sb.length() > 0) { 531 addToken(sb, tokens, tokenType); 532 } 533 tokenType = t.type; 534 sb.append(character); 535 } 536 } 537 } 538 if (sb.length() > 0) { 539 addToken(sb, tokens, tokenType); 540 } 541 return tokens; 542 } 543 544 private void addToken(final StringBuilder sb, 545 final ArrayList<Token> tokens, final int tokenType) { 546 String str = sb.toString(); 547 tokens.add(new Token(tokenType, str, str)); 548 sb.setLength(0); 549 } 550 }
方法调用:
package com.example.test; import java.util.ArrayList; import com.example.test.HanziToPinyin.Token; import android.os.Bundle; import android.app.Activity; import android.widget.TextView; public class MainActivity extends Activity { @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); TextView txt = (TextView) findViewById(R.id.txtContent); txt.append(getPinYin("令狐冲") + " -- " + getPinYin("张无忌") + " -- " + getPinYin("任盈盈")); } /** * 汉字返回拼音,字母原样返回,都转换为小写 * * @param input * @return */ public static String getPinYin(String str) { ArrayList<Token> tokens = HanziToPinyin.getInstance().get(str); StringBuilder sb = new StringBuilder(); if (tokens != null && tokens.size() > 0) { for (Token token : tokens) { if (Token.PINYIN == token.type) { sb.append(token.target); } else { sb.append(token.source); } } } return sb.toString().toLowerCase(); } }