|
@@ -0,0 +1,157 @@
|
|
|
|
+package com.shkpr.service.alambizplugin.commtools.norule;
|
|
|
|
+
|
|
|
|
+import net.sourceforge.pinyin4j.PinyinHelper;
|
|
|
|
+import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
|
|
|
|
+import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
|
|
|
|
+import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
|
|
|
|
+import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
|
|
|
|
+
|
|
|
|
+import java.lang.ref.SoftReference;
|
|
|
|
+import java.util.ArrayList;
|
|
|
|
+import java.util.HashMap;
|
|
|
|
+import java.util.List;
|
|
|
|
+import java.util.Map;
|
|
|
|
+import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
+import java.util.regex.Pattern;
|
|
|
|
+
|
|
|
|
+public class StrToFirstCharTool {
|
|
|
|
+
|
|
|
|
+ private static final Pattern FILTER_PATTERN = Pattern.compile("[\\u4e00-\\u9fffA-Za-z0-9]");//过滤非中文字符、非字母、非数字
|
|
|
|
+ private static final HanyuPinyinOutputFormat PINYIN_FORMAT = new HanyuPinyinOutputFormat();
|
|
|
|
+ private static final Map<Character, String> POLYPHONE_MAP = new HashMap<>();//多音字映射
|
|
|
|
+ private static final Map<Character, SoftReference<String>> PINYIN_CACHE = new ConcurrentHashMap<>();
|
|
|
|
+
|
|
|
|
+ static {
|
|
|
|
+ PINYIN_FORMAT.setCaseType(HanyuPinyinCaseType.LOWERCASE);
|
|
|
|
+ PINYIN_FORMAT.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public static String processMixedString(String input) {
|
|
|
|
+ if (input == null || input.trim().isEmpty()) {
|
|
|
|
+ return "";
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ String normalized = normalizeFullWidthChars(input);
|
|
|
|
+ String filtered = filterSpecialChars(normalized);
|
|
|
|
+ return capitalizeFirstLetters(filtered);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static String normalizeFullWidthChars(String str) {
|
|
|
|
+ char[] chars = str.toCharArray();
|
|
|
|
+ for (int i = 0; i < chars.length; i++) {
|
|
|
|
+ if (chars[i] >= '\uff00' && chars[i] <= '\uff5e') {
|
|
|
|
+ chars[i] = (char) (chars[i] - 0xfee0);
|
|
|
|
+ } else if (chars[i] == '\u3000') {
|
|
|
|
+ chars[i] = ' ';
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return new String(chars);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static String filterSpecialChars(String str) {
|
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
|
+ for (char c : str.toCharArray()) {
|
|
|
|
+ if (FILTER_PATTERN.matcher(String.valueOf(c)).matches()) {
|
|
|
|
+ sb.append(c);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return sb.toString();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static String capitalizeFirstLetters(String str) {
|
|
|
|
+ if (str.isEmpty()) return "";
|
|
|
|
+
|
|
|
|
+ List<String> words = new ArrayList<>();
|
|
|
|
+ StringBuilder currentWord = new StringBuilder();
|
|
|
|
+ CharType currentType = getCharType(str.charAt(0));
|
|
|
|
+ currentWord.append(str.charAt(0));
|
|
|
|
+
|
|
|
|
+ for (int i = 1; i < str.length(); i++) {
|
|
|
|
+ char c = str.charAt(i);
|
|
|
|
+ CharType type = getCharType(c);
|
|
|
|
+
|
|
|
|
+ if (type == currentType && (currentType == CharType.LETTER || currentType == CharType.DIGIT)) {
|
|
|
|
+ currentWord.append(c);
|
|
|
|
+ } else if (type == CharType.CHINESE) {
|
|
|
|
+ if (currentWord.length() > 0) {
|
|
|
|
+ words.add(currentWord.toString());
|
|
|
|
+ currentWord = new StringBuilder();
|
|
|
|
+ }
|
|
|
|
+ words.add(String.valueOf(c));
|
|
|
|
+ currentType = type;
|
|
|
|
+ } else {
|
|
|
|
+ if (currentWord.length() > 0) {
|
|
|
|
+ words.add(currentWord.toString());
|
|
|
|
+ currentWord = new StringBuilder();
|
|
|
|
+ }
|
|
|
|
+ currentWord.append(c);
|
|
|
|
+ currentType = type;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if (currentWord.length() > 0) {
|
|
|
|
+ words.add(currentWord.toString());
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ StringBuilder result = new StringBuilder();
|
|
|
|
+ for (String word : words) {
|
|
|
|
+ CharType type = getCharType(word.charAt(0));
|
|
|
|
+ if (type == CharType.CHINESE) {
|
|
|
|
+ StringBuilder sb = new StringBuilder();
|
|
|
|
+ for (char c : word.toCharArray()) {
|
|
|
|
+ sb.append(getCachedFirstLetter(c).toUpperCase());
|
|
|
|
+ }
|
|
|
|
+ result.append(sb);
|
|
|
|
+ } else if (type == CharType.LETTER) {
|
|
|
|
+ if (!word.isEmpty()) {
|
|
|
|
+ result.append(word.toUpperCase());
|
|
|
|
+ //result.append(Character.toUpperCase(word.charAt(0)));
|
|
|
|
+ //if (word.length() > 1) {
|
|
|
|
+ // result.append(word.substring(1));
|
|
|
|
+ //}
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ result.append(word);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return result.toString();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static CharType getCharType(char c) {
|
|
|
|
+ if (isChineseChar(c)) {
|
|
|
|
+ return CharType.CHINESE;
|
|
|
|
+ } else if (Character.isLetter(c)) {
|
|
|
|
+ return CharType.LETTER;
|
|
|
|
+ } else if (Character.isDigit(c)) {
|
|
|
|
+ return CharType.DIGIT;
|
|
|
|
+ }
|
|
|
|
+ return CharType.OTHER;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static boolean isChineseChar(char c) {
|
|
|
|
+ Character.UnicodeBlock block = Character.UnicodeBlock.of(c);
|
|
|
|
+ return block == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|
|
|
|
+ || block == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private static String getCachedFirstLetter(char c) {
|
|
|
|
+ SoftReference<String> ref = PINYIN_CACHE.get(c);
|
|
|
|
+ String value = (ref != null) ? ref.get() : null;
|
|
|
|
+
|
|
|
|
+ if (value == null) {
|
|
|
|
+ if (POLYPHONE_MAP.containsKey(c)) {
|
|
|
|
+ value = POLYPHONE_MAP.get(c);
|
|
|
|
+ } else {
|
|
|
|
+ try {
|
|
|
|
+ String[] pinyin = PinyinHelper.toHanyuPinyinStringArray(c, PINYIN_FORMAT);
|
|
|
|
+ value = (pinyin != null && pinyin.length > 0) ? pinyin[0].substring(0, 1) : String.valueOf(c);
|
|
|
|
+ } catch (BadHanyuPinyinOutputFormatCombination e) {
|
|
|
|
+ value = String.valueOf(c);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ PINYIN_CACHE.put(c, new SoftReference<>(value));
|
|
|
|
+ }
|
|
|
|
+ return value;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private enum CharType {CHINESE, LETTER, DIGIT, OTHER}
|
|
|
|
+}
|