package Java; import java.util.regex.Pattern; public final class Unicode { private Unicode() {} /** The pattern of a grapheme cluster. * * @see * Grapheme cluster boundaries in Unicode text segmentation */ public static final Pattern graphemeClusterPattern = Pattern.compile( "\\X" ); /* An alternative means of cluster discovery is `java.txt.BreakIterator`. Long outdated in this regard, [https://bugs.openjdk.org/browse/JDK-8174266] it was updated for JDK 20. [https://bugs.openjdk.org/browse/JDK-8291660, https://stackoverflow.com/a/76109241/2402790] */ /** The general category value for codepoint `ch`. * * @see * General category values */ public static String generalCategory( final int ch ) { return switch( Character.getType( ch )) { case Character.UPPERCASE_LETTER -> "Lu"; case Character.LOWERCASE_LETTER -> "Ll"; case Character.TITLECASE_LETTER -> "Lt"; case Character.MODIFIER_LETTER -> "Lm"; case Character.OTHER_LETTER -> "Lo"; case Character.NON_SPACING_MARK -> "Mn"; case Character.COMBINING_SPACING_MARK -> "Mc"; case Character.ENCLOSING_MARK -> "Me"; case Character.DECIMAL_DIGIT_NUMBER -> "Nd"; case Character.LETTER_NUMBER -> "Nl"; case Character.OTHER_NUMBER -> "No"; case Character.CONNECTOR_PUNCTUATION -> "Pc"; case Character.DASH_PUNCTUATION -> "Pd"; case Character.START_PUNCTUATION -> "Ps"; case Character.END_PUNCTUATION -> "Pe"; case Character.INITIAL_QUOTE_PUNCTUATION -> "Pi"; case Character.FINAL_QUOTE_PUNCTUATION -> "Pf"; case Character.OTHER_PUNCTUATION -> "Po"; case Character.MATH_SYMBOL -> "Sm"; case Character.CURRENCY_SYMBOL -> "Sc"; case Character.MODIFIER_SYMBOL -> "Sk"; case Character.OTHER_SYMBOL -> "So"; case Character.SPACE_SEPARATOR -> "Zs"; case Character.LINE_SEPARATOR -> "Zl"; case Character.PARAGRAPH_SEPARATOR -> "Zp"; case Character.CONTROL -> "Cc"; case Character.FORMAT -> "Cf"; case Character.SURROGATE -> "Cs"; case Character.PRIVATE_USE -> "Co"; case Character.UNASSIGNED -> "Cn"; default -> throw new IllegalStateException(); }; }} // Copyright © 2021-2024 Michael Allan. Licence MIT.