1313 public class GlyphList
1414 {
1515 /// <summary>
16- /// <c>.notdef</c>.
16+ /// <c>.notdef</c> name .
1717 /// </summary>
1818 public const string NotDefined = ".notdef" ;
1919
@@ -37,7 +37,7 @@ public class GlyphList
3737 public static GlyphList AdditionalGlyphList => LazyAdditionalGlyphList . Value ;
3838
3939 private static readonly Lazy < GlyphList > LazyZapfDingbatsGlyphList = new Lazy < GlyphList > ( ( ) => GlyphListFactory . Get ( "zapfdingbats" ) ) ;
40-
40+
4141 /// <summary>
4242 /// Zapf Dingbats.
4343 /// </summary>
@@ -84,6 +84,7 @@ public string UnicodeCodePointToName(int unicodeValue)
8484
8585 /// <summary>
8686 /// Get the unicode value for the glyph name.
87+ /// See <see href="https://github.com/adobe-type-tools/agl-specification"/>.
8788 /// </summary>
8889 public string NameToUnicode ( string name )
8990 {
@@ -103,25 +104,47 @@ public string NameToUnicode(string name)
103104 }
104105
105106 string unicode ;
106- // Remove suffixes
107+ // 1. Drop all the characters from the glyph name starting with the first occurrence of a period (U+002E FULL STOP), if any.
107108 if ( name . IndexOf ( '.' ) > 0 )
108109 {
109110 unicode = NameToUnicode ( name . Substring ( 0 , name . IndexOf ( '.' ) ) ) ;
110111 }
111- else if ( name . StartsWith ( "uni" ) && name . Length == 7 )
112+ // 2. Split the remaining string into a sequence of components, using underscore (U+005F LOW LINE) as the delimiter.
113+ else if ( name . IndexOf ( '_' ) > 0 )
114+ {
115+ /*
116+ * MOZILLA-3136-0.pdf
117+ * 68-1990-01_A.pdf
118+ * TIKA-2054-0.pdf
119+ */
120+ var sb = new StringBuilder ( ) ;
121+ foreach ( var s in name . Split ( '_' ) )
122+ {
123+ sb . Append ( NameToUnicode ( s ) ) ;
124+ }
125+
126+ unicode = sb . ToString ( ) ;
127+ }
128+ // Otherwise, if the component is of the form ‘uni’ (U+0075, U+006E, and U+0069) followed by a sequence of uppercase hexadecimal
129+ // digits (0–9 and A–F, meaning U+0030 through U+0039 and U+0041 through U+0046), if the length of that sequence is a multiple
130+ // of four, and if each group of four digits represents a value in the ranges 0000 through D7FF or E000 through FFFF, then
131+ // interpret each as a Unicode scalar value and map the component to the string made of those scalar values. Note that the range
132+ // and digit-length restrictions mean that the ‘uni’ glyph name prefix can be used only with UVs in the Basic Multilingual Plane (BMP).
133+ else if ( name . StartsWith ( "uni" ) && ( name . Length - 3 ) % 4 == 0 )
112134 {
113135 // test for Unicode name in the format uniXXXX where X is hex
114136 int nameLength = name . Length ;
115137
116138 var uniStr = new StringBuilder ( ) ;
117139
118- var foundUnicode = true ;
119140 for ( int chPos = 3 ; chPos + 4 <= nameLength ; chPos += 4 )
120141 {
121- if ( ! int . TryParse ( name . AsSpanOrSubstring ( chPos , 4 ) , NumberStyles . HexNumber , CultureInfo . InvariantCulture , out var codePoint ) )
142+ if ( ! int . TryParse ( name . AsSpanOrSubstring ( chPos , 4 ) ,
143+ NumberStyles . HexNumber ,
144+ CultureInfo . InvariantCulture ,
145+ out var codePoint ) )
122146 {
123- foundUnicode = false ;
124- break ;
147+ return null ;
125148 }
126149
127150 if ( codePoint > 0xD7FF && codePoint < 0xE000 )
@@ -132,33 +155,30 @@ public string NameToUnicode(string name)
132155 uniStr . Append ( ( char ) codePoint ) ;
133156 }
134157
135- if ( ! foundUnicode )
136- {
137- return null ;
138- }
139-
140158 unicode = uniStr . ToString ( ) ;
141159 }
142- else if ( name . StartsWith ( "u" , StringComparison . Ordinal ) && name . Length == 5 )
160+ // Otherwise, if the component is of the form ‘u’ (U+0075) followed by a sequence of four to six uppercase hexadecimal digits (0–9
161+ // and A–F, meaning U+0030 through U+0039 and U+0041 through U+0046), and those digits represents a value in the ranges 0000 through
162+ // D7FF or E000 through 10FFFF, then interpret it as a Unicode scalar value and map the component to the string made of this scalar value.
163+ else if ( name . StartsWith ( "u" , StringComparison . Ordinal ) && name . Length >= 5 && name . Length <= 7 )
143164 {
144- // test for an alternate Unicode name representation uXXXX
145165 var codePoint = int . Parse ( name . AsSpanOrSubstring ( 1 ) , NumberStyles . HexNumber , CultureInfo . InvariantCulture ) ;
146166
147167 if ( codePoint > 0xD7FF && codePoint < 0xE000 )
148168 {
149- throw new InvalidFontFormatException (
150- $ "Unicode character name with disallowed code area: { name } ") ;
169+ throw new InvalidFontFormatException ( $ "Unicode character name with disallowed code area: { name } ") ;
151170 }
152171
153172 unicode = char . ConvertFromUtf32 ( codePoint ) ;
154173 }
174+ // Ad-hoc special cases
155175 else if ( name . StartsWith ( "c" , StringComparison . OrdinalIgnoreCase ) && name . Length >= 3 && name . Length <= 4 )
156176 {
157177 // name representation cXXX
158178 var codePoint = int . Parse ( name . AsSpanOrSubstring ( 1 ) , NumberStyles . Integer , CultureInfo . InvariantCulture ) ;
159- System . Diagnostics . Debug . Assert ( codePoint > 0 ) ;
160179 unicode = char . ConvertFromUtf32 ( codePoint ) ;
161180 }
181+ // Otherwise, map the component to an empty string.
162182 else
163183 {
164184 return null ;
0 commit comments