99import  os 
1010import  subprocess 
1111
12- NUM_CODEPOINTS = 0x110000 
12+ NUM_CODEPOINTS  =  0x110000 
13+ 
1314
1415def  to_ranges (iter ):
1516    current  =  None 
@@ -23,19 +24,25 @@ def to_ranges(iter):
2324    if  current  is  not None :
2425        yield  tuple (current )
2526
27+ 
2628def  get_escaped (codepoints ):
2729    for  c  in  codepoints :
28-         if  (c .class_  or  "Cn" ) in  "Cc Cf Cs Co Cn Zl Zp Zs" .split () and  c .value  !=  ord (' ' ):
30+         if  (c .class_  or  "Cn" ) in  "Cc Cf Cs Co Cn Zl Zp Zs" .split () and  c .value  !=  ord (
31+             " " 
32+         ):
2933            yield  c .value 
3034
35+ 
3136def  get_file (f ):
3237    try :
3338        return  open (os .path .basename (f ))
3439    except  FileNotFoundError :
3540        subprocess .run (["curl" , "-O" , f ], check = True )
3641        return  open (os .path .basename (f ))
3742
38- Codepoint  =  namedtuple ('Codepoint' , 'value class_' )
43+ 
44+ Codepoint  =  namedtuple ("Codepoint" , "value class_" )
45+ 
3946
4047def  get_codepoints (f ):
4148    r  =  csv .reader (f , delimiter = ";" )
@@ -66,13 +73,14 @@ def get_codepoints(f):
6673    for  c  in  range (prev_codepoint  +  1 , NUM_CODEPOINTS ):
6774        yield  Codepoint (c , None )
6875
76+ 
6977def  compress_singletons (singletons ):
70-     uppers  =  [] # (upper, # items in lowers) 
78+     uppers  =  []   # (upper, # items in lowers) 
7179    lowers  =  []
7280
7381    for  i  in  singletons :
7482        upper  =  i  >>  8 
75-         lower  =  i  &  0xff 
83+         lower  =  i  &  0xFF 
7684        if  len (uppers ) ==  0  or  uppers [- 1 ][0 ] !=  upper :
7785            uppers .append ((upper , 1 ))
7886        else :
@@ -82,10 +90,11 @@ def compress_singletons(singletons):
8290
8391    return  uppers , lowers 
8492
93+ 
8594def  compress_normal (normal ):
8695    # lengths 0x00..0x7f are encoded as 00, 01, ..., 7e, 7f 
8796    # lengths 0x80..0x7fff are encoded as 80 80, 80 81, ..., ff fe, ff ff 
88-     compressed  =  [] # [truelen, (truelenaux), falselen, (falselenaux)] 
97+     compressed  =  []   # [truelen, (truelenaux), falselen, (falselenaux)] 
8998
9099    prev_start  =  0 
91100    for  start , count  in  normal :
@@ -95,21 +104,22 @@ def compress_normal(normal):
95104
96105        assert  truelen  <  0x8000  and  falselen  <  0x8000 
97106        entry  =  []
98-         if  truelen  >  0x7f :
107+         if  truelen  >  0x7F :
99108            entry .append (0x80  |  (truelen  >>  8 ))
100-             entry .append (truelen  &  0xff )
109+             entry .append (truelen  &  0xFF )
101110        else :
102-             entry .append (truelen  &  0x7f )
103-         if  falselen  >  0x7f :
111+             entry .append (truelen  &  0x7F )
112+         if  falselen  >  0x7F :
104113            entry .append (0x80  |  (falselen  >>  8 ))
105-             entry .append (falselen  &  0xff )
114+             entry .append (falselen  &  0xFF )
106115        else :
107-             entry .append (falselen  &  0x7f )
116+             entry .append (falselen  &  0x7F )
108117
109118        compressed .append (entry )
110119
111120    return  compressed 
112121
122+ 
113123def  print_singletons (uppers , lowers , uppersname , lowersname ):
114124    print ("#[rustfmt::skip]" )
115125    print ("const {}: &[(u8, u8)] = &[" .format (uppersname ))
@@ -119,22 +129,26 @@ def print_singletons(uppers, lowers, uppersname, lowersname):
119129    print ("#[rustfmt::skip]" )
120130    print ("const {}: &[u8] = &[" .format (lowersname ))
121131    for  i  in  range (0 , len (lowers ), 8 ):
122-         print ("    {}" .format (" " .join ("{:#04x}," .format (x ) for  x  in  lowers [i :i + 8 ])))
132+         print (
133+             "    {}" .format (" " .join ("{:#04x}," .format (x ) for  x  in  lowers [i  : i  +  8 ]))
134+         )
123135    print ("];" )
124136
137+ 
125138def  print_normal (normal , normalname ):
126139    print ("#[rustfmt::skip]" )
127140    print ("const {}: &[u8] = &[" .format (normalname ))
128141    for  v  in  normal :
129142        print ("    {}" .format (" " .join ("{:#04x}," .format (i ) for  i  in  v )))
130143    print ("];" )
131144
145+ 
132146def  main ():
133147    file  =  get_file ("https://www.unicode.org/Public/UNIDATA/UnicodeData.txt" )
134148
135149    codepoints  =  get_codepoints (file )
136150
137-     CUTOFF = 0x10000 
151+     CUTOFF   =   0x10000 
138152    singletons0  =  []
139153    singletons1  =  []
140154    normal0  =  []
@@ -234,10 +248,11 @@ def main():
234248}\  
235249
236250    print ()
237-     print_singletons (singletons0u , singletons0l , 'SINGLETONS0U' , 'SINGLETONS0L' )
238-     print_singletons (singletons1u , singletons1l , 'SINGLETONS1U' , 'SINGLETONS1L' )
239-     print_normal (normal0 , 'NORMAL0' )
240-     print_normal (normal1 , 'NORMAL1' )
251+     print_singletons (singletons0u , singletons0l , "SINGLETONS0U" , "SINGLETONS0L" )
252+     print_singletons (singletons1u , singletons1l , "SINGLETONS1U" , "SINGLETONS1L" )
253+     print_normal (normal0 , "NORMAL0" )
254+     print_normal (normal1 , "NORMAL1" )
255+ 
241256
242- if  __name__  ==  ' __main__' 
257+ if  __name__  ==  " __main__" 
243258    main ()
0 commit comments