@@ -81,6 +81,28 @@ def load_properties(f, interestingprops = None):
8181
8282    return  props 
8383
84+ def  load_confusables (f ):
85+     fetch (f )
86+     confusables  =  []
87+     re1  =  re .compile (r"^((?:[0-9A-F]+ )+);\t((?:[0-9A-F]+ )+);\t\w*" )
88+ 
89+     for  line  in  fileinput .input (os .path .basename (f ), openhook = fileinput .hook_encoded ("utf-8" )):
90+         d_input  =  0 
91+         d_outputs  =  []
92+         m  =  re1 .match (line )
93+         if  not  m :
94+             continue 
95+         d_inputs  =  m .group (1 ).split ()
96+         if  len (d_inputs ) !=  1 :
97+             raise  Exception ('More than one code point in first column' )
98+         d_input  =  int (d_inputs [0 ].strip (), 16 )
99+         for  d_output  in  m .group (2 ).split ():
100+             d_outputitem  =  int (d_output , 16 );
101+             d_outputs .append (d_outputitem );
102+         confusables .append ((d_input , d_outputs ))
103+ 
104+     return  confusables 
105+ 
84106def  format_table_content (f , content , indent ):
85107    line  =  " " * indent 
86108    first  =  True 
@@ -99,6 +121,18 @@ def format_table_content(f, content, indent):
99121def  escape_char (c ):
100122    return  "'\\ u{%x}'"  %  c 
101123
124+ def  escape_char_list (l ):
125+     line  =  "[" ;
126+     first  =  True ;
127+     for  c  in  l :
128+         if  first :
129+             line  +=  escape_char (c );
130+         else :
131+             line  +=  ", "  +  escape_char (c );
132+         first  =  False ;
133+     line  +=  "]" ;
134+     return  line 
135+ 
102136def  emit_table (f , name , t_data , t_type  =  "&'static [(char, char)]" , is_pub = True ,
103137        pfun = lambda  x : "(%s,%s)"  %  (escape_char (x [0 ]), escape_char (x [1 ])), is_const = True ):
104138    pub_string  =  "const" 
@@ -173,10 +207,51 @@ def emit_identifier_module(f):
173207            pfun = lambda  x : "(%s,%s, IdentifierType::%s)"  %  (escape_char (x [0 ]), escape_char (x [1 ]), x [2 ]))
174208    f .write ("}\n \n " )
175209
210+ def  emit_confusable_detection_module (f ):
211+     f .write ("pub mod confusable_detection {" )
212+     f .write (""" 
213+ 
214+     #[inline] 
215+     pub fn char_confusable_prototype(c: char) -> Option<&'static [char]> { 
216+         // FIXME: do we want to special case ASCII here? 
217+         match c as usize { 
218+             _ => super::util::bsearch_value_table(c, CONFUSABLES) 
219+         } 
220+     } 
221+ 
222+ """ )
223+ 
224+     f .write ("    // Confusable table:\n " )
225+     confusable_table  =  load_confusables ("confusables.txt" )
226+     confusable_table .sort (key = lambda  w : w [0 ])
227+     
228+     last_key  =  None 
229+     for  (k , v ) in  confusable_table :
230+         if  k  ==  last_key :
231+             raise  Exception ("duplicate keys in confusables table: %s"  %  k )
232+         last_key  =  k 
233+ 
234+     emit_table (f , "CONFUSABLES" , confusable_table , "&'static [(char, &'static [char])]" , is_pub = False ,
235+             pfun = lambda  x : "(%s, &%s)"  %  (escape_char (x [0 ]), escape_char_list (x [1 ])))
236+     f .write ("}\n \n " )
237+ 
238+ 
176239def  emit_util_mod (f ):
177240    f .write (""" 
178241pub mod util { 
179242    use core::result::Result::{Ok, Err}; 
243+      
244+     #[inline] 
245+     pub fn bsearch_value_table<T: Copy>(c: char, r: &'static [(char, T)]) -> Option<T> { 
246+         match r.binary_search_by_key(&c, |&(k, _)| k) { 
247+             Ok(idx) => { 
248+                 let (_, v) = r[idx]; 
249+                 Some(v) 
250+             } 
251+             Err(_) => None 
252+         } 
253+     } 
254+      
180255    #[inline] 
181256    pub fn bsearch_range_table(c: char, r: &'static [(char,char)]) -> bool { 
182257        use core::cmp::Ordering::{Equal, Less, Greater}; 
@@ -224,3 +299,5 @@ def emit_util_mod(f):
224299        emit_util_mod (rf )
225300        ### identifier module 
226301        emit_identifier_module (rf )
302+         ### confusable_detection module 
303+         emit_confusable_detection_module (rf )
0 commit comments