11use std:: collections:: HashMap ;
22use std:: fmt:: { self , Debug , Write as _} ;
3- use std:: sync:: OnceLock ;
3+ use std:: sync:: LazyLock ;
44
5- use anyhow:: { Context , anyhow} ;
5+ use anyhow:: { Context , anyhow, ensure} ;
6+ use itertools:: Itertools ;
67use regex:: Regex ;
78
89use crate :: llvm_junk:: unescape_llvm_string_contents;
910use crate :: parser:: Parser ;
1011
12+ #[ cfg( test) ]
13+ mod tests;
14+
1115pub ( crate ) fn dump_covfun_mappings (
1216 llvm_ir : & str ,
1317 function_names : & HashMap < u64 , String > ,
@@ -16,9 +20,12 @@ pub(crate) fn dump_covfun_mappings(
1620 // each entry with its (demangled) name.
1721 let mut covfun_entries = llvm_ir
1822 . lines ( )
19- . filter_map ( covfun_line_data)
20- . map ( |line_data| ( function_names. get ( & line_data. name_hash ) . map ( String :: as_str) , line_data) )
21- . collect :: < Vec < _ > > ( ) ;
23+ . filter ( |line| is_covfun_line ( line) )
24+ . map ( parse_covfun_line)
25+ . map_ok ( |line_data| {
26+ ( function_names. get ( & line_data. name_hash ) . map ( String :: as_str) , line_data)
27+ } )
28+ . collect :: < Result < Vec < _ > , _ > > ( ) ?;
2229 covfun_entries. sort_by ( |a, b| {
2330 // Sort entries primarily by name, to help make the order consistent
2431 // across platforms and relatively insensitive to changes.
@@ -108,36 +115,50 @@ pub(crate) fn dump_covfun_mappings(
108115 Ok ( ( ) )
109116}
110117
118+ #[ derive( Debug , PartialEq , Eq ) ]
111119struct CovfunLineData {
112- name_hash : u64 ,
113120 is_used : bool ,
121+ name_hash : u64 ,
122+ filenames_hash : u64 ,
114123 payload : Vec < u8 > ,
115124}
116125
117- /// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
118- /// entry, and if so extracts relevant data in a `CovfunLineData`.
119- fn covfun_line_data ( line : & str ) -> Option < CovfunLineData > {
120- let re = {
121- // We cheat a little bit and match variable names `@__covrec_[HASH]u`
122- // rather than the section name, because the section name is harder to
123- // extract and differs across Linux/Windows/macOS. We also extract the
124- // symbol name hash from the variable name rather than the data, since
125- // it's easier and both should match.
126- static RE : OnceLock < Regex > = OnceLock :: new ( ) ;
127- RE . get_or_init ( || {
128- Regex :: new (
129- r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"# ,
130- )
131- . unwrap ( )
132- } )
133- } ;
126+ fn is_covfun_line ( line : & str ) -> bool {
127+ line. starts_with ( "@__covrec_" )
128+ }
134129
135- let captures = re. captures ( line) ?;
136- let name_hash = u64:: from_str_radix ( & captures[ "name_hash" ] , 16 ) . unwrap ( ) ;
130+ /// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
131+ /// entry, parses it to extract relevant data in a `CovfunLineData`.
132+ fn parse_covfun_line ( line : & str ) -> anyhow:: Result < CovfunLineData > {
133+ ensure ! ( is_covfun_line( line) ) ;
134+
135+ // We cheat a little bit and match variable names `@__covrec_[HASH]u`
136+ // rather than the section name, because the section name is harder to
137+ // extract and differs across Linux/Windows/macOS.
138+ const RE_STRING : & str = r#"(?x)^
139+ @__covrec_[0-9A-Z]+(?<is_used>u)?
140+ \ = \ # (trailing space)
141+ .*
142+ <\{
143+ \ i64 \ (?<name_hash> -? [0-9]+),
144+ \ i32 \ -? [0-9]+, # (length of payload; currently unused)
145+ \ i64 \ -? [0-9]+, # (source hash; currently unused)
146+ \ i64 \ (?<filenames_hash> -? [0-9]+),
147+ \ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
148+ \ # (trailing space)
149+ }>
150+ .*$
151+ "# ;
152+ static RE : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( RE_STRING ) . unwrap ( ) ) ;
153+
154+ let captures =
155+ RE . captures ( line) . with_context ( || format ! ( "couldn't parse covfun line: {line:?}" ) ) ?;
137156 let is_used = captures. name ( "is_used" ) . is_some ( ) ;
157+ let name_hash = i64:: from_str_radix ( & captures[ "name_hash" ] , 10 ) . unwrap ( ) as u64 ;
158+ let filenames_hash = i64:: from_str_radix ( & captures[ "filenames_hash" ] , 10 ) . unwrap ( ) as u64 ;
138159 let payload = unescape_llvm_string_contents ( & captures[ "payload" ] ) ;
139160
140- Some ( CovfunLineData { name_hash, is_used , payload } )
161+ Ok ( CovfunLineData { is_used , name_hash, filenames_hash , payload } )
141162}
142163
143164// Extra parser methods only needed when parsing `covfun` payloads.
0 commit comments