11use std:: collections:: HashMap ;
22use std:: fmt:: { self , Debug , Write as _} ;
3- use std:: sync:: OnceLock ;
3+ use std:: sync:: LazyLock ;
44
5- use anyhow:: { Context , anyhow} ;
5+ use anyhow:: { Context , anyhow, bail, ensure} ;
6+ use itertools:: Itertools ;
67use regex:: Regex ;
78
8- use crate :: parser:: { Parser , unescape_llvm_string_contents} ;
9+ use crate :: covmap:: FilenameTables ;
10+ use crate :: llvm_utils:: unescape_llvm_string_contents;
11+ use crate :: parser:: Parser ;
12+
13+ #[ cfg( test) ]
14+ mod tests;
915
1016pub ( crate ) fn dump_covfun_mappings (
1117 llvm_ir : & str ,
18+ filename_tables : & FilenameTables ,
1219 function_names : & HashMap < u64 , String > ,
1320) -> anyhow:: Result < ( ) > {
1421 // Extract function coverage entries from the LLVM IR assembly, and associate
1522 // each entry with its (demangled) name.
1623 let mut covfun_entries = llvm_ir
1724 . lines ( )
18- . filter_map ( covfun_line_data)
19- . map ( |line_data| ( function_names. get ( & line_data. name_hash ) . map ( String :: as_str) , line_data) )
20- . collect :: < Vec < _ > > ( ) ;
25+ . filter ( |line| is_covfun_line ( line) )
26+ . map ( parse_covfun_line)
27+ . map_ok ( |line_data| {
28+ ( function_names. get ( & line_data. name_hash ) . map ( String :: as_str) , line_data)
29+ } )
30+ . collect :: < Result < Vec < _ > , _ > > ( ) ?;
2131 covfun_entries. sort_by ( |a, b| {
2232 // Sort entries primarily by name, to help make the order consistent
2333 // across platforms and relatively insensitive to changes.
@@ -41,8 +51,12 @@ pub(crate) fn dump_covfun_mappings(
4151 println ! ( "Number of files: {num_files}" ) ;
4252
4353 for i in 0 ..num_files {
44- let global_file_id = parser. read_uleb128_u32 ( ) ?;
45- println ! ( "- file {i} => global file {global_file_id}" ) ;
54+ let global_file_id = parser. read_uleb128_usize ( ) ?;
55+ let & CovfunLineData { filenames_hash, .. } = line_data;
56+ let Some ( filename) = filename_tables. lookup ( filenames_hash, global_file_id) else {
57+ bail ! ( "couldn't resolve global file: {filenames_hash}, {global_file_id}" ) ;
58+ } ;
59+ println ! ( "- file {i} => {filename}" ) ;
4660 }
4761
4862 let num_expressions = parser. read_uleb128_u32 ( ) ?;
@@ -107,36 +121,50 @@ pub(crate) fn dump_covfun_mappings(
107121 Ok ( ( ) )
108122}
109123
124+ #[ derive( Debug , PartialEq , Eq ) ]
110125struct CovfunLineData {
111- name_hash : u64 ,
112126 is_used : bool ,
127+ name_hash : u64 ,
128+ filenames_hash : u64 ,
113129 payload : Vec < u8 > ,
114130}
115131
116- /// Checks a line of LLVM IR assembly to see if it contains an `__llvm_covfun`
117- /// entry, and if so extracts relevant data in a `CovfunLineData`.
118- fn covfun_line_data ( line : & str ) -> Option < CovfunLineData > {
119- let re = {
120- // We cheat a little bit and match variable names `@__covrec_[HASH]u`
121- // rather than the section name, because the section name is harder to
122- // extract and differs across Linux/Windows/macOS. We also extract the
123- // symbol name hash from the variable name rather than the data, since
124- // it's easier and both should match.
125- static RE : OnceLock < Regex > = OnceLock :: new ( ) ;
126- RE . get_or_init ( || {
127- Regex :: new (
128- r#"^@__covrec_(?<name_hash>[0-9A-Z]+)(?<is_used>u)? = .*\[[0-9]+ x i8\] c"(?<payload>[^"]*)".*$"# ,
129- )
130- . unwrap ( )
131- } )
132- } ;
132+ fn is_covfun_line ( line : & str ) -> bool {
133+ line. starts_with ( "@__covrec_" )
134+ }
133135
134- let captures = re. captures ( line) ?;
135- let name_hash = u64:: from_str_radix ( & captures[ "name_hash" ] , 16 ) . unwrap ( ) ;
136+ /// Given a line of LLVM IR assembly that should contain an `__llvm_covfun`
137+ /// entry, parses it to extract relevant data in a `CovfunLineData`.
138+ fn parse_covfun_line ( line : & str ) -> anyhow:: Result < CovfunLineData > {
139+ ensure ! ( is_covfun_line( line) ) ;
140+
141+ // We cheat a little bit and match variable names `@__covrec_[HASH]u`
142+ // rather than the section name, because the section name is harder to
143+ // extract and differs across Linux/Windows/macOS.
144+ const RE_STRING : & str = r#"(?x)^
145+ @__covrec_[0-9A-Z]+(?<is_used>u)?
146+ \ = \ # (trailing space)
147+ .*
148+ <\{
149+ \ i64 \ (?<name_hash> -? [0-9]+),
150+ \ i32 \ -? [0-9]+, # (length of payload; currently unused)
151+ \ i64 \ -? [0-9]+, # (source hash; currently unused)
152+ \ i64 \ (?<filenames_hash> -? [0-9]+),
153+ \ \[ [0-9]+ \ x \ i8 \] \ c"(?<payload>[^"]*)"
154+ \ # (trailing space)
155+ }>
156+ .*$
157+ "# ;
158+ static RE : LazyLock < Regex > = LazyLock :: new ( || Regex :: new ( RE_STRING ) . unwrap ( ) ) ;
159+
160+ let captures =
161+ RE . captures ( line) . with_context ( || format ! ( "couldn't parse covfun line: {line:?}" ) ) ?;
136162 let is_used = captures. name ( "is_used" ) . is_some ( ) ;
163+ let name_hash = i64:: from_str_radix ( & captures[ "name_hash" ] , 10 ) . unwrap ( ) as u64 ;
164+ let filenames_hash = i64:: from_str_radix ( & captures[ "filenames_hash" ] , 10 ) . unwrap ( ) as u64 ;
137165 let payload = unescape_llvm_string_contents ( & captures[ "payload" ] ) ;
138166
139- Some ( CovfunLineData { name_hash, is_used , payload } )
167+ Ok ( CovfunLineData { is_used , name_hash, filenames_hash , payload } )
140168}
141169
142170// Extra parser methods only needed when parsing `covfun` payloads.
0 commit comments