Skip to content

Commit 50178ba

Browse files
authored
fix: Upgrade rayon and jwalk to get rid of intermediate vec (#163)
1 parent 273ec52 commit 50178ba

File tree

4 files changed

+13
-21
lines changed

4 files changed

+13
-21
lines changed

Cargo.lock

Lines changed: 4 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ pulldown-cmark = "0.8.0"
1717
blake3 = "1.0.0"
1818

1919
html5gum = "0.5.3"
20-
jwalk = "0.6.0"
20+
jwalk = "0.7.0"
2121
patricia_tree = "0.3.1"
2222
bumpalo = { version = "3.11.1", features = ["collections"] }
2323
percent-encoding = "2.1.0"

src/html/mod.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -326,10 +326,7 @@ fn test_document_href() {
326326
Path::new("public/platforms/python/troubleshooting.html"),
327327
);
328328

329-
assert_eq!(
330-
doc.href(),
331-
Href("platforms/python/troubleshooting.html")
332-
);
329+
assert_eq!(doc.href(), Href("platforms/python/troubleshooting.html"));
333330
}
334331

335332
#[test]

src/main.rs

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@ struct HtmlResult<C> {
352352

353353
fn walk_files(
354354
base_path: &Path,
355-
) -> Result<impl ParallelIterator<Item = jwalk::DirEntry<((), bool)>>, Error> {
356-
let entries = WalkDirGeneric::<((), bool)>::new(base_path)
355+
) -> impl ParallelIterator<Item = Result<jwalk::DirEntry<((), bool)>, jwalk::Error>> {
356+
WalkDirGeneric::<((), bool)>::new(base_path)
357357
.sort(true) // helps branch predictor (?)
358358
.skip_hidden(false)
359359
.process_read_dir(|_, _, _, children| {
@@ -364,6 +364,7 @@ fn walk_files(
364364
}
365365
})
366366
.into_iter()
367+
.par_bridge()
367368
.filter_map(|entry_result| {
368369
if let Ok(entry) = entry_result {
369370
if let Some(err) = entry.read_children_error {
@@ -379,24 +380,18 @@ fn walk_files(
379380
Some(entry_result)
380381
}
381382
})
382-
// XXX: cannot use par_bridge because of https://github.com/rayon-rs/rayon/issues/690
383-
.collect::<Result<Vec<_>, _>>()?;
384-
385-
// Minimize amount of LinkCollector instances created. This impacts parallelism but
386-
// `LinkCollector::merge` is rather slow.
387-
let min_len = entries.len() / rayon::current_num_threads();
388-
Ok(entries.into_par_iter().with_min_len(min_len))
389383
}
390384

391385
fn extract_html_links<C: LinkCollector<P::Paragraph>, P: ParagraphWalker>(
392386
base_path: &Path,
393387
check_anchors: bool,
394388
get_paragraphs: bool,
395389
) -> Result<HtmlResult<C>, Error> {
396-
let result: Result<_, Error> = walk_files(base_path)?
390+
let result: Result<_, Error> = walk_files(base_path)
397391
.try_fold(
398392
|| (DocumentBuffers::default(), C::new(), 0, 0),
399393
|(mut doc_buf, mut collector, mut documents_count, mut file_count), entry| {
394+
let entry = entry?;
400395
let path = entry.path();
401396
let document = Document::new(base_path, &path);
402397

@@ -458,8 +453,9 @@ type MarkdownResult<P> = BTreeMap<P, Vec<(DocumentSource, usize)>>;
458453
fn extract_markdown_paragraphs<P: ParagraphWalker>(
459454
sources_path: &Path,
460455
) -> Result<MarkdownResult<P::Paragraph>, Error> {
461-
let results: Vec<Result<_, Error>> = walk_files(sources_path)?
456+
let results: Vec<Result<_, Error>> = walk_files(sources_path)
462457
.try_fold(Vec::new, |mut paragraphs, entry| {
458+
let entry = entry?;
463459
let source = DocumentSource::new(entry.path());
464460

465461
if !source

0 commit comments

Comments
 (0)