Skip to content

Commit 374f139

Browse files
committed
Add SubCaptureMatches iterator on Captures.
1 parent dd120a9 commit 374f139

File tree

4 files changed

+92
-3
lines changed

4 files changed

+92
-3
lines changed

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ pub use re_set::unicode::*;
463463
pub use re_trait::Locations;
464464
pub use re_unicode::{
465465
Regex, Match, Captures,
466-
CaptureNames, Matches, CaptureMatches,
466+
CaptureNames, Matches, CaptureMatches, SubCaptureMatches,
467467
Replacer, NoExpand, Split, SplitN,
468468
escape,
469469
};

src/re_bytes.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use exec::{Exec, ExecNoSync};
2121
use expand::expand_bytes;
2222
use error::Error;
2323
use re_builder::bytes::RegexBuilder;
24-
use re_trait::{self, RegularExpression, Locations};
24+
use re_trait::{self, RegularExpression, Locations, SubCapturesPosIter};
2525

2626
/// Match represents a single match of a regex in a haystack.
2727
///
@@ -789,6 +789,18 @@ impl<'t> Captures<'t> {
789789
self.named_groups.get(name).and_then(|&i| self.get(i))
790790
}
791791

792+
/// An iterator that yields all capturing matches in the order in which
793+
/// they appear in the regex. If a particular capture group didn't
794+
/// participate in the match, then `None` is yielded for that capture.
795+
///
796+
/// The first match always corresponds to the overall match of the regex.
797+
pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
798+
SubCaptureMatches {
799+
caps: self,
800+
it: self.locs.iter(),
801+
}
802+
}
803+
792804
/// Expands all instances of `$name` in `text` to the corresponding capture
793805
/// group `name`, and writes them to the `dst` buffer given.
794806
///
@@ -902,6 +914,29 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
902914
}
903915
}
904916

917+
/// An iterator that yields all capturing matches in the order in which they
918+
/// appear in the regex.
919+
///
920+
/// If a particular capture group didn't participate in the match, then `None`
921+
/// is yielded for that capture. The first match always corresponds to the
922+
/// overall match of the regex.
923+
///
924+
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
925+
/// the lifetime `'t` corresponds to the originally matched text.
926+
pub struct SubCaptureMatches<'c, 't: 'c> {
927+
caps: &'c Captures<'t>,
928+
it: SubCapturesPosIter<'c>,
929+
}
930+
931+
impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
932+
type Item = Option<Match<'t>>;
933+
934+
fn next(&mut self) -> Option<Option<Match<'t>>> {
935+
self.it.next()
936+
.map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e)))
937+
}
938+
}
939+
905940
/// Replacer describes types that can be used to replace matches in a byte
906941
/// string.
907942
///

src/re_unicode.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use exec::{Exec, ExecNoSyncStr};
2323
use expand::expand_str;
2424
use re_builder::unicode::RegexBuilder;
2525
use re_plugin::Plugin;
26-
use re_trait::{self, RegularExpression, Locations};
26+
use re_trait::{self, RegularExpression, Locations, SubCapturesPosIter};
2727

2828
/// Escapes all regular expression meta characters in `text`.
2929
///
@@ -927,6 +927,18 @@ impl<'t> Captures<'t> {
927927
self.named_groups.pos(name).and_then(|i| self.get(i))
928928
}
929929

930+
/// An iterator that yields all capturing matches in the order in which
931+
/// they appear in the regex. If a particular capture group didn't
932+
/// participate in the match, then `None` is yielded for that capture.
933+
///
934+
/// The first match always corresponds to the overall match of the regex.
935+
pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
936+
SubCaptureMatches {
937+
caps: self,
938+
it: self.locs.iter(),
939+
}
940+
}
941+
930942
/// Expands all instances of `$name` in `text` to the corresponding capture
931943
/// group `name`, and writes them to the `dst` buffer given.
932944
///
@@ -1025,6 +1037,29 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> {
10251037
}
10261038
}
10271039

1040+
/// An iterator that yields all capturing matches in the order in which they
1041+
/// appear in the regex.
1042+
///
1043+
/// If a particular capture group didn't participate in the match, then `None`
1044+
/// is yielded for that capture. The first match always corresponds to the
1045+
/// overall match of the regex.
1046+
///
1047+
/// The lifetime `'c` corresponds to the lifetime of the `Captures` value, and
1048+
/// the lifetime `'t` corresponds to the originally matched text.
1049+
pub struct SubCaptureMatches<'c, 't: 'c> {
1050+
caps: &'c Captures<'t>,
1051+
it: SubCapturesPosIter<'c>,
1052+
}
1053+
1054+
impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
1055+
type Item = Option<Match<'t>>;
1056+
1057+
fn next(&mut self) -> Option<Option<Match<'t>>> {
1058+
self.it.next()
1059+
.map(|cap| cap.map(|(s, e)| Match::new(self.caps.text, s, e)))
1060+
}
1061+
}
1062+
10281063
/// An iterator that yields all non-overlapping capture groups matching a
10291064
/// particular regular expression.
10301065
///

tests/api.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,25 @@ fn capture_misc() {
140140
assert_eq!(t!("c"), match_text!(cap.name("b").unwrap()));
141141
}
142142

143+
#[test]
144+
fn sub_capture_matches() {
145+
let re = regex!(r"([a-z])(([a-z])|([0-9]))");
146+
let cap = re.captures(t!("a5")).unwrap();
147+
let subs: Vec<_> = cap.iter().collect();
148+
149+
assert_eq!(5, subs.len());
150+
assert!(subs[0].is_some());
151+
assert!(subs[1].is_some());
152+
assert!(subs[2].is_some());
153+
assert!(subs[3].is_none());
154+
assert!(subs[4].is_some());
155+
156+
assert_eq!(t!("a5"), match_text!(subs[0].unwrap()));
157+
assert_eq!(t!("a"), match_text!(subs[1].unwrap()));
158+
assert_eq!(t!("5"), match_text!(subs[2].unwrap()));
159+
assert_eq!(t!("5"), match_text!(subs[4].unwrap()));
160+
}
161+
143162
expand!(expand1, r"(?P<foo>\w+)", "abc", "$foo", "abc");
144163
expand!(expand2, r"(?P<foo>\w+)", "abc", "$0", "abc");
145164
expand!(expand3, r"(?P<foo>\w+)", "abc", "$1", "abc");

0 commit comments

Comments
 (0)