From ad431dc56964e47004b1520193a26b7ce7bcbd1f Mon Sep 17 00:00:00 2001
From: Hadley Wickham <h.wickham@gmail.com>
Date: Fri, 21 Jan 2022 12:14:32 -0600
Subject: [PATCH] Optionally extract a group with str_extract()

Fixes #420
---
 NEWS.md                       |  3 +++
 R/extract.r                   | 13 +++++++++++--
 man/str_extract.Rd            |  9 ++++++++-
 tests/testthat/test-extract.r |  5 +++++
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 7afaf228..faf2e270 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -39,6 +39,9 @@
 
 * New `str_equal()` compares two character vectors using unicode rules,
   and optionally ignores case (#381).
+  
+* `str_extract()` can now optionally extract a capturing group instead of
+  the complete match (#420).
 
 * New `str_split_1()` is tailored for the special case of splitting up a single 
   string (#409).
diff --git a/R/extract.r b/R/extract.r
index 02e92965..18892a86 100644
--- a/R/extract.r
+++ b/R/extract.r
@@ -4,6 +4,8 @@
 #'
 #' @inheritParams str_detect
 #' @return A character vector.
+#' @param group If supplied, instead of returning the complete match, will
+#'   return the matched text from the specified capturing group.
 #' @seealso [str_match()] to extract matched groups;
 #'   [stringi::stri_extract()] for the underlying implementation.
 #' @param simplify If `FALSE`, the default, returns a list of character
@@ -16,6 +18,10 @@
 #' str_extract(shopping_list, "[a-z]{1,4}")
 #' str_extract(shopping_list, "\\b[a-z]{1,4}\\b")
 #'
+#' str_extract(shopping_list, "([a-z]+) of ([a-z]+)")
+#' str_extract(shopping_list, "([a-z]+) of ([a-z]+)", group = 1)
+#' str_extract(shopping_list, "([a-z]+) of ([a-z]+)", group = 2)
+#'
 #' # Extract all matches
 #' str_extract_all(shopping_list, "[a-z]+")
 #' str_extract_all(shopping_list, "\\b[a-z]+\\b")
@@ -27,9 +33,12 @@
 #'
 #' # Extract all words
 #' str_extract_all("This is, suprisingly, a sentence.", boundary("word"))
-str_extract <- function(string, pattern) {
-  check_lengths(string, pattern)
+str_extract <- function(string, pattern, group = NULL) {
+  if (!is.null(group)) {
+    return(str_match(string, pattern)[, group + 1])
+  }
 
+  check_lengths(string, pattern)
   switch(type(pattern),
     empty = stri_extract_first_boundaries(string, pattern, opts_brkiter = opts(pattern)),
     bound = stri_extract_first_boundaries(string, pattern, opts_brkiter = opts(pattern)),
diff --git a/man/str_extract.Rd b/man/str_extract.Rd
index 9861f269..32a7cce0 100644
--- a/man/str_extract.Rd
+++ b/man/str_extract.Rd
@@ -5,7 +5,7 @@
 \alias{str_extract_all}
 \title{Extract matching patterns from a string}
 \usage{
-str_extract(string, pattern)
+str_extract(string, pattern, group = NULL)
 
 str_extract_all(string, pattern, simplify = FALSE)
 }
@@ -27,6 +27,9 @@ Match character, word, line and sentence boundaries with
 \code{\link[=boundary]{boundary()}}. An empty pattern, "", is equivalent to
 \code{boundary("character")}.}
 
+\item{group}{If supplied, instead of returning the complete match, will
+return the matched text from the specified capturing group.}
+
 \item{simplify}{If \code{FALSE}, the default, returns a list of character
 vectors. If \code{TRUE} returns a character matrix.}
 }
@@ -43,6 +46,10 @@ str_extract(shopping_list, "[a-z]+")
 str_extract(shopping_list, "[a-z]{1,4}")
 str_extract(shopping_list, "\\\\b[a-z]{1,4}\\\\b")
 
+str_extract(shopping_list, "([a-z]+) of ([a-z]+)")
+str_extract(shopping_list, "([a-z]+) of ([a-z]+)", group = 1)
+str_extract(shopping_list, "([a-z]+) of ([a-z]+)", group = 2)
+
 # Extract all matches
 str_extract_all(shopping_list, "[a-z]+")
 str_extract_all(shopping_list, "\\\\b[a-z]+\\\\b")
diff --git a/tests/testthat/test-extract.r b/tests/testthat/test-extract.r
index fec3e0a4..e6386e89 100644
--- a/tests/testthat/test-extract.r
+++ b/tests/testthat/test-extract.r
@@ -36,3 +36,8 @@ test_that("str_extract extracts first match if found, NA otherwise", {
   expect_length(word_1_to_4, length(shopping_list))
   expect_equal(word_1_to_4[1], NA_character_)
 })
+
+test_that("can extract a group", {
+  expect_equal(str_extract("abc", "(.).(.)", group = 1), "a")
+  expect_equal(str_extract("abc", "(.).(.)", group = 2), "c")
+})