Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# dplyr (development version)

* The `rows_*()` functions now always retain the column types of `x`. This
behavior was documented, but previously wasn't being applied correctly
(#6240).

* `rows_insert()` gained a new `conflict` argument allowing you greater control
over rows in `y` with keys that conflict with keys in `x`. A conflict arises
if a key in `y` already exists in `x`. By default, a conflict results in an
Expand Down
96 changes: 73 additions & 23 deletions R/rows.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,11 @@ rows_insert.data.frame <- function(x,

y <- auto_copy(x, y, copy = copy)

rows_check_containment(x, y)

by <- rows_check_by(by, y)

rows_check_containment(x, y)
y <- rows_cast_y(y, x)

x_key <- rows_select_key(x, by, "x")
y_key <- rows_select_key(y, by, "y")

Expand Down Expand Up @@ -169,18 +170,27 @@ rows_update.data.frame <- function(x,

y <- auto_copy(x, y, copy = copy)

rows_check_containment(x, y)

by <- rows_check_by(by, y)

rows_check_containment(x, y)

x_key <- rows_select_key(x, by, "x")
y_key <- rows_select_key(y, by, "y", unique = TRUE)
args <- vec_cast_common(x = x_key, y = y_key)
x_key <- args$x
y_key <- args$y

values_names <- setdiff(names(y), names(y_key))

x_values <- x[values_names]
y_values <- y[values_names]
y_values <- rows_cast_y(y_values, x_values)

keep <- rows_check_y_unmatched(x_key, y_key, unmatched)

if (!is.null(keep)) {
y <- dplyr_row_slice(y, keep)
y_key <- rows_select_key(y, by, "y")
y_key <- dplyr_row_slice(y_key, keep)
y_values <- dplyr_row_slice(y_values, keep)
}

loc <- vec_match(x_key, y_key)
Expand All @@ -189,9 +199,12 @@ rows_update.data.frame <- function(x,
y_loc <- loc[match]
x_loc <- which(match)

values_cols <- setdiff(names(y), names(y_key))
y_values <- dplyr_row_slice(y_values, y_loc)

x[x_loc, values_cols] <- y[y_loc, values_cols]
x_values <- vec_assign(x_values, x_loc, y_values)
x_values <- dplyr_new_list(x_values)

x <- dplyr_col_modify(x, x_values)

x
}
Expand Down Expand Up @@ -222,18 +235,27 @@ rows_patch.data.frame <- function(x,

y <- auto_copy(x, y, copy = copy)

rows_check_containment(x, y)

by <- rows_check_by(by, y)

rows_check_containment(x, y)

x_key <- rows_select_key(x, by, "x")
y_key <- rows_select_key(y, by, "y", unique = TRUE)
args <- vec_cast_common(x = x_key, y = y_key)
x_key <- args$x
y_key <- args$y

values_names <- setdiff(names(y), names(y_key))

x_values <- x[values_names]
y_values <- y[values_names]
y_values <- rows_cast_y(y_values, x_values)

keep <- rows_check_y_unmatched(x_key, y_key, unmatched)

if (!is.null(keep)) {
y <- dplyr_row_slice(y, keep)
y_key <- rows_select_key(y, by, "y")
y_key <- dplyr_row_slice(y_key, keep)
y_values <- dplyr_row_slice(y_values, keep)
}

loc <- vec_match(x_key, y_key)
Expand All @@ -242,14 +264,19 @@ rows_patch.data.frame <- function(x,
y_loc <- loc[match]
x_loc <- which(match)

values_cols <- setdiff(names(y), names(y_key))
x_slice <- dplyr_row_slice(x_values, x_loc)
x_slice <- dplyr_new_list(x_slice)

y_slice <- dplyr_row_slice(y_values, y_loc)
y_slice <- dplyr_new_list(y_slice)

x_values <- x[x_loc, values_cols]
y_values <- y[y_loc, values_cols]
x_patched <- map2(x_slice, y_slice, coalesce)
x_patched <- new_data_frame(x_patched, n = length(x_loc))

x_patched <- map2(x_values, y_values, coalesce)
x_values <- vec_assign(x_values, x_loc, x_patched)
x_values <- dplyr_new_list(x_values)

x[x_loc, values_cols] <- x_patched
x <- dplyr_col_modify(x, x_values)

x
}
Expand Down Expand Up @@ -278,27 +305,43 @@ rows_upsert.data.frame <- function(x,

y <- auto_copy(x, y, copy = copy)

rows_check_containment(x, y)

by <- rows_check_by(by, y)

rows_check_containment(x, y)

x_key <- rows_select_key(x, by, "x")
y_key <- rows_select_key(y, by, "y", unique = TRUE)
args <- vec_cast_common(x = x_key, y = y_key)
x_key <- args$x
y_key <- args$y

values_names <- setdiff(names(y), names(y_key))

x_values <- x[values_names]
y_values <- y[values_names]
y_values <- rows_cast_y(y_values, x_values)

loc <- vec_match(x_key, y_key)
match <- !is.na(loc)

y_loc <- loc[match]
x_loc <- which(match)

# Update
y_values <- dplyr_row_slice(y_values, y_loc)
x_values <- vec_assign(x_values, x_loc, y_values)
x_values <- dplyr_new_list(x_values)

x <- dplyr_col_modify(x, x_values)

# Insert
y_size <- vec_size(y_key)
y_extra <- vec_as_location_invert(y_loc, y_size)
y_extra <- dplyr_row_slice(y, y_extra)

values_cols <- setdiff(names(y), names(y_key))
y <- dplyr_row_slice(y, y_extra)
y <- rows_cast_y(y, x)

x[x_loc, values_cols] <- y[y_loc, values_cols]
x <- rows_bind(x, y_extra)
x <- rows_bind(x, y)

x
}
Expand Down Expand Up @@ -333,6 +376,9 @@ rows_delete.data.frame <- function(x,

x_key <- rows_select_key(x, by, "x")
y_key <- rows_select_key(y, by, "y")
args <- vec_cast_common(x = x_key, y = y_key)
x_key <- args$x
y_key <- args$y

keep <- rows_check_y_unmatched(x_key, y_key, unmatched)

Expand Down Expand Up @@ -404,6 +450,10 @@ rows_check_containment <- function(x, y, ..., error_call = caller_env()) {
invisible()
}

rows_cast_y <- function(y, x, ..., call = caller_env()) {
vec_cast(x = y, to = x, x_arg = "y", to_arg = "x", call = call)
}

rows_select_key <- function(x,
by,
arg,
Expand Down
17 changes: 17 additions & 0 deletions R/utils.r
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,23 @@ dplyr_new_data_frame <- function(x = data.frame(),
)
}

# Strips a list-like vector down to just names
dplyr_new_list <- function(x) {
if (!is_list(x)) {
abort("`x` must be a VECSXP.", .internal = TRUE)
}

names <- names(x)

if (is.null(names)) {
attributes(x) <- NULL
} else {
attributes(x) <- list(names = names)
}

x
}

maybe_restart <- function(restart) {
if (!is_null(findRestart(restart))) {
invokeRestart(restart)
Expand Down
96 changes: 96 additions & 0 deletions tests/testthat/_snaps/rows.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,26 @@
i The following rows in `y` have keys that already exist in `x`: `c(1, 2, 3)`.
i Use `conflict = "ignore"` if you want to ignore these `y` rows.

# rows_insert() casts keys to the type of `x`

Code
(expect_error(rows_insert(x, y, "key")))
Output
<error/vctrs_error_cast_lossy>
Error in `rows_insert()`:
! Can't convert from `y$key` <double> to `x$key` <integer> due to loss of precision.
* Locations: 1

# rows_insert() casts values to the type of `x`

Code
(expect_error(rows_insert(x, y, "key")))
Output
<error/vctrs_error_cast_lossy>
Error in `rows_insert()`:
! Can't convert from `y$value` <double> to `x$value` <integer> due to loss of precision.
* Locations: 1

# `conflict` is validated

Code
Expand Down Expand Up @@ -56,6 +76,25 @@
! `y` key values must be unique.
i The following rows contain duplicate key values: `c(1, 2)`.

# rows_update() casts keys to their common type for matching but retains `x` type

Code
(expect_error(rows_update(x, y, "key")))
Output
<error/vctrs_error_incompatible_type>
Error in `rows_update()`:
! Can't combine `x$key` <integer> and `y$key` <character>.

# rows_update() casts values to the type of `x`

Code
(expect_error(rows_update(x, y, "key")))
Output
<error/vctrs_error_cast_lossy>
Error in `rows_update()`:
! Can't convert from `y$value` <double> to `x$value` <integer> due to loss of precision.
* Locations: 1

# `unmatched` is validated

Code
Expand Down Expand Up @@ -92,6 +131,25 @@
! `y` key values must be unique.
i The following rows contain duplicate key values: `c(1, 2)`.

# rows_patch() casts keys to their common type for matching but retains `x` type

Code
(expect_error(rows_patch(x, y, "key")))
Output
<error/vctrs_error_incompatible_type>
Error in `rows_patch()`:
! Can't combine `x$key` <integer> and `y$key` <character>.

# rows_patch() casts values to the type of `x`

Code
(expect_error(rows_patch(x, y, "key")))
Output
<error/vctrs_error_cast_lossy>
Error in `rows_patch()`:
! Can't convert from `y$value` <double> to `x$value` <integer> due to loss of precision.
* Locations: 1

# rows_upsert() doesn't allow `y` keys to be duplicated (#5553)

Code
Expand All @@ -102,6 +160,35 @@
! `y` key values must be unique.
i The following rows contain duplicate key values: `c(1, 2)`.

# rows_upsert() casts keys to their common type for matching but retains `x` type

Code
(expect_error(rows_upsert(x, y, "key")))
Output
<error/vctrs_error_incompatible_type>
Error in `rows_upsert()`:
! Can't combine `x$key` <integer> and `y$key` <character>.

# rows_upsert() casts keys to the type of `x`

Code
(expect_error(rows_upsert(x, y, "key")))
Output
<error/vctrs_error_cast_lossy>
Error in `rows_upsert()`:
! Can't convert from `y$key` <double> to `x$key` <integer> due to loss of precision.
* Locations: 1

# rows_upsert() casts values to the type of `x`

Code
(expect_error(rows_upsert(x, y, "key")))
Output
<error/vctrs_error_cast_lossy>
Error in `rows_upsert()`:
! Can't convert from `y$value` <double> to `x$value` <integer> due to loss of precision.
* Locations: 1

# rows_delete() ignores extra `y` columns, with a message

Code
Expand All @@ -128,6 +215,15 @@
i The following rows in `y` have keys that don't exist in `x`: `c(1, 3)`.
i Use `unmatched = "ignore"` if you want to ignore these `y` rows.

# rows_delete() casts keys to their common type for matching but retains `x` type

Code
(expect_error(rows_delete(x, y, "key")))
Output
<error/vctrs_error_incompatible_type>
Error in `rows_delete()`:
! Can't combine `x$key` <integer> and `y$key` <character>.

# rows_check_containment() checks that `y` columns are in `x`

Code
Expand Down
Loading