diff --git a/DESCRIPTION b/DESCRIPTION index 9b205f8..bcd7d95 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,21 +1,27 @@ Type: Package Package: tidySingleCellExperiment Title: Brings SingleCellExperiment to the Tidyverse -Version: 1.11.3 -Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", - role = c("aut", "cre")) ) -Description: tidySingleCellExperiment is an adapter that abstracts the 'SingleCellExperiment' container - in the form of a tibble and allows the data manipulation, plotting and nesting using 'tidyverse'. +Version: 1.11.4 +Authors@R: c(person("Stefano", "Mangiola", + email="mangiolastefano@gmail.com", + role=c("aut", "cre"))) +Description: 'tidySingleCellExperiment' is an adapter that abstracts + the 'SingleCellExperiment' container in the form of a 'tibble'. + This allows *tidy* data manipulation, nesting, and plotting. + For example, a 'tidySingleCellExperiment' is directly compatible + with functions from 'tidyverse' packages `dplyr` and `tidyr`, + as well as plotting with `ggplot2` and `plotly`. + In addition, the package provides various utility + functions specific to single-cell omics data analysis + (e.g., aggregation of cell-level data to pseudobulks). License: GPL-3 Depends: R (>= 4.1.0), - ttservice (>= 0.3.6), SingleCellExperiment Imports: SummarizedExperiment, - dplyr, + dplyr, tidyr, ttservice, tibble, - tidyr, ggplot2, plotly, magrittr, @@ -38,7 +44,7 @@ Suggests: BiocStyle, testthat, knitr, - markdown, + rmarkdown, SingleCellSignalR, SingleR, scater, @@ -58,6 +64,5 @@ biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpr Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 -Roxygen: list(markdown = TRUE) URL: https://github.com/stemangiola/tidySingleCellExperiment BugReports: https://github.com/stemangiola/tidySingleCellExperiment/issues diff --git a/NAMESPACE b/NAMESPACE old mode 100755 new mode 100644 index 68b1ef2..4df7b19 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,59 +33,31 @@ S3method(select,SingleCellExperiment) S3method(separate,SingleCellExperiment) S3method(slice,SingleCellExperiment) S3method(summarise,SingleCellExperiment) +S3method(summarize,SingleCellExperiment) S3method(tbl_format_header,tidySingleCellExperiment) S3method(tidy,SingleCellExperiment) S3method(unite,SingleCellExperiment) S3method(unnest,tidySingleCellExperiment_nested) export("%>%") -export(add_count) -export(aggregate_cells) -export(arrange) -export(as_tibble) -export(bind_cols) -export(bind_rows) -export(count) -export(distinct) -export(extract) -export(filter) -export(full_join) -export(ggplot) -export(glimpse) -export(group_by) -export(inner_join) -export(join_features) +export(count.SingleCellExperiment) +export(filter.SingleCellExperiment) export(join_transcripts) -export(left_join) -export(mutate) -export(nest) -export(pivot_longer) export(plot_ly) -export(pull) -export(rename) -export(right_join) -export(rowwise) -export(sample_frac) -export(sample_n) -export(select) -export(separate) -export(slice) -export(summarise) -export(summarize) -export(tbl_format_header) +export(rename.SingleCellExperiment) +export(select.SingleCellExperiment) +export(slice.SingleCellExperiment) export(tidy) -export(unite) -export(unnest) export(unnest_single_cell_experiment) +exportMethods(aggregate_cells) +exportMethods(join_features) importFrom(Matrix,rowSums) importFrom(S4Vectors,"metadata<-") importFrom(S4Vectors,DataFrame) importFrom(S4Vectors,metadata) importFrom(SingleCellExperiment,cbind) -importFrom(SingleCellExperiment,counts) importFrom(SummarizedExperiment,"colData<-") importFrom(SummarizedExperiment,assays) importFrom(SummarizedExperiment,colData) -importFrom(cli,cat_line) importFrom(dplyr,add_count) importFrom(dplyr,arrange) importFrom(dplyr,contains) @@ -110,18 +82,17 @@ importFrom(dplyr,select) importFrom(dplyr,select_if) importFrom(dplyr,slice) importFrom(dplyr,summarise) +importFrom(dplyr,summarize) importFrom(dplyr,vars) importFrom(ellipsis,check_dots_unnamed) importFrom(ellipsis,check_dots_used) importFrom(fansi,strwrap_ctl) -importFrom(ggplot2,aes) importFrom(ggplot2,ggplot) importFrom(lifecycle,deprecate_warn) importFrom(magrittr,"%$%") importFrom(magrittr,"%>%") importFrom(magrittr,equals) importFrom(magrittr,set_rownames) -importFrom(methods,as) importFrom(methods,getMethod) importFrom(pillar,align) importFrom(pillar,get_extent) @@ -141,7 +112,6 @@ importFrom(rlang,enquo) importFrom(rlang,enquos) importFrom(rlang,expr) importFrom(rlang,flatten_if) -importFrom(rlang,is_empty) importFrom(rlang,is_spliced) importFrom(rlang,names2) importFrom(rlang,quo_is_null) @@ -152,7 +122,6 @@ importFrom(rlang,quo_squash) importFrom(stats,setNames) importFrom(stringr,regex) importFrom(stringr,str_detect) -importFrom(stringr,str_replace) importFrom(stringr,str_replace_all) importFrom(tibble,as_tibble) importFrom(tibble,enframe) @@ -161,7 +130,6 @@ importFrom(tidyr,extract) importFrom(tidyr,nest) importFrom(tidyr,pivot_longer) importFrom(tidyr,separate) -importFrom(tidyr,spread) importFrom(tidyr,unite) importFrom(tidyr,unnest) importFrom(tidyselect,eval_select) @@ -170,5 +138,6 @@ importFrom(ttservice,bind_cols) importFrom(ttservice,bind_rows) importFrom(ttservice,join_features) importFrom(utils,data) +importFrom(utils,packageDescription) importFrom(utils,tail) importFrom(vctrs,new_data_frame) diff --git a/R/attach.R b/R/attach.R new file mode 100644 index 0000000..9a11fc1 --- /dev/null +++ b/R/attach.R @@ -0,0 +1,22 @@ +core <- c("dplyr", "tidyr", "ttservice", "ggplot2") + +core_unloaded <- function() { + search <- paste0("package:", core) + core[!search %in% search()] +} + +# Attach the package from the same library it was loaded from before. +# [source: https://github.com/tidy-biology/tidyverse/issues/171] +same_library <- function(pkg) { + loc <- if (pkg %in% loadedNamespaces()) dirname(getNamespaceInfo(pkg, "path")) + library(pkg, lib.loc=loc, character.only=TRUE, warn.conflicts=FALSE) +} + +tidyverse_attach <- function() { + to_load <- core_unloaded() + + suppressPackageStartupMessages( + lapply(to_load, same_library)) + + invisible(to_load) +} diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index bd3f18e..5c92d6b 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -1,62 +1,15 @@ -#' Arrange rows by column values -#' -#' @importFrom dplyr arrange -#' -#' @description -#' `arrange()` order the rows of a data frame rows by the values of selected -#' columns. -#' -#' Unlike other dplyr verbs, `arrange()` largely ignores grouping; you -#' need to explicit mention grouping variables (or use `by_group=TRUE`) -#' in order to group by them, and functions of variables are evaluated -#' once per data frame, not once per group. -#' -#' @details -#' ## Locales -#' The sort order for character vectors will depend on the collating sequence -#' of the locale in use: see [locales()]. -#' -#' ## Missing values -#' Unlike base sorting with `sort()`, `NA` are: -#' * always sorted to the end for local data, even when wrapped with `desc()`. -#' * treated differently for remote data, depending on the backend. -#' -#' @return -#' An object of the same type as `.data`. -#' -#' * All rows appear in the output, but (usually) in a different place. -#' * Columns are not modified. -#' * Groups are not modified. -#' * Data frame attributes are preserved. -#' @section Methods: -#' This function is a **generic**, which means that packages can provide -#' implementations (methods) for other classes. See the documentation of -#' individual methods for extra arguments and differences in behaviour. -#' -#' The following methods are currently available in loaded packages: -#' @export -#' @param .data A data frame, data frame extension (e.g. a tibble), or a -#' lazy data frame (e.g. from dbplyr or dtplyr). See *Methods*, below, for -#' more details. -#' @param ... <[`tidy-eval`][dplyr_tidy_eval]> Variables, or functions or -#' variables. Use [desc()] to sort a variable in descending order. -#' @param .by_group If TRUE, will sort first by grouping variable. Applies to -#' grouped data frames only. -#' -#' @rdname dplyr-methods -#' @name arrange -#' +#' @rdname arrange +#' @inherit dplyr::arrange +#' @family single table verbs +#' #' @examples -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' +#' pbmc_small |> #' arrange(nFeature_RNA) -NULL - +#' #' @importFrom tibble as_tibble -#' +#' @importFrom dplyr arrange +#' @importFrom dplyr pull #' @export -#' @inheritParams arrange arrange.SingleCellExperiment <- function(.data, ..., .by_group=FALSE) { new_metadata <- .data %>% @@ -67,61 +20,23 @@ arrange.SingleCellExperiment <- function(.data, ..., .by_group=FALSE) { } - -#' Efficiently bind multiple data frames by row and column -#' -#' This is an efficient implementation of the common pattern of -#' `do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many -#' data frames into one. -#' -#' The output of `bind_rows()` will contain a column if that column -#' appears in any of the inputs. -#' -#' @param ... Data frames to combine. -#' -#' Each argument can either be a data frame, a list that could be a data -#' frame, or a list of data frames. -#' -#' When row-binding, columns are matched by name, and any missing -#' columns will be filled with NA. -#' -#' When column-binding, rows are matched by position, so all data -#' frames must have the same number of rows. To match by value, not -#' position, see mutate-joins. -#' @param .id Data frame identifier. -#' -#' When `.id` is supplied, a new column of identifiers is -#' created to link each row to its original data frame. The labels -#' are taken from the named arguments to `bind_rows()`. When a -#' list of data frames is supplied, the labels are taken from the -#' names of the list. If no names are found a numeric sequence is -#' used instead. -#' @param add.cell.ids from Seurat 3.0 A character vector of length(x = c(x, y)). Appends the corresponding values to the start of each objects' cell names. -#' -#' @importFrom ttservice bind_rows -#' -#' @return `bind_rows()` and `bind_cols()` return the same type as -#' the first input, either a data frame, `tbl_df`, or `grouped_df`. +#' @name bind_rows +#' @aliases bind_cols +#' @inherit ttservice::bind_rows +#' #' @examples -#' `%>%` = magrittr::`%>%` -#' tt = pbmc_small -#' bind_rows( tt, tt ) -#' -#' tt_bind = tt %>% select(nCount_RNA ,nFeature_RNA) -#' tt %>% bind_cols(tt_bind) +#' tt <- pbmc_small +#' bind_rows(tt, tt) #' -#' @export +#' tt_bind <- tt |> select(nCount_RNA, nFeature_RNA) +#' tt |> bind_cols(tt_bind) #' -#' @name bind_rows -NULL - -#' @importFrom rlang dots_values #' @importFrom rlang flatten_if #' @importFrom rlang is_spliced +#' @importFrom rlang dots_values +#' @importFrom ttservice bind_rows #' @importFrom SingleCellExperiment cbind -#' #' @export -#' bind_rows.SingleCellExperiment <- function(..., .id=NULL, add.cell.ids=NULL) { tts <- flatten_if(dots_values(...), is_spliced) @@ -135,12 +50,13 @@ bind_rows.SingleCellExperiment <- function(..., .id=NULL, add.cell.ids=NULL) { new_obj } - -# Internal of bind_cols +#' @importFrom rlang flatten_if +#' @importFrom rlang is_spliced +#' @importFrom rlang dots_values +#' @importFrom ttservice bind_cols #' @importFrom SummarizedExperiment colData #' @importFrom SummarizedExperiment colData<- -#' -bind_cols_ = function(..., .id=NULL) { +bind_cols_ <- function(..., .id=NULL) { tts <- tts <- flatten_if(dots_values(...), is_spliced) colData(tts[[1]]) <- bind_cols(colData(tts[[1]]) %>% as.data.frame(), @@ -149,52 +65,18 @@ bind_cols_ = function(..., .id=NULL) { tts[[1]] } +#' @rdname bind_rows #' @export -#' -#' @importFrom ttservice bind_cols -#' @inheritParams bind_cols -#' -#' @name bind_cols -#' -#' @rdname dplyr-methods -NULL - -#' @importFrom rlang dots_values -#' @importFrom rlang flatten_if -#' @importFrom rlang is_spliced -#' @importFrom SummarizedExperiment colData -#' @importFrom SummarizedExperiment colData<- -#' -#' @export -#' bind_cols.SingleCellExperiment <- bind_cols_ -#' distinct -#' -#' @importFrom dplyr distinct -#' -#' @param .data A tbl. (See dplyr) -#' @param ... Data frames to combine (See dplyr) -#' @param .keep_all If TRUE, keep all variables in .data. If a combination -#' of ... is not distinct, this keeps the first row of values. (See dplyr) -#' -#' @return A tidySingleCellExperiment object -#' +#' @rdname distinct +#' @inherit dplyr::distinct +#' #' @examples -#' -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' +#' pbmc_small |> #' distinct(groups) #' -#' @rdname dplyr-methods -#' @name distinct -#' -#' @export -NULL - -#' @inheritParams distinct -#' +#' @importFrom dplyr distinct #' @export distinct.SingleCellExperiment <- function(.data, ..., .keep_all=FALSE) { message(data_frame_returned_message) @@ -212,78 +94,18 @@ distinct.SingleCellExperiment <- function(.data, ..., .keep_all=FALSE) { dplyr::distinct(..., .keep_all=.keep_all) } - -#' Subset rows using column values -#' -#' -#' `filter()` retains the rows where the conditions you provide a `TRUE`. Note -#' that, unlike base subsetting with `[`, rows where the condition evaluates -#' to `NA` are dropped. -#' -#' dplyr is not yet smart enough to optimise filtering optimisation -#' on grouped datasets that don't need grouped calculations. For this reason, -#' filtering is often considerably faster on [ungroup()]ed data. -#' -#' @importFrom dplyr filter -#' -#' @section Useful filter functions: -#' -#' * [`==`], [`>`], [`>=`] etc -#' * [`&`], [`|`], [`!`], [xor()] -#' * [is.na()] -#' * [between()], [near()] -#' -#' @section Grouped tibbles: -#' -#' Because filtering expressions are computed within groups, they may -#' yield different results on grouped tibbles. This will be the case -#' as soon as an aggregating, lagging, or ranking function is -#' involved. Compare this ungrouped filtering: -#' -#' -#' The former keeps rows with `mass` greater than the global average -#' whereas the latter keeps rows with `mass` greater than the gender -#' -#' average. -#' @family single table verbs -#' @inheritParams arrange -#' @param ... <[`tidy-eval`][dplyr_tidy_eval]> Logical predicates defined in -#' terms of the variables in `.data`. -#' Multiple conditions are combined with `&`. Only rows where the -#' condition evaluates to `TRUE` are kept. -#' @param .preserve when `FALSE` (the default), the grouping structure -#' is recalculated based on the resulting data, otherwise it is kept as is. -#' @return -#' An object of the same type as `.data`. -#' -#' * Rows are a subset of the input, but appear in the same order. -#' * Columns are not modified. -#' * The number of groups may be reduced (if `.preserve` is not `TRUE`). -#' * Data frame attributes are preserved. -#' @section Methods: -#' This function is a **generic**, which means that packages can provide -#' implementations (methods) for other classes. See the documentation of -#' individual methods for extra arguments and differences in behaviour. -#' -#' The following methods are currently available in loaded packages: -#' @seealso [filter_all()], [filter_if()] and [filter_at()]. +#' @rdname filter +#' @inherit dplyr::filter +#' #' @examples -#' -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' +#' pbmc_small |> #' filter(groups == "g1") #' #' # Learn more in ?dplyr_tidy_eval -#' -#' @rdname dplyr-methods -#' @name filter -#' -#' @export -NULL - -#' @inheritParams filter -#' +#' +#' @importFrom purrr map +#' @importFrom dplyr filter +#' @export filter.SingleCellExperiment #' @export filter.SingleCellExperiment <- function(.data, ..., .preserve=FALSE) { @@ -311,53 +133,16 @@ filter.SingleCellExperiment <- function(.data, ..., .preserve=FALSE) { } - -#' Group by one or more variables -#' -#' @importFrom dplyr group_by -#' @importFrom dplyr group_by_drop_default -#' -#' -#' @description -#' Most data operations are done on groups defined by variables. -#' `group_by()` takes an existing tbl and converts it into a grouped tbl -#' where operations are performed "by group". `ungroup()` removes grouping. -#' -#' @family grouping functions -#' @inheritParams arrange -#' @param ... In `group_by()`, variables or computations to group by. -#' In `ungroup()`, variables to remove from the grouping. -#' @param .add When `FALSE`, the default, `group_by()` will -#' override existing groups. To add to the existing groups, use -#' `.add=TRUE`. -#' -#' This argument was previously called `add`, but that prevented -#' creating a new grouping variable called `add`, and conflicts with -#' our naming conventions. -#' @param .drop When `.drop=TRUE`, empty groups are dropped. See -#' [group_by_drop_default()] for what the default value is for this argument. -#' @return A [grouped data frame][grouped_df()], unless the combination of -#' `...` and `add` yields a non empty set of grouping columns, a -#' regular (ungrouped) data frame otherwise. -#' @section Methods: -#' These function are **generic**s, which means that packages can provide -#' implementations (methods) for other classes. See the documentation of -#' individual methods for extra arguments and differences in behaviour. -#' -#' Methods available in currently loaded packages: +#' @rdname group_by +#' @inherit dplyr::group_by +#' @seealso \code{} #' #' @examples -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' +#' pbmc_small |> #' group_by(groups) -#' -#' @rdname dplyr-methods -#' @name group_by -#' -#' @export -NULL - +#' +#' @importFrom dplyr group_by_drop_default +#' @importFrom dplyr group_by #' @export group_by.SingleCellExperiment <- function(.data, ..., .add=FALSE, .drop=group_by_drop_default(.data)) { message(data_frame_returned_message) @@ -376,82 +161,17 @@ group_by.SingleCellExperiment <- function(.data, ..., .add=FALSE, .drop=group_by } -#' Summarise each group to fewer rows -#' -#' @importFrom dplyr summarise -#' -#' @description -#' `summarise()` creates a new data frame. It will have one (or more) rows for -#' each combination of grouping variables; if there are no grouping variables, -#' the output will have a single row summarising all observations in the input. -#' It will contain one column for each grouping variable and one column -#' for each of the summary statistics that you have specified. -#' -#' `summarise()` and `summarize()` are synonyms. -#' -#' @section Useful functions: -#' -#' * Center: [mean()], [median()] -#' * Spread: [sd()], [IQR()], [mad()] -#' * Range: [min()], [max()], [quantile()] -#' * Position: [first()], [last()], [nth()], -#' * Count: [n()], [n_distinct()] -#' * Logical: [any()], [all()] -#' -#' @section Backend variations: -#' -#' The data frame backend supports creating a variable and using it in the -#' same summary. This means that previously created summary variables can be -#' further transformed or combined within the summary, as in [mutate()]. -#' However, it also means that summary variables with the same names as previous -#' variables overwrite them, making those variables unavailable to later summary -#' variables. -#' -#' This behaviour may not be supported in other backends. To avoid unexpected -#' results, consider using new names for your summary variables, especially when -#' creating multiple summaries. -#' -#' @inheritParams arrange -#' @param ... <[`tidy-eval`][dplyr_tidy_eval]> Name-value pairs of summary -#' functions. The name will be the name of the variable in the result. -#' -#' The value can be: -#' -#' * A vector of length 1, e.g. `min(x)`, `n()`, or `sum(is.na(y))`. -#' * A vector of length `n`, e.g. `quantile()`. -#' * A data frame, to add multiple columns from a single expression. +#' @rdname summarise +#' @aliases summarize +#' @inherit dplyr::summarise #' @family single table verbs -#' @return -#' An object _usually_ of the same type as `.data`. -#' -#' * The rows come from the underlying `group_keys()`. -#' * The columns are a combination of the grouping keys and the summary -#' expressions that you provide. -#' * If `x` is grouped by more than one variable, the output will be another -#' [grouped_df] with the right-most group removed. -#' * If `x` is grouped by one variable, or is not grouped, the output will -#' be a [tibble]. -#' * Data frame attributes are **not** preserved, because `summarise()` -#' fundamentally creates a new data frame. -#' @section Methods: -#' This function is a **generic**, which means that packages can provide -#' implementations (methods) for other classes. See the documentation of -#' individual methods for extra arguments and differences in behaviour. -#' -#' The following methods are currently available in loaded packages: +#' #' @examples -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' +#' pbmc_small |> #' summarise(mean(nCount_RNA)) #' -#' @rdname dplyr-methods -#' @name summarise -#' -#' -#' @export -NULL - +#' @importFrom dplyr summarise +#' @importFrom purrr map #' @export summarise.SingleCellExperiment <- function(.data, ...) { message(data_frame_returned_message) @@ -469,106 +189,24 @@ summarise.SingleCellExperiment <- function(.data, ...) { dplyr::summarise(...) } -#' @rdname dplyr-methods -#' @name summarise +#' @rdname summarise +#' @importFrom dplyr summarize #' @export -summarize <- summarise - +summarize.SingleCellExperiment <- summarise.SingleCellExperiment -#' Create, modify, and delete columns -#' -#' `mutate()` adds new variables and preserves existing ones; -#' `transmute()` adds new variables and drops existing ones. -#' New variables overwrite existing variables of the same name. -#' Variables can be removed by setting their value to `NULL`. -#' -#' @importFrom dplyr mutate -#' -#' @section Useful mutate functions: -#' -#' * [`+`], [`-`], [log()], etc., for their usual mathematical meanings -#' -#' * [lead()], [lag()] -#' -#' * [dense_rank()], [min_rank()], [percent_rank()], [row_number()], -#' [cume_dist()], [ntile()] -#' -#' * [cumsum()], [cummean()], [cummin()], [cummax()], [cumany()], [cumall()] -#' -#' * [na_if()], [coalesce()] -#' -#' * [if_else()], [recode()], [case_when()] -#' -#' @section Grouped tibbles: -#' -#' Because mutating expressions are computed within groups, they may -#' yield different results on grouped tibbles. This will be the case -#' as soon as an aggregating, lagging, or ranking function is -#' involved. Compare this ungrouped mutate: -#' -#' With the grouped equivalent: -#' -#' The former normalises `mass` by the global average whereas the -#' latter normalises by the averages within gender levels. -#' -#' @inheritParams arrange -#' @param ... <[`tidy-eval`][dplyr_tidy_eval]> Name-value pairs. -#' The name gives the name of the column in the output. -#' -#' The value can be: -#' -#' * A vector of length 1, which will be recycled to the correct length. -#' * A vector the same length as the current group (or the whole data frame -#' if ungrouped). -#' * `NULL`, to remove the column. -#' * A data frame or tibble, to create multiple columns in the output. -#' +#' @rdname mutate +#' @inherit dplyr::mutate #' @family single table verbs -#' @return -#' An object of the same type as `.data`. -#' -#' For `mutate()`: -#' -#' * Rows are not affected. -#' * Existing columns will be preserved unless explicitly modified. -#' * New columns will be added to the right of existing columns. -#' * Columns given value `NULL` will be removed -#' * Groups will be recomputed if a grouping variable is mutated. -#' * Data frame attributes are preserved. -#' -#' For `transmute()`: -#' -#' * Rows are not affected. -#' * Apart from grouping variables, existing columns will be remove unless -#' explicitly kept. -#' * Column order matches order of expressions. -#' * Groups will be recomputed if a grouping variable is mutated. -#' * Data frame attributes are preserved. -#' @section Methods: -#' These function are **generic**s, which means that packages can provide -#' implementations (methods) for other classes. See the documentation of -#' individual methods for extra arguments and differences in behaviour. -#' -#' Methods available in currently loaded packages: #' #' @examples -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' +#' pbmc_small |> #' mutate(nFeature_RNA=1) #' -#' @rdname dplyr-methods -#' @name mutate -#' -#' @export -NULL - - -#' @importFrom dplyr mutate -#' @importFrom rlang enquos #' @importFrom SummarizedExperiment colData #' @importFrom SummarizedExperiment colData<- -#' +#' @importFrom rlang enquos +#' @importFrom dplyr mutate +#' @importFrom purrr map #' @export mutate.SingleCellExperiment <- function(.data, ...) { @@ -614,52 +252,19 @@ mutate.SingleCellExperiment <- function(.data, ...) { .data } - - -#' Rename columns -#' -#' -#' Rename individual variables using `new_name=old_name` syntax. -#' -#' @importFrom dplyr rename -#' -#' @section Scoped selection and renaming: -#' -#' Use the three scoped variants ([rename_all()], [rename_if()], [rename_at()]) -#' to renaming a set of variables with a function. -#' -#' @inheritParams arrange -#' @param ... <[`tidy-select`][dplyr_tidy_select]> Use `new_name=old_name` -#' to rename selected variables. -#' @return -#' An object of the same type as `.data`. -#' * Rows are not affected. -#' * Column names are changed; column order is preserved -#' * Data frame attributes are preserved. -#' * Groups are updated to reflect new names. -#' @section Methods: -#' This function is a **generic**, which means that packages can provide -#' implementations (methods) for other classes. See the documentation of -#' individual methods for extra arguments and differences in behaviour. -#' -#' The following methods are currently available in loaded packages: +#' @rdname rename +#' @inherit dplyr::rename #' @family single table verbs -#' @export -#' @examples -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% #' +#' @examples +#' pbmc_small |> #' rename(s_score=nFeature_RNA) #' -#' @rdname dplyr-methods -#' @name rename -#' -#' @export -NULL - -#' @importFrom tidyselect eval_select #' @importFrom SummarizedExperiment colData #' @importFrom SummarizedExperiment colData<- +#' @importFrom tidyselect eval_select +#' @importFrom dplyr rename +#' @export rename.SingleCellExperiment #' @export rename.SingleCellExperiment <- function(.data, ...) { @@ -694,40 +299,13 @@ rename.SingleCellExperiment <- function(.data, ...) { .data } - -#' Group input by rows -#' -#' -#' See [this repository](https://github.com/jennybc/row-oriented-workflows) -#' for alternative ways to perform row-wise operations. -#' -#' `rowwise()` is used for the results of [do()] when you -#' create list-variables. It is also useful to support arbitrary -#' complex operations that need to be applied to each row. -#' -#' Currently, rowwise grouping only works with data frames. Its -#' main impact is to allow you to work with list-variables in -#' [summarise()] and [mutate()] without having to -#' use \code{[[1]]}. This makes `summarise()` on a rowwise tbl -#' effectively equivalent to [plyr::ldply()]. -#' -#' @importFrom dplyr rowwise -#' -#' @param .data Input data frame. -#' @param ... See dplyr::rowwise -#' @return A `tbl` -#' -#' A `tbl` +#' @rdname rowwise +#' @inherit dplyr::rowwise #' #' @examples -#' `%>%` <- magrittr::`%>%` +#' # TODO #' -#' @rdname dplyr-methods -#' @name rowwise -#' -#' @export -NULL - +#' @importFrom dplyr rowwise #' @export rowwise.SingleCellExperiment <- function(data, ...) { message(data_frame_returned_message) @@ -737,37 +315,18 @@ rowwise.SingleCellExperiment <- function(data, ...) { dplyr::rowwise(...) } - -#' Left join datasets -#' -#' @importFrom dplyr count -#' @importFrom dplyr left_join -#' -#' @param x tbls to join. (See dplyr) -#' @param y tbls to join. (See dplyr) -#' @param by A character vector of variables to join by. (See dplyr) -#' @param copy If x and y are not from the same data source, and copy is TRUE, -#' then y will be copied into the same src as x. (See dplyr) -#' @param suffix If there are non-joined duplicate variables in x and y, these -#' suffixes will be added to the output to disambiguate them. Should be a -#' character vector of length 2. (See dplyr) -#' @param ... Data frames to combine (See dplyr) -#' -#' @return A tidySingleCellExperiment object -#' -#' @rdname dplyr-methods -#' @name left_join -#' -#' @export +#' @rdname left_join +#' @inherit dplyr::left_join #' #' @examples -#' `%>%` <- magrittr::`%>%` -#' #' tt <- pbmc_small -#' tt %>% left_join(tt %>% distinct(groups) %>% mutate(new_column=1:2)) -NULL - +#' tt |> left_join(tt |> +#' distinct(groups) |> +#' mutate(new_column=1:2)) +#' #' @importFrom SummarizedExperiment colData +#' @importFrom dplyr left_join +#' @importFrom dplyr count #' @export left_join.SingleCellExperiment <- function(x, y, by=NULL, copy=FALSE, suffix=c(".x", ".y"), ...) { @@ -799,33 +358,19 @@ left_join.SingleCellExperiment <- function(x, y, by=NULL, copy=FALSE, suffix=c(" ) } -#' Inner join datasets -#' -#' @importFrom dplyr pull -#' @importFrom dplyr inner_join -#' -#' @param x tbls to join. (See dplyr) -#' @param y tbls to join. (See dplyr) -#' @param by A character vector of variables to join by. (See dplyr) -#' @param copy If x and y are not from the same data source, and copy is TRUE, then y will be copied into the same src as x. (See dplyr) -#' @param suffix If there are non-joined duplicate variables in x and y, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2. (See dplyr) -#' @param ... Data frames to combine (See dplyr) -#' -#' @return A tidySingleCellExperiment object +#' @rdname left_join +#' @inherit dplyr::left_join #' #' @examples -#' `%>%` <- magrittr::`%>%` -#' #' tt <- pbmc_small -#' tt %>% inner_join(tt %>% distinct(groups) %>% mutate(new_column=1:2) %>% slice(1)) -#' -#' @rdname dplyr-methods -#' @name inner_join +#' tt |> inner_join(tt |> +#' distinct(groups) |> +#' mutate(new_column=1:2) |> +#' slice(1)) #' -#' @export -NULL - #' @importFrom SummarizedExperiment colData +#' @importFrom dplyr inner_join +#' @importFrom dplyr pull #' @export inner_join.SingleCellExperiment <- function(x, y, by=NULL, copy=FALSE, suffix=c(".x", ".y"), ...) { @@ -857,36 +402,19 @@ inner_join.SingleCellExperiment <- function(x, y, by=NULL, copy=FALSE, suffix=c( ) } -#' Right join datasets -#' -#' @importFrom dplyr pull -#' @importFrom dplyr right_join -#' -#' @param x tbls to join. (See dplyr) -#' @param y tbls to join. (See dplyr) -#' @param by A character vector of variables to join by. (See dplyr) -#' @param copy If x and y are not from the same data source, and copy is TRUE, -#' then y will be copied into the same src as x. (See dplyr) -#' @param suffix If there are non-joined duplicate variables in x and y, these -#' suffixes will be added to the output to disambiguate them. Should be a -#' character vector of length 2. (See dplyr) -#' @param ... Data frames to combine (See dplyr) -#' -#' @return A tidySingleCellExperiment object +#' @rdname right_join +#' @inherit dplyr::right_join #' #' @examples -#' `%>%` <- magrittr::`%>%` -#' #' tt <- pbmc_small -#' tt %>% right_join(tt %>% distinct(groups) %>% mutate(new_column=1:2) %>% slice(1)) -#' -#' @rdname dplyr-methods -#' @name right_join +#' tt |> right_join(tt |> +#' distinct(groups) |> +#' mutate(new_column=1:2) |> +#' slice(1)) #' -#' @export -NULL - #' @importFrom SummarizedExperiment colData +#' @importFrom dplyr right_join +#' @importFrom dplyr pull #' @export right_join.SingleCellExperiment <- function(x, y, by=NULL, copy=FALSE, suffix=c(".x", ".y"), ...) { @@ -919,36 +447,15 @@ right_join.SingleCellExperiment <- function(x, y, by=NULL, copy=FALSE, suffix=c( ) } - -#' Full join datasets -#' -#' @importFrom dplyr pull -#' @importFrom dplyr full_join -#' -#' @param x tbls to join. (See dplyr) -#' @param y tbls to join. (See dplyr) -#' @param by A character vector of variables to join by. (See dplyr) -#' @param copy If x and y are not from the same data source, and copy is TRUE, -#' then y will be copied into the same src as x. (See dplyr) -#' @param suffix If there are non-joined duplicate variables in x and y, these -#' suffixes will be added to the output to disambiguate them. Should be a -#' character vector of length 2. (See dplyr) -#' @param ... Data frames to combine (See dplyr) -#' -#' @return A tidySingleCellExperiment object +#' @rdname full_join +#' @inherit dplyr::full_join #' #' @examples -#' `%>%` <- magrittr::`%>%` -#' #' tt <- pbmc_small -#' tt %>% full_join(tibble::tibble(groups="g1", other=1:4)) -#' -#' @rdname dplyr-methods -#' @name full_join +#' tt |> full_join(tibble::tibble(groups="g1", other=1:4)) #' -#' @export -NULL - +#' @importFrom dplyr full_join +#' @importFrom dplyr pull #' @export full_join.SingleCellExperiment <- function(x, y, by=NULL, copy=FALSE, suffix=c(".x", ".y"), ...) { @@ -981,141 +488,36 @@ full_join.SingleCellExperiment <- function(x, y, by=NULL, copy=FALSE, suffix=c(" ) } -#' Subset rows using their positions -#' -#' @importFrom dplyr slice -#' -#' @description -#' `slice()` lets you index rows by their (integer) locations. It allows you -#' to select, remove, and duplicate rows. It is accompanied by a number of -#' helpers for common use cases: -#' -#' * `slice_head()` and `slice_tail()` select the first or last rows. -#' * `slice_sample()` randomly selects rows. -#' * `slice_min()` and `slice_max()` select rows with highest or lowest values -#' of a variable. -#' -#' If `.data` is a [grouped_df], the operation will be performed on each group, -#' so that (e.g.) `slice_head(df, n=5)` will select the first five rows in -#' each group. -#' -#' @details -#' Slice does not work with relational databases because they have no -#' intrinsic notion of row order. If you want to perform the equivalent -#' operation, use [filter()] and [row_number()]. -#' +#' @rdname slice +#' @aliases slice_head slice_tail +#' slice_sample slice_min slice_max +#' @inherit dplyr::slice #' @family single table verbs -#' @inheritParams arrange -#' @inheritParams filter -#' @param ... For `slice()`: <[`data-masking`][dplyr_data_masking]> Integer row -#' values. -#' -#' Provide either positive values to keep, or negative values to drop. -#' The values provided must be either all positive or all negative. -#' Indices beyond the number of rows in the input are silently ignored. -#' -#' For `slice_helpers()`, these arguments are passed on to methods. -#' -#' -#' If `n` is greater than the number of rows in the group (or `prop > 1`), -#' the result will be silently truncated to the group size. If the -#' `prop`ortion of a group size is not an integer, it is rounded down. -#' @return -#' An object of the same type as `.data`. The output has the following -#' properties: -#' -#' * Each row may appear 0, 1, or many times in the output. -#' * Columns are not modified. -#' * Groups are not modified. -#' * Data frame attributes are preserved. -#' @section Methods: -#' These function are **generic**s, which means that packages can provide -#' implementations (methods) for other classes. See the documentation of -#' individual methods for extra arguments and differences in behaviour. -#' -#' Methods available in currently loaded packages: -#' -#' * `slice()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. -#' * `slice_head()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. -#' * `slice_tail()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. -#' * `slice_min()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. -#' * `slice_max()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. -#' * `slice_sample()`: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. -#' -#' @rdname dplyr-methods -#' @name slice -#' -#' @export +#' #' @examples +#' pbmc_small |> slice(1) #' -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' -#' slice(1) -NULL - #' @importFrom SummarizedExperiment colData +#' @importFrom dplyr slice +#' @export slice.SingleCellExperiment #' @export -slice.SingleCellExperiment <- function(.data, ..., .preserve=FALSE) { - new_meta <- dplyr::slice(colData(.data) %>% as.data.frame(), ..., .preserve=.preserve) +slice.SingleCellExperiment <- function(.data, ..., .by=NULL, .preserve=FALSE) { + new_meta <- dplyr::slice(colData(.data) %>% as.data.frame(), ..., .by=.by, .preserve=.preserve) new_obj <- .data[, rownames(new_meta)] # colData(new_obj)=new_meta new_obj } -#' Subset columns using their names and types -#' -#' @importFrom dplyr select -#' -#' @description -#' -#' Select (and optionally rename) variables in a data frame, using a concise -#' mini-language that makes it easy to refer to variables based on their name -#' (e.g. `a:f` selects all columns from `a` on the left to `f` on the -#' right). You can also use predicate functions like [is.numeric] to select -#' variables based on their properties. -#' -#' -#' -#' @inheritParams arrange -#' @param ... <[`tidy-select`][dplyr_tidy_select]> One or more unquoted -#' expressions separated by commas. Variable names can be used as if they -#' were positions in the data frame, so expressions like `x:y` can -#' be used to select a range of variables. -#' @return -#' An object of the same type as `.data`. The output has the following -#' properties: -#' -#' * Rows are not affected. -#' * Output columns are a subset of input columns, potentially with a different -#' order. Columns will be renamed if `new_name=old_name` form is used. -#' * Data frame attributes are preserved. -#' * Groups are maintained; you can't select off grouping variables. -#' -#' @section Methods: -#' This function is a **generic**, which means that packages can provide -#' implementations (methods) for other classes. See the documentation of -#' individual methods for extra arguments and differences in behaviour. -#' -#' The following methods are currently available in loaded packages: -#' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}. +#' @rdname select +#' @inherit dplyr::select #' #' @examples -#' -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' -#' select(cell, orig.ident) -#' @family single table verbs -#' -#' @rdname dplyr-methods -#' @name select -#' -#' @export -NULL - +#' pbmc_small |> select(cell, orig.ident) +#' #' @importFrom SummarizedExperiment colData +#' @importFrom dplyr select +#' @export select.SingleCellExperiment #' @export select.SingleCellExperiment <- function(.data, ...) { @@ -1148,61 +550,16 @@ select.SingleCellExperiment <- function(.data, ...) { ) } - -#' Sample n rows from a table -#' -#' @importFrom dplyr sample_n -#' -#' @description -#' \Sexpr[results=rd, stage=render]{lifecycle::badge("superseded")} -#' `sample_n()` and `sample_frac()` have been superseded in favour of -#' [slice_sample()]. While they will not be deprecated in the near future, -#' retirement means that we will only perform critical bug fixes, so we recommend -#' moving to the newer alternative. -#' -#' These functions were superseded because we realised it was more convenient to -#' have two mutually exclusive arguments to one function, rather than two -#' separate functions. This also made it to clean up a few other smaller -#' design issues with `sample_n()`/`sample_frac`: -#' -#' * The connection to `slice()` was not obvious. -#' * The name of the first argument, `tbl`, is inconsistent with other -#' single table verbs which use `.data`. -#' * The `size` argument uses tidy evaluation, which is surprising and -#' undocumented. -#' * It was easier to remove the deprecated `.env` argument. -#' * `...` was in a suboptimal position. -#' -#' @keywords internal -#' @param tbl A data.frame. -#' @param size <[`tidy-select`][dplyr_tidy_select]> -#' For `sample_n()`, the number of rows to select. -#' For `sample_frac()`, the fraction of rows to select. -#' If `tbl` is grouped, `size` applies to each group. -#' @param replace Sample with or without replacement? -#' @param weight <[`tidy-select`][dplyr_tidy_select]> Sampling weights. -#' This must evaluate to a vector of non-negative numbers the same length as -#' the input. Weights are automatically standardised to sum to 1. -#' @param .env DEPRECATED. -#' @param ... ignored +#' @rdname sample_n +#' @aliases sample_frac +#' @inherit dplyr::sample_n +#' #' @examples -#' -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' -#' sample_n(50) -#' pbmc_small %>% -#' -#' sample_frac(0.1) -#' @return A tidySingleCellExperiment object -#' -#' @rdname dplyr-methods -#' @name sample_n -#' -#' @export -NULL - +#' pbmc_small |> sample_n(50) +#' pbmc_small |> sample_frac(0.1) +#' #' @importFrom SummarizedExperiment colData +#' @importFrom dplyr sample_n #' @export sample_n.SingleCellExperiment <- function(tbl, size, replace=FALSE, weight=NULL, .env=NULL, ...) { @@ -1227,19 +584,9 @@ sample_n.SingleCellExperiment <- function(tbl, size, replace=FALSE, } } - - - - -#' @importFrom dplyr sample_frac -#' -#' @rdname dplyr-methods -#' @name sample_frac -#' -#' @export -NULL - +#' @rdname sample_n #' @importFrom SummarizedExperiment colData +#' @importFrom dplyr sample_frac #' @export sample_frac.SingleCellExperiment <- function(tbl, size=1, replace=FALSE, weight=NULL, .env=NULL, ...) { @@ -1264,56 +611,14 @@ sample_frac.SingleCellExperiment <- function(tbl, size=1, replace=FALSE, } } - -#' Count observations by group -#' -#' @importFrom dplyr count -#' -#' @description -#' `count()` lets you quickly count the unique values of one or more variables: -#' `df %>% count(a, b)` is roughly equivalent to -#' `df %>% group_by(a, b) %>% summarise(n=n())`. -#' `count()` is paired with `tally()`, a lower-level helper that is equivalent -#' to `df %>% summarise(n=n())`. Supply `wt` to perform weighted counts, -#' switching the summary from `n=n()` to `n=sum(wt)`. -#' -#' `add_count()` are `add_tally()` are equivalents to `count()` and `tally()` -#' but use `mutate()` instead of `summarise()` so that they add a new column -#' with group-wise counts. -#' -#' @param x A data frame, data frame extension (e.g. a tibble), or a -#' lazy data frame (e.g. from dbplyr or dtplyr). -#' @param ... <[`data-masking`][dplyr_data_masking]> Variables to group by. -#' @param wt <[`data-masking`][dplyr_data_masking]> Frequency weights. -#' Can be `NULL` or a variable: -#' -#' * If `NULL` (the default), counts the number of rows in each group. -#' * If a variable, computes `sum(wt)` for each group. -#' @param sort If `TRUE`, will show the largest groups at the top. -#' @param name The name of the new column in the output. -#' -#' If omitted, it will default to `n`. If there's already a column called `n`, -#' it will error, and require you to specify the name. -#' @param .drop For `count()`: if `FALSE` will include counts for empty groups -#' (i.e. for levels of factors that don't exist in the data). Deprecated in -#' `add_count()` since it didn't actually affect the output. -#' @return -#' An object of the same type as `.data`. `count()` and `add_count()` -#' group transiently, so the output has the same groups as the input. -#' @export -#' -#' @rdname dplyr-methods -#' @name count -#' +#' @rdname count +#' @inherit dplyr::count +#' #' @examples -#' -#' -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' -#' count(groups) -NULL - +#' pbmc_small |> count(groups) +#' +#' @importFrom dplyr count +#' @export count.SingleCellExperiment #' @export count.SingleCellExperiment <- function(x, ..., wt=NULL, sort=FALSE, name=NULL, .drop=group_by_drop_default(x)) { message(data_frame_returned_message) @@ -1331,17 +636,9 @@ count.SingleCellExperiment <- function(x, ..., wt=NULL, sort=FALSE, name=NULL, . dplyr::count(..., wt=!!enquo(wt), sort=sort, name=name, .drop=.drop) } - -#' @export -#' -#' +#' @rdname count +#' @aliases add_count #' @importFrom dplyr add_count -#' -#' @name add_count -#' -#' @rdname dplyr-methods -NULL - #' @export add_count.SingleCellExperiment <- function(x, ..., wt = NULL, sort = FALSE, name = NULL) { @@ -1363,46 +660,14 @@ add_count.SingleCellExperiment <- function(x, ..., wt = NULL, sort = FALSE, name } - - -#' Extract a single column -#' -#' -#' `pull()` is similar to `$`. It's mostly useful because it looks a little -#' nicer in pipes, it also works with remote data frames, and it can optionally -#' name the output. -#' -#' @importFrom dplyr pull -#' -#' @inheritParams arrange -#' @inheritParams tidyselect::vars_pull -#' @param name An optional parameter that specifies the column to be used -#' as names for a named vector. Specified in a similar manner as \code{var}. -#' @param ... For use by methods. -#' @return A vector the same size as `.data`. -#' @section Methods: -#' This function is a **generic**, which means that packages can provide -#' implementations (methods) for other classes. See the documentation of -#' individual methods for extra arguments and differences in behaviour. -#' -#' The following methods are currently available in loaded packages: -#' \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}. -#' -#' @rdname dplyr-methods -#' @name pull -#' -#' @export -#' -#' @importFrom ellipsis check_dots_used -#' +#' @rdname pull +#' @inherit dplyr::pull +#' #' @examples -#' -#' `%>%` <- magrittr::`%>%` -#' pbmc_small %>% -#' -#' pull(groups) -NULL - +#' pbmc_small |> pull(groups) +#' +#' @importFrom ellipsis check_dots_used +#' @importFrom dplyr pull #' @export pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { var <- enquo(var) diff --git a/R/ggplot2_methods.R b/R/ggplot2_methods.R index c6bb225..a3b405c 100755 --- a/R/ggplot2_methods.R +++ b/R/ggplot2_methods.R @@ -1,57 +1,16 @@ - -#' Create a new ggplot from a tidySingleCellExperiment object -#' -#' -#' `ggplot()` initializes a ggplot object. It can be used to -#' declare the input data frame for a graphic and to specify the -#' set of plot aesthetics intended to be common throughout all -#' subsequent layers unless specifically overridden. -#' -#' `ggplot()` is used to construct the initial plot object, -#' and is almost always followed by `+` to add component to the -#' plot. There are three common ways to invoke `ggplot()`: -#' -#' -#' The first method is recommended if all layers use the same -#' data and the same set of aesthetics, although this method -#' can also be used to add a layer using data from another -#' data frame. See the first example below. The second -#' method specifies the default data frame to use for the plot, -#' but no aesthetics are defined up front. This is useful when -#' one data frame is used predominantly as layers are added, -#' but the aesthetics may vary from one layer to another. The -#' third method initializes a skeleton `ggplot` object which -#' is fleshed out as layers are added. This method is useful when -#' multiple data frames are used to produce different layers, as -#' is often the case in complex graphics. -#' -#' @importFrom ggplot2 aes -#' @importFrom ggplot2 ggplot -#' -#' @param .data Default dataset to use for plot. If not already a data.frame, -#' will be converted to one by [fortify()]. If not specified, -#' must be supplied in each layer added to the plot. -#' @param mapping Default list of aesthetic mappings to use for plot. -#' If not specified, must be supplied in each layer added to the plot. -#' @param ... Other arguments passed on to methods. Not currently used. -#' @param environment DEPRECATED. Used prior to tidy evaluation. +#' @rdname ggplot +#' @inherit ggplot2::ggplot +#' @title Create a new \code{ggplot} from a \code{tidySingleCellExperiment} #' -#' @return A ggplot -#' -#' @rdname ggplot2-methods -#' @name ggplot -#' -#' @export #' @examples -#' #' library(ggplot2) -#' -#' tidySingleCellExperiment::pbmc_small %>% -#' -#' tidySingleCellExperiment::ggplot(aes(groups, nCount_RNA)) + -#' geom_boxplot() -NULL - +#' pbmc_small |> +#' ggplot(aes(groups, nCount_RNA)) + +#' geom_boxplot() +#' +#' @importFrom purrr map +#' @importFrom rlang quo_name +#' @importFrom ggplot2 ggplot #' @export ggplot.SingleCellExperiment <- function(data=NULL, mapping=aes(), ..., environment=parent.frame()) { diff --git a/R/methods.R b/R/methods.R index 98b422e..8163276 100755 --- a/R/methods.R +++ b/R/methods.R @@ -1,4 +1,3 @@ - #' @importFrom methods getMethod setMethod( f = "show", @@ -20,50 +19,22 @@ setMethod( setClass("tidySingleCellExperiment", contains = "SingleCellExperiment") -#' Extract and join information for features. -#' -#' -#' @description join_features() extracts and joins information for specified features -#' -#' @importFrom rlang enquo -#' @importFrom magrittr "%>%" -#' @importFrom ttservice join_features -#' -#' @name join_features #' @rdname join_features +#' @inherit ttservice::join_features #' -#' @param .data A SingleCellExperiment object -#' @param features A vector of feature identifiers to join -#' @param all If TRUE return all -#' @param exclude_zeros If TRUE exclude zero values -#' @param shape Format of the returned table "long" or "wide" -#' @param ... Parameters to pass to join wide, i.e. assay name to extract feature abundance from and gene prefix, for shape="wide" -#' -#' @details This function extracts information for specified features and returns the information in either long or wide format. -#' -#' @return An object containing the information.for the specified features +#' @return A `tidySingleCellExperiment` object +#' containing information for the specified features. #' #' @examples -#' #' data("pbmc_small") -#' pbmc_small %>% -#' join_features(features = c("HLA-DRA", "LYZ")) -#' -#' -#' @export -#' -NULL - -#' join_features +#' pbmc_small %>% join_features( +#' features=c("HLA-DRA", "LYZ")) #' +#' @importFrom magrittr "%>%" #' @importFrom dplyr contains #' @importFrom dplyr everything -#' -#' @docType methods -#' @rdname join_features -#' -#' @return An object containing the information.for the specified features -#' +#' @importFrom ttservice join_features +#' @export setMethod("join_features", "SingleCellExperiment", function(.data, features = NULL, all = FALSE, @@ -100,28 +71,22 @@ setMethod("join_features", "SingleCellExperiment", function(.data, }) - -#' tidy for SingleCellExperiment -#' -#' @param object A SingleCellExperiment object -#' -#' @return A tidySingleCellExperiment object -#' #' @name tidy +#' @title tidy for `SingleCellExperiment` #' -#' @examples +#' @param object A `SingleCellExperiment` object. +#' @return A `tidySingleCellExperiment` object. #' +#' @examples #' tidySingleCellExperiment::pbmc_small +#' #' @export tidy <- function(object) { UseMethod("tidy", object) } -#' @importFrom methods as +#' @rdname tidy #' @importFrom lifecycle deprecate_warn -#' -#' @param object A SingleCellExperiment object -#' #' @export tidy.SingleCellExperiment <- function(object) { @@ -135,42 +100,20 @@ tidy.SingleCellExperiment <- function(object) { object } -#' Aggregate cells -#' -#' @description Combine cells into groups based on shared variables and aggregate feature counts. -#' -#' @importFrom magrittr "%>%" -#' @importFrom rlang enquo -#' @importFrom tibble enframe -#' @importFrom Matrix rowSums -#' @importFrom ttservice aggregate_cells -#' -#' @name aggregate_cells #' @rdname aggregate_cells -#' -#' @param .data A tidySingleCellExperiment object -#' @param .sample A vector of variables by which cells are aggregated -#' @param slot The slot to which the function is applied -#' @param assays The assay to which the function is applied -#' @param aggregation_function The method of cell-feature value aggregation -#' -#' @return A SummarizedExperiment object +#' @inherit ttservice::aggregate_cells #' #' @examples #' data("pbmc_small") #' pbmc_small_pseudo_bulk <- pbmc_small |> -#' aggregate_cells(c(groups, ident), assays = "counts") +#' aggregate_cells(c(groups, ident), assays="counts") #' +#' @importFrom rlang enquo +#' @importFrom magrittr "%>%" +#' @importFrom tibble enframe +#' @importFrom Matrix rowSums +#' @importFrom ttservice aggregate_cells #' @export -NULL - -#' aggregate_cells -#' -#' @docType methods -#' @rdname aggregate_cells -#' -#' @return An object containing the information for the specified features -#' setMethod("aggregate_cells", "SingleCellExperiment", function(.data, .sample = NULL, slot = "data", diff --git a/R/print_method.R b/R/print_method.R index f7fdc94..cc95c2b 100755 --- a/R/print_method.R +++ b/R/print_method.R @@ -1,38 +1,17 @@ # This file is a replacement of the unexported functions in the tibble package, in order to specify "tibble abstraction in the header" -#' Format the header of a tibble -#' -#' @description -#' `r lifecycle::badge("experimental")` -#' -#' For easier customization, the formatting of a tibble is split -#' into three components: header, body, and footer. -#' The `tbl_format_header()` method is responsible for formatting the header -#' of a tibble. -#' -#' Override this method if you need to change the appearance -#' of the entire header. -#' If you only need to change or extend the components shown in the header, -#' override or extend [tbl_sum()] for your class which is called by the -#' default method. -#' -#' @importFrom pillar tbl_format_header -#' @inheritParams tbl_format_body -#' @inherit tbl_format_body return -#' -#' @rdname tbl_format_header-methods -#' @name tbl_format_header -#' -#' @export -#' -NULL - +#' @rdname tbl_format_header +#' @inherit pillar::tbl_format_header +#' +#' @examples +#' # TODO +#' #' @importFrom rlang names2 #' @importFrom pillar align #' @importFrom pillar get_extent #' @importFrom pillar style_subtle +#' @importFrom pillar tbl_format_header #' @export -#' @inheritParams tbl_format_header tbl_format_header.tidySingleCellExperiment <- function(x, setup, ...){ number_of_features = x |> attr("number_of_features") @@ -67,72 +46,18 @@ tbl_format_header.tidySingleCellExperiment <- function(x, setup, ...){ } - -#' Printing tibbles -#' -#' @description -#' `r lifecycle::badge("maturing")` -#' -#' One of the main features of the `tbl_df` class is the printing: -#' -#' * Tibbles only print as many rows and columns as fit on one screen, -#' supplemented by a summary of the remaining rows and columns. -#' * Tibble reveals the type of each column, which keeps the user informed about -#' whether a variable is, e.g., `` or `` (character versus factor). -#' -#' Printing can be tweaked for a one-off call by calling `print()` explicitly -#' and setting arguments like `n` and `width`. More persistent control is -#' available by setting the options described below. -#' -#' Only the first 5 reduced dimensions are displayed, while all of them are queriable (e.g. ggplot). All dimensions are returned/displayed if as_tibble is used. -#' -#' @inheritSection pillar::`pillar-package` Package options -#' @section Package options: -#' -#' The following options are used by the tibble and pillar packages -#' to format and print `tbl_df` objects. -#' Used by the formatting workhorse `trunc_mat()` and therefore, -#' indirectly, by `print.tbl()`. -#' -#' * `tibble.print_max`: Row number threshold: Maximum number of rows printed. -#' Set to `Inf` to always print all rows. Default: 20. -#' * `tibble.print_min`: Number of rows printed if row number threshold is -#' exceeded. Default: 10. -#' * `tibble.width`: Output width. Default: `NULL` (use `width` option). -#' * `tibble.max_extra_cols`: Number of extra columns printed in reduced form. -#' Default: 100. -#' -#' @importFrom rlang is_empty -#' @importFrom stringr str_replace -#' -#' @param x Object to format or print. -#' @param ... Other arguments passed on to individual methods. -#' @param n Number of rows to show. If `NULL`, the default, will print all rows -#' if less than option `tibble.print_max`. Otherwise, will print -#' `tibble.print_min` rows. -#' @param width Width of text output to generate. This defaults to `NULL`, which -#' means use `getOption("tibble.width")` or (if also `NULL`) -#' `getOption("width")`; the latter displays only the columns that fit on one -#' screen. You can also set `options(tibble.width = Inf)` to override this -#' default and always print all columns. -#' @param n_extra Number of extra columns to print abbreviated information for, -#' if the width is too small for the entire tibble. If `NULL`, the default, -#' will print information about at most `tibble.max_extra_cols` extra columns. -#' -#' @return Nothing +#' @rdname formatting +#' @aliases print +#' @inherit tibble::formatting #' #' @examples -#' library(dplyr) -#' pbmc_small %>% print() -#' @name print -NULL - -#' @rdname print -#' @importFrom cli cat_line -#' @importFrom SingleCellExperiment counts +#' print(pbmc_small) +#' #' @importFrom vctrs new_data_frame #' @export -print.SingleCellExperiment <- function(x, ..., n = NULL, width = NULL, n_extra = NULL) { +print.SingleCellExperiment <- function(x, ..., n = NULL, width = NULL) {#, n_extra = NULL) { + # TODO: argument 'n_extra' seems to not + # exist anymore; see ?tibble::print.tbl x |> as_tibble(n_dimensions_to_return = 5 ) |> @@ -145,5 +70,3 @@ print.SingleCellExperiment <- function(x, ..., n = NULL, width = NULL, n_extra = invisible(x) } - - diff --git a/R/tibble_methods.R b/R/tibble_methods.R index b78101e..56b386b 100755 --- a/R/tibble_methods.R +++ b/R/tibble_methods.R @@ -1,75 +1,12 @@ -#' Coerce lists, matrices, and more to data frames -#' -#' @description -#' `r lifecycle::badge("maturing")` -#' -#' `as_tibble()` turns an existing object, such as a data frame or -#' matrix, into a so-called tibble, a data frame with class [`tbl_df`]. This is -#' in contrast with [tibble()], which builds a tibble from individual columns. -#' `as_tibble()` is to [`tibble()`] as [base::as.data.frame()] is to -#' [base::data.frame()]. -#' -#' `as_tibble()` is an S3 generic, with methods for: -#' * [`data.frame`][base::data.frame()]: Thin wrapper around the `list` method -#' that implements tibble's treatment of [rownames]. -#' * [`matrix`][methods::matrix-class], [`poly`][stats::poly()], -#' [`ts`][stats::ts()], [`table`][base::table()] -#' * Default: Other inputs are first coerced with [base::as.data.frame()]. -#' -#' @importFrom tibble as_tibble -#' -#' @section Row names: -#' The default behavior is to silently remove row names. -#' -#' New code should explicitly convert row names to a new column using the -#' `rownames` argument. -#' -#' For existing code that relies on the retention of row names, call -#' `pkgconfig::set_config("tibble::rownames"=NA)` in your script or in your -#' package's [.onLoad()] function. -#' -#' @section Life cycle: -#' Using `as_tibble()` for vectors is superseded as of version 3.0.0, -#' prefer the more expressive maturing `as_tibble_row()` and -#' `as_tibble_col()` variants for new code. -#' -#' @seealso [tibble()] constructs a tibble from individual columns. [enframe()] -#' converts a named vector to a tibble with a column of names and column of -#' values. Name repair is implemented using [vctrs::vec_as_names()]. -#' -#' @param x A data frame, list, matrix, or other object that could reasonably be -#' coerced to a tibble. -#' @param ... Unused, for extensibility. -#' @param rownames How to treat existing row names of a data frame or matrix: -#' * `NULL`: remove row names. This is the default. -#' * `NA`: keep row names. -#' * A string: the name of a new column. Existing rownames are transferred -#' into this column and the `row.names` attribute is deleted. -#' Read more in [rownames]. -#' @param .name_repair see tidyr -#' -#' For compatibility only, do not use for new code. -#' @return A tibble -#' -#' @rdname tibble-methods -#' @name as_tibble -#' -#' @export +#' @rdname as_tibble +#' @inherit tibble::as_tibble +#' #' @examples -#' pbmc_small %>% -#' -#' as_tibble() -NULL - - -#' @export -#' @importFrom purrr reduce -#' @importFrom purrr map -#' @importFrom tidyr spread -#' @importFrom tibble enframe +#' pbmc_small |> as_tibble() +#' +#' @importFrom tibble as_tibble #' @importFrom SummarizedExperiment colData -#' -#' +#' @export as_tibble.SingleCellExperiment <- function(x, ..., .name_repair=c("check_unique", "unique", "universal", "minimal"), rownames=pkgconfig::get_config("tibble::rownames", NULL)) { @@ -91,47 +28,14 @@ as_tibble.SingleCellExperiment <- function(x, ..., ) } -#' Get a glimpse of your data -#' -#' @description -#' `r lifecycle::badge("maturing")` -#' -#' `glimpse()` is like a transposed version of `print()`: -#' columns run down the page, and data runs across. -#' This makes it possible to see every column in a data frame. -#' It's a little like [str()] applied to a data frame -#' but it tries to show you as much data as possible. -#' (And it always shows the underlying data, even when applied -#' to a remote data source.) -#' -#' This generic will be moved to \pkg{pillar}, and reexported from there -#' as soon as it becomes available. -#' -#' @section S3 methods: -#' `glimpse` is an S3 generic with a customised method for `tbl`s and -#' `data.frames`, and a default method that calls [str()]. -#' -#' @param x An object to glimpse at. -#' @param width Width of output: defaults to the setting of the option -#' `tibble.width` (if finite) or the width of the console. -#' @param ... Unused, for extensibility. -#' @return x original x is (invisibly) returned, allowing `glimpse()` to be -#' used within a data pipe line. +#' @rdname glimpse +#' @inherit pillar::glimpse #' -#' @rdname tibble-methods -#' @name glimpse -#' -#' @export #' @examples -#' pbmc_small %>% tidy %>% glimpse() -#' -#' -NULL - -#' @export +#' pbmc_small |> glimpse() +#' #' @importFrom tibble glimpse -#' -#' +#' @export glimpse.tidySingleCellExperiment = function(x, width = NULL, ...){ x %>% as_tibble() %>% diff --git a/R/tidyr_methods.R b/R/tidyr_methods.R index cf656d3..69f07e2 100755 --- a/R/tidyr_methods.R +++ b/R/tidyr_methods.R @@ -1,63 +1,14 @@ -#' unnest -#' -#' @importFrom tidyr unnest -#' @importFrom purrr when -#' -#' @param data A tbl. (See tidyr) -#' @param cols <[`tidy-select`][tidyr_tidy_select]> Columns to unnest. -#' If you `unnest()` multiple columns, parallel entries must be of -#' compatible sizes, i.e. they're either equal or length 1 (following the -#' standard tidyverse recycling rules). -#' @param ... <[`tidy-select`][tidyr_tidy_select]> Columns to nest, specified -#' using name-variable pairs of the form `new_col=c(col1, col2, col3)`. -#' The right hand side can be any valid tidy select expression. -#' -#' \Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}: -#' previously you could write `df %>% nest(x, y, z)` and `df %>% -#' unnest(x, y, z)`. Convert to `df %>% nest(data=c(x, y, z))`. -#' and `df %>% unnest(c(x, y, z))`. -#' -#' If you previously created new variable in `unnest()` you'll now need to -#' do it explicitly with `mutate()`. Convert `df %>% unnest(y=fun(x, y, z))` -#' to `df %>% mutate(y=fun(x, y, z)) %>% unnest(y)`. -#' @param names_sep If `NULL`, the default, the names will be left -#' as is. In `nest()`, inner names will come from the former outer names; -#' in `unnest()`, the new outer names will come from the inner names. -#' -#' If a string, the inner and outer names will be used together. In `nest()`, -#' the names of the new outer columns will be formed by pasting together the -#' outer and the inner column names, separated by `names_sep`. In `unnest()`, -#' the new inner names will have the outer names (+ `names_sep`) automatically -#' stripped. This makes `names_sep` roughly symmetric between nesting and unnesting. -#' @param keep_empty See tidyr::unnest -#' @param names_repair See tidyr::unnest -#' @param ptype See tidyr::unnest -#' @param .drop See tidyr::unnest -#' @param .id tidyr::unnest -#' @param sep tidyr::unnest -#' @param .preserve See tidyr::unnest -#' -#' -#' @return A tidySingleCellExperiment objector a tibble depending on input -#' +#' @rdname unnest +#' @inherit tidyr::unnest +#' @aliases unnest_single_cell_experiment +#' #' @examples -#' -#' library(dplyr) -#' pbmc_small %>% -#' -#' nest(data=-groups) %>% +#' pbmc_small |> +#' nest(data=-groups) |> #' unnest(data) #' -#' @rdname unnest-methods -#' @name unnest -#' -#' @export -NULL - - -#' @rdname unnest-methods -#' @name unnest -#' +#' @importFrom tidyr unnest +#' @importFrom purrr when #' @export unnest.tidySingleCellExperiment_nested <- function(data, cols, ..., keep_empty=FALSE, ptype=NULL, names_sep=NULL, names_repair="check_unique", .drop, .id, .sep, .preserve) { @@ -68,64 +19,13 @@ unnest.tidySingleCellExperiment_nested <- function(data, cols, ..., keep_empty=F names_sep=names_sep, names_repair=names_repair) } - - -#' unnest_single_cell_experiment -#' +#' @rdname unnest #' @importFrom tidyr unnest +#' @importFrom rlang quo_name +#' @importFrom rlang enquo +#' @importFrom purrr reduce #' @importFrom purrr when -#' @importFrom rlang quo_name #' @importFrom purrr imap -#' -#' @param data A tbl. (See tidyr) -#' @param cols <[`tidy-select`][tidyr_tidy_select]> Columns to unnest. -#' If you `unnest()` multiple columns, parallel entries must be of -#' compatible sizes, i.e. they're either equal or length 1 (following the -#' standard tidyverse recycling rules). -#' @param ... <[`tidy-select`][tidyr_tidy_select]> Columns to nest, specified -#' using name-variable pairs of the form `new_col=c(col1, col2, col3)`. -#' The right hand side can be any valid tidy select expression. -#' -#' \Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}: -#' previously you could write `df %>% nest(x, y, z)` and `df %>% -#' unnest(x, y, z)`. Convert to `df %>% nest(data=c(x, y, z))`. -#' and `df %>% unnest(c(x, y, z))`. -#' -#' If you previously created new variable in `unnest()` you'll now need to -#' do it explicitly with `mutate()`. Convert `df %>% unnest(y=fun(x, y, z))` -#' to `df %>% mutate(y=fun(x, y, z)) %>% unnest(y)`. -#' @param names_sep If `NULL`, the default, the names will be left -#' as is. In `nest()`, inner names will come from the former outer names; -#' in `unnest()`, the new outer names will come from the inner names. -#' -#' If a string, the inner and outer names will be used together. In `nest()`, -#' the names of the new outer columns will be formed by pasting together the -#' outer and the inner column names, separated by `names_sep`. In `unnest()`, -#' the new inner names will have the outer names (+ `names_sep`) automatically -#' stripped. This makes `names_sep` roughly symmetric between nesting and unnesting. -#' @param keep_empty See tidyr::unnest -#' @param names_repair See tidyr::unnest -#' @param ptype See tidyr::unnest -#' @param .drop See tidyr::unnest -#' @param .id tidyr::unnest -#' @param .sep tidyr::unnest -#' @param .preserve See tidyr::unnest -#' -#' @return A tidySingleCellExperiment objector a tibble depending on input -#' -#' @examples -#' -#' library(dplyr) -#' pbmc_small %>% -#' -#' nest(data=-groups) %>% -#' unnest_single_cell_experiment(data) -#' -#' @rdname unnest-methods -#' @name unnest_single_cell_experiment -#' -#' -#' #' @export unnest_single_cell_experiment <- function(data, cols, ..., keep_empty=FALSE, ptype=NULL, names_sep=NULL, names_repair="check_unique", .drop, .id, .sep, .preserve) { @@ -164,35 +64,17 @@ unnest_single_cell_experiment <- function(data, cols, ..., keep_empty=FALSE, p ) } - - - -#' nest -#' -#' @importFrom tidyr nest -#' -#' @param .data A tbl. (See tidyr) -#' @param ... Name-variable pairs of the form new_col=c(col1, col2, col3) (See tidyr) -#' @param .names_sep See ?tidyr::nest -#' -#' @return A tidySingleCellExperiment objector a tibble depending on input +#' @rdname nest +#' @inherit tidyr::nest #' #' @examples -#' -#' library(dplyr) -#' pbmc_small %>% -#' -#' nest(data=-groups) %>% +#' pbmc_small |> +#' nest(data=-groups) |> #' unnest(data) -#' @rdname nest-methods -#' @name nest -#' -#' @export -NULL - +#' +#' @importFrom tidyr nest #' @importFrom rlang enquos #' @importFrom rlang := -#' #' @export nest.SingleCellExperiment <- function(.data, ..., .names_sep = NULL) { cols <- enquos(...) @@ -230,56 +112,19 @@ nest.SingleCellExperiment <- function(.data, ..., .names_sep = NULL) { add_class("tidySingleCellExperiment_nested") } -#' Extract a character column into multiple columns using regular -#' expression groups -#' -#' Given a regular expression with capturing groups, `extract()` turns -#' each group into a new column. If the groups don't match, or the input -#' is NA, the output will be NA. -#' -#' @importFrom tidyr extract -#' -#' @param data A tidySingleCellExperiment object -#' @param col Column name or position. This is passed to -#' [tidyselect::vars_pull()]. -#' -#' This argument is passed by expression and supports -#' [quasiquotation][rlang::quasiquotation] (you can unquote column -#' names or column positions). -#' @param into Names of new variables to create as character vector. -#' Use `NA` to omit the variable in the output. -#' @param regex a regular expression used to extract the desired values. -#' There should be one group (defined by `()`) for each element of `into`. -#' @param remove If `TRUE`, remove input column from output data frame. -#' @param convert If `TRUE`, will run [type.convert()] with -#' `as.is=TRUE` on new columns. This is useful if the component -#' columns are integer, numeric or logical. -#' -#' NB: this will cause string `"NA"`s to be converted to `NA`s. -#' @param ... Additional arguments passed on to methods. -#' @seealso [separate()] to split up by a separator. -#' @export +#' @rdname extract +#' @inherit tidyr::extract +#' #' @examples -#' -#' pbmc_small %>% -#' -#' extract(groups, into="g", regex="g([0-9])", convert=TRUE) -#' @return A tidySingleCellExperiment objector a tibble depending on input -#' -#' @importFrom tidyr extract -#' -#' @rdname extract-methods -#' @name extract -#' -#' @export -NULL - +#' pbmc_small|> +#' extract(groups, +#' into="g", +#' regex="g([0-9])", +#' convert=TRUE) +#' #' @importFrom SummarizedExperiment colData #' @importFrom SummarizedExperiment colData<- -#' -#' @rdname extract-methods -#' @name extract -#' +#' @importFrom tidyr extract #' @export extract.SingleCellExperiment <- function(data, col, into, regex="([[:alnum:]]+)", remove=TRUE, convert=FALSE, ...) { @@ -303,123 +148,18 @@ extract.SingleCellExperiment <- function(data, col, into, regex="([[:alnum:]]+)" data } -#' Pivot data from wide to long -#' -#' @description -#' \Sexpr[results=rd, stage=render]{lifecycle::badge("maturing")} -#' -#' `pivot_longer()` "lengthens" data, increasing the number of rows and -#' decreasing the number of columns. The inverse transformation is -#' [pivot_wider()] -#' -#' Learn more in `vignette("pivot")`. -#' -#' @details -#' `pivot_longer()` is an updated approach to [gather()], designed to be both -#' simpler to use and to handle more use cases. We recommend you use -#' `pivot_longer()` for new code; `gather()` isn't going away but is no longer -#' under active development. -#' -#' @importFrom ellipsis check_dots_used -#' @importFrom tidyr pivot_longer -#' -#' @param data A data frame to pivot. -#' @param cols <[`tidy-select`][tidyr_tidy_select]> Columns to pivot into -#' longer format. -#' @param cols_vary When pivoting `cols` into longer format, how should the -#' output rows be arranged relative to their original row number? -#' -#' * `"fastest"`, the default, keeps individual rows from `cols` close -#' together in the output. This often produces intuitively ordered output -#' when you have at least one key column from `data` that is not involved in -#' the pivoting process. -#' -#' * `"slowest"` keeps individual columns from `cols` close together in the -#' output. This often produces intuitively ordered output when you utilize -#' all of the columns from `data` in the pivoting process. -#' @param names_to A character vector specifying the new column or columns to -#' create from the information stored in the column names of `data` specified -#' by `cols`. -#' -#' * If length 0, or if `NULL` is supplied, no columns will be created. -#' -#' * If length 1, a single column will be created which will contain the -#' column names specified by `cols`. -#' -#' * If length >1, multiple columns will be created. In this case, one of -#' `names_sep` or `names_pattern` must be supplied to specify how the -#' column names should be split. There are also two additional character -#' values you can take advantage of: -#' -#' * `NA` will discard the corresponding component of the column name. -#' -#' * `".value"` indicates that the corresponding component of the column -#' name defines the name of the output column containing the cell values, -#' overriding `values_to` entirely. -#' @param names_prefix A regular expression used to remove matching text -#' from the start of each variable name. -#' @param names_sep,names_pattern If `names_to` contains multiple values, -#' these arguments control how the column name is broken up. -#' -#' `names_sep` takes the same specification as [separate()], and can either -#' be a numeric vector (specifying positions to break on), or a single string -#' (specifying a regular expression to split on). -#' -#' `names_pattern` takes the same specification as [extract()], a regular -#' expression containing matching groups (`()`). -#' -#' If these arguments do not give you enough control, use -#' `pivot_longer_spec()` to create a spec object and process manually as -#' needed. -#' @param names_repair What happens if the output has invalid column names? -#' The default, `"check_unique"` is to error if the columns are duplicated. -#' Use `"minimal"` to allow duplicates in the output, or `"unique"` to -#' de-duplicated by adding numeric suffixes. See [vctrs::vec_as_names()] -#' for more options. -#' @param values_to A string specifying the name of the column to create -#' from the data stored in cell values. If `names_to` is a character -#' containing the special `.value` sentinel, this value will be ignored, -#' and the name of the value column will be derived from part of the -#' existing column names. -#' @param values_drop_na If `TRUE`, will drop rows that contain only `NA`s -#' in the `value_to` column. This effectively converts explicit missing values -#' to implicit missing values, and should generally be used only when missing -#' values in `data` were created by its structure. -#' @param names_transform,values_transform Optionally, a list of column -#' name-function pairs. Alternatively, a single function can be supplied, -#' which will be applied to all columns. Use these arguments if you need to -#' change the types of specific columns. For example, `names_transform = -#' list(week = as.integer)` would convert a character variable called `week` -#' to an integer. -#' -#' If not specified, the type of the columns generated from `names_to` will -#' be character, and the type of the variables generated from `values_to` -#' will be the common type of the input columns used to generate them. -#' @param names_ptypes,values_ptypes Optionally, a list of column name-prototype -#' pairs. Alternatively, a single empty prototype can be supplied, which will -#' be applied to all columns. A prototype (or ptype for short) is a -#' zero-length vector (like `integer()` or `numeric()`) that defines the type, -#' class, and attributes of a vector. Use these arguments if you want to -#' confirm that the created columns are the types that you expect. Note that -#' if you want to change (instead of confirm) the types of specific columns, -#' you should use `names_transform` or `values_transform` instead. -#' @param ... Additional arguments passed on to methods. -#' -#' @return A tidySingleCellExperiment objector a tibble depending on input -#' -#' @rdname pivot-methods -#' @name pivot_longer -#' +#' @rdname pivot_longer +#' @inherit tidyr::pivot_longer +#' #' @export #' @examples #' # See vignette("pivot") for examples and explanation -#' -#' library(dplyr) -#' pbmc_small %>% -#' -#' pivot_longer(c(orig.ident, groups), names_to="name", values_to="value") -NULL - +#' pbmc_small |> pivot_longer( +#' cols=c(orig.ident, groups), +#' names_to="name", values_to="value") +#' +#' @importFrom ellipsis check_dots_used +#' @importFrom tidyr pivot_longer #' @export pivot_longer.SingleCellExperiment <- function(data, cols, ..., cols_vary = "fastest", names_to = "name", @@ -458,44 +198,18 @@ pivot_longer.SingleCellExperiment <- function(data, ) } -#' Unite multiple columns into one by pasting strings together -#' -#' Convenience function to paste together multiple columns into one. -#' -#' @importFrom ellipsis check_dots_unnamed -#' @importFrom tidyr unite -#' -#' @param data A data frame. -#' @param col The name of the new column, as a string or symbol. -#' -#' This argument is passed by expression and supports -#' [quasiquotation][rlang::quasiquotation] (you can unquote strings -#' and symbols). The name is captured from the expression with -#' [rlang::ensym()] (note that this kind of interface where -#' symbols do not represent actual objects is now discouraged in the -#' tidyverse; we support it here for backward compatibility). -#' @param ... <[`tidy-select`][tidyr_tidy_select]> Columns to unite -#' @param sep Separator to use between values. -#' @param na.rm If `TRUE`, missing values will be remove prior to uniting -#' each value. -#' @param remove If `TRUE`, remove input columns from output data frame. -#' @seealso [separate()], the complement. -#' -#' @return A tidySingleCellExperiment objector a tibble depending on input -#' -#' @rdname unite-methods -#' @name unite -#' -#' @export +#' @rdname unite +#' @inherit tidyr::unite +#' #' @examples -#' -#' pbmc_small %>% -#' -#' unite("new_col", c(orig.ident, groups)) -NULL - +#' pbmc_small |> unite( +#' col="new_col", +#' c(orig.ident, groups)) +#' #' @importFrom SummarizedExperiment colData #' @importFrom SummarizedExperiment colData<- +#' @importFrom ellipsis check_dots_unnamed +#' @importFrom tidyr unite #' @export unite.SingleCellExperiment <- function(data, col, ..., sep="_", remove=TRUE, na.rm=FALSE) { @@ -540,58 +254,17 @@ unite.SingleCellExperiment <- function(data, col, ..., sep="_", remove=TRUE, na. data } -#' Separate a character column into multiple columns with a regular -#' expression or numeric locations -#' -#' Given either a regular expression or a vector of character positions, -#' `separate()` turns a single character column into multiple columns. -#' -#' @importFrom ellipsis check_dots_used -#' @importFrom tidyr separate -#' -#' @inheritParams extract -#' @param sep Separator between columns. -#' -#' If character, `sep` is interpreted as a regular expression. The default -#' value is a regular expression that matches any sequence of -#' non-alphanumeric values. -#' -#' If numeric, `sep` is interpreted as character positions to split at. Positive -#' values start at 1 at the far-left of the string; negative value start at -1 at -#' the far-right of the string. The length of `sep` should be one less than -#' `into`. -#' @param extra If `sep` is a character vector, this controls what -#' happens when there are too many pieces. There are three valid options: -#' -#' * "warn" (the default): emit a warning and drop extra values. -#' * "drop": drop any extra values without a warning. -#' * "merge": only splits at most `length(into)` times -#' @param fill If `sep` is a character vector, this controls what -#' happens when there are not enough pieces. There are three valid options: -#' -#' * "warn" (the default): emit a warning and fill from the right -#' * "right": fill with missing values on the right -#' * "left": fill with missing values on the left -#' @seealso [unite()], the complement, [extract()] which uses regular -#' expression capturing groups. -#' -#' @return A tidySingleCellExperiment objector a tibble depending on input -#' -#' @rdname separate-methods -#' @name separate -#' -#' @export +#' @rdname separate +#' @inherit tidyr::separate +#' #' @examples -#' -#' un <- pbmc_small %>% -#' -#' unite("new_col", c(orig.ident, groups)) -#' un %>% separate(col=new_col, into=c("orig.ident", "groups")) -NULL - +#' un <- pbmc_small |> unite("new_col", c(orig.ident, groups)) +#' un |> separate(new_col, c("orig.ident", "groups")) +#' #' @importFrom SummarizedExperiment colData #' @importFrom SummarizedExperiment colData<- -#' +#' @importFrom ellipsis check_dots_used +#' @importFrom tidyr separate #' @export separate.SingleCellExperiment <- function(data, col, into, sep="[^[:alnum:]]+", remove=TRUE, convert=FALSE, extra="warn", fill="warn", ...) { diff --git a/R/utilities.R b/R/utilities.R index 273e64e..d947188 100755 --- a/R/utilities.R +++ b/R/utilities.R @@ -419,6 +419,8 @@ add_attr = function(var, attribute, name) { var } +#' @importFrom purrr reduce +#' @importFrom tibble enframe special_datasets_to_tibble = function(.singleCellExperiment, ...){ x = .singleCellExperiment |> diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..ece89a4 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,30 @@ +#' @importFrom utils packageDescription +.onAttach <- function(libname, pkgname) { + version <- packageDescription(pkgname, fields="Version") + + # msg = paste0("======================================== + # ", pkgname, " version ", version, " + # If you use TIDYBULK in published research, please cite: + # + # Mangiola et al. tidybulk: an R tidy framework for modular + # transcriptomic data analysis. Genome Biology 2021. + # + # This message can be suppressed by: + # suppressPackageStartupMessages(library(tidybulk)) + # ======================================== + # ") + # + # packageStartupMessage(msg) + + # Attach tidyverse + attached <- tidyverse_attach() + +} + +# rv = R.Version() +# +# if(getRversion() >= "4.0.0" && as.numeric(rv$`svn rev`) >= 77889) { +# unitType = get("unitType", envir = asNamespace("grid")) +# } else { +# unitType = function(x, recurse = TRUE) attr(x, "unit") +# } \ No newline at end of file diff --git a/man/aggregate_cells.Rd b/man/aggregate_cells.Rd index 60c7e9a..53e3790 100644 --- a/man/aggregate_cells.Rd +++ b/man/aggregate_cells.Rd @@ -1,8 +1,6 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R -\docType{methods} -\name{aggregate_cells} -\alias{aggregate_cells} +\name{aggregate_cells,SingleCellExperiment-method} \alias{aggregate_cells,SingleCellExperiment-method} \title{Aggregate cells} \usage{ @@ -26,9 +24,7 @@ \item{aggregation_function}{The method of cell-feature value aggregation} } \value{ -A SummarizedExperiment object - -An object containing the information for the specified features +A tibble object } \description{ Combine cells into groups based on shared variables and aggregate feature counts. @@ -36,6 +32,6 @@ Combine cells into groups based on shared variables and aggregate feature counts \examples{ data("pbmc_small") pbmc_small_pseudo_bulk <- pbmc_small |> - aggregate_cells(c(groups, ident), assays = "counts") + aggregate_cells(c(groups, ident), assays="counts") } diff --git a/man/arrange.Rd b/man/arrange.Rd new file mode 100644 index 0000000..78d62f8 --- /dev/null +++ b/man/arrange.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{arrange.SingleCellExperiment} +\alias{arrange.SingleCellExperiment} +\title{Order rows using column values} +\usage{ +\method{arrange}{SingleCellExperiment}(.data, ..., .by_group = FALSE) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables, or +functions of variables. Use \code{\link[dplyr:desc]{desc()}} to sort a variable in descending +order.} + +\item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to +grouped data frames only.} +} +\value{ +An object of the same type as \code{.data}. The output has the following +properties: +\itemize{ +\item All rows appear in the output, but (usually) in a different place. +\item Columns are not modified. +\item Groups are not modified. +\item Data frame attributes are preserved. +} +} +\description{ +\code{arrange()} orders the rows of a data frame by the values of selected +columns. + +Unlike other dplyr verbs, \code{arrange()} largely ignores grouping; you +need to explicitly mention grouping variables (or use \code{.by_group = TRUE}) +in order to group by them, and functions of variables are evaluated +once per data frame, not once per group. +} +\details{ +\subsection{Missing values}{ + +Unlike base sorting with \code{sort()}, \code{NA} are: +\itemize{ +\item always sorted to the end for local data, even when wrapped with \code{desc()}. +\item treated differently for remote data, depending on the backend. +} +} +} +\section{Methods}{ + + +This function is a \strong{generic}, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +The following methods are currently available in loaded packages: +\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("arrange")}. + +} + +\examples{ +pbmc_small |> + arrange(nFeature_RNA) + +} +\seealso{ +Other single table verbs: +\code{\link{mutate.SingleCellExperiment}()}, +\code{\link{rename.SingleCellExperiment}()}, +\code{\link{slice.SingleCellExperiment}()}, +\code{\link{summarise.SingleCellExperiment}()} +} +\concept{single table verbs} diff --git a/man/as_tibble.Rd b/man/as_tibble.Rd new file mode 100644 index 0000000..28ec49b --- /dev/null +++ b/man/as_tibble.Rd @@ -0,0 +1,101 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tibble_methods.R +\name{as_tibble.SingleCellExperiment} +\alias{as_tibble.SingleCellExperiment} +\title{Coerce lists, matrices, and more to data frames} +\usage{ +\method{as_tibble}{SingleCellExperiment}( + x, + ..., + .name_repair = c("check_unique", "unique", "universal", "minimal"), + rownames = pkgconfig::get_config("tibble::rownames", NULL) +) +} +\arguments{ +\item{x}{A data frame, list, matrix, or other object that could reasonably be +coerced to a tibble.} + +\item{...}{Unused, for extensibility.} + +\item{.name_repair}{Treatment of problematic column names: +\itemize{ +\item \code{"minimal"}: No name repair or checks, beyond basic existence, +\item \code{"unique"}: Make sure names are unique and not empty, +\item \code{"check_unique"}: (default value), no name repair, but check they are +\code{unique}, +\item \code{"universal"}: Make the names \code{unique} and syntactic +\item a function: apply custom name repair (e.g., \code{.name_repair = make.names} +for names in the style of base R). +\item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}} +} + +This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}. +See there for more details on these terms and the strategies used +to enforce them.} + +\item{rownames}{How to treat existing row names of a data frame or matrix: +\itemize{ +\item \code{NULL}: remove row names. This is the default. +\item \code{NA}: keep row names. +\item A string: the name of a new column. Existing rownames are transferred +into this column and the \code{row.names} attribute is deleted. +No name repair is applied to the new column name, even if \code{x} already contains +a column of that name. +Use \code{as_tibble(rownames_to_column(...))} to safeguard against this case. +} + +Read more in \link[tibble]{rownames}.} +} +\description{ +\code{as_tibble()} turns an existing object, such as a data frame or +matrix, into a so-called tibble, a data frame with class \code{\link[tibble]{tbl_df}}. This is +in contrast with \code{\link[tibble:tibble]{tibble()}}, which builds a tibble from individual columns. +\code{as_tibble()} is to \code{\link[tibble:tibble]{tibble()}} as \code{\link[base:as.data.frame]{base::as.data.frame()}} is to +\code{\link[base:data.frame]{base::data.frame()}}. + +\code{as_tibble()} is an S3 generic, with methods for: +\itemize{ +\item \code{\link[base:data.frame]{data.frame}}: Thin wrapper around the \code{list} method +that implements tibble's treatment of \link[tibble]{rownames}. +\item \code{\link[base:matrix]{matrix}}, \code{\link[stats:poly]{poly}}, +\code{\link[stats:ts]{ts}}, \code{\link[base:table]{table}} +\item Default: Other inputs are first coerced with \code{\link[base:as.data.frame]{base::as.data.frame()}}. +} + +\code{as_tibble_row()} converts a vector to a tibble with one row. +If the input is a list, all elements must have size one. + +\code{as_tibble_col()} converts a vector to a tibble with one column. +} +\section{Row names}{ + + +The default behavior is to silently remove row names. + +New code should explicitly convert row names to a new column using the +\code{rownames} argument. + +For existing code that relies on the retention of row names, call +\code{pkgconfig::set_config("tibble::rownames" = NA)} in your script or in your +package's \code{\link[=.onLoad]{.onLoad()}} function. + +} + +\section{Life cycle}{ + + +Using \code{as_tibble()} for vectors is superseded as of version 3.0.0, +prefer the more expressive \code{as_tibble_row()} and +\code{as_tibble_col()} variants for new code. + +} + +\examples{ +pbmc_small |> as_tibble() + +} +\seealso{ +\code{\link[tibble:tibble]{tibble()}} constructs a tibble from individual columns. \code{\link[tibble:enframe]{enframe()}} +converts a named vector to a tibble with a column of names and column of +values. Name repair is implemented using \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}. +} diff --git a/man/bind_rows.Rd b/man/bind_rows.Rd index 0efd4e9..167191f 100644 --- a/man/bind_rows.Rd +++ b/man/bind_rows.Rd @@ -2,50 +2,67 @@ % Please edit documentation in R/dplyr_methods.R \name{bind_rows} \alias{bind_rows} -\title{Efficiently bind multiple data frames by row and column} +\alias{bind_rows.SingleCellExperiment} +\alias{bind_cols} +\alias{bind_cols.SingleCellExperiment} +\title{#' Efficiently bind multiple data frames by row and column} +\usage{ +\method{bind_rows}{SingleCellExperiment}(..., .id = NULL, add.cell.ids = NULL) + +\method{bind_cols}{SingleCellExperiment}(..., .id = NULL) +} \arguments{ \item{...}{Data frames to combine. -Each argument can either be a data frame, a list that could be a data -frame, or a list of data frames. + Each argument can either be a data frame, a list that could be a data + frame, or a list of data frames. -When row-binding, columns are matched by name, and any missing -columns will be filled with NA. + When row-binding, columns are matched by name, and any missing + columns will be filled with NA. -When column-binding, rows are matched by position, so all data -frames must have the same number of rows. To match by value, not -position, see mutate-joins.} + When column-binding, rows are matched by position, so all data + frames must have the same number of rows. To match by value, not + position, see mutate-joins.} \item{.id}{Data frame identifier. -When \code{.id} is supplied, a new column of identifiers is -created to link each row to its original data frame. The labels -are taken from the named arguments to \code{bind_rows()}. When a -list of data frames is supplied, the labels are taken from the -names of the list. If no names are found a numeric sequence is -used instead.} + When `.id` is supplied, a new column of identifiers is + created to link each row to its original data frame. The labels + are taken from the named arguments to `bind_rows()`. When a + list of data frames is supplied, the labels are taken from the + names of the list. If no names are found a numeric sequence is + used instead.} \item{add.cell.ids}{from Seurat 3.0 A character vector of length(x = c(x, y)). Appends the corresponding values to the start of each objects' cell names.} } \value{ -\code{bind_rows()} and \code{bind_cols()} return the same type as -the first input, either a data frame, \code{tbl_df}, or \code{grouped_df}. +`bind_rows()` and `bind_cols()` return the same type as + the first input, either a data frame, `tbl_df`, or `grouped_df`. + +`bind_rows()` and `bind_cols()` return the same type as + the first input, either a data frame, `tbl_df`, or `grouped_df`. } \description{ This is an efficient implementation of the common pattern of -\code{do.call(rbind, dfs)} or \code{do.call(cbind, dfs)} for binding many +`do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many +data frames into one. + +This is an efficient implementation of the common pattern of +`do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many data frames into one. } \details{ -The output of \code{bind_rows()} will contain a column if that column +The output of `bind_rows()` will contain a column if that column +appears in any of the inputs. + +The output of `bind_rows()` will contain a column if that column appears in any of the inputs. } \examples{ -`\%>\%` = magrittr::`\%>\%` -tt = pbmc_small -bind_rows( tt, tt ) +tt <- pbmc_small +bind_rows(tt, tt) -tt_bind = tt \%>\% select(nCount_RNA ,nFeature_RNA) -tt \%>\% bind_cols(tt_bind) +tt_bind <- tt |> select(nCount_RNA, nFeature_RNA) +tt |> bind_cols(tt_bind) } diff --git a/man/cell_type_df.Rd b/man/cell_type_df.Rd index cab120d..53d9406 100644 --- a/man/cell_type_df.Rd +++ b/man/cell_type_df.Rd @@ -5,12 +5,12 @@ \alias{cell_type_df} \title{Cell types of 80 PBMC single cells} \format{ -A tibble containing 80 rows and 2 columns. Cells are a subsample of -the Peripheral Blood Mononuclear Cells (PBMC) dataset of 2,700 single -cell. Cell types were identified with SingleR. +A tibble containing 80 rows and 2 columns. Cells are a subsample of + the Peripheral Blood Mononuclear Cells (PBMC) dataset of 2,700 single + cell. Cell types were identified with SingleR. \describe{ -\item{cell}{cell identifier, barcode} -\item{first.labels}{cell type} + \item{cell}{cell identifier, barcode} + \item{first.labels}{cell type} } } \source{ diff --git a/man/count.Rd b/man/count.Rd new file mode 100644 index 0000000..f1f5994 --- /dev/null +++ b/man/count.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{count.SingleCellExperiment} +\alias{count.SingleCellExperiment} +\alias{add_count.SingleCellExperiment} +\alias{add_count} +\title{Count the observations in each group} +\usage{ +\method{count}{SingleCellExperiment}( + x, + ..., + wt = NULL, + sort = FALSE, + name = NULL, + .drop = group_by_drop_default(x) +) + +\method{add_count}{SingleCellExperiment}(x, ..., wt = NULL, sort = FALSE, name = NULL) +} +\arguments{ +\item{x}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr).} + +\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables to group +by.} + +\item{wt}{<\code{\link[rlang:args_data_masking]{data-masking}}> Frequency weights. +Can be \code{NULL} or a variable: +\itemize{ +\item If \code{NULL} (the default), counts the number of rows in each group. +\item If a variable, computes \code{sum(wt)} for each group. +}} + +\item{sort}{If \code{TRUE}, will show the largest groups at the top.} + +\item{name}{The name of the new column in the output. + +If omitted, it will default to \code{n}. If there's already a column called \code{n}, +it will use \code{nn}. If there's a column called \code{n} and \code{nn}, it'll use +\code{nnn}, and so on, adding \code{n}s until it gets a new name.} + +\item{.drop}{Handling of factor levels that don't appear in the data, passed +on to \code{\link[dplyr:group_by]{group_by()}}. + +For \code{count()}: if \code{FALSE} will include counts for empty groups (i.e. for +levels of factors that don't exist in the data). + +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} For \code{add_count()}: deprecated since it +can't actually affect the output.} +} +\value{ +An object of the same type as \code{.data}. \code{count()} and \code{add_count()} +group transiently, so the output has the same groups as the input. +} +\description{ +\code{count()} lets you quickly count the unique values of one or more variables: +\code{df \%>\% count(a, b)} is roughly equivalent to +\code{df \%>\% group_by(a, b) \%>\% summarise(n = n())}. +\code{count()} is paired with \code{tally()}, a lower-level helper that is equivalent +to \code{df \%>\% summarise(n = n())}. Supply \code{wt} to perform weighted counts, +switching the summary from \code{n = n()} to \code{n = sum(wt)}. + +\code{add_count()} and \code{add_tally()} are equivalents to \code{count()} and \code{tally()} +but use \code{mutate()} instead of \code{summarise()} so that they add a new column +with group-wise counts. +} +\examples{ +pbmc_small |> count(groups) + +} diff --git a/man/distinct.Rd b/man/distinct.Rd new file mode 100644 index 0000000..b1a5e81 --- /dev/null +++ b/man/distinct.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{distinct.SingleCellExperiment} +\alias{distinct.SingleCellExperiment} +\title{Keep distinct/unique rows} +\usage{ +\method{distinct}{SingleCellExperiment}(.data, ..., .keep_all = FALSE) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to +use when determining uniqueness. If there are multiple rows for a given +combination of inputs, only the first row will be preserved. If omitted, +will use all variables in the data frame.} + +\item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. +If a combination of \code{...} is not distinct, this keeps the +first row of values.} +} +\value{ +An object of the same type as \code{.data}. The output has the following +properties: +\itemize{ +\item Rows are a subset of the input but appear in the same order. +\item Columns are not modified if \code{...} is empty or \code{.keep_all} is \code{TRUE}. +Otherwise, \code{distinct()} first calls \code{mutate()} to create new columns. +\item Groups are not modified. +\item Data frame attributes are preserved. +} +} +\description{ +Keep only unique/distinct rows from a data frame. This is similar +to \code{\link[=unique.data.frame]{unique.data.frame()}} but considerably faster. +} +\section{Methods}{ + + +This function is a \strong{generic}, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +The following methods are currently available in loaded packages: +\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("distinct")}. + +} + +\examples{ +pbmc_small |> + distinct(groups) + +} diff --git a/man/dplyr-methods.Rd b/man/dplyr-methods.Rd deleted file mode 100644 index 7b905bc..0000000 --- a/man/dplyr-methods.Rd +++ /dev/null @@ -1,579 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dplyr_methods.R -\name{arrange} -\alias{arrange} -\alias{bind_cols} -\alias{distinct} -\alias{filter} -\alias{group_by} -\alias{summarise} -\alias{summarize} -\alias{mutate} -\alias{rename} -\alias{rowwise} -\alias{left_join} -\alias{inner_join} -\alias{right_join} -\alias{full_join} -\alias{slice} -\alias{select} -\alias{sample_n} -\alias{sample_frac} -\alias{count} -\alias{add_count} -\alias{pull} -\title{Arrange rows by column values} -\usage{ -summarize(.data, ..., .by = NULL, .groups = NULL) -} -\arguments{ -\item{.data}{Input data frame.} - -\item{...}{For use by methods.} - -\item{.by}{A character vector of variables to join by. (See dplyr)} - -\item{.by_group}{If TRUE, will sort first by grouping variable. Applies to -grouped data frames only.} - -\item{.keep_all}{If TRUE, keep all variables in .data. If a combination -of ... is not distinct, this keeps the first row of values. (See dplyr)} - -\item{.preserve}{when \code{FALSE} (the default), the grouping structure -is recalculated based on the resulting data, otherwise it is kept as is.} - -\item{.add}{When \code{FALSE}, the default, \code{group_by()} will -override existing groups. To add to the existing groups, use -\code{.add=TRUE}. - -This argument was previously called \code{add}, but that prevented -creating a new grouping variable called \code{add}, and conflicts with -our naming conventions.} - -\item{y}{tbls to join. (See dplyr)} - -\item{by}{A character vector of variables to join by. (See dplyr)} - -\item{copy}{If x and y are not from the same data source, and copy is TRUE, -then y will be copied into the same src as x. (See dplyr)} - -\item{suffix}{If there are non-joined duplicate variables in x and y, these -suffixes will be added to the output to disambiguate them. Should be a -character vector of length 2. (See dplyr)} - -\item{tbl}{A data.frame.} - -\item{size}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> -For \code{sample_n()}, the number of rows to select. -For \code{sample_frac()}, the fraction of rows to select. -If \code{tbl} is grouped, \code{size} applies to each group.} - -\item{replace}{Sample with or without replacement?} - -\item{weight}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> Sampling weights. -This must evaluate to a vector of non-negative numbers the same length as -the input. Weights are automatically standardised to sum to 1.} - -\item{.env}{DEPRECATED.} - -\item{x}{A data frame, data frame extension (e.g. a tibble), or a -lazy data frame (e.g. from dbplyr or dtplyr).} - -\item{wt}{<\code{\link[=dplyr_data_masking]{data-masking}}> Frequency weights. -Can be \code{NULL} or a variable: -\itemize{ -\item If \code{NULL} (the default), counts the number of rows in each group. -\item If a variable, computes \code{sum(wt)} for each group. -}} - -\item{sort}{If \code{TRUE}, will show the largest groups at the top.} - -\item{.drop}{For \code{count()}: if \code{FALSE} will include counts for empty groups -(i.e. for levels of factors that don't exist in the data). Deprecated in -\code{add_count()} since it didn't actually affect the output.} - -\item{name}{An optional parameter that specifies the column to be used -as names for a named vector. Specified in a similar manner as \code{var}.} -} -\value{ -An object of the same type as \code{.data}. -\itemize{ -\item All rows appear in the output, but (usually) in a different place. -\item Columns are not modified. -\item Groups are not modified. -\item Data frame attributes are preserved. -} - -A tidySingleCellExperiment object - -An object of the same type as \code{.data}. -\itemize{ -\item Rows are a subset of the input, but appear in the same order. -\item Columns are not modified. -\item The number of groups may be reduced (if \code{.preserve} is not \code{TRUE}). -\item Data frame attributes are preserved. -} - -A \link[=grouped_df]{grouped data frame}, unless the combination of -\code{...} and \code{add} yields a non empty set of grouping columns, a -regular (ungrouped) data frame otherwise. - -An object \emph{usually} of the same type as \code{.data}. -\itemize{ -\item The rows come from the underlying \code{group_keys()}. -\item The columns are a combination of the grouping keys and the summary -expressions that you provide. -\item If \code{x} is grouped by more than one variable, the output will be another -\link{grouped_df} with the right-most group removed. -\item If \code{x} is grouped by one variable, or is not grouped, the output will -be a \link{tibble}. -\item Data frame attributes are \strong{not} preserved, because \code{summarise()} -fundamentally creates a new data frame. -} - -An object of the same type as \code{.data}. - -For \code{mutate()}: -\itemize{ -\item Rows are not affected. -\item Existing columns will be preserved unless explicitly modified. -\item New columns will be added to the right of existing columns. -\item Columns given value \code{NULL} will be removed -\item Groups will be recomputed if a grouping variable is mutated. -\item Data frame attributes are preserved. -} - -For \code{transmute()}: -\itemize{ -\item Rows are not affected. -\item Apart from grouping variables, existing columns will be remove unless -explicitly kept. -\item Column order matches order of expressions. -\item Groups will be recomputed if a grouping variable is mutated. -\item Data frame attributes are preserved. -} - -An object of the same type as \code{.data}. -\itemize{ -\item Rows are not affected. -\item Column names are changed; column order is preserved -\item Data frame attributes are preserved. -\item Groups are updated to reflect new names. -} - -A \code{tbl} - -A \code{tbl} - -A tidySingleCellExperiment object - -A tidySingleCellExperiment object - -A tidySingleCellExperiment object - -A tidySingleCellExperiment object - -An object of the same type as \code{.data}. The output has the following -properties: -\itemize{ -\item Each row may appear 0, 1, or many times in the output. -\item Columns are not modified. -\item Groups are not modified. -\item Data frame attributes are preserved. -} - -An object of the same type as \code{.data}. The output has the following -properties: -\itemize{ -\item Rows are not affected. -\item Output columns are a subset of input columns, potentially with a different -order. Columns will be renamed if \code{new_name=old_name} form is used. -\item Data frame attributes are preserved. -\item Groups are maintained; you can't select off grouping variables. -} - -A tidySingleCellExperiment object - -An object of the same type as \code{.data}. \code{count()} and \code{add_count()} -group transiently, so the output has the same groups as the input. - -A vector the same size as \code{.data}. -} -\description{ -\code{arrange()} order the rows of a data frame rows by the values of selected -columns. - -Unlike other dplyr verbs, \code{arrange()} largely ignores grouping; you -need to explicit mention grouping variables (or use \code{by_group=TRUE}) -in order to group by them, and functions of variables are evaluated -once per data frame, not once per group. - -\code{filter()} retains the rows where the conditions you provide a \code{TRUE}. Note -that, unlike base subsetting with \code{[}, rows where the condition evaluates -to \code{NA} are dropped. - -Most data operations are done on groups defined by variables. -\code{group_by()} takes an existing tbl and converts it into a grouped tbl -where operations are performed "by group". \code{ungroup()} removes grouping. - -\code{summarise()} creates a new data frame. It will have one (or more) rows for -each combination of grouping variables; if there are no grouping variables, -the output will have a single row summarising all observations in the input. -It will contain one column for each grouping variable and one column -for each of the summary statistics that you have specified. - -\code{summarise()} and \code{summarize()} are synonyms. - -\code{mutate()} adds new variables and preserves existing ones; -\code{transmute()} adds new variables and drops existing ones. -New variables overwrite existing variables of the same name. -Variables can be removed by setting their value to \code{NULL}. - -Rename individual variables using \code{new_name=old_name} syntax. - -See \href{https://github.com/jennybc/row-oriented-workflows}{this repository} -for alternative ways to perform row-wise operations. - -\code{slice()} lets you index rows by their (integer) locations. It allows you -to select, remove, and duplicate rows. It is accompanied by a number of -helpers for common use cases: -\itemize{ -\item \code{slice_head()} and \code{slice_tail()} select the first or last rows. -\item \code{slice_sample()} randomly selects rows. -\item \code{slice_min()} and \code{slice_max()} select rows with highest or lowest values -of a variable. -} - -If \code{.data} is a \link{grouped_df}, the operation will be performed on each group, -so that (e.g.) \code{slice_head(df, n=5)} will select the first five rows in -each group. - -Select (and optionally rename) variables in a data frame, using a concise -mini-language that makes it easy to refer to variables based on their name -(e.g. \code{a:f} selects all columns from \code{a} on the left to \code{f} on the -right). You can also use predicate functions like \link{is.numeric} to select -variables based on their properties. - -\Sexpr[results=rd, stage=render]{lifecycle::badge("superseded")} -\code{sample_n()} and \code{sample_frac()} have been superseded in favour of -\code{\link[=slice_sample]{slice_sample()}}. While they will not be deprecated in the near future, -retirement means that we will only perform critical bug fixes, so we recommend -moving to the newer alternative. - -These functions were superseded because we realised it was more convenient to -have two mutually exclusive arguments to one function, rather than two -separate functions. This also made it to clean up a few other smaller -design issues with \code{sample_n()}/\code{sample_frac}: -\itemize{ -\item The connection to \code{slice()} was not obvious. -\item The name of the first argument, \code{tbl}, is inconsistent with other -single table verbs which use \code{.data}. -\item The \code{size} argument uses tidy evaluation, which is surprising and -undocumented. -\item It was easier to remove the deprecated \code{.env} argument. -\item \code{...} was in a suboptimal position. -} - -\code{count()} lets you quickly count the unique values of one or more variables: -\code{df \%>\% count(a, b)} is roughly equivalent to -\code{df \%>\% group_by(a, b) \%>\% summarise(n=n())}. -\code{count()} is paired with \code{tally()}, a lower-level helper that is equivalent -to \code{df \%>\% summarise(n=n())}. Supply \code{wt} to perform weighted counts, -switching the summary from \code{n=n()} to \code{n=sum(wt)}. - -\code{add_count()} are \code{add_tally()} are equivalents to \code{count()} and \code{tally()} -but use \code{mutate()} instead of \code{summarise()} so that they add a new column -with group-wise counts. - -\code{pull()} is similar to \code{$}. It's mostly useful because it looks a little -nicer in pipes, it also works with remote data frames, and it can optionally -name the output. -} -\details{ -\subsection{Locales}{ - -The sort order for character vectors will depend on the collating sequence -of the locale in use: see \code{\link[=locales]{locales()}}. -} - -\subsection{Missing values}{ - -Unlike base sorting with \code{sort()}, \code{NA} are: -\itemize{ -\item always sorted to the end for local data, even when wrapped with \code{desc()}. -\item treated differently for remote data, depending on the backend. -} -} - -dplyr is not yet smart enough to optimise filtering optimisation -on grouped datasets that don't need grouped calculations. For this reason, -filtering is often considerably faster on \code{\link[=ungroup]{ungroup()}}ed data. - -\code{rowwise()} is used for the results of \code{\link[=do]{do()}} when you -create list-variables. It is also useful to support arbitrary -complex operations that need to be applied to each row. - -Currently, rowwise grouping only works with data frames. Its -main impact is to allow you to work with list-variables in -\code{\link[=summarise]{summarise()}} and \code{\link[=mutate]{mutate()}} without having to -use \code{[[1]]}. This makes \code{summarise()} on a rowwise tbl -effectively equivalent to \code{\link[plyr:ldply]{plyr::ldply()}}. - -Slice does not work with relational databases because they have no -intrinsic notion of row order. If you want to perform the equivalent -operation, use \code{\link[=filter]{filter()}} and \code{\link[=row_number]{row_number()}}. -} -\section{Methods}{ - -This function is a \strong{generic}, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour. - -The following methods are currently available in loaded packages: - - -This function is a \strong{generic}, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour. - -The following methods are currently available in loaded packages: - - -These function are \strong{generic}s, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour. - -Methods available in currently loaded packages: - - -This function is a \strong{generic}, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour. - -The following methods are currently available in loaded packages: - - -These function are \strong{generic}s, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour. - -Methods available in currently loaded packages: - - -This function is a \strong{generic}, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour. - -The following methods are currently available in loaded packages: - - -These function are \strong{generic}s, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour. - -Methods available in currently loaded packages: -\itemize{ -\item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. -\item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. -\item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. -\item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. -\item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. -\item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. -} - - -This function is a \strong{generic}, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour. - -The following methods are currently available in loaded packages: -\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}. - - -This function is a \strong{generic}, which means that packages can provide -implementations (methods) for other classes. See the documentation of -individual methods for extra arguments and differences in behaviour. - -The following methods are currently available in loaded packages: -\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}. -} - -\section{Useful filter functions}{ - -\itemize{ -\item \code{\link{==}}, \code{\link{>}}, \code{\link{>=}} etc -\item \code{\link{&}}, \code{\link{|}}, \code{\link{!}}, \code{\link[=xor]{xor()}} -\item \code{\link[=is.na]{is.na()}} -\item \code{\link[=between]{between()}}, \code{\link[=near]{near()}} -} -} - -\section{Grouped tibbles}{ - - -Because filtering expressions are computed within groups, they may -yield different results on grouped tibbles. This will be the case -as soon as an aggregating, lagging, or ranking function is -involved. Compare this ungrouped filtering: - -The former keeps rows with \code{mass} greater than the global average -whereas the latter keeps rows with \code{mass} greater than the gender - -average. - - - -Because mutating expressions are computed within groups, they may -yield different results on grouped tibbles. This will be the case -as soon as an aggregating, lagging, or ranking function is -involved. Compare this ungrouped mutate: - -With the grouped equivalent: - -The former normalises \code{mass} by the global average whereas the -latter normalises by the averages within gender levels. -} - -\section{Useful functions}{ - -\itemize{ -\item Center: \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}} -\item Spread: \code{\link[=sd]{sd()}}, \code{\link[=IQR]{IQR()}}, \code{\link[=mad]{mad()}} -\item Range: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=quantile]{quantile()}} -\item Position: \code{\link[=first]{first()}}, \code{\link[=last]{last()}}, \code{\link[=nth]{nth()}}, -\item Count: \code{\link[=n]{n()}}, \code{\link[=n_distinct]{n_distinct()}} -\item Logical: \code{\link[=any]{any()}}, \code{\link[=all]{all()}} -} -} - -\section{Backend variations}{ - - -The data frame backend supports creating a variable and using it in the -same summary. This means that previously created summary variables can be -further transformed or combined within the summary, as in \code{\link[=mutate]{mutate()}}. -However, it also means that summary variables with the same names as previous -variables overwrite them, making those variables unavailable to later summary -variables. - -This behaviour may not be supported in other backends. To avoid unexpected -results, consider using new names for your summary variables, especially when -creating multiple summaries. -} - -\section{Useful mutate functions}{ - -\itemize{ -\item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings -\item \code{\link[=lead]{lead()}}, \code{\link[=lag]{lag()}} -\item \code{\link[=dense_rank]{dense_rank()}}, \code{\link[=min_rank]{min_rank()}}, \code{\link[=percent_rank]{percent_rank()}}, \code{\link[=row_number]{row_number()}}, -\code{\link[=cume_dist]{cume_dist()}}, \code{\link[=ntile]{ntile()}} -\item \code{\link[=cumsum]{cumsum()}}, \code{\link[=cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[=cumany]{cumany()}}, \code{\link[=cumall]{cumall()}} -\item \code{\link[=na_if]{na_if()}}, \code{\link[=coalesce]{coalesce()}} -\item \code{\link[=if_else]{if_else()}}, \code{\link[=recode]{recode()}}, \code{\link[=case_when]{case_when()}} -} -} - -\section{Scoped selection and renaming}{ - - -Use the three scoped variants (\code{\link[=rename_all]{rename_all()}}, \code{\link[=rename_if]{rename_if()}}, \code{\link[=rename_at]{rename_at()}}) -to renaming a set of variables with a function. -} - -\examples{ -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - arrange(nFeature_RNA) - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - distinct(groups) - - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - filter(groups == "g1") - -# Learn more in ?dplyr_tidy_eval - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - group_by(groups) - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - summarise(mean(nCount_RNA)) - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - mutate(nFeature_RNA=1) - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - rename(s_score=nFeature_RNA) - -`\%>\%` <- magrittr::`\%>\%` - -`\%>\%` <- magrittr::`\%>\%` - -tt <- pbmc_small -tt \%>\% left_join(tt \%>\% distinct(groups) \%>\% mutate(new_column=1:2)) -`\%>\%` <- magrittr::`\%>\%` - -tt <- pbmc_small -tt \%>\% inner_join(tt \%>\% distinct(groups) \%>\% mutate(new_column=1:2) \%>\% slice(1)) - -`\%>\%` <- magrittr::`\%>\%` - -tt <- pbmc_small -tt \%>\% right_join(tt \%>\% distinct(groups) \%>\% mutate(new_column=1:2) \%>\% slice(1)) - -`\%>\%` <- magrittr::`\%>\%` - -tt <- pbmc_small -tt \%>\% full_join(tibble::tibble(groups="g1", other=1:4)) - - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - slice(1) - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - select(cell, orig.ident) - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - sample_n(50) -pbmc_small \%>\% - - sample_frac(0.1) - - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - count(groups) - -`\%>\%` <- magrittr::`\%>\%` -pbmc_small \%>\% - - pull(groups) -} -\seealso{ -\code{\link[=filter_all]{filter_all()}}, \code{\link[=filter_if]{filter_if()}} and \code{\link[=filter_at]{filter_at()}}. -} -\concept{grouping functions} -\concept{single table verbs} -\keyword{internal} diff --git a/man/extract-methods.Rd b/man/extract.Rd similarity index 51% rename from man/extract-methods.Rd rename to man/extract.Rd index 834f879..0fa1b02 100644 --- a/man/extract-methods.Rd +++ b/man/extract.Rd @@ -1,7 +1,6 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidyr_methods.R -\name{extract} -\alias{extract} +\name{extract.SingleCellExperiment} \alias{extract.SingleCellExperiment} \title{Extract a character column into multiple columns using regular expression groups} @@ -17,45 +16,47 @@ expression groups} ) } \arguments{ -\item{data}{A tidySingleCellExperiment object} +\item{data}{A data frame.} -\item{col}{Column name or position. This is passed to -\code{\link[tidyselect:vars_pull]{tidyselect::vars_pull()}}. - -This argument is passed by expression and supports -\link[rlang:topic-inject]{quasiquotation} (you can unquote column -names or column positions).} +\item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.} \item{into}{Names of new variables to create as character vector. Use \code{NA} to omit the variable in the output.} -\item{regex}{a regular expression used to extract the desired values. -There should be one group (defined by \verb{()}) for each element of \code{into}.} +\item{regex}{A string representing a regular expression used to extract the +desired values. There should be one group (defined by \verb{()}) for each +element of \code{into}.} \item{remove}{If \code{TRUE}, remove input column from output data frame.} \item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with -\code{as.is=TRUE} on new columns. This is useful if the component +\code{as.is = TRUE} on new columns. This is useful if the component columns are integer, numeric or logical. NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.} \item{...}{Additional arguments passed on to methods.} } -\value{ -A tidySingleCellExperiment objector a tibble depending on input -} \description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} + +\code{extract()} has been superseded in favour of \code{\link[tidyr:separate_wider_regex]{separate_wider_regex()}} +because it has a more polished API and better handling of problems. +Superseded functions will not go away, but will only receive critical bug +fixes. + Given a regular expression with capturing groups, \code{extract()} turns each group into a new column. If the groups don't match, or the input is NA, the output will be NA. } \examples{ +pbmc_small|> + extract(groups, + into="g", + regex="g([0-9])", + convert=TRUE) -pbmc_small \%>\% - - extract(groups, into="g", regex="g([0-9])", convert=TRUE) } \seealso{ -\code{\link[=separate]{separate()}} to split up by a separator. +\code{\link[tidyr:separate]{separate()}} to split up by a separator. } diff --git a/man/filter.Rd b/man/filter.Rd new file mode 100644 index 0000000..cdf9656 --- /dev/null +++ b/man/filter.Rd @@ -0,0 +1,116 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{filter.SingleCellExperiment} +\alias{filter.SingleCellExperiment} +\title{Keep rows that match a condition} +\usage{ +\method{filter}{SingleCellExperiment}(.data, ..., .preserve = FALSE) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that +return a logical value, and are defined in terms of the variables in +\code{.data}. If multiple expressions are included, they are combined with the +\code{&} operator. Only rows for which all conditions evaluate to \code{TRUE} are +kept.} + +\item{.preserve}{Relevant when the \code{.data} input is grouped. +If \code{.preserve = FALSE} (the default), the grouping structure +is recalculated based on the resulting data, otherwise the grouping is kept as is.} +} +\value{ +An object of the same type as \code{.data}. The output has the following properties: +\itemize{ +\item Rows are a subset of the input, but appear in the same order. +\item Columns are not modified. +\item The number of groups may be reduced (if \code{.preserve} is not \code{TRUE}). +\item Data frame attributes are preserved. +} +} +\description{ +The \code{filter()} function is used to subset a data frame, +retaining all rows that satisfy your conditions. +To be retained, the row must produce a value of \code{TRUE} for all conditions. +Note that when a condition evaluates to \code{NA} +the row will be dropped, unlike base subsetting with \code{[}. +} +\details{ +The \code{filter()} function is used to subset the rows of +\code{.data}, applying the expressions in \code{...} to the column values to determine which +rows should be retained. It can be applied to both grouped and ungrouped data (see \code{\link[dplyr:group_by]{group_by()}} and +\code{\link[dplyr:ungroup]{ungroup()}}). However, dplyr is not yet smart enough to optimise the filtering +operation on grouped datasets that do not need grouped calculations. For this +reason, filtering is often considerably faster on ungrouped data. +} +\section{Useful filter functions}{ + + + +There are many functions and operators that are useful when constructing the +expressions used to filter the data: +\itemize{ +\item \code{\link{==}}, \code{\link{>}}, \code{\link{>=}} etc +\item \code{\link{&}}, \code{\link{|}}, \code{\link{!}}, \code{\link[=xor]{xor()}} +\item \code{\link[=is.na]{is.na()}} +\item \code{\link[dplyr:between]{between()}}, \code{\link[dplyr:near]{near()}} +} + +} + +\section{Grouped tibbles}{ + + + +Because filtering expressions are computed within groups, they may +yield different results on grouped tibbles. This will be the case +as soon as an aggregating, lagging, or ranking function is +involved. Compare this ungrouped filtering: + +\if{html}{\out{
}}\preformatted{starwars \%>\% filter(mass > mean(mass, na.rm = TRUE)) +}\if{html}{\out{
}} + +With the grouped equivalent: + +\if{html}{\out{
}}\preformatted{starwars \%>\% group_by(gender) \%>\% filter(mass > mean(mass, na.rm = TRUE)) +}\if{html}{\out{
}} + +In the ungrouped version, \code{filter()} compares the value of \code{mass} in each row to +the global average (taken over the whole data set), keeping only the rows with +\code{mass} greater than this global average. In contrast, the grouped version calculates +the average mass separately for each \code{gender} group, and keeps rows with \code{mass} greater +than the relevant within-gender average. + +} + +\section{Methods}{ + + +This function is a \strong{generic}, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +The following methods are currently available in loaded packages: +\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("filter")}. + +} + +\examples{ +pbmc_small |> + filter(groups == "g1") + +# Learn more in ?dplyr_tidy_eval + +} +\seealso{ +Other single table verbs: +\code{\link[dplyr]{arrange}()}, +\code{\link[dplyr]{mutate}()}, +\code{\link[dplyr]{reframe}()}, +\code{\link[dplyr]{rename}()}, +\code{\link[dplyr]{select}()}, +\code{\link[dplyr]{slice}()}, +\code{\link[dplyr]{summarise}()} +} diff --git a/man/formatting.Rd b/man/formatting.Rd new file mode 100644 index 0000000..ba4e0d8 --- /dev/null +++ b/man/formatting.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/print_method.R +\name{print.SingleCellExperiment} +\alias{print.SingleCellExperiment} +\alias{print} +\title{Printing tibbles} +\usage{ +\method{print}{SingleCellExperiment}(x, ..., n = NULL, width = NULL) +} +\arguments{ +\item{x}{Object to format or print.} + +\item{...}{Passed on to \code{\link[=tbl_format_setup]{tbl_format_setup()}}.} + +\item{n}{Number of rows to show. If \code{NULL}, the default, will print all rows +if less than the \code{print_max} \link[pillar:pillar_options]{option}. +Otherwise, will print as many rows as specified by the +\code{print_min} \link[pillar:pillar_options]{option}.} + +\item{width}{Width of text output to generate. This defaults to \code{NULL}, which +means use the \code{width} \link[pillar:pillar_options]{option}.} +} +\description{ +One of the main features of the \code{tbl_df} class is the printing: +\itemize{ +\item Tibbles only print as many rows and columns as fit on one screen, +supplemented by a summary of the remaining rows and columns. +\item Tibble reveals the type of each column, which keeps the user informed about +whether a variable is, e.g., \verb{} or \verb{} (character versus factor). +See \code{vignette("types")} for an overview of common +type abbreviations. +} + +Printing can be tweaked for a one-off call by calling \code{print()} explicitly +and setting arguments like \code{n} and \code{width}. More persistent control is +available by setting the options described in \link[pillar:pillar_options]{pillar::pillar_options}. +See also \code{vignette("digits")} for a comparison to base options, +and \code{vignette("numbers")} that showcases \code{\link[tibble:num]{num()}} and \code{\link[tibble:char]{char()}} +for creating columns with custom formatting options. + +As of tibble 3.1.0, printing is handled entirely by the \pkg{pillar} package. +If you implement a package that extends tibble, +the printed output can be customized in various ways. +See \code{vignette("extending", package = "pillar")} for details, +and \link[pillar:pillar_options]{pillar::pillar_options} for options that control the display in the console. +} +\examples{ +print(pbmc_small) + +} diff --git a/man/full_join.Rd b/man/full_join.Rd new file mode 100644 index 0000000..24fc47b --- /dev/null +++ b/man/full_join.Rd @@ -0,0 +1,169 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{full_join.SingleCellExperiment} +\alias{full_join.SingleCellExperiment} +\title{Mutating joins} +\usage{ +\method{full_join}{SingleCellExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) +} +\arguments{ +\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or +lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character +vector of variables to join by. + +If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all +variables in common across \code{x} and \code{y}. A message lists the variables so +that you can check they're correct; suppress the message by supplying \code{by} +explicitly. + +To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} +specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. + +To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with +multiple expressions. For example, \code{join_by(a == b, c == d)} will match +\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between +\code{x} and \code{y}, you can shorten this by listing only the variable names, like +\code{join_by(a, c)}. + +\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap +joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on +these types of joins. + +For simple equality joins, you can alternatively specify a character vector +of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} +to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, +use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. + +To perform a cross-join, generating all combinations of \code{x} and \code{y}, see +\code{\link[dplyr:cross_join]{cross_join()}}.} + +\item{copy}{If \code{x} and \code{y} are not from the same data source, +and \code{copy} is \code{TRUE}, then \code{y} will be copied into the +same src as \code{x}. This allows you to join tables across srcs, but +it is a potentially expensive operation so you must opt into it.} + +\item{suffix}{If there are non-joined duplicate variables in \code{x} and +\code{y}, these suffixes will be added to the output to disambiguate them. +Should be a character vector of length 2.} + +\item{...}{Other parameters passed onto methods.} +} +\value{ +An object of the same type as \code{x} (including the same groups). The order of +the rows and columns of \code{x} is preserved as much as possible. The output has +the following properties: +\itemize{ +\item The rows are affect by the join type. +\itemize{ +\item \code{inner_join()} returns matched \code{x} rows. +\item \code{left_join()} returns all \code{x} rows. +\item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. +\item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. +} +\item Output columns include all columns from \code{x} and all non-key columns from +\code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. +\item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added +to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have +the same name, \code{suffix}es are added to disambiguate these as well. +\item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their +common type between \code{x} and \code{y}. +} +} +\description{ +Mutating joins add columns from \code{y} to \code{x}, matching observations based on +the keys. There are four mutating joins: the inner join, and the three outer +joins. +\subsection{Inner join}{ + +An \code{inner_join()} only keeps observations from \code{x} that have a matching key +in \code{y}. + +The most important property of an inner join is that unmatched rows in either +input are not included in the result. This means that generally inner joins +are not appropriate in most analyses, because it is too easy to lose +observations. +} + +\subsection{Outer joins}{ + +The three outer joins keep observations that appear in at least one of the +data frames: +\itemize{ +\item A \code{left_join()} keeps all observations in \code{x}. +\item A \code{right_join()} keeps all observations in \code{y}. +\item A \code{full_join()} keeps all observations in \code{x} and \code{y}. +} +} +} +\section{Many-to-many relationships}{ + + + +By default, dplyr guards against many-to-many relationships in equality joins +by throwing a warning. These occur when both of the following are true: +\itemize{ +\item A row in \code{x} matches multiple rows in \code{y}. +\item A row in \code{y} matches multiple rows in \code{x}. +} + +This is typically surprising, as most joins involve a relationship of +one-to-one, one-to-many, or many-to-one, and is often the result of an +improperly specified join. Many-to-many relationships are particularly +problematic because they can result in a Cartesian explosion of the number of +rows returned from the join. + +If a many-to-many relationship is expected, silence this warning by +explicitly setting \code{relationship = "many-to-many"}. + +In production code, it is best to preemptively set \code{relationship} to whatever +relationship you expect to exist between the keys of \code{x} and \code{y}, as this +forces an error to occur immediately if the data doesn't align with your +expectations. + +Inequality joins typically result in many-to-many relationships by nature, so +they don't warn on them by default, but you should still take extra care when +specifying an inequality join, because they also have the capability to +return a large number of rows. + +Rolling joins don't warn on many-to-many relationships either, but many +rolling joins follow a many-to-one relationship, so it is often useful to +set \code{relationship = "many-to-one"} to enforce this. + +Note that in SQL, most database providers won't let you specify a +many-to-many relationship between two tables, instead requiring that you +create a third \emph{junction table} that results in two one-to-many relationships +instead. + +} + +\section{Methods}{ + + +These functions are \strong{generic}s, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +Methods available in currently loaded packages: +\itemize{ +\item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. +\item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. +\item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. +\item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. +} + +} + +\examples{ +tt <- pbmc_small +tt |> full_join(tibble::tibble(groups="g1", other=1:4)) + +} +\seealso{ +Other joins: +\code{\link[dplyr]{cross_join}()}, +\code{\link[dplyr]{filter-joins}}, +\code{\link[dplyr]{nest_join}()} +} diff --git a/man/ggplot.Rd b/man/ggplot.Rd new file mode 100644 index 0000000..e0dbfc0 --- /dev/null +++ b/man/ggplot.Rd @@ -0,0 +1,66 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ggplot2_methods.R +\name{ggplot.SingleCellExperiment} +\alias{ggplot.SingleCellExperiment} +\title{Create a new \code{ggplot} from a \code{tidySingleCellExperiment}} +\usage{ +\method{ggplot}{SingleCellExperiment}(data = NULL, mapping = aes(), ..., environment = parent.frame()) +} +\arguments{ +\item{data}{Default dataset to use for plot. If not already a data.frame, +will be converted to one by \code{\link[ggplot2:fortify]{fortify()}}. If not specified, +must be supplied in each layer added to the plot.} + +\item{mapping}{Default list of aesthetic mappings to use for plot. +If not specified, must be supplied in each layer added to the plot.} + +\item{...}{Other arguments passed on to methods. Not currently used.} + +\item{environment}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Used prior to tidy +evaluation.} +} +\description{ +\code{ggplot()} initializes a ggplot object. It can be used to +declare the input data frame for a graphic and to specify the +set of plot aesthetics intended to be common throughout all +subsequent layers unless specifically overridden. +} +\details{ +\code{ggplot()} is used to construct the initial plot object, +and is almost always followed by a plus sign (\code{+}) to add +components to the plot. + +There are three common patterns used to invoke \code{ggplot()}: +\itemize{ +\item \verb{ggplot(data = df, mapping = aes(x, y, other aesthetics))} +\item \code{ggplot(data = df)} +\item \code{ggplot()} +} + +The first pattern is recommended if all layers use the same +data and the same set of aesthetics, although this method +can also be used when adding a layer using data from another +data frame. + +The second pattern specifies the default data frame to use +for the plot, but no aesthetics are defined up front. This +is useful when one data frame is used predominantly for the +plot, but the aesthetics vary from one layer to another. + +The third pattern initializes a skeleton \code{ggplot} object, which +is fleshed out as layers are added. This is useful when +multiple data frames are used to produce different layers, as +is often the case in complex graphics. + +The \verb{data =} and \verb{mapping =} specifications in the arguments are optional +(and are often omitted in practice), so long as the data and the mapping +values are passed into the function in the right order. In the examples +below, however, they are left in place for clarity. +} +\examples{ +library(ggplot2) +pbmc_small |> + ggplot(aes(groups, nCount_RNA)) + + geom_boxplot() + +} diff --git a/man/ggplot2-methods.Rd b/man/ggplot2-methods.Rd deleted file mode 100644 index 171dd34..0000000 --- a/man/ggplot2-methods.Rd +++ /dev/null @@ -1,53 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ggplot2_methods.R -\name{ggplot} -\alias{ggplot} -\title{Create a new ggplot from a tidySingleCellExperiment object} -\arguments{ -\item{.data}{Default dataset to use for plot. If not already a data.frame, -will be converted to one by \code{\link[=fortify]{fortify()}}. If not specified, -must be supplied in each layer added to the plot.} - -\item{mapping}{Default list of aesthetic mappings to use for plot. -If not specified, must be supplied in each layer added to the plot.} - -\item{...}{Other arguments passed on to methods. Not currently used.} - -\item{environment}{DEPRECATED. Used prior to tidy evaluation.} -} -\value{ -A ggplot -} -\description{ -\code{ggplot()} initializes a ggplot object. It can be used to -declare the input data frame for a graphic and to specify the -set of plot aesthetics intended to be common throughout all -subsequent layers unless specifically overridden. -} -\details{ -\code{ggplot()} is used to construct the initial plot object, -and is almost always followed by \code{+} to add component to the -plot. There are three common ways to invoke \code{ggplot()}: - -The first method is recommended if all layers use the same -data and the same set of aesthetics, although this method -can also be used to add a layer using data from another -data frame. See the first example below. The second -method specifies the default data frame to use for the plot, -but no aesthetics are defined up front. This is useful when -one data frame is used predominantly as layers are added, -but the aesthetics may vary from one layer to another. The -third method initializes a skeleton \code{ggplot} object which -is fleshed out as layers are added. This method is useful when -multiple data frames are used to produce different layers, as -is often the case in complex graphics. -} -\examples{ - -library(ggplot2) - -tidySingleCellExperiment::pbmc_small \%>\% - - tidySingleCellExperiment::ggplot(aes(groups, nCount_RNA)) + - geom_boxplot() -} diff --git a/man/glimpse.Rd b/man/glimpse.Rd new file mode 100644 index 0000000..df1fd08 --- /dev/null +++ b/man/glimpse.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tibble_methods.R +\name{glimpse.tidySingleCellExperiment} +\alias{glimpse.tidySingleCellExperiment} +\title{Get a glimpse of your data} +\usage{ +\method{glimpse}{tidySingleCellExperiment}(x, width = NULL, ...) +} +\arguments{ +\item{x}{An object to glimpse at.} + +\item{width}{Width of output: defaults to the setting of the +\code{width} \link[pillar:pillar_options]{option} (if finite) +or the width of the console.} + +\item{...}{Unused, for extensibility.} +} +\value{ +x original x is (invisibly) returned, allowing \code{glimpse()} to be +used within a data pipe line. +} +\description{ +\code{glimpse()} is like a transposed version of \code{print()}: +columns run down the page, and data runs across. +This makes it possible to see every column in a data frame. +It's a little like \code{\link[=str]{str()}} applied to a data frame +but it tries to show you as much data as possible. +(And it always shows the underlying data, even when applied +to a remote data source.) + +See \code{\link[pillar:format_glimpse]{format_glimpse()}} for details on the formatting. +} +\section{S3 methods}{ + + +\code{glimpse} is an S3 generic with a customised method for \code{tbl}s and +\code{data.frames}, and a default method that calls \code{\link[=str]{str()}}. + +} + +\examples{ +pbmc_small |> glimpse() + +} diff --git a/man/group_by.Rd b/man/group_by.Rd new file mode 100644 index 0000000..20a0547 --- /dev/null +++ b/man/group_by.Rd @@ -0,0 +1,99 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{group_by.SingleCellExperiment} +\alias{group_by.SingleCellExperiment} +\title{Group by one or more variables} +\usage{ +\method{group_by}{SingleCellExperiment}(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{...}{In \code{group_by()}, variables or computations to group by. +Computations are always done on the ungrouped data frame. +To perform computations on the grouped data, you need to use +a separate \code{mutate()} step before the \code{group_by()}. +Computations are not allowed in \code{nest_by()}. +In \code{ungroup()}, variables to remove from the grouping.} + +\item{.add}{When \code{FALSE}, the default, \code{group_by()} will +override existing groups. To add to the existing groups, use +\code{.add = TRUE}. + +This argument was previously called \code{add}, but that prevented +creating a new grouping variable called \code{add}, and conflicts with +our naming conventions.} + +\item{.drop}{Drop groups formed by factor levels that don't appear in the +data? The default is \code{TRUE} except when \code{.data} has been previously +grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.} +} +\value{ +A grouped data frame with class \code{\link[dplyr]{grouped_df}}, +unless the combination of \code{...} and \code{add} yields a empty set of +grouping columns, in which case a tibble will be returned. +} +\description{ +Most data operations are done on groups defined by variables. +\code{group_by()} takes an existing tbl and converts it into a grouped tbl +where operations are performed "by group". \code{ungroup()} removes grouping. +} +\section{Methods}{ + + +These function are \strong{generic}s, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +Methods available in currently loaded packages: +\itemize{ +\item \code{group_by()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}. +\item \code{ungroup()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}. +} + +} + +\section{Ordering}{ + + +Currently, \code{group_by()} internally orders the groups in ascending order. This +results in ordered output from functions that aggregate groups, such as +\code{\link[dplyr:summarise]{summarise()}}. + +When used as grouping columns, character vectors are ordered in the C locale +for performance and reproducibility across R sessions. If the resulting +ordering of your grouped operation matters and is dependent on the locale, +you should follow up the grouped operation with an explicit call to +\code{\link[dplyr:arrange]{arrange()}} and set the \code{.locale} argument. For example: + +\if{html}{\out{
}}\preformatted{data \%>\% + group_by(chr) \%>\% + summarise(avg = mean(x)) \%>\% + arrange(chr, .locale = "en") +}\if{html}{\out{
}} + +This is often useful as a preliminary step before generating content intended +for humans, such as an HTML table. +\subsection{Legacy behavior}{ + +Prior to dplyr 1.1.0, character vector grouping columns were ordered in the +system locale. If you need to temporarily revert to this behavior, you can +set the global option \code{dplyr.legacy_locale} to \code{TRUE}, but this should be +used sparingly and you should expect this option to be removed in a future +version of dplyr. It is better to update existing code to explicitly call +\code{arrange(.locale = )} instead. Note that setting \code{dplyr.legacy_locale} will +also force calls to \code{\link[dplyr:arrange]{arrange()}} to use the system locale. +} + +} + +\examples{ +pbmc_small |> + group_by(groups) + +} +\seealso{ +\code{} +} diff --git a/man/join_features.Rd b/man/join_features.Rd index 3463178..f302ea0 100644 --- a/man/join_features.Rd +++ b/man/join_features.Rd @@ -1,10 +1,8 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R -\docType{methods} -\name{join_features} -\alias{join_features} +\name{join_features,SingleCellExperiment-method} \alias{join_features,SingleCellExperiment-method} -\title{Extract and join information for features.} +\title{join_features} \usage{ \S4method{join_features}{SingleCellExperiment}( .data, @@ -16,7 +14,7 @@ ) } \arguments{ -\item{.data}{A SingleCellExperiment object} +\item{.data}{A tidy SingleCellExperiment object} \item{features}{A vector of feature identifiers to join} @@ -29,21 +27,20 @@ \item{...}{Parameters to pass to join wide, i.e. assay name to extract feature abundance from and gene prefix, for shape="wide"} } \value{ -An object containing the information.for the specified features - -An object containing the information.for the specified features +A `tidySingleCellExperiment` object + containing information for the specified features. } \description{ -join_features() extracts and joins information for specified features +join_features() extracts and joins information for specific + features } \details{ -This function extracts information for specified features and returns the information in either long or wide format. +This function extracts information for specified features and + returns the information in either long or wide format. } \examples{ - data("pbmc_small") -pbmc_small \%>\% -join_features(features = c("HLA-DRA", "LYZ")) - +pbmc_small \%>\% join_features( + features=c("HLA-DRA", "LYZ")) } diff --git a/man/join_transcripts.Rd b/man/join_transcripts.Rd index 2954e36..c356635 100644 --- a/man/join_transcripts.Rd +++ b/man/join_transcripts.Rd @@ -27,7 +27,7 @@ join_transcripts( \item{...}{Parameters to pass to join wide, i.e. assay name to extract transcript abundance from} } \value{ -A \code{tbl} containing the information.for the specified transcripts +A `tbl` containing the information.for the specified transcripts } \description{ join_transcripts() extracts and joins information for specified transcripts diff --git a/man/left_join.Rd b/man/left_join.Rd new file mode 100644 index 0000000..c65f133 --- /dev/null +++ b/man/left_join.Rd @@ -0,0 +1,180 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{left_join.SingleCellExperiment} +\alias{left_join.SingleCellExperiment} +\alias{inner_join.SingleCellExperiment} +\title{Mutating joins} +\usage{ +\method{left_join}{SingleCellExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) + +\method{inner_join}{SingleCellExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) +} +\arguments{ +\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or +lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character +vector of variables to join by. + +If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all +variables in common across \code{x} and \code{y}. A message lists the variables so +that you can check they're correct; suppress the message by supplying \code{by} +explicitly. + +To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} +specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. + +To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with +multiple expressions. For example, \code{join_by(a == b, c == d)} will match +\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between +\code{x} and \code{y}, you can shorten this by listing only the variable names, like +\code{join_by(a, c)}. + +\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap +joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on +these types of joins. + +For simple equality joins, you can alternatively specify a character vector +of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} +to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, +use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. + +To perform a cross-join, generating all combinations of \code{x} and \code{y}, see +\code{\link[dplyr:cross_join]{cross_join()}}.} + +\item{copy}{If \code{x} and \code{y} are not from the same data source, +and \code{copy} is \code{TRUE}, then \code{y} will be copied into the +same src as \code{x}. This allows you to join tables across srcs, but +it is a potentially expensive operation so you must opt into it.} + +\item{suffix}{If there are non-joined duplicate variables in \code{x} and +\code{y}, these suffixes will be added to the output to disambiguate them. +Should be a character vector of length 2.} + +\item{...}{Other parameters passed onto methods.} +} +\value{ +An object of the same type as \code{x} (including the same groups). The order of +the rows and columns of \code{x} is preserved as much as possible. The output has +the following properties: +\itemize{ +\item The rows are affect by the join type. +\itemize{ +\item \code{inner_join()} returns matched \code{x} rows. +\item \code{left_join()} returns all \code{x} rows. +\item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. +\item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. +} +\item Output columns include all columns from \code{x} and all non-key columns from +\code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. +\item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added +to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have +the same name, \code{suffix}es are added to disambiguate these as well. +\item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their +common type between \code{x} and \code{y}. +} +} +\description{ +Mutating joins add columns from \code{y} to \code{x}, matching observations based on +the keys. There are four mutating joins: the inner join, and the three outer +joins. +\subsection{Inner join}{ + +An \code{inner_join()} only keeps observations from \code{x} that have a matching key +in \code{y}. + +The most important property of an inner join is that unmatched rows in either +input are not included in the result. This means that generally inner joins +are not appropriate in most analyses, because it is too easy to lose +observations. +} + +\subsection{Outer joins}{ + +The three outer joins keep observations that appear in at least one of the +data frames: +\itemize{ +\item A \code{left_join()} keeps all observations in \code{x}. +\item A \code{right_join()} keeps all observations in \code{y}. +\item A \code{full_join()} keeps all observations in \code{x} and \code{y}. +} +} +} +\section{Many-to-many relationships}{ + + + +By default, dplyr guards against many-to-many relationships in equality joins +by throwing a warning. These occur when both of the following are true: +\itemize{ +\item A row in \code{x} matches multiple rows in \code{y}. +\item A row in \code{y} matches multiple rows in \code{x}. +} + +This is typically surprising, as most joins involve a relationship of +one-to-one, one-to-many, or many-to-one, and is often the result of an +improperly specified join. Many-to-many relationships are particularly +problematic because they can result in a Cartesian explosion of the number of +rows returned from the join. + +If a many-to-many relationship is expected, silence this warning by +explicitly setting \code{relationship = "many-to-many"}. + +In production code, it is best to preemptively set \code{relationship} to whatever +relationship you expect to exist between the keys of \code{x} and \code{y}, as this +forces an error to occur immediately if the data doesn't align with your +expectations. + +Inequality joins typically result in many-to-many relationships by nature, so +they don't warn on them by default, but you should still take extra care when +specifying an inequality join, because they also have the capability to +return a large number of rows. + +Rolling joins don't warn on many-to-many relationships either, but many +rolling joins follow a many-to-one relationship, so it is often useful to +set \code{relationship = "many-to-one"} to enforce this. + +Note that in SQL, most database providers won't let you specify a +many-to-many relationship between two tables, instead requiring that you +create a third \emph{junction table} that results in two one-to-many relationships +instead. + +} + +\section{Methods}{ + + +These functions are \strong{generic}s, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +Methods available in currently loaded packages: +\itemize{ +\item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. +\item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. +\item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. +\item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. +} + +} + +\examples{ +tt <- pbmc_small +tt |> left_join(tt |> + distinct(groups) |> + mutate(new_column=1:2)) + +tt <- pbmc_small +tt |> inner_join(tt |> + distinct(groups) |> + mutate(new_column=1:2) |> + slice(1)) + +} +\seealso{ +Other joins: +\code{\link[dplyr]{cross_join}()}, +\code{\link[dplyr]{filter-joins}}, +\code{\link[dplyr]{nest_join}()} +} diff --git a/man/mutate.Rd b/man/mutate.Rd new file mode 100644 index 0000000..92c7b55 --- /dev/null +++ b/man/mutate.Rd @@ -0,0 +1,112 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{mutate.SingleCellExperiment} +\alias{mutate.SingleCellExperiment} +\title{Create, modify, and delete columns} +\usage{ +\method{mutate}{SingleCellExperiment}(.data, ...) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs. +The name gives the name of the column in the output. + +The value can be: +\itemize{ +\item A vector of length 1, which will be recycled to the correct length. +\item A vector the same length as the current group (or the whole data frame +if ungrouped). +\item \code{NULL}, to remove the column. +\item A data frame or tibble, to create multiple columns in the output. +}} +} +\value{ +An object of the same type as \code{.data}. The output has the following +properties: +\itemize{ +\item Columns from \code{.data} will be preserved according to the \code{.keep} argument. +\item Existing columns that are modified by \code{...} will always be returned in +their original location. +\item New columns created through \code{...} will be placed according to the +\code{.before} and \code{.after} arguments. +\item The number of rows is not affected. +\item Columns given the value \code{NULL} will be removed. +\item Groups will be recomputed if a grouping variable is mutated. +\item Data frame attributes are preserved. +} +} +\description{ +\code{mutate()} creates new columns that are functions of existing variables. +It can also modify (if the name is the same as an existing +column) and delete columns (by setting their value to \code{NULL}). +} +\section{Useful mutate functions}{ + + +\itemize{ +\item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings +\item \code{\link[dplyr:lead]{lead()}}, \code{\link[dplyr:lag]{lag()}} +\item \code{\link[dplyr:dense_rank]{dense_rank()}}, \code{\link[dplyr:min_rank]{min_rank()}}, \code{\link[dplyr:percent_rank]{percent_rank()}}, \code{\link[dplyr:row_number]{row_number()}}, +\code{\link[dplyr:cume_dist]{cume_dist()}}, \code{\link[dplyr:ntile]{ntile()}} +\item \code{\link[=cumsum]{cumsum()}}, \code{\link[dplyr:cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[dplyr:cumany]{cumany()}}, \code{\link[dplyr:cumall]{cumall()}} +\item \code{\link[dplyr:na_if]{na_if()}}, \code{\link[dplyr:coalesce]{coalesce()}} +\item \code{\link[dplyr:if_else]{if_else()}}, \code{\link[dplyr:recode]{recode()}}, \code{\link[dplyr:case_when]{case_when()}} +} + +} + +\section{Grouped tibbles}{ + + + +Because mutating expressions are computed within groups, they may +yield different results on grouped tibbles. This will be the case +as soon as an aggregating, lagging, or ranking function is +involved. Compare this ungrouped mutate: + +\if{html}{\out{
}}\preformatted{starwars \%>\% + select(name, mass, species) \%>\% + mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) +}\if{html}{\out{
}} + +With the grouped equivalent: + +\if{html}{\out{
}}\preformatted{starwars \%>\% + select(name, mass, species) \%>\% + group_by(species) \%>\% + mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) +}\if{html}{\out{
}} + +The former normalises \code{mass} by the global average whereas the +latter normalises by the averages within species levels. + +} + +\section{Methods}{ + + +This function is a \strong{generic}, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +Methods available in currently loaded packages: +\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}. + +} + +\examples{ +pbmc_small |> + mutate(nFeature_RNA=1) + +} +\seealso{ +Other single table verbs: +\code{\link{arrange.SingleCellExperiment}()}, +\code{\link{rename.SingleCellExperiment}()}, +\code{\link{slice.SingleCellExperiment}()}, +\code{\link{summarise.SingleCellExperiment}()} +} +\concept{single table verbs} diff --git a/man/nest-methods.Rd b/man/nest-methods.Rd deleted file mode 100644 index fb257f9..0000000 --- a/man/nest-methods.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tidyr_methods.R -\name{nest} -\alias{nest} -\title{nest} -\arguments{ -\item{.data}{A tbl. (See tidyr)} - -\item{...}{Name-variable pairs of the form new_col=c(col1, col2, col3) (See tidyr)} - -\item{.names_sep}{See ?tidyr::nest} -} -\value{ -A tidySingleCellExperiment objector a tibble depending on input -} -\description{ -nest -} -\examples{ - -library(dplyr) -pbmc_small \%>\% - - nest(data=-groups) \%>\% - unnest(data) -} diff --git a/man/nest.Rd b/man/nest.Rd new file mode 100644 index 0000000..07a5834 --- /dev/null +++ b/man/nest.Rd @@ -0,0 +1,84 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyr_methods.R +\name{nest.SingleCellExperiment} +\alias{nest.SingleCellExperiment} +\title{Nest rows into a list-column of data frames} +\usage{ +\method{nest}{SingleCellExperiment}(.data, ..., .names_sep = NULL) +} +\arguments{ +\item{.data}{A data frame.} + +\item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to nest; these will +appear in the inner data frames. + +Specified using name-variable pairs of the form +\code{new_col = c(col1, col2, col3)}. The right hand side can be any valid +tidyselect expression. + +If not supplied, then \code{...} is derived as all columns \emph{not} selected by +\code{.by}, and will use the column name from \code{.key}. + +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: +previously you could write \code{df \%>\% nest(x, y, z)}. +Convert to \code{df \%>\% nest(data = c(x, y, z))}.} + +\item{.names_sep}{If \code{NULL}, the default, the inner names will come from +the former outer names. If a string, the new inner names will use the +outer names with \code{names_sep} automatically stripped. This makes +\code{names_sep} roughly symmetric between nesting and unnesting.} +} +\description{ +Nesting creates a list-column of data frames; unnesting flattens it back out +into regular columns. Nesting is implicitly a summarising operation: you +get one row for each group defined by the non-nested columns. This is useful +in conjunction with other summaries that work with whole datasets, most +notably models. + +Learn more in \code{vignette("nest")}. +} +\details{ +If neither \code{...} nor \code{.by} are supplied, \code{nest()} will nest all variables, +and will use the column name supplied through \code{.key}. +} +\section{New syntax}{ + + +tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's +designed to be more similar to other functions. Converting to the new syntax +should be straightforward (guided by the message you'll receive) but if +you just need to run an old analysis, you can easily revert to the previous +behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows: + +\if{html}{\out{
}}\preformatted{library(tidyr) +nest <- nest_legacy +unnest <- unnest_legacy +}\if{html}{\out{
}} + +} + +\section{Grouped data frames}{ + + +\code{df \%>\% nest(data = c(x, y))} specifies the columns to be nested; i.e. the +columns that will appear in the inner data frame. \code{df \%>\% nest(.by = c(x, y))} specifies the columns to nest \emph{by}; i.e. the columns that will remain in +the outer data frame. An alternative way to achieve the latter is to \code{nest()} +a grouped data frame created by \code{\link[dplyr:group_by]{dplyr::group_by()}}. The grouping variables +remain in the outer data frame and the others are nested. The result +preserves the grouping of the input. + +Variables supplied to \code{nest()} will override grouping variables so that +\code{df \%>\% group_by(x, y) \%>\% nest(data = !z)} will be equivalent to +\code{df \%>\% nest(data = !z)}. + +You can't supply \code{.by} with a grouped data frame, as the groups already +represent what you are nesting by. + +} + +\examples{ +pbmc_small |> + nest(data=-groups) |> + unnest(data) + +} diff --git a/man/pbmc_small.Rd b/man/pbmc_small.Rd index c053995..8d2b9ed 100644 --- a/man/pbmc_small.Rd +++ b/man/pbmc_small.Rd @@ -5,8 +5,8 @@ \alias{pbmc_small} \title{pbmc_small} \format{ -A SingleCellExperiment object containing 80 Peripheral Blood -Mononuclear Cells (PBMC) from 10x Genomics. Generated by subsampling the PBMC dataset of 2,700 single cells. +A SingleCellExperiment object containing 80 Peripheral Blood + Mononuclear Cells (PBMC) from 10x Genomics. Generated by subsampling the PBMC dataset of 2,700 single cells. } \source{ \url{https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html} diff --git a/man/pbmc_small_nested_interactions.Rd b/man/pbmc_small_nested_interactions.Rd index 1d34cc7..128ed26 100644 --- a/man/pbmc_small_nested_interactions.Rd +++ b/man/pbmc_small_nested_interactions.Rd @@ -5,18 +5,18 @@ \alias{pbmc_small_nested_interactions} \title{Intercellular ligand-receptor interactions for 38 ligands from a single cell RNA-seq cluster.} \format{ -A tibble containing 100 rows and 9 columns. Cells are a subsample of -the PBMC dataset of 2,700 single cells. Cell interactions were identified with SingleCellSignalR. +A tibble containing 100 rows and 9 columns. Cells are a subsample of + the PBMC dataset of 2,700 single cells. Cell interactions were identified with SingleCellSignalR. \describe{ -\item{sample}{sample identifier} -\item{ligand}{cluster and ligand identifier} -\item{receptor}{cluster and receptor identifier} -\item{ligand.name}{ligand name} -\item{receptor.name}{receptor name} -\item{origin}{cluster containing ligand} -\item{destination}{cluster containing receptor} -\item{interaction.type}{type of interation, paracrine or autocrine} -\item{LRscore}{interaction score} + \item{sample}{sample identifier} + \item{ligand}{cluster and ligand identifier} + \item{receptor}{cluster and receptor identifier} + \item{ligand.name}{ligand name} + \item{receptor.name}{receptor name} + \item{origin}{cluster containing ligand} + \item{destination}{cluster containing receptor} + \item{interaction.type}{type of interation, paracrine or autocrine} + \item{LRscore}{interaction score} } } \source{ @@ -26,7 +26,7 @@ the PBMC dataset of 2,700 single cells. Cell interactions were identified with S data(pbmc_small_nested_interactions) } \description{ -A dataset containing ligand-receptor interactions withibn a sample. There are 38 ligands from a single cell cluster versus +A dataset containing ligand-receptor interactions withibn a sample. There are 38 ligands from a single cell cluster versus 35 receptors in 6 other clusters. } \keyword{datasets} diff --git a/man/pipe.Rd b/man/pipe.Rd index a648c29..1f8f237 100644 --- a/man/pipe.Rd +++ b/man/pipe.Rd @@ -12,7 +12,7 @@ lhs \%>\% rhs \item{rhs}{A function call using the magrittr semantics.} } \value{ -The result of calling \code{rhs(lhs)}. +The result of calling `rhs(lhs)`. } \description{ See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. diff --git a/man/pivot-methods.Rd b/man/pivot_longer.Rd similarity index 84% rename from man/pivot-methods.Rd rename to man/pivot_longer.Rd index b8eadf9..eb40919 100644 --- a/man/pivot-methods.Rd +++ b/man/pivot_longer.Rd @@ -1,14 +1,35 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidyr_methods.R -\name{pivot_longer} -\alias{pivot_longer} +\name{pivot_longer.SingleCellExperiment} +\alias{pivot_longer.SingleCellExperiment} \title{Pivot data from wide to long} +\usage{ +\method{pivot_longer}{SingleCellExperiment}( + data, + cols, + ..., + cols_vary = "fastest", + names_to = "name", + names_prefix = NULL, + names_sep = NULL, + names_pattern = NULL, + names_ptypes = NULL, + names_transform = NULL, + names_repair = "check_unique", + values_to = "value", + values_drop_na = FALSE, + values_ptypes = NULL, + values_transform = NULL +) +} \arguments{ \item{data}{A data frame to pivot.} -\item{cols}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> Columns to pivot into +\item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to pivot into longer format.} +\item{...}{Additional arguments passed on to methods.} + \item{cols_vary}{When pivoting \code{cols} into longer format, how should the output rows be arranged relative to their original row number? \itemize{ @@ -46,17 +67,36 @@ from the start of each variable name.} \item{names_sep, names_pattern}{If \code{names_to} contains multiple values, these arguments control how the column name is broken up. -\code{names_sep} takes the same specification as \code{\link[=separate]{separate()}}, and can either +\code{names_sep} takes the same specification as \code{\link[tidyr:separate]{separate()}}, and can either be a numeric vector (specifying positions to break on), or a single string (specifying a regular expression to split on). -\code{names_pattern} takes the same specification as \code{\link[=extract]{extract()}}, a regular +\code{names_pattern} takes the same specification as \code{\link[tidyr:extract]{extract()}}, a regular expression containing matching groups (\verb{()}). If these arguments do not give you enough control, use \code{pivot_longer_spec()} to create a spec object and process manually as needed.} +\item{names_ptypes, values_ptypes}{Optionally, a list of column name-prototype +pairs. Alternatively, a single empty prototype can be supplied, which will +be applied to all columns. A prototype (or ptype for short) is a +zero-length vector (like \code{integer()} or \code{numeric()}) that defines the type, +class, and attributes of a vector. Use these arguments if you want to +confirm that the created columns are the types that you expect. Note that +if you want to change (instead of confirm) the types of specific columns, +you should use \code{names_transform} or \code{values_transform} instead.} + +\item{names_transform, values_transform}{Optionally, a list of column +name-function pairs. Alternatively, a single function can be supplied, +which will be applied to all columns. Use these arguments if you need to +change the types of specific columns. For example, \code{names_transform = list(week = as.integer)} would convert a character variable called \code{week} +to an integer. + +If not specified, the type of the columns generated from \code{names_to} will +be character, and the type of the variables generated from \code{values_to} +will be the common type of the input columns used to generate them.} + \item{names_repair}{What happens if the output has invalid column names? The default, \code{"check_unique"} is to error if the columns are duplicated. Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to @@ -73,51 +113,24 @@ existing column names.} in the \code{value_to} column. This effectively converts explicit missing values to implicit missing values, and should generally be used only when missing values in \code{data} were created by its structure.} - -\item{names_transform, values_transform}{Optionally, a list of column -name-function pairs. Alternatively, a single function can be supplied, -which will be applied to all columns. Use these arguments if you need to -change the types of specific columns. For example, \code{names_transform = list(week = as.integer)} would convert a character variable called \code{week} -to an integer. - -If not specified, the type of the columns generated from \code{names_to} will -be character, and the type of the variables generated from \code{values_to} -will be the common type of the input columns used to generate them.} - -\item{names_ptypes, values_ptypes}{Optionally, a list of column name-prototype -pairs. Alternatively, a single empty prototype can be supplied, which will -be applied to all columns. A prototype (or ptype for short) is a -zero-length vector (like \code{integer()} or \code{numeric()}) that defines the type, -class, and attributes of a vector. Use these arguments if you want to -confirm that the created columns are the types that you expect. Note that -if you want to change (instead of confirm) the types of specific columns, -you should use \code{names_transform} or \code{values_transform} instead.} - -\item{...}{Additional arguments passed on to methods.} -} -\value{ -A tidySingleCellExperiment objector a tibble depending on input } \description{ -\Sexpr[results=rd, stage=render]{lifecycle::badge("maturing")} - \code{pivot_longer()} "lengthens" data, increasing the number of rows and decreasing the number of columns. The inverse transformation is -\code{\link[=pivot_wider]{pivot_wider()}} +\code{\link[tidyr:pivot_wider]{pivot_wider()}} Learn more in \code{vignette("pivot")}. } \details{ -\code{pivot_longer()} is an updated approach to \code{\link[=gather]{gather()}}, designed to be both +\code{pivot_longer()} is an updated approach to \code{\link[tidyr:gather]{gather()}}, designed to be both simpler to use and to handle more use cases. We recommend you use \code{pivot_longer()} for new code; \code{gather()} isn't going away but is no longer under active development. } \examples{ # See vignette("pivot") for examples and explanation +pbmc_small |> pivot_longer( + cols=c(orig.ident, groups), + names_to="name", values_to="value") -library(dplyr) -pbmc_small \%>\% - - pivot_longer(c(orig.ident, groups), names_to="name", values_to="value") } diff --git a/man/plot_ly.Rd b/man/plot_ly.Rd index 3b3d960..3975dbf 100644 --- a/man/plot_ly.Rd +++ b/man/plot_ly.Rd @@ -31,80 +31,80 @@ plot_ly( ) } \arguments{ -\item{data}{A data frame (optional) or \link[crosstalk:SharedData]{crosstalk::SharedData} object.} +\item{data}{A data frame (optional) or [crosstalk::SharedData] object.} -\item{...}{Arguments (i.e., attributes) passed along to the trace \code{type}. -See \code{\link[=schema]{schema()}} for a list of acceptable attributes for a given trace \code{type} -(by going to \code{traces} -> \code{type} -> \code{attributes}). Note that attributes +\item{...}{Arguments (i.e., attributes) passed along to the trace `type`. +See [schema()] for a list of acceptable attributes for a given trace `type` +(by going to `traces` -> `type` -> `attributes`). Note that attributes provided at this level may override other arguments -(e.g. \code{plot_ly(x=1:10, y=1:10, color=I("red"), marker=list(color="blue"))}).} +(e.g. `plot_ly(x=1:10, y=1:10, color=I("red"), marker=list(color="blue"))`).} \item{type}{A character string specifying the trace type -(e.g. \code{"scatter"}, \code{"bar"}, \code{"box"}, etc). -If specified, it \emph{always} creates a trace, otherwise} + (e.g. `"scatter"`, `"bar"`, `"box"`, etc). +If specified, it *always* creates a trace, otherwise} \item{name}{Values mapped to the trace's name attribute. Since a trace can -only have one name, this argument acts very much like \code{split} in that it +only have one name, this argument acts very much like `split` in that it creates one trace for every unique value.} \item{color}{Values mapped to relevant 'fill-color' attribute(s) -(e.g. \href{https://plot.ly/r/reference#scatter-fillcolor}{fillcolor}, -\href{https://plot.ly/r/reference#scatter-marker-color}{marker.color}, -\href{https://plot.ly/r/reference/#scatter-textfont-color}{textfont.color}, etc.). +(e.g. [fillcolor](https://plot.ly/r/reference#scatter-fillcolor), +[marker.color](https://plot.ly/r/reference#scatter-marker-color), +[textfont.color](https://plot.ly/r/reference/#scatter-textfont-color), etc.). The mapping from data values to color codes may be controlled using -\code{colors} and \code{alpha}, or avoided altogether via \code{\link[=I]{I()}} -(e.g., \code{color=I("red")}). -Any color understood by \code{\link[grDevices:col2rgb]{grDevices::col2rgb()}} may be used in this way.} +`colors` and `alpha`, or avoided altogether via [I()] + (e.g., `color=I("red")`). +Any color understood by [grDevices::col2rgb()] may be used in this way.} \item{colors}{Either a colorbrewer2.org palette name -(e.g. "YlOrRd" or "Blues"), + (e.g. "YlOrRd" or "Blues"), or a vector of colors to interpolate in hexadecimal "#RRGGBB" format, -or a color interpolation function like \code{colorRamp()}.} +or a color interpolation function like `colorRamp()`.} -\item{alpha}{A number between 0 and 1 specifying the alpha channel applied to \code{color}. -Defaults to 0.5 when mapping to \href{https://plot.ly/r/reference#scatter-fillcolor}{fillcolor} and 1 otherwise.} +\item{alpha}{A number between 0 and 1 specifying the alpha channel applied to `color`. +Defaults to 0.5 when mapping to [fillcolor](https://plot.ly/r/reference#scatter-fillcolor) and 1 otherwise.} -\item{stroke}{Similar to \code{color}, but values are mapped to relevant 'stroke-color' attribute(s) -(e.g., \href{https://plot.ly/r/reference#scatter-marker-line-color}{marker.line.color} -and \href{https://plot.ly/r/reference#scatter-line-color}{line.color} -for filled polygons). If not specified, \code{stroke} inherits from \code{color}.} +\item{stroke}{Similar to `color`, but values are mapped to relevant 'stroke-color' attribute(s) +(e.g., [marker.line.color](https://plot.ly/r/reference#scatter-marker-line-color) + and [line.color](https://plot.ly/r/reference#scatter-line-color) +for filled polygons). If not specified, `stroke` inherits from `color`.} -\item{strokes}{Similar to \code{colors}, but controls the \code{stroke} mapping.} +\item{strokes}{Similar to `colors`, but controls the `stroke` mapping.} -\item{alpha_stroke}{Similar to \code{alpha}, but applied to \code{stroke}.} +\item{alpha_stroke}{Similar to `alpha`, but applied to `stroke`.} \item{size}{(Numeric) values mapped to relevant 'fill-size' attribute(s) -(e.g., \href{https://plot.ly/r/reference#scatter-marker-size}{marker.size}, -\href{https://plot.ly/r/reference#scatter-textfont-size}{textfont.size}, -and \href{https://plot.ly/r/reference#scatter-error_x-width}{error_x.width}). +(e.g., [marker.size](https://plot.ly/r/reference#scatter-marker-size), +[textfont.size](https://plot.ly/r/reference#scatter-textfont-size), +and [error_x.width](https://plot.ly/r/reference#scatter-error_x-width)). The mapping from data values to symbols may be controlled using -\code{sizes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{size=I(30)}).} +`sizes`, or avoided altogether via [I()] (e.g., `size=I(30)`).} -\item{sizes}{A numeric vector of length 2 used to scale \code{size} to pixels.} +\item{sizes}{A numeric vector of length 2 used to scale `size` to pixels.} \item{span}{(Numeric) values mapped to relevant 'stroke-size' attribute(s) (e.g., -\href{https://plot.ly/r/reference#scatter-marker-line-width}{marker.line.width}, -\href{https://plot.ly/r/reference#scatter-line-width}{line.width} for filled polygons, -and \href{https://plot.ly/r/reference#scatter-error_x-thickness}{error_x.thickness}) +[marker.line.width](https://plot.ly/r/reference#scatter-marker-line-width), +[line.width](https://plot.ly/r/reference#scatter-line-width) for filled polygons, +and [error_x.thickness](https://plot.ly/r/reference#scatter-error_x-thickness)) The mapping from data values to symbols may be controlled using -\code{spans}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{span=I(30)}).} +`spans`, or avoided altogether via [I()] (e.g., `span=I(30)`).} -\item{spans}{A numeric vector of length 2 used to scale \code{span} to pixels.} +\item{spans}{A numeric vector of length 2 used to scale `span` to pixels.} -\item{symbol}{(Discrete) values mapped to \href{https://plot.ly/r/reference#scatter-marker-symbol}{marker.symbol}. +\item{symbol}{(Discrete) values mapped to [marker.symbol](https://plot.ly/r/reference#scatter-marker-symbol). The mapping from data values to symbols may be controlled using -\code{symbols}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{symbol=I("pentagon")}). -Any \link{pch} value or \href{https://plot.ly/r/reference#scatter-marker-symbol}{symbol name} may be used in this way.} +`symbols`, or avoided altogether via [I()] (e.g., `symbol=I("pentagon")`). +Any [pch] value or [symbol name](https://plot.ly/r/reference#scatter-marker-symbol) may be used in this way.} -\item{symbols}{A character vector of \link{pch} values or \href{https://plot.ly/r/reference#scatter-marker-symbol}{symbol names}.} +\item{symbols}{A character vector of [pch] values or [symbol names](https://plot.ly/r/reference#scatter-marker-symbol).} -\item{linetype}{(Discrete) values mapped to \href{https://plot.ly/r/reference#scatter-line-dash}{line.dash}. +\item{linetype}{(Discrete) values mapped to [line.dash](https://plot.ly/r/reference#scatter-line-dash). The mapping from data values to symbols may be controlled using -\code{linetypes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{linetype=I("dash")}). -Any \code{lty} (see \link{par}) value or \href{https://plot.ly/r/reference#scatter-line-dash}{dash name} may be used in this way.} +`linetypes`, or avoided altogether via [I()] (e.g., `linetype=I("dash")`). +Any `lty` (see [par]) value or [dash name](https://plot.ly/r/reference#scatter-line-dash) may be used in this way.} -\item{linetypes}{A character vector of \code{lty} values or \href{https://plot.ly/r/reference#scatter-line-dash}{dash names}} +\item{linetypes}{A character vector of `lty` values or [dash names](https://plot.ly/r/reference#scatter-line-dash)} \item{split}{(Discrete) values used to create multiple traces (one trace per value).} @@ -115,28 +115,28 @@ Any \code{lty} (see \link{par}) value or \href{https://plot.ly/r/reference#scatt \item{height}{Height in pixels (optional, defaults to automatic sizing).} \item{source}{a character string of length 1. Match the value of this string -with the source argument in \code{\link[=event_data]{event_data()}} to retrieve the +with the source argument in [event_data()] to retrieve the event data corresponding to a specific plot (shiny apps can have multiple plots).} } \value{ A plotly } \description{ -This function maps R objects to \href{https://plot.ly/javascript/}{plotly.js}, +This function maps R objects to [plotly.js](https://plot.ly/javascript/), an (MIT licensed) web-based interactive charting library. It provides abstractions for doing common things (e.g. mapping data values to -fill colors (via \code{color}) or creating \link{animation}s (via \code{frame})) and sets +fill colors (via `color`) or creating [animation]s (via `frame`)) and sets some different defaults to make the interface feel more 'R-like' -(i.e., closer to \code{\link[=plot]{plot()}} and \code{\link[ggplot2:qplot]{ggplot2::qplot()}}). +(i.e., closer to [plot()] and [ggplot2::qplot()]). } \details{ -Unless \code{type} is specified, this function just initiates a plotly +Unless `type` is specified, this function just initiates a plotly object with 'global' attributes that are passed onto downstream uses of -\code{\link[=add_trace]{add_trace()}} (or similar). A \link{formula} must always be used when -referencing column name(s) in \code{data} (e.g. \code{plot_ly(mtcars, x=~wt)}). +[add_trace()] (or similar). A [formula] must always be used when +referencing column name(s) in `data` (e.g. `plot_ly(mtcars, x=~wt)`). Formulas are optional when supplying values directly, but they do help inform default axis/scale titles -(e.g., \code{plot_ly(x=mtcars$wt)} vs \code{plot_ly(x=~mtcars$wt)}) +(e.g., `plot_ly(x=mtcars$wt)` vs `plot_ly(x=~mtcars$wt)`) } \examples{ \dontrun{ @@ -178,18 +178,18 @@ add_paths(p, linetype=~Species) } \references{ -\url{https://plotly-r.com/overview.html} + } \seealso{ \itemize{ -\item For initializing a plotly-geo object: \code{\link[=plot_geo]{plot_geo()}} -\item For initializing a plotly-mapbox object: \code{\link[=plot_mapbox]{plot_mapbox()}} -\item For translating a ggplot2 object to a plotly object: \code{\link[=ggplotly]{ggplotly()}} -\item For modifying any plotly object: \code{\link[=layout]{layout()}}, \code{\link[=add_trace]{add_trace()}}, \code{\link[=style]{style()}} -\item For linked brushing: \code{\link[=highlight]{highlight()}} -\item For arranging multiple plots: \code{\link[=subplot]{subplot()}}, \code{\link[crosstalk:bscols]{crosstalk::bscols()}} -\item For inspecting plotly objects: \code{\link[=plotly_json]{plotly_json()}} -\item For quick, accurate, and searchable plotly.js reference: \code{\link[=schema]{schema()}} + \item For initializing a plotly-geo object: [plot_geo()] + \item For initializing a plotly-mapbox object: [plot_mapbox()] + \item For translating a ggplot2 object to a plotly object: [ggplotly()] + \item For modifying any plotly object: [layout()], [add_trace()], [style()] + \item For linked brushing: [highlight()] + \item For arranging multiple plots: [subplot()], [crosstalk::bscols()] + \item For inspecting plotly objects: [plotly_json()] + \item For quick, accurate, and searchable plotly.js reference: [schema()] } } \author{ diff --git a/man/print.Rd b/man/print.Rd deleted file mode 100644 index bad962f..0000000 --- a/man/print.Rd +++ /dev/null @@ -1,70 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/print_method.R -\name{print} -\alias{print} -\alias{print.SingleCellExperiment} -\title{Printing tibbles} -\usage{ -\method{print}{SingleCellExperiment}(x, ..., n = NULL, width = NULL, n_extra = NULL) -} -\arguments{ -\item{x}{Object to format or print.} - -\item{...}{Other arguments passed on to individual methods.} - -\item{n}{Number of rows to show. If \code{NULL}, the default, will print all rows -if less than option \code{tibble.print_max}. Otherwise, will print -\code{tibble.print_min} rows.} - -\item{width}{Width of text output to generate. This defaults to \code{NULL}, which -means use \code{getOption("tibble.width")} or (if also \code{NULL}) -\code{getOption("width")}; the latter displays only the columns that fit on one -screen. You can also set \code{options(tibble.width = Inf)} to override this -default and always print all columns.} - -\item{n_extra}{Number of extra columns to print abbreviated information for, -if the width is too small for the entire tibble. If \code{NULL}, the default, -will print information about at most \code{tibble.max_extra_cols} extra columns.} -} -\value{ -Nothing -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} - -One of the main features of the \code{tbl_df} class is the printing: -\itemize{ -\item Tibbles only print as many rows and columns as fit on one screen, -supplemented by a summary of the remaining rows and columns. -\item Tibble reveals the type of each column, which keeps the user informed about -whether a variable is, e.g., \verb{} or \verb{} (character versus factor). -} - -Printing can be tweaked for a one-off call by calling \code{print()} explicitly -and setting arguments like \code{n} and \code{width}. More persistent control is -available by setting the options described below. - -Only the first 5 reduced dimensions are displayed, while all of them are queriable (e.g. ggplot). All dimensions are returned/displayed if as_tibble is used. -} -\section{Package options}{ - - -The following options are used by the tibble and pillar packages -to format and print \code{tbl_df} objects. -Used by the formatting workhorse \code{trunc_mat()} and therefore, -indirectly, by \code{print.tbl()}. -\itemize{ -\item \code{tibble.print_max}: Row number threshold: Maximum number of rows printed. -Set to \code{Inf} to always print all rows. Default: 20. -\item \code{tibble.print_min}: Number of rows printed if row number threshold is -exceeded. Default: 10. -\item \code{tibble.width}: Output width. Default: \code{NULL} (use \code{width} option). -\item \code{tibble.max_extra_cols}: Number of extra columns printed in reduced form. -Default: 100. -} -} - -\examples{ -library(dplyr) -pbmc_small \%>\% print() -} diff --git a/man/pull.Rd b/man/pull.Rd new file mode 100644 index 0000000..16b5f00 --- /dev/null +++ b/man/pull.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{pull.SingleCellExperiment} +\alias{pull.SingleCellExperiment} +\title{Extract a single column} +\usage{ +\method{pull}{SingleCellExperiment}(.data, var = -1, name = NULL, ...) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{var}{A variable specified as: +\itemize{ +\item a literal variable name +\item a positive integer, giving the position counting from the left +\item a negative integer, giving the position counting from the right. +} + +The default returns the last column (on the assumption that's the +column you've created most recently). + +This argument is taken by expression and supports +\link[rlang:topic-inject]{quasiquotation} (you can unquote column +names and column locations).} + +\item{name}{An optional parameter that specifies the column to be used +as names for a named vector. Specified in a similar manner as \code{var}.} + +\item{...}{For use by methods.} +} +\value{ +A vector the same size as \code{.data}. +} +\description{ +\code{pull()} is similar to \code{$}. It's mostly useful because it looks a little +nicer in pipes, it also works with remote data frames, and it can optionally +name the output. +} +\section{Methods}{ + + +This function is a \strong{generic}, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +The following methods are currently available in loaded packages: +\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}. + +} + +\examples{ +pbmc_small |> pull(groups) + +} diff --git a/man/rename.Rd b/man/rename.Rd new file mode 100644 index 0000000..dfa1632 --- /dev/null +++ b/man/rename.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{rename.SingleCellExperiment} +\alias{rename.SingleCellExperiment} +\title{Rename columns} +\usage{ +\method{rename}{SingleCellExperiment}(.data, ...) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use +\code{new_name = old_name} to rename selected variables. + +For \code{rename_with()}: additional arguments passed onto \code{.fn}.} +} +\value{ +An object of the same type as \code{.data}. The output has the following +properties: +\itemize{ +\item Rows are not affected. +\item Column names are changed; column order is preserved. +\item Data frame attributes are preserved. +\item Groups are updated to reflect new names. +} +} +\description{ +\code{rename()} changes the names of individual variables using +\code{new_name = old_name} syntax; \code{rename_with()} renames columns using a +function. +} +\section{Methods}{ + + +This function is a \strong{generic}, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +The following methods are currently available in loaded packages: +\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rename")}. + +} + +\examples{ +pbmc_small |> + rename(s_score=nFeature_RNA) + +} +\seealso{ +Other single table verbs: +\code{\link{arrange.SingleCellExperiment}()}, +\code{\link{mutate.SingleCellExperiment}()}, +\code{\link{slice.SingleCellExperiment}()}, +\code{\link{summarise.SingleCellExperiment}()} +} +\concept{single table verbs} diff --git a/man/right_join.Rd b/man/right_join.Rd new file mode 100644 index 0000000..fca8da4 --- /dev/null +++ b/man/right_join.Rd @@ -0,0 +1,172 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{right_join.SingleCellExperiment} +\alias{right_join.SingleCellExperiment} +\title{Mutating joins} +\usage{ +\method{right_join}{SingleCellExperiment}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) +} +\arguments{ +\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or +lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character +vector of variables to join by. + +If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all +variables in common across \code{x} and \code{y}. A message lists the variables so +that you can check they're correct; suppress the message by supplying \code{by} +explicitly. + +To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} +specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. + +To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with +multiple expressions. For example, \code{join_by(a == b, c == d)} will match +\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between +\code{x} and \code{y}, you can shorten this by listing only the variable names, like +\code{join_by(a, c)}. + +\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap +joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on +these types of joins. + +For simple equality joins, you can alternatively specify a character vector +of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} +to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, +use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. + +To perform a cross-join, generating all combinations of \code{x} and \code{y}, see +\code{\link[dplyr:cross_join]{cross_join()}}.} + +\item{copy}{If \code{x} and \code{y} are not from the same data source, +and \code{copy} is \code{TRUE}, then \code{y} will be copied into the +same src as \code{x}. This allows you to join tables across srcs, but +it is a potentially expensive operation so you must opt into it.} + +\item{suffix}{If there are non-joined duplicate variables in \code{x} and +\code{y}, these suffixes will be added to the output to disambiguate them. +Should be a character vector of length 2.} + +\item{...}{Other parameters passed onto methods.} +} +\value{ +An object of the same type as \code{x} (including the same groups). The order of +the rows and columns of \code{x} is preserved as much as possible. The output has +the following properties: +\itemize{ +\item The rows are affect by the join type. +\itemize{ +\item \code{inner_join()} returns matched \code{x} rows. +\item \code{left_join()} returns all \code{x} rows. +\item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. +\item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. +} +\item Output columns include all columns from \code{x} and all non-key columns from +\code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. +\item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added +to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have +the same name, \code{suffix}es are added to disambiguate these as well. +\item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their +common type between \code{x} and \code{y}. +} +} +\description{ +Mutating joins add columns from \code{y} to \code{x}, matching observations based on +the keys. There are four mutating joins: the inner join, and the three outer +joins. +\subsection{Inner join}{ + +An \code{inner_join()} only keeps observations from \code{x} that have a matching key +in \code{y}. + +The most important property of an inner join is that unmatched rows in either +input are not included in the result. This means that generally inner joins +are not appropriate in most analyses, because it is too easy to lose +observations. +} + +\subsection{Outer joins}{ + +The three outer joins keep observations that appear in at least one of the +data frames: +\itemize{ +\item A \code{left_join()} keeps all observations in \code{x}. +\item A \code{right_join()} keeps all observations in \code{y}. +\item A \code{full_join()} keeps all observations in \code{x} and \code{y}. +} +} +} +\section{Many-to-many relationships}{ + + + +By default, dplyr guards against many-to-many relationships in equality joins +by throwing a warning. These occur when both of the following are true: +\itemize{ +\item A row in \code{x} matches multiple rows in \code{y}. +\item A row in \code{y} matches multiple rows in \code{x}. +} + +This is typically surprising, as most joins involve a relationship of +one-to-one, one-to-many, or many-to-one, and is often the result of an +improperly specified join. Many-to-many relationships are particularly +problematic because they can result in a Cartesian explosion of the number of +rows returned from the join. + +If a many-to-many relationship is expected, silence this warning by +explicitly setting \code{relationship = "many-to-many"}. + +In production code, it is best to preemptively set \code{relationship} to whatever +relationship you expect to exist between the keys of \code{x} and \code{y}, as this +forces an error to occur immediately if the data doesn't align with your +expectations. + +Inequality joins typically result in many-to-many relationships by nature, so +they don't warn on them by default, but you should still take extra care when +specifying an inequality join, because they also have the capability to +return a large number of rows. + +Rolling joins don't warn on many-to-many relationships either, but many +rolling joins follow a many-to-one relationship, so it is often useful to +set \code{relationship = "many-to-one"} to enforce this. + +Note that in SQL, most database providers won't let you specify a +many-to-many relationship between two tables, instead requiring that you +create a third \emph{junction table} that results in two one-to-many relationships +instead. + +} + +\section{Methods}{ + + +These functions are \strong{generic}s, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +Methods available in currently loaded packages: +\itemize{ +\item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. +\item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. +\item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. +\item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. +} + +} + +\examples{ +tt <- pbmc_small +tt |> right_join(tt |> + distinct(groups) |> + mutate(new_column=1:2) |> + slice(1)) + +} +\seealso{ +Other joins: +\code{\link[dplyr]{cross_join}()}, +\code{\link[dplyr]{filter-joins}}, +\code{\link[dplyr]{nest_join}()} +} diff --git a/man/rowwise.Rd b/man/rowwise.Rd new file mode 100644 index 0000000..c8d47a9 --- /dev/null +++ b/man/rowwise.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{rowwise.SingleCellExperiment} +\alias{rowwise.SingleCellExperiment} +\title{Group input by rows} +\usage{ +\method{rowwise}{SingleCellExperiment}(data, ...) +} +\arguments{ +\item{data}{Input data frame.} + +\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Variables to be preserved +when calling \code{\link[dplyr:summarise]{summarise()}}. This is typically a set of variables whose +combination uniquely identify each row. + +\strong{NB}: unlike \code{group_by()} you can not create new variables here but +instead you can select multiple variables with (e.g.) \code{everything()}.} +} +\value{ +A row-wise data frame with class \code{rowwise_df}. Note that a +\code{rowwise_df} is implicitly grouped by row, but is not a \code{grouped_df}. +} +\description{ +\code{rowwise()} allows you to compute on a data frame a row-at-a-time. +This is most useful when a vectorised function doesn't exist. + +Most dplyr verbs preserve row-wise grouping. The exception is \code{\link[dplyr:summarise]{summarise()}}, +which return a \link[dplyr]{grouped_df}. You can explicitly ungroup with \code{\link[dplyr:ungroup]{ungroup()}} +or \code{\link[dplyr:as_tibble]{as_tibble()}}, or convert to a \link[dplyr]{grouped_df} with \code{\link[dplyr:group_by]{group_by()}}. +} +\section{List-columns}{ + + +Because a rowwise has exactly one row per group it offers a small +convenience for working with list-columns. Normally, \code{summarise()} and +\code{mutate()} extract a groups worth of data with \code{[}. But when you index +a list in this way, you get back another list. When you're working with +a \code{rowwise} tibble, then dplyr will use \code{[[} instead of \code{[} to make your +life a little easier. + +} + +\examples{ +# TODO + +} +\seealso{ +\code{\link[dplyr:nest_by]{nest_by()}} for a convenient way of creating rowwise data frames +with nested data. +} diff --git a/man/sample_n.Rd b/man/sample_n.Rd new file mode 100644 index 0000000..aaddfc0 --- /dev/null +++ b/man/sample_n.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{sample_n.SingleCellExperiment} +\alias{sample_n.SingleCellExperiment} +\alias{sample_frac} +\alias{sample_frac.SingleCellExperiment} +\title{Sample n rows from a table} +\usage{ +\method{sample_n}{SingleCellExperiment}(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) + +\method{sample_frac}{SingleCellExperiment}(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...) +} +\arguments{ +\item{tbl}{A data.frame.} + +\item{size}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> +For \code{sample_n()}, the number of rows to select. +For \code{sample_frac()}, the fraction of rows to select. +If \code{tbl} is grouped, \code{size} applies to each group.} + +\item{replace}{Sample with or without replacement?} + +\item{weight}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Sampling weights. +This must evaluate to a vector of non-negative numbers the same length as +the input. Weights are automatically standardised to sum to 1.} + +\item{.env}{DEPRECATED.} + +\item{...}{ignored} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} +\code{sample_n()} and \code{sample_frac()} have been superseded in favour of +\code{\link[dplyr:slice_sample]{slice_sample()}}. While they will not be deprecated in the near future, +retirement means that we will only perform critical bug fixes, so we recommend +moving to the newer alternative. + +These functions were superseded because we realised it was more convenient to +have two mutually exclusive arguments to one function, rather than two +separate functions. This also made it to clean up a few other smaller +design issues with \code{sample_n()}/\code{sample_frac}: +\itemize{ +\item The connection to \code{slice()} was not obvious. +\item The name of the first argument, \code{tbl}, is inconsistent with other +single table verbs which use \code{.data}. +\item The \code{size} argument uses tidy evaluation, which is surprising and +undocumented. +\item It was easier to remove the deprecated \code{.env} argument. +\item \code{...} was in a suboptimal position. +} +} +\examples{ +pbmc_small |> sample_n(50) +pbmc_small |> sample_frac(0.1) + +} diff --git a/man/select.Rd b/man/select.Rd new file mode 100644 index 0000000..ad22ab7 --- /dev/null +++ b/man/select.Rd @@ -0,0 +1,261 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{select.SingleCellExperiment} +\alias{select.SingleCellExperiment} +\title{Keep or drop columns using their names and types} +\usage{ +\method{select}{SingleCellExperiment}(.data, ...) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted +expressions separated by commas. Variable names can be used as if they +were positions in the data frame, so expressions like \code{x:y} can +be used to select a range of variables.} +} +\value{ +An object of the same type as \code{.data}. The output has the following +properties: +\itemize{ +\item Rows are not affected. +\item Output columns are a subset of input columns, potentially with a different +order. Columns will be renamed if \code{new_name = old_name} form is used. +\item Data frame attributes are preserved. +\item Groups are maintained; you can't select off grouping variables. +} +} +\description{ +Select (and optionally rename) variables in a data frame, using a concise +mini-language that makes it easy to refer to variables based on their name +(e.g. \code{a:f} selects all columns from \code{a} on the left to \code{f} on the +right) or type (e.g. \code{where(is.numeric)} selects all numeric columns). +\subsection{Overview of selection features}{ + +Tidyverse selections implement a dialect of R where operators make +it easy to select variables: +\itemize{ +\item \code{:} for selecting a range of consecutive variables. +\item \code{!} for taking the complement of a set of variables. +\item \code{&} and \code{|} for selecting the intersection or the union of two +sets of variables. +\item \code{c()} for combining selections. +} + +In addition, you can use \strong{selection helpers}. Some helpers select specific +columns: +\itemize{ +\item \code{\link[tidyselect:everything]{everything()}}: Matches all variables. +\item \code{\link[tidyselect:everything]{last_col()}}: Select last variable, possibly with an offset. +\item \code{\link[dplyr:group_cols]{group_cols()}}: Select all grouping columns. +} + +Other helpers select variables by matching patterns in their names: +\itemize{ +\item \code{\link[tidyselect:starts_with]{starts_with()}}: Starts with a prefix. +\item \code{\link[tidyselect:starts_with]{ends_with()}}: Ends with a suffix. +\item \code{\link[tidyselect:starts_with]{contains()}}: Contains a literal string. +\item \code{\link[tidyselect:starts_with]{matches()}}: Matches a regular expression. +\item \code{\link[tidyselect:starts_with]{num_range()}}: Matches a numerical range like x01, x02, x03. +} + +Or from variables stored in a character vector: +\itemize{ +\item \code{\link[tidyselect:all_of]{all_of()}}: Matches variable names in a character vector. All +names must be present, otherwise an out-of-bounds error is +thrown. +\item \code{\link[tidyselect:all_of]{any_of()}}: Same as \code{all_of()}, except that no error is thrown +for names that don't exist. +} + +Or using a predicate function: +\itemize{ +\item \code{\link[tidyselect:where]{where()}}: Applies a function to all variables and selects those +for which the function returns \code{TRUE}. +} +} +} +\section{Methods}{ + + +This function is a \strong{generic}, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +The following methods are currently available in loaded packages: +\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}. + +} + +\section{Examples}{ + + + +Here we show the usage for the basic selection operators. See the +specific help pages to learn about helpers like \code{\link[dplyr:starts_with]{starts_with()}}. + +The selection language can be used in functions like +\code{dplyr::select()} or \code{tidyr::pivot_longer()}. Let's first attach +the tidyverse: + +\if{html}{\out{
}}\preformatted{library(tidyverse) + +# For better printing +iris <- as_tibble(iris) +}\if{html}{\out{
}} + +Select variables by name: + +\if{html}{\out{
}}\preformatted{starwars \%>\% select(height) +#> # A tibble: 87 x 1 +#> height +#> +#> 1 172 +#> 2 167 +#> 3 96 +#> 4 202 +#> # i 83 more rows + +iris \%>\% pivot_longer(Sepal.Length) +#> # A tibble: 150 x 6 +#> Sepal.Width Petal.Length Petal.Width Species name value +#> +#> 1 3.5 1.4 0.2 setosa Sepal.Length 5.1 +#> 2 3 1.4 0.2 setosa Sepal.Length 4.9 +#> 3 3.2 1.3 0.2 setosa Sepal.Length 4.7 +#> 4 3.1 1.5 0.2 setosa Sepal.Length 4.6 +#> # i 146 more rows +}\if{html}{\out{
}} + +Select multiple variables by separating them with commas. Note how +the order of columns is determined by the order of inputs: + +\if{html}{\out{
}}\preformatted{starwars \%>\% select(homeworld, height, mass) +#> # A tibble: 87 x 3 +#> homeworld height mass +#> +#> 1 Tatooine 172 77 +#> 2 Tatooine 167 75 +#> 3 Naboo 96 32 +#> 4 Tatooine 202 136 +#> # i 83 more rows +}\if{html}{\out{
}} + +Functions like \code{tidyr::pivot_longer()} don't take variables with +dots. In this case use \code{c()} to select multiple variables: + +\if{html}{\out{
}}\preformatted{iris \%>\% pivot_longer(c(Sepal.Length, Petal.Length)) +#> # A tibble: 300 x 5 +#> Sepal.Width Petal.Width Species name value +#> +#> 1 3.5 0.2 setosa Sepal.Length 5.1 +#> 2 3.5 0.2 setosa Petal.Length 1.4 +#> 3 3 0.2 setosa Sepal.Length 4.9 +#> 4 3 0.2 setosa Petal.Length 1.4 +#> # i 296 more rows +}\if{html}{\out{
}} +\subsection{Operators:}{ + +The \code{:} operator selects a range of consecutive variables: + +\if{html}{\out{
}}\preformatted{starwars \%>\% select(name:mass) +#> # A tibble: 87 x 3 +#> name height mass +#> +#> 1 Luke Skywalker 172 77 +#> 2 C-3PO 167 75 +#> 3 R2-D2 96 32 +#> 4 Darth Vader 202 136 +#> # i 83 more rows +}\if{html}{\out{
}} + +The \code{!} operator negates a selection: + +\if{html}{\out{
}}\preformatted{starwars \%>\% select(!(name:mass)) +#> # A tibble: 87 x 11 +#> hair_color skin_color eye_color birth_year sex gender homeworld species +#> +#> 1 blond fair blue 19 male masculine Tatooine Human +#> 2 gold yellow 112 none masculine Tatooine Droid +#> 3 white, blue red 33 none masculine Naboo Droid +#> 4 none white yellow 41.9 male masculine Tatooine Human +#> # i 83 more rows +#> # i 3 more variables: films , vehicles , starships + +iris \%>\% select(!c(Sepal.Length, Petal.Length)) +#> # A tibble: 150 x 3 +#> Sepal.Width Petal.Width Species +#> +#> 1 3.5 0.2 setosa +#> 2 3 0.2 setosa +#> 3 3.2 0.2 setosa +#> 4 3.1 0.2 setosa +#> # i 146 more rows + +iris \%>\% select(!ends_with("Width")) +#> # A tibble: 150 x 3 +#> Sepal.Length Petal.Length Species +#> +#> 1 5.1 1.4 setosa +#> 2 4.9 1.4 setosa +#> 3 4.7 1.3 setosa +#> 4 4.6 1.5 setosa +#> # i 146 more rows +}\if{html}{\out{
}} + +\code{&} and \code{|} take the intersection or the union of two selections: + +\if{html}{\out{
}}\preformatted{iris \%>\% select(starts_with("Petal") & ends_with("Width")) +#> # A tibble: 150 x 1 +#> Petal.Width +#> +#> 1 0.2 +#> 2 0.2 +#> 3 0.2 +#> 4 0.2 +#> # i 146 more rows + +iris \%>\% select(starts_with("Petal") | ends_with("Width")) +#> # A tibble: 150 x 3 +#> Petal.Length Petal.Width Sepal.Width +#> +#> 1 1.4 0.2 3.5 +#> 2 1.4 0.2 3 +#> 3 1.3 0.2 3.2 +#> 4 1.5 0.2 3.1 +#> # i 146 more rows +}\if{html}{\out{
}} + +To take the difference between two selections, combine the \code{&} and +\code{!} operators: + +\if{html}{\out{
}}\preformatted{iris \%>\% select(starts_with("Petal") & !ends_with("Width")) +#> # A tibble: 150 x 1 +#> Petal.Length +#> +#> 1 1.4 +#> 2 1.4 +#> 3 1.3 +#> 4 1.5 +#> # i 146 more rows +}\if{html}{\out{
}} +} + +} + +\examples{ +pbmc_small |> select(cell, orig.ident) + +} +\seealso{ +Other single table verbs: +\code{\link[dplyr]{arrange}()}, +\code{\link[dplyr]{filter}()}, +\code{\link[dplyr]{mutate}()}, +\code{\link[dplyr]{reframe}()}, +\code{\link[dplyr]{rename}()}, +\code{\link[dplyr]{slice}()}, +\code{\link[dplyr]{summarise}()} +} diff --git a/man/separate-methods.Rd b/man/separate-methods.Rd deleted file mode 100644 index 24fd354..0000000 --- a/man/separate-methods.Rd +++ /dev/null @@ -1,52 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tidyr_methods.R -\name{separate} -\alias{separate} -\title{Separate a character column into multiple columns with a regular -expression or numeric locations} -\arguments{ -\item{sep}{Separator between columns. - -If character, \code{sep} is interpreted as a regular expression. The default -value is a regular expression that matches any sequence of -non-alphanumeric values. - -If numeric, \code{sep} is interpreted as character positions to split at. Positive -values start at 1 at the far-left of the string; negative value start at -1 at -the far-right of the string. The length of \code{sep} should be one less than -\code{into}.} - -\item{extra}{If \code{sep} is a character vector, this controls what -happens when there are too many pieces. There are three valid options: -\itemize{ -\item "warn" (the default): emit a warning and drop extra values. -\item "drop": drop any extra values without a warning. -\item "merge": only splits at most \code{length(into)} times -}} - -\item{fill}{If \code{sep} is a character vector, this controls what -happens when there are not enough pieces. There are three valid options: -\itemize{ -\item "warn" (the default): emit a warning and fill from the right -\item "right": fill with missing values on the right -\item "left": fill with missing values on the left -}} -} -\value{ -A tidySingleCellExperiment objector a tibble depending on input -} -\description{ -Given either a regular expression or a vector of character positions, -\code{separate()} turns a single character column into multiple columns. -} -\examples{ - -un <- pbmc_small \%>\% - - unite("new_col", c(orig.ident, groups)) -un \%>\% separate(col=new_col, into=c("orig.ident", "groups")) -} -\seealso{ -\code{\link[=unite]{unite()}}, the complement, \code{\link[=extract]{extract()}} which uses regular -expression capturing groups. -} diff --git a/man/separate.Rd b/man/separate.Rd new file mode 100644 index 0000000..ed25fdf --- /dev/null +++ b/man/separate.Rd @@ -0,0 +1,85 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyr_methods.R +\name{separate.SingleCellExperiment} +\alias{separate.SingleCellExperiment} +\title{Separate a character column into multiple columns with a regular +expression or numeric locations} +\usage{ +\method{separate}{SingleCellExperiment}( + data, + col, + into, + sep = "[^[:alnum:]]+", + remove = TRUE, + convert = FALSE, + extra = "warn", + fill = "warn", + ... +) +} +\arguments{ +\item{data}{A data frame.} + +\item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.} + +\item{into}{Names of new variables to create as character vector. +Use \code{NA} to omit the variable in the output.} + +\item{sep}{Separator between columns. + +If character, \code{sep} is interpreted as a regular expression. The default +value is a regular expression that matches any sequence of +non-alphanumeric values. + +If numeric, \code{sep} is interpreted as character positions to split at. Positive +values start at 1 at the far-left of the string; negative value start at -1 at +the far-right of the string. The length of \code{sep} should be one less than +\code{into}.} + +\item{remove}{If \code{TRUE}, remove input column from output data frame.} + +\item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with +\code{as.is = TRUE} on new columns. This is useful if the component +columns are integer, numeric or logical. + +NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.} + +\item{extra}{If \code{sep} is a character vector, this controls what +happens when there are too many pieces. There are three valid options: +\itemize{ +\item \code{"warn"} (the default): emit a warning and drop extra values. +\item \code{"drop"}: drop any extra values without a warning. +\item \code{"merge"}: only splits at most \code{length(into)} times +}} + +\item{fill}{If \code{sep} is a character vector, this controls what +happens when there are not enough pieces. There are three valid options: +\itemize{ +\item \code{"warn"} (the default): emit a warning and fill from the right +\item \code{"right"}: fill with missing values on the right +\item \code{"left"}: fill with missing values on the left +}} + +\item{...}{Additional arguments passed on to methods.} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} + +\code{separate()} has been superseded in favour of \code{\link[tidyr:separate_wider_position]{separate_wider_position()}} +and \code{\link[tidyr:separate_wider_delim]{separate_wider_delim()}} because the two functions make the two uses +more obvious, the API is more polished, and the handling of problems is +better. Superseded functions will not go away, but will only receive +critical bug fixes. + +Given either a regular expression or a vector of character positions, +\code{separate()} turns a single character column into multiple columns. +} +\examples{ +un <- pbmc_small |> unite("new_col", c(orig.ident, groups)) +un |> separate(new_col, c("orig.ident", "groups")) + +} +\seealso{ +\code{\link[tidyr:unite]{unite()}}, the complement, \code{\link[tidyr:extract]{extract()}} which uses regular +expression capturing groups. +} diff --git a/man/slice.Rd b/man/slice.Rd new file mode 100644 index 0000000..31fd310 --- /dev/null +++ b/man/slice.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{slice.SingleCellExperiment} +\alias{slice.SingleCellExperiment} +\alias{slice_head} +\alias{slice_tail} +\alias{slice_sample} +\alias{slice_min} +\alias{slice_max} +\title{Subset rows using their positions} +\usage{ +\method{slice}{SingleCellExperiment}(.data, ..., .by = NULL, .preserve = FALSE) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{...}{For \code{slice()}: <\code{\link[rlang:args_data_masking]{data-masking}}> +Integer row values. + +Provide either positive values to keep, or negative values to drop. +The values provided must be either all positive or all negative. +Indices beyond the number of rows in the input are silently ignored. + +For \verb{slice_*()}, these arguments are passed on to methods.} + +\item{.by}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to +group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For +details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} + +\item{.preserve}{Relevant when the \code{.data} input is grouped. +If \code{.preserve = FALSE} (the default), the grouping structure +is recalculated based on the resulting data, otherwise the grouping is kept as is.} +} +\value{ +An object of the same type as \code{.data}. The output has the following +properties: +\itemize{ +\item Each row may appear 0, 1, or many times in the output. +\item Columns are not modified. +\item Groups are not modified. +\item Data frame attributes are preserved. +} +} +\description{ +\code{slice()} lets you index rows by their (integer) locations. It allows you +to select, remove, and duplicate rows. It is accompanied by a number of +helpers for common use cases: +\itemize{ +\item \code{slice_head()} and \code{slice_tail()} select the first or last rows. +\item \code{slice_sample()} randomly selects rows. +\item \code{slice_min()} and \code{slice_max()} select rows with highest or lowest values +of a variable. +} + +If \code{.data} is a \link[dplyr]{grouped_df}, the operation will be performed on each group, +so that (e.g.) \code{slice_head(df, n = 5)} will select the first five rows in +each group. +} +\details{ +Slice does not work with relational databases because they have no +intrinsic notion of row order. If you want to perform the equivalent +operation, use \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:row_number]{row_number()}}. +} +\section{Methods}{ + + +These function are \strong{generic}s, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +Methods available in currently loaded packages: +\itemize{ +\item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. +\item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. +\item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. +\item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. +\item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. +\item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. +} + +} + +\examples{ +pbmc_small |> slice(1) + +} +\seealso{ +Other single table verbs: +\code{\link{arrange.SingleCellExperiment}()}, +\code{\link{mutate.SingleCellExperiment}()}, +\code{\link{rename.SingleCellExperiment}()}, +\code{\link{summarise.SingleCellExperiment}()} +} +\concept{single table verbs} diff --git a/man/summarise.Rd b/man/summarise.Rd new file mode 100644 index 0000000..7d24d05 --- /dev/null +++ b/man/summarise.Rd @@ -0,0 +1,106 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{summarise.SingleCellExperiment} +\alias{summarise.SingleCellExperiment} +\alias{summarize} +\alias{summarize.SingleCellExperiment} +\title{Summarise each group down to one row} +\usage{ +\method{summarise}{SingleCellExperiment}(.data, ...) + +\method{summarize}{SingleCellExperiment}(.data, ...) +} +\arguments{ +\item{.data}{A data frame, data frame extension (e.g. a tibble), or a +lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of +summary functions. The name will be the name of the variable in the result. + +The value can be: +\itemize{ +\item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. +\item A data frame, to add multiple columns from a single expression. +} + +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Returning values with size 0 or >1 was +deprecated as of 1.1.0. Please use \code{\link[dplyr:reframe]{reframe()}} for this instead.} +} +\value{ +An object \emph{usually} of the same type as \code{.data}. +\itemize{ +\item The rows come from the underlying \code{\link[dplyr:group_keys]{group_keys()}}. +\item The columns are a combination of the grouping keys and the summary +expressions that you provide. +\item The grouping structure is controlled by the \verb{.groups=} argument, the +output may be another \link[dplyr]{grouped_df}, a \link[dplyr]{tibble} or a \link[dplyr]{rowwise} data frame. +\item Data frame attributes are \strong{not} preserved, because \code{summarise()} +fundamentally creates a new data frame. +} +} +\description{ +\code{summarise()} creates a new data frame. It returns one row for each +combination of grouping variables; if there are no grouping variables, the +output will have a single row summarising all observations in the input. It +will contain one column for each grouping variable and one column for each of +the summary statistics that you have specified. + +\code{summarise()} and \code{summarize()} are synonyms. +} +\section{Useful functions}{ + + +\itemize{ +\item Center: \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}} +\item Spread: \code{\link[=sd]{sd()}}, \code{\link[=IQR]{IQR()}}, \code{\link[=mad]{mad()}} +\item Range: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, +\item Position: \code{\link[dplyr:first]{first()}}, \code{\link[dplyr:last]{last()}}, \code{\link[dplyr:nth]{nth()}}, +\item Count: \code{\link[dplyr:n]{n()}}, \code{\link[dplyr:n_distinct]{n_distinct()}} +\item Logical: \code{\link[=any]{any()}}, \code{\link[=all]{all()}} +} + +} + +\section{Backend variations}{ + + + +The data frame backend supports creating a variable and using it in the +same summary. This means that previously created summary variables can be +further transformed or combined within the summary, as in \code{\link[dplyr:mutate]{mutate()}}. +However, it also means that summary variables with the same names as previous +variables overwrite them, making those variables unavailable to later summary +variables. + +This behaviour may not be supported in other backends. To avoid unexpected +results, consider using new names for your summary variables, especially when +creating multiple summaries. + +} + +\section{Methods}{ + + +This function is a \strong{generic}, which means that packages can provide +implementations (methods) for other classes. See the documentation of +individual methods for extra arguments and differences in behaviour. + +The following methods are currently available in loaded packages: +\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}. + +} + +\examples{ +pbmc_small |> + summarise(mean(nCount_RNA)) + +} +\seealso{ +Other single table verbs: +\code{\link{arrange.SingleCellExperiment}()}, +\code{\link{mutate.SingleCellExperiment}()}, +\code{\link{rename.SingleCellExperiment}()}, +\code{\link{slice.SingleCellExperiment}()} +} +\concept{single table verbs} diff --git a/man/tbl_format_header-methods.Rd b/man/tbl_format_header.Rd similarity index 56% rename from man/tbl_format_header-methods.Rd rename to man/tbl_format_header.Rd index a1f630c..a047e74 100644 --- a/man/tbl_format_header-methods.Rd +++ b/man/tbl_format_header.Rd @@ -1,8 +1,21 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/print_method.R -\name{tbl_format_header} -\alias{tbl_format_header} +\name{tbl_format_header.tidySingleCellExperiment} +\alias{tbl_format_header.tidySingleCellExperiment} \title{Format the header of a tibble} +\usage{ +\method{tbl_format_header}{tidySingleCellExperiment}(x, setup, ...) +} +\arguments{ +\item{x}{A tibble-like object.} + +\item{setup}{A setup object returned from \code{\link[pillar:tbl_format_setup]{tbl_format_setup()}}.} + +\item{...}{These dots are for future extensions and must be empty.} +} +\value{ +A character vector. +} \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} @@ -14,6 +27,10 @@ of a tibble. Override this method if you need to change the appearance of the entire header. If you only need to change or extend the components shown in the header, -override or extend \code{\link[=tbl_sum]{tbl_sum()}} for your class which is called by the +override or extend \code{\link[pillar:tbl_sum]{tbl_sum()}} for your class which is called by the default method. } +\examples{ +# TODO + +} diff --git a/man/tibble-methods.Rd b/man/tibble-methods.Rd deleted file mode 100644 index f452b5c..0000000 --- a/man/tibble-methods.Rd +++ /dev/null @@ -1,102 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tibble_methods.R -\name{as_tibble} -\alias{as_tibble} -\alias{glimpse} -\title{Coerce lists, matrices, and more to data frames} -\arguments{ -\item{rownames}{How to treat existing row names of a data frame or matrix: -\itemize{ -\item \code{NULL}: remove row names. This is the default. -\item \code{NA}: keep row names. -\item A string: the name of a new column. Existing rownames are transferred -into this column and the \code{row.names} attribute is deleted. -Read more in \link{rownames}. -}} - -\item{.name_repair}{see tidyr - -For compatibility only, do not use for new code.} - -\item{x}{An object to glimpse at.} - -\item{width}{Width of output: defaults to the setting of the option -\code{tibble.width} (if finite) or the width of the console.} - -\item{...}{Unused, for extensibility.} -} -\value{ -A tibble - -x original x is (invisibly) returned, allowing \code{glimpse()} to be -used within a data pipe line. -} -\description{ -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} - -\code{as_tibble()} turns an existing object, such as a data frame or -matrix, into a so-called tibble, a data frame with class \code{\link{tbl_df}}. This is -in contrast with \code{\link[=tibble]{tibble()}}, which builds a tibble from individual columns. -\code{as_tibble()} is to \code{\link[=tibble]{tibble()}} as \code{\link[base:as.data.frame]{base::as.data.frame()}} is to -\code{\link[base:data.frame]{base::data.frame()}}. - -\code{as_tibble()} is an S3 generic, with methods for: -\itemize{ -\item \code{\link[base:data.frame]{data.frame}}: Thin wrapper around the \code{list} method -that implements tibble's treatment of \link{rownames}. -\item \code{\link[methods:StructureClasses]{matrix}}, \code{\link[stats:poly]{poly}}, -\code{\link[stats:ts]{ts}}, \code{\link[base:table]{table}} -\item Default: Other inputs are first coerced with \code{\link[base:as.data.frame]{base::as.data.frame()}}. -} - -\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#maturing}{\figure{lifecycle-maturing.svg}{options: alt='[Maturing]'}}}{\strong{[Maturing]}} - -\code{glimpse()} is like a transposed version of \code{print()}: -columns run down the page, and data runs across. -This makes it possible to see every column in a data frame. -It's a little like \code{\link[=str]{str()}} applied to a data frame -but it tries to show you as much data as possible. -(And it always shows the underlying data, even when applied -to a remote data source.) - -This generic will be moved to \pkg{pillar}, and reexported from there -as soon as it becomes available. -} -\section{Row names}{ - -The default behavior is to silently remove row names. - -New code should explicitly convert row names to a new column using the -\code{rownames} argument. - -For existing code that relies on the retention of row names, call -\code{pkgconfig::set_config("tibble::rownames"=NA)} in your script or in your -package's \code{\link[=.onLoad]{.onLoad()}} function. -} - -\section{Life cycle}{ - -Using \code{as_tibble()} for vectors is superseded as of version 3.0.0, -prefer the more expressive maturing \code{as_tibble_row()} and -\code{as_tibble_col()} variants for new code. -} - -\section{S3 methods}{ - -\code{glimpse} is an S3 generic with a customised method for \code{tbl}s and -\code{data.frames}, and a default method that calls \code{\link[=str]{str()}}. -} - -\examples{ -pbmc_small \%>\% - - as_tibble() -pbmc_small \%>\% tidy \%>\% glimpse() - - -} -\seealso{ -\code{\link[=tibble]{tibble()}} constructs a tibble from individual columns. \code{\link[=enframe]{enframe()}} -converts a named vector to a tibble with a column of names and column of -values. Name repair is implemented using \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}. -} diff --git a/man/tidy.Rd b/man/tidy.Rd index 983a748..a84756a 100644 --- a/man/tidy.Rd +++ b/man/tidy.Rd @@ -2,20 +2,23 @@ % Please edit documentation in R/methods.R \name{tidy} \alias{tidy} -\title{tidy for SingleCellExperiment} +\alias{tidy.SingleCellExperiment} +\title{tidy for `SingleCellExperiment`} \usage{ tidy(object) + +\method{tidy}{SingleCellExperiment}(object) } \arguments{ -\item{object}{A SingleCellExperiment object} +\item{object}{A `SingleCellExperiment` object.} } \value{ -A tidySingleCellExperiment object +A `tidySingleCellExperiment` object. } \description{ -tidy for SingleCellExperiment +tidy for `SingleCellExperiment` } \examples{ - tidySingleCellExperiment::pbmc_small + } diff --git a/man/unite-methods.Rd b/man/unite.Rd similarity index 66% rename from man/unite-methods.Rd rename to man/unite.Rd index 0309420..fd1b8a8 100644 --- a/man/unite-methods.Rd +++ b/man/unite.Rd @@ -1,8 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidyr_methods.R -\name{unite} -\alias{unite} +\name{unite.SingleCellExperiment} +\alias{unite.SingleCellExperiment} \title{Unite multiple columns into one by pasting strings together} +\usage{ +\method{unite}{SingleCellExperiment}(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE) +} \arguments{ \item{data}{A data frame.} @@ -15,27 +18,24 @@ and symbols). The name is captured from the expression with symbols do not represent actual objects is now discouraged in the tidyverse; we support it here for backward compatibility).} -\item{...}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> Columns to unite} +\item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to unite} \item{sep}{Separator to use between values.} -\item{na.rm}{If \code{TRUE}, missing values will be remove prior to uniting -each value.} - \item{remove}{If \code{TRUE}, remove input columns from output data frame.} -} -\value{ -A tidySingleCellExperiment objector a tibble depending on input + +\item{na.rm}{If \code{TRUE}, missing values will be removed prior to uniting +each value.} } \description{ Convenience function to paste together multiple columns into one. } \examples{ - -pbmc_small \%>\% - - unite("new_col", c(orig.ident, groups)) +pbmc_small |> unite( + col="new_col", + c(orig.ident, groups)) + } \seealso{ -\code{\link[=separate]{separate()}}, the complement. +\code{\link[tidyr:separate]{separate()}}, the complement. } diff --git a/man/unnest-methods.Rd b/man/unnest-methods.Rd deleted file mode 100644 index 7787fa8..0000000 --- a/man/unnest-methods.Rd +++ /dev/null @@ -1,108 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tidyr_methods.R -\name{unnest} -\alias{unnest} -\alias{unnest.tidySingleCellExperiment_nested} -\alias{unnest_single_cell_experiment} -\title{unnest} -\usage{ -\method{unnest}{tidySingleCellExperiment_nested}( - data, - cols, - ..., - keep_empty = FALSE, - ptype = NULL, - names_sep = NULL, - names_repair = "check_unique", - .drop, - .id, - .sep, - .preserve -) - -unnest_single_cell_experiment( - data, - cols, - ..., - keep_empty = FALSE, - ptype = NULL, - names_sep = NULL, - names_repair = "check_unique", - .drop, - .id, - .sep, - .preserve -) -} -\arguments{ -\item{data}{A tbl. (See tidyr)} - -\item{cols}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> Columns to unnest. -If you \code{unnest()} multiple columns, parallel entries must be of -compatible sizes, i.e. they're either equal or length 1 (following the -standard tidyverse recycling rules).} - -\item{...}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> Columns to nest, specified -using name-variable pairs of the form \code{new_col=c(col1, col2, col3)}. -The right hand side can be any valid tidy select expression. - -\Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}: -previously you could write \code{df \%>\% nest(x, y, z)} and \code{df \%>\% unnest(x, y, z)}. Convert to \code{df \%>\% nest(data=c(x, y, z))}. -and \code{df \%>\% unnest(c(x, y, z))}. - -If you previously created new variable in \code{unnest()} you'll now need to -do it explicitly with \code{mutate()}. Convert \code{df \%>\% unnest(y=fun(x, y, z))} -to \code{df \%>\% mutate(y=fun(x, y, z)) \%>\% unnest(y)}.} - -\item{keep_empty}{See tidyr::unnest} - -\item{ptype}{See tidyr::unnest} - -\item{names_sep}{If \code{NULL}, the default, the names will be left -as is. In \code{nest()}, inner names will come from the former outer names; -in \code{unnest()}, the new outer names will come from the inner names. - -If a string, the inner and outer names will be used together. In \code{nest()}, -the names of the new outer columns will be formed by pasting together the -outer and the inner column names, separated by \code{names_sep}. In \code{unnest()}, -the new inner names will have the outer names (+ \code{names_sep}) automatically -stripped. This makes \code{names_sep} roughly symmetric between nesting and unnesting.} - -\item{names_repair}{See tidyr::unnest} - -\item{.drop}{See tidyr::unnest} - -\item{.id}{tidyr::unnest} - -\item{.sep}{tidyr::unnest} - -\item{.preserve}{See tidyr::unnest} - -\item{sep}{tidyr::unnest} -} -\value{ -A tidySingleCellExperiment objector a tibble depending on input - -A tidySingleCellExperiment objector a tibble depending on input -} -\description{ -unnest - -unnest_single_cell_experiment -} -\examples{ - -library(dplyr) -pbmc_small \%>\% - - nest(data=-groups) \%>\% - unnest(data) - - -library(dplyr) -pbmc_small \%>\% - - nest(data=-groups) \%>\% - unnest_single_cell_experiment(data) - -} diff --git a/man/unnest.Rd b/man/unnest.Rd new file mode 100644 index 0000000..5c04278 --- /dev/null +++ b/man/unnest.Rd @@ -0,0 +1,123 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyr_methods.R +\name{unnest.tidySingleCellExperiment_nested} +\alias{unnest.tidySingleCellExperiment_nested} +\alias{unnest_single_cell_experiment} +\title{Unnest a list-column of data frames into rows and columns} +\usage{ +\method{unnest}{tidySingleCellExperiment_nested}( + data, + cols, + ..., + keep_empty = FALSE, + ptype = NULL, + names_sep = NULL, + names_repair = "check_unique", + .drop, + .id, + .sep, + .preserve +) + +unnest_single_cell_experiment( + data, + cols, + ..., + keep_empty = FALSE, + ptype = NULL, + names_sep = NULL, + names_repair = "check_unique", + .drop, + .id, + .sep, + .preserve +) +} +\arguments{ +\item{data}{A data frame.} + +\item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> List-columns to unnest. + +When selecting multiple columns, values from the same row will be recycled +to their common size.} + +\item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: +previously you could write \code{df \%>\% unnest(x, y, z)}. +Convert to \code{df \%>\% unnest(c(x, y, z))}. If you previously created a new +variable in \code{unnest()} you'll now need to do it explicitly with \code{mutate()}. +Convert \code{df \%>\% unnest(y = fun(x, y, z))} +to \code{df \%>\% mutate(y = fun(x, y, z)) \%>\% unnest(y)}.} + +\item{keep_empty}{By default, you get one row of output for each element +of the list that you are unchopping/unnesting. This means that if there's a +size-0 element (like \code{NULL} or an empty data frame or vector), then that +entire row will be dropped from the output. If you want to preserve all +rows, use \code{keep_empty = TRUE} to replace size-0 elements with a single row +of missing values.} + +\item{ptype}{Optionally, a named list of column name-prototype pairs to +coerce \code{cols} to, overriding the default that will be guessed from +combining the individual values. Alternatively, a single empty ptype +can be supplied, which will be applied to all \code{cols}.} + +\item{names_sep}{If \code{NULL}, the default, the outer names will come from the +inner names. If a string, the outer names will be formed by pasting +together the outer and the inner column names, separated by \code{names_sep}.} + +\item{names_repair}{Used to check that output data frame has valid +names. Must be one of the following options: +\itemize{ +\item \verb{"minimal}": no name repair or checks, beyond basic existence, +\item \verb{"unique}": make sure names are unique and not empty, +\item \verb{"check_unique}": (the default), no name repair, but check they are unique, +\item \verb{"universal}": make the names unique and syntactic +\item a function: apply custom name repair. +\item \link[tidyr]{tidyr_legacy}: use the name repair from tidyr 0.8. +\item a formula: a purrr-style anonymous function (see \code{\link[rlang:as_function]{rlang::as_function()}}) +} + +See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more details on these terms and the +strategies used to enforce them.} + +\item{.drop, .preserve}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: +all list-columns are now preserved; If there are any that you +don't want in the output use \code{select()} to remove them prior to +unnesting.} + +\item{.id}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: +convert \code{df \%>\% unnest(x, .id = "id")} to \verb{df \%>\% mutate(id = names(x)) \%>\% unnest(x))}.} + +\item{.sep}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: +use \code{names_sep} instead.} +} +\description{ +Unnest expands a list-column containing data frames into rows and columns. +} +\section{New syntax}{ + + +tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's +designed to be more similar to other functions. Converting to the new syntax +should be straightforward (guided by the message you'll receive) but if +you just need to run an old analysis, you can easily revert to the previous +behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows: + +\if{html}{\out{
}}\preformatted{library(tidyr) +nest <- nest_legacy +unnest <- unnest_legacy +}\if{html}{\out{
}} + +} + +\examples{ +pbmc_small |> + nest(data=-groups) |> + unnest(data) + +} +\seealso{ +Other rectangling: +\code{\link[tidyr]{hoist}()}, +\code{\link[tidyr]{unnest_longer}()}, +\code{\link[tidyr]{unnest_wider}()} +} diff --git a/tests/testthat/test-dplyr_methods.R b/tests/testthat/test-dplyr_methods.R index 8d5ed4d..6faee7c 100755 --- a/tests/testthat/test-dplyr_methods.R +++ b/tests/testthat/test-dplyr_methods.R @@ -3,18 +3,19 @@ context("dplyr test") library(magrittr) test_that("arrange", { + + expect_warning( tt_pca_aranged <- pbmc_small %>% arrange(groups) %>% scater::logNormCounts() %>% - scater::runPCA() + scater::runPCA()) + expect_warning( tt_pca <- pbmc_small %>% scater::logNormCounts() %>% - scater::runPCA() - - + scater::runPCA()) expect_equal( reducedDims(tt_pca_aranged)$PCA[sort(colnames(tt_pca_aranged)), 1:3] %>% abs() %>% head(), @@ -24,13 +25,15 @@ test_that("arrange", { }) test_that("bind_rows", { - tt_bind <- bind_rows(pbmc_small, pbmc_small) + expect_warning( + tt_bind <- pbmc_small %>% + bind_rows(pbmc_small)) tt_bind %>% - select(cell) %>% + select(.cell) %>% tidySingleCellExperiment:::to_tib() %>% - dplyr::count(cell) %>% - dplyr::count(n) %>% + dplyr::count(.cell) %>% + dplyr::count(n, name="m") %>% nrow() %>% expect_equal(1) }) @@ -135,7 +138,7 @@ test_that("slice", { test_that("select", { pbmc_small %>% - select(cell, orig.ident) %>% + select(.cell, orig.ident) %>% class() %>% as.character() %>% expect_equal("SingleCellExperiment") diff --git a/tests/testthat/test-methods.R b/tests/testthat/test-methods.R index 65848e4..20d142b 100644 --- a/tests/testthat/test-methods.R +++ b/tests/testthat/test-methods.R @@ -8,7 +8,7 @@ test_that("join_features",{ pbmc_small %>% join_features("CD3D") %>% slice(1) %>% - tidySingleCellExperiment::pull(.abundance_counts) %>% + pull(.abundance_counts) %>% expect_equal(4, tolerance=0.1) diff --git a/tests/testthat/test-tidyr_methods.R b/tests/testthat/test-tidyr_methods.R index 9d8c9a3..2006655 100755 --- a/tests/testthat/test-tidyr_methods.R +++ b/tests/testthat/test-tidyr_methods.R @@ -5,15 +5,17 @@ tt <- pbmc_small %>% mutate(col2 = "other_col") test_that("nest_unnest", { col_names <- tt %>% colData %>% colnames() %>% c("cell") + expect_warning( x <- tt %>% nest(data = -groups) %>% unnest(data) %>% scater::logNormCounts() %>% - scater::runPCA() + scater::runPCA()) + + expect_warning( y <- tt %>% scater::logNormCounts() %>% - scater::runPCA() - + scater::runPCA()) expect_equal( reducedDims(x)$PCA %>% diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 1ce2042..b87f7fd 100755 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -1,15 +1,15 @@ --- title: "Overview of the tidySingleCellExperiment package" +package: "`r BiocStyle::pkg_ver('tidySingleCellExperiment')`" author: "Stefano Mangiola" -date: "`r Sys.Date()`" -package: tidySingleCellExperiment output: BiocStyle::html_document: toc_float: true bibliography: tidySingleCellExperiment.bib vignette: > - %\VignetteEngine{knitr::knitr} %\VignetteIndexEntry{Overview of the tidySingleCellExperiment package} + %\VignettePackage{tidySingleCellExperiment} + %\VignetteEngine{knitr::rmarkdown} %\usepackage[UTF-8]{inputenc} --- @@ -17,37 +17,29 @@ vignette: > library(knitr) knitr::opts_chunk$set( cache=TRUE, warning=FALSE, - message=FALSE, cache.lazy=FALSE -) + message=FALSE, cache.lazy=FALSE) ``` +# Introduction {-} -# Introduction +`tidySingleCellExperiment` provides a bridge between Bioconductor single-cell packages [@amezquita2019orchestrating] and the *tidyverse* [@wickham2019welcome]. It creates an invisible layer that enables viewing the Bioconductor `r BiocStyle::Biocpkg("SingleCellExperiment")` object as a *tidyverse* `tibble`, and provides `SingleCellExperiment`-compatible `r BiocStyle::CRANpkg("dplyr")`, `r BiocStyle::CRANpkg("tidyr")`, `r BiocStyle::CRANpkg("ggplot2")` and `r BiocStyle::CRANpkg("plotly")` functions (see Table \@ref(tab:table)). This allows users to get the best of both Bioconductor and *tidyverse* worlds. -tidySingleCellExperiment provides a bridge between Bioconductor single-cell packages [@amezquita2019orchestrating] and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the -Bioconductor *SingleCellExperiment* object as a tidyverse tibble, and provides SingleCellExperiment-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions. This allows users to get the best of both Bioconductor and tidyverse worlds. + | +------ | ---------- +All functions compatible with `SingleCellExperiment`s | After all, a `tidySingleCellExperiment`
is a `SingleCellExperiment`, just better! +__*tidyverse*__ | +`dplyr` | All `tibble`-compatible
functions (e.g., `select()`) +`tidyr` | All `tibble`-compatible
functions (e.g., `pivot_longer()`) +`ggplot2` | Plotting with `ggplot()` +`plotly` | Plotting with `plot_ly()` +**Utilities** | +`tidy()` | Add an invisible `tidySingleCellExperiment`
layer over a `SingleCellExperiment` object +`as_tibble()` | Convert cell-wise information to a `tbl_df` +`join_features()` | Add feature-wise information;
returns a `tbl_df` +`aggregate_cells()` | Aggregate feature abundances as pseudobulks;
returns a `SummarizedExperiment` +: (\#tab:table) Available `tidySingleCellExperiment` functions and utilities. -## Functions/utilities available - -SingleCellExperiment-compatible Functions | Description ------------- | ------------- -`all` | After all `tidySingleCellExperiment` is a SingleCellExperiment object, just better - -tidyverse Packages | Description ------------- | ------------- -`dplyr` | All `dplyr` tibble functions (e.g. `tidySingleCellExperiment::select`) -`tidyr` | All `tidyr` tibble functions (e.g. `tidySingleCellExperiment::pivot_longer`) -`ggplot2` | `ggplot` (`tidySingleCellExperiment::ggplot`) -`plotly` | `plot_ly` (`tidySingleCellExperiment::plot_ly`) - -Utilities | Description ------------- | ------------- -`tidy` | Add `tidySingleCellExperiment` invisible layer over a SingleCellExperiment object -`as_tibble` | Convert cell-wise information to a `tbl_df` -`join_features` | Add feature-wise information, returns a `tbl_df` -`aggregate_cells` | Aggregate cell gene-transcription abundance as pseudobulk tissue - -## Installation +# Installation {-} ```{r, eval=FALSE} if (!requireNamespace("BiocManager", quietly=TRUE)) @@ -60,49 +52,60 @@ Load libraries used in this vignette. ```{r message=FALSE} # Bioconductor single-cell packages -library(scater) library(scran) +library(scater) +library(igraph) +library(celldex) library(SingleR) library(SingleCellSignalR) # Tidyverse-compatible packages -library(ggplot2) library(purrr) +library(GGally) +library(ggplot2) library(tidyHeatmap) # Both library(tidySingleCellExperiment) + +# Other +library(Matrix) +library(dittoSeq) ``` -# Create `tidySingleCellExperiment`, the best of both worlds! +# Creating a `tidySingleCellExperiment`
-- the best of both worlds! -This is a *SingleCellExperiment* object but it is evaluated as a tibble. So it is compatible both with SingleCellExperiment and tidyverse. +This is a `SingleCellExperiment` object but it is evaluated as a tibble. +So it is compatible both with `SingleCellExperiment` and *tidyverse*. ```{r} pbmc_small_tidy <- tidySingleCellExperiment::pbmc_small ``` -**It looks like a tibble** +**It looks like a `tibble`...** ```{r} pbmc_small_tidy ``` -**But it is a SingleCellExperiment object after all** +**...but it is a `SingleCellExperiment` after all!** ```{r} -assay(pbmc_small_tidy, "counts")[1:5, 1:5] +counts(pbmc_small_tidy)[1:5, 1:4] ``` # Annotation polishing -We may have a column that contains the directory each run was taken from, such as the "file" column in `pbmc_small_tidy`. +We may have a column that contains the directory each run was taken from, +such as the "file" column in `pbmc_small_tidy`. ```{r} pbmc_small_tidy$file[1:5] ``` -We may want to extract the run/sample name out of it into a separate column. Tidyverse `extract` can be used to convert a character column into multiple columns using regular expression groups. +We may want to extract the run/sample name out of it into a separate column. +The *tidyverse* function `extract()` can be used to convert a character column +into multiple columns using regular expression groups. ```{r} # Create sample column @@ -124,33 +127,29 @@ Set colours and theme for plots. friendly_cols <- dittoSeq::dittoColors() # Set theme -custom_theme <- - list( - scale_fill_manual(values=friendly_cols), - scale_color_manual(values=friendly_cols), - theme_bw() + - theme( - panel.border=element_blank(), - axis.line=element_line(), - panel.grid.major=element_line(size=0.2), - panel.grid.minor=element_line(size=0.1), - text=element_text(size=12), - legend.position="bottom", - aspect.ratio=1, - strip.background=element_blank(), - axis.title.x=element_text(margin=margin(t=10, r=10, b=10, l=10)), - axis.title.y=element_text(margin=margin(t=10, r=10, b=10, l=10)) - ) - ) +custom_theme <- list( + scale_fill_manual(values=friendly_cols), + scale_color_manual(values=friendly_cols), + theme_bw() + theme( + aspect.ratio=1, + legend.position="bottom", + axis.line=element_line(), + text=element_text(size=12), + panel.border=element_blank(), + strip.background=element_blank(), + panel.grid.major=element_line(linewidth=0.2), + panel.grid.minor=element_line(linewidth=0.1), + axis.title.x=element_text(margin=margin(t=10, r=10, b=10, l=10)), + axis.title.y=element_text(margin=margin(t=10, r=10, b=10, l=10)))) ``` -We can treat `pbmc_small_polished` as a tibble for plotting. +We can treat `pbmc_small_polished` as a `tibble` for plotting. Here we plot number of features per cell. ```{r plot1} pbmc_small_polished %>% - tidySingleCellExperiment::ggplot(aes(nFeature_RNA, fill=groups)) + + ggplot(aes(nFeature_RNA, fill=groups)) + geom_histogram() + custom_theme ``` @@ -159,7 +158,7 @@ Here we plot total features per cell. ```{r plot2} pbmc_small_polished %>% - tidySingleCellExperiment::ggplot(aes(groups, nCount_RNA, fill=groups)) + + ggplot(aes(groups, nCount_RNA, fill=groups)) + geom_boxplot(outlier.shape=NA) + geom_jitter(width=0.1) + custom_theme @@ -177,9 +176,12 @@ pbmc_small_polished %>% custom_theme ``` -# Preprocess the dataset +# Preprocessing -We can also treat `pbmc_small_polished` as a *SingleCellExperiment* object and proceed with data processing with Bioconductor packages, such as *scran* [@lun2016pooling] and *scater* [@mccarthy2017scater]. +We can also treat `pbmc_small_polished` as a `SingleCellExperiment` object +and proceed with data processing with Bioconductor packages, such as +`r BiocStyle::Biocpkg("scran")` [@lun2016pooling] and +`r BiocStyle::Biocpkg("scater")` [@mccarthy2017scater]. ```{r preprocess} # Identify variable genes with scran @@ -196,25 +198,27 @@ pbmc_small_pca <- pbmc_small_pca ``` -If a tidyverse-compatible package is not included in the tidySingleCellExperiment collection, we can use `as_tibble` to permanently convert `tidySingleCellExperiment` into a tibble. +If a *tidyverse*-compatible package is not included in the `tidySingleCellExperiment` collection, +we can use `as_tibble()` to permanently convert a `tidySingleCellExperiment` into a `tibble`. ```{r pc_plot} -# Create pairs plot with GGally +# Create pairs plot with 'GGally' pbmc_small_pca %>% as_tibble() %>% select(contains("PC"), everything()) %>% - GGally::ggpairs(columns=1:5, ggplot2::aes(colour=groups)) + + GGally::ggpairs(columns=1:5, aes(colour=groups)) + custom_theme ``` -# Identify clusters +# Clustering -We can proceed with cluster identification with *scran*. +We can proceed with cluster identification with `r BiocStyle::Biocpkg("scran")`. ```{r cluster} pbmc_small_cluster <- pbmc_small_pca -# Assign clusters to the 'colLabels' of the SingleCellExperiment object +# Assign clusters to the 'colLabels' +# of the 'SingleCellExperiment' object colLabels(pbmc_small_cluster) <- pbmc_small_pca %>% buildSNNGraph(use.dimred="PCA") %>% @@ -223,18 +227,20 @@ colLabels(pbmc_small_cluster) <- as.factor() # Reorder columns -pbmc_small_cluster %>% select(label, everything()) +pbmc_small_cluster %>% + select(label, everything()) ``` -And interrogate the output as if it was a regular tibble. +And interrogate the output as if it was a regular `tibble`. ```{r cluster count} # Count number of cells for each cluster per group pbmc_small_cluster %>% - tidySingleCellExperiment::count(groups, label) + count(groups, label) ``` -We can identify and visualise cluster markers combining SingleCellExperiment, tidyverse functions and tidyHeatmap [@mangiola2020tidyheatmap] +We can identify and visualise cluster markers combining `SingleCellExperiment`, +*tidyverse* functions and `r BiocStyle::CRANpkg("tidyHeatmap")` [@mangiola2020tidyheatmap]. ```{r} # Identify top 10 markers per cluster @@ -251,12 +257,14 @@ marker_genes <- pbmc_small_cluster %>% join_features(features=marker_genes) %>% group_by(label) %>% - heatmap(.feature, .cell, .abundance_counts, .scale="column") + heatmap( + .row=.feature, .column=.cell, + .value=.abundance_counts, scale="column") ``` # Reduce dimensions -We can calculate the first 3 UMAP dimensions using the SingleCellExperiment framework and *scater*. +We can calculate the first 3 UMAP dimensions using `r BiocStyle::Biocpkg("scater")`. ```{r umap} pbmc_small_UMAP <- @@ -264,7 +272,7 @@ pbmc_small_UMAP <- runUMAP(ncomponents=3) ``` -And we can plot the result in 3D using plotly. +And we can plot the result in 3D using `r BiocStyle::CRANpkg("plotly")`. ```{r umap plot, eval=FALSE} pbmc_small_UMAP %>% @@ -273,31 +281,28 @@ pbmc_small_UMAP %>% y=~`UMAP2`, z=~`UMAP3`, color=~label, - colors=friendly_cols[1:4] - ) + colors=friendly_cols[1:4]) ``` - ![plotly screenshot](../man/figures/plotly.png) # Cell type prediction -We can infer cell type identities using *SingleR* [@aran2019reference] and manipulate the output using tidyverse. +We can infer cell type identities using `r BiocStyle::Biocpkg("SingleR")` +[@aran2019reference] and manipulate the output using *tidyverse*. ```{r eval=FALSE} # Get cell type reference data blueprint <- celldex::BlueprintEncodeData() # Infer cell identities -cell_type_df <- - - assays(pbmc_small_UMAP)$logcounts %>% +cell_type_df <- + logcounts(pbmc_small_UMAP) %>% Matrix::Matrix(sparse = TRUE) %>% SingleR::SingleR( - ref = blueprint, - labels = blueprint$label.main, - method = "single" - ) %>% + ref=blueprint, + labels=blueprint$label.main, + method="single") %>% as.data.frame() %>% as_tibble(rownames="cell") %>% select(cell, first.labels) @@ -311,10 +316,11 @@ pbmc_small_cell_type <- # Reorder columns pbmc_small_cell_type %>% - tidySingleCellExperiment::select(cell, first.labels, everything()) + select(cell, first.labels, everything()) ``` -We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification. +We can easily summarise the results. For example, we can see how +cell type classification overlaps with cluster classification. ```{r} # Count number of cells for each cell type per cluster @@ -326,17 +332,14 @@ We can easily reshape the data for building information-rich faceted plots. ```{r} pbmc_small_cell_type %>% - # Reshape and add classifier column pivot_longer( cols=c(label, first.labels), - names_to="classifier", values_to="label" - ) %>% - + names_to="classifier", values_to="label") %>% # UMAP plots for cell type and cluster ggplot(aes(UMAP1, UMAP2, color=label)) + - geom_point() + facet_wrap(~classifier) + + geom_point() + custom_theme ``` @@ -344,15 +347,13 @@ We can easily plot gene correlation per cell category, adding multi-layer annota ```{r} pbmc_small_cell_type %>% - # Add some mitochondrial abundance values mutate(mitochondrial=rnorm(dplyr::n())) %>% - # Plot correlation join_features(features=c("CST3", "LYZ"), shape="wide") %>% - ggplot(aes(CST3 + 1, LYZ + 1, color=groups, size=mitochondrial)) + - geom_point() + + ggplot(aes(CST3+1, LYZ+1, color=groups, size=mitochondrial)) + facet_wrap(~first.labels, scales="free") + + geom_point() + scale_x_log10() + scale_y_log10() + custom_theme @@ -360,62 +361,64 @@ pbmc_small_cell_type %>% # Nested analyses -A powerful tool we can use with tidySingleCellExperiment is tidyverse `nest`. We can easily perform independent analyses on subsets of the dataset. First we classify cell types into lymphoid and myeloid, and then nest based on the new classification. +A powerful tool we can use with `tidySingleCellExperiment` is *tidyverse*'s `nest()`. +We can easily perform independent analyses on subsets of the dataset. +First, we classify cell types into lymphoid and myeloid, +and then `nest()` based on the new classification. ```{r} pbmc_small_nested <- pbmc_small_cell_type %>% filter(first.labels != "Erythrocytes") %>% - mutate(cell_class=dplyr::if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>% + mutate(cell_class=if_else( + first.labels %in% c("Macrophages", "Monocytes"), + true="myeloid", false="lymphoid")) %>% nest(data=-cell_class) pbmc_small_nested ``` -Now we can independently for the lymphoid and myeloid subsets (i) find variable features, (ii) reduce dimensions, and (iii) cluster using both tidyverse and SingleCellExperiment seamlessly. +Now we can independently for the lymphoid and myeloid subsets +(i) find variable features, (ii) reduce dimensions, and (iii) +cluster using both tidyverse and SingleCellExperiment seamlessly. ```{r warning=FALSE} pbmc_small_nested_reanalysed <- pbmc_small_nested %>% - mutate(data=map( - data, ~ { - .x <- runPCA(.x, subset_row=variable_genes) - - variable_genes <- - .x %>% - modelGeneVar() %>% - getTopHVGs(prop=0.3) - - colLabels(.x) <- - .x %>% - buildSNNGraph(use.dimred="PCA") %>% - igraph::cluster_walktrap() %$% - membership %>% - as.factor() - - .x %>% runUMAP(ncomponents=3) - } - )) - + mutate(data=map(data, ~ { + # feature selection + variable_genes <- .x %>% + modelGeneVar() %>% + getTopHVGs(prop=0.3) + # dimension reduction + .x <- .x %>% + runPCA(subset_row=variable_genes) %>% + runUMAP(ncomponents=3) + # clustering + colLabels(.x) <- .x %>% + buildSNNGraph(use.dimred="PCA") %>% + cluster_walktrap() %$% + membership %>% + as.factor() + return(.x) + })) pbmc_small_nested_reanalysed ``` -We can then unnest and plot the new classification. +We can then `unnest()` and plot the new classification. ```{r} pbmc_small_nested_reanalysed %>% - - # Convert to tibble otherwise SingleCellExperiment drops reduced dimensions when unifying data sets. - mutate(data=map(data, ~ .x %>% as_tibble())) %>% + # Convert to 'tibble', else 'SingleCellExperiment' + # drops reduced dimensions when unifying data sets. + mutate(data=map(data, ~as_tibble(.x))) %>% unnest(data) %>% - # Define unique clusters unite("cluster", c(cell_class, label), remove=FALSE) %>% - # Plotting ggplot(aes(UMAP1, UMAP2, color=cluster)) + - geom_point() + facet_wrap(~cell_class) + + geom_point() + custom_theme ``` @@ -424,22 +427,17 @@ We can perform a large number of functional analyses on data subsets. For exampl ```{r, eval=FALSE} pbmc_small_nested_interactions <- pbmc_small_nested_reanalysed %>% - # Unnest based on cell category unnest(data) %>% - # Create unambiguous clusters mutate(integrated_clusters=first.labels %>% as.factor() %>% as.integer()) %>% - # Nest based on sample tidySingleCellExperiment::nest(data=-sample) %>% tidySingleCellExperiment::mutate(interactions=map(data, ~ { - # Produce variables. Yuck! cluster <- colData(.x)$integrated_clusters - data <- data.frame(assays(.x) %>% as.list() %>% .[[1]] %>% as.matrix()) - - # Ligand/Receptor analysis using SingleCellSignalR + data <- data.frame(assay(.x) %>% as.matrix()) + # Ligand/Receptor analysis using 'SingleCellSignalR' data %>% cell_signaling(genes=rownames(data), cluster=cluster) %>% inter_network(data=data, signal=., genes=rownames(data), cluster=cluster) %$% @@ -452,7 +450,8 @@ pbmc_small_nested_interactions %>% unnest(interactions) ``` -If the dataset was not so small, and interactions could be identified, you would see something like below. +If the dataset was not so small, and interactions could be identified, +you would see something like below. ```{r} tidySingleCellExperiment::pbmc_small_nested_interactions @@ -460,13 +459,16 @@ tidySingleCellExperiment::pbmc_small_nested_interactions # Aggregating cells -Sometimes, it is necessary to aggregate the gene-transcript abundance from a group of cells into a single value. For example, when comparing groups of cells across different samples with fixed-effect models. +Sometimes, it is necessary to aggregate the gene-transcript abundance +from a group of cells into a single value. For example, when comparing +groups of cells across different samples with fixed-effect models. -In tidySingleCellExperiment, cell aggregation can be achieved using the `aggregate_cells` function. +In `tidySingleCellExperiment`, cell aggregation can be achieved using `aggregate_cells()`, +which will return an object of class `r BiocStyle::Biocpkg("SummarizedExperiment")`. ```{r} pbmc_small_tidy %>% - aggregate_cells(groups, assays = "counts") + aggregate_cells(groups, assays="counts") ``` # Session Info @@ -475,4 +477,4 @@ pbmc_small_tidy %>% sessionInfo() ``` -# References +# References \ No newline at end of file