Skip to content

Commit f38d190

Browse files
authored
Merge pull request #86 from stemangiola/speedup-aggregate_cells
Speedup aggregate cells
2 parents 325b0a9 + dfc16a9 commit f38d190

File tree

3 files changed

+106
-41
lines changed

3 files changed

+106
-41
lines changed

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ importFrom(Matrix,rowSums)
4949
importFrom(S4Vectors,"metadata<-")
5050
importFrom(S4Vectors,DataFrame)
5151
importFrom(S4Vectors,metadata)
52+
importFrom(S4Vectors,split)
5253
importFrom(SingleCellExperiment,cbind)
5354
importFrom(SingleCellExperiment,reducedDims)
5455
importFrom(SummarizedExperiment,"assays<-")
@@ -69,6 +70,7 @@ importFrom(dplyr,filter)
6970
importFrom(dplyr,full_join)
7071
importFrom(dplyr,group_by)
7172
importFrom(dplyr,group_by_drop_default)
73+
importFrom(dplyr,group_split)
7274
importFrom(dplyr,inner_join)
7375
importFrom(dplyr,left_join)
7476
importFrom(dplyr,mutate)
@@ -123,6 +125,7 @@ importFrom(rlang,quo_squash)
123125
importFrom(stats,setNames)
124126
importFrom(stringr,regex)
125127
importFrom(stringr,str_detect)
128+
importFrom(stringr,str_remove)
126129
importFrom(stringr,str_replace_all)
127130
importFrom(tibble,as_tibble)
128131
importFrom(tibble,enframe)

R/methods.R

Lines changed: 76 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ setMethod("join_features", "SingleCellExperiment", function(.data,
3939
# CRAN Note
4040
.cell <- NULL
4141
.feature <- NULL
42-
42+
4343
# Shape is long
4444
if (shape == "long") {
4545
.data %>%
@@ -50,11 +50,11 @@ setMethod("join_features", "SingleCellExperiment", function(.data,
5050
features=features,
5151
all=all,
5252
exclude_zeros=exclude_zeros)) %>%
53-
select(!!c_(.data)$symbol, .feature,
53+
select(!!c_(.data)$symbol, .feature,
5454
contains(".abundance"), everything())
5555
# Shape if wide
5656
} else {
57-
.data %>%
57+
.data %>%
5858
left_join(
5959
by=c_(.data)$name,
6060
get_abundance_sc_wide(
@@ -84,7 +84,7 @@ tidy <- function(object) {
8484
#' @importFrom lifecycle deprecate_warn
8585
#' @export
8686
tidy.SingleCellExperiment <- function(object) {
87-
87+
8888
# DEPRECATE
8989
deprecate_warn(
9090
when="1.1.1",
@@ -98,8 +98,8 @@ tidy.SingleCellExperiment <- function(object) {
9898
#' @rdname aggregate_cells
9999
#' @inherit ttservice::aggregate_cells
100100
#' @aliases aggregate_cells,SingleCellExperiment-method
101-
#'
102-
#' @examples
101+
#'
102+
#' @examples
103103
#' data(pbmc_small)
104104
#' pbmc_small_pseudo_bulk <- pbmc_small |>
105105
#' aggregate_cells(c(groups, ident), assays="counts")
@@ -110,48 +110,88 @@ tidy.SingleCellExperiment <- function(object) {
110110
#' @importFrom Matrix rowSums
111111
#' @importFrom ttservice aggregate_cells
112112
#' @importFrom SummarizedExperiment assays assays<- assayNames
113+
#' @importFrom S4Vectors split
114+
#' @importFrom stringr str_remove
115+
#' @importFrom dplyr group_split
116+
#'
117+
#'
113118
#' @export
114119
setMethod("aggregate_cells", "SingleCellExperiment", function(.data,
115-
.sample=NULL, slot="data", assays=NULL,
120+
.sample=NULL, slot="data", assays=NULL,
116121
aggregation_function=Matrix::rowSums,
117122
...) {
118-
123+
119124
# Fix NOTEs
120125
feature <- NULL
121126
.sample <- enquo(.sample)
122-
127+
123128
# Subset only wanted assays
124129
if (!is.null(assays)) {
125130
assays(.data) <- assays(.data)[assays]
126131
}
127-
128-
.data %>%
129-
nest(data=-!!.sample) %>%
130-
mutate(.aggregated_cells=as.integer(map(data, ~ ncol(.x)))) %>%
131-
mutate(
132-
data=map(data, ~ {
133-
# Loop over assays
134-
map2(as.list(assays(.x)), assayNames(.x), ~ {
135-
# Get counts
136-
.x %>%
137-
aggregation_function(na.rm=TRUE) %>%
138-
enframe(
139-
name ="feature",
140-
value=sprintf("%s", .y)) %>%
141-
mutate(feature=as.character(feature))
142-
}) %>%
143-
Reduce(function(...) full_join(..., by="feature"), .)
144-
})
145-
) %>%
132+
133+
134+
grouping_factor =
135+
.data |>
136+
colData() |>
137+
as_tibble() |>
138+
select(!!.sample) |>
139+
suppressMessages() |>
140+
unite("my_id_to_split_by___", !!.sample, sep = "___") |>
141+
pull(my_id_to_split_by___) |>
142+
as.factor()
143+
144+
list_count_cells = table(grouping_factor) |> as.list()
145+
146+
# New method
147+
list_assays =
148+
.data |>
149+
assays() |>
150+
as.list() |>
151+
map(~ .x |> splitColData(grouping_factor)) |>
152+
unlist(recursive=FALSE)
153+
154+
list_assays =
155+
list_assays |>
156+
map2(names(list_assays), ~ {
157+
# Get counts
158+
.x %>%
159+
aggregation_function(na.rm=TRUE) %>%
160+
enframe(
161+
name =".feature",
162+
value="x") %>% # sprintf("%s", .y)) %>%
163+
164+
# In case we don't have rownames
165+
mutate(.feature=as.character(.feature))
166+
}) |>
167+
enframe(name = ".sample") |>
168+
169+
# Clean groups
170+
mutate(assay_name = assayNames(!!.data) |> rep(each=length(levels(grouping_factor)))) |>
171+
mutate(.sample = .sample |> str_remove(assay_name) |> str_remove("\\.")) |>
172+
group_split(.sample) |>
173+
map(~ .x |> unnest(value) |> pivot_wider(names_from = assay_name, values_from = x) ) |>
174+
175+
# Add cell count
176+
map2(
177+
list_count_cells,
178+
~ .x |> mutate(.aggregated_cells = .y)
179+
)
180+
181+
182+
do.call(rbind, list_assays) |>
183+
146184
left_join(
147-
.data %>%
148-
as_tibble() %>%
149-
subset(!!.sample),
150-
by=quo_names(.sample)) %>%
151-
unnest(data) %>%
152-
drop_class("tidySingleCellExperiment_nested") %>%
185+
.data |>
186+
colData() |>
187+
as_tibble() |>
188+
subset(!!.sample) |>
189+
unite("my_id_to_split_by___", !!.sample, remove=FALSE, sep = "___"),
190+
by= join_by(".sample" == "my_id_to_split_by___")
191+
) |>
192+
153193
as_SummarizedExperiment(
154-
.sample=!!.sample,
155-
.transcript=feature,
194+
.sample=.sample,
195+
.transcript=.feature,
156196
.abundance=!!as.symbol(names(.data@assays)))
157197
})

R/utilities.R

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,6 @@ get_special_column_name_cell <- function(name) {
365365
list(name=name, symbol=as.symbol(name))
366366
}
367367

368-
cell__ <- get_special_column_name_symbol(".cell")
369368

370369
#' @importFrom S4Vectors metadata
371370
c_ <- function(x) {
@@ -442,12 +441,17 @@ get_specific_annotation_columns <- function(.data, .col) {
442441
# x-annotation df
443442
n_x <- .data |> distinct_at(vars(!!.col)) |> nrow()
444443

444+
# Exclude columns that have more values than my .col
445+
columns_unique_length = .data |> select(-!!.col) |> lapply(function(x) unique(x) |> length())
446+
columns_unique_length = columns_unique_length[columns_unique_length<=n_x]
447+
448+
.sample = .data |> select(!!.col) |> unite(".sample", !!.col) |> pull(.sample)
449+
445450
# element wise columns
446-
.data |>
447-
select(-!!.col) |>
448-
colnames() |>
451+
columns_unique_length |>
452+
names() |>
449453
map(~ {
450-
n_.x <- .data |> distinct_at(vars(!!.col, .x)) |> nrow()
454+
n_.x <- .data |> pull(all_of(.x)) |> paste(.sample) |> unique() |> length()
451455
if (n_.x == n_x) .x else NULL
452456
}) %>%
453457
# Drop NULL
@@ -482,5 +486,23 @@ subset <- function(.data, .column) {
482486
distinct()
483487
}
484488

489+
490+
splitColData <- function(x, f) {
491+
# This is by @jma1991
492+
# at https://github.com/drisso/SingleCellExperiment/issues/55
493+
494+
i <- split(seq_along(f), f)
495+
496+
v <- vector(mode = "list", length = length(i))
497+
498+
names(v) <- names(i)
499+
500+
for (n in names(i)) { v[[n]] <- x[, i[[n]]] }
501+
502+
return(v)
503+
504+
}
505+
506+
cell__ <- get_special_column_name_symbol(".cell")
485507
feature__ <- get_special_column_name_symbol(".feature")
486508
sample__ <- get_special_column_name_symbol(".sample")

0 commit comments

Comments
 (0)