From 9dfd97e301335093806df014f5d21339d0ba42fb Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sun, 15 Oct 2023 17:18:24 +1000 Subject: [PATCH 01/20] group_split by column --- R/dplyr_methods.R | 29 ++++++++++++++++++++++++++++ tests/testthat/Rplots.pdf | Bin 0 -> 5794 bytes tests/testthat/test-dplyr_methods.R | 7 +++++++ 3 files changed, 36 insertions(+) create mode 100644 tests/testthat/Rplots.pdf diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index db079f9..9418d5a 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -875,3 +875,32 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { as_tibble() %>% dplyr::pull(var=!!var, name=!!name, ...) } + +#' @name group_split +#' @rdname group_split +#' @inherit dplyr::group_split +#' +#' @param .data +#' +#' @return +#' @export +#' +#' @examples +#' data(pbmc_small) +#' pbmc_small |> group_split(pbmc_small, groups) +group_split.SingleCellExperiment <- function(.data, var) { + var <- enquo(var) + + var_list <- .data |> + as_tibble() |> + select(!!var) |> + unlist(use.names = FALSE) + + groups <- unique(var_list) + + v <- vector(mode = "list", length = length(groups)) + + for (n in seq_along(groups)) { v[[n]] <- .data[, var_list == groups[[n]]] } + + v +} diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf new file mode 100644 index 0000000000000000000000000000000000000000..73b7057a8fa17bcb60fff62d6a7dc05ba57d1d47 GIT binary patch literal 5794 zcma)A2UHVXx1~sn(m_Be0}3K7BoLY)E!0S_(gYz$1QJYw(z}2(MHEDOZz3QdO;Dr? zLg+ak@6)gN-v9ntE0Z;6_PyuKU3<>#y9Ev5n&J=%X^>#>bnr|tA$ZUR z3xWWW0Os;lkfI_8td2!G;87Sf+yRdSgy9NMNvIS=5(0zBLS!IPXF*^+jMM*l&lH7s zBaSi#w6G2yE+|JFVBm@O@Wc~`8DcR`o{mT?>8`YdlmvtrJIsY4h0cP`p9dk)PNXWJ zzp4SirrsV%0IcqScW}d80lqwkW`d{;r06&-G0up@%?*hPNqyVrc$_-C^0I;STac$s8M~oAaR5lJz zTrzi%S8ASFGy1LMvBpJFuL}$x;W~+>u2E=}Ao{V(_7T|;;;szMcQ5yZfBWvD^K@Rw z)ol|H0sBeoBpV1>Qa1Aj6cJcQ8wQgYMaRlYti`w`fh~VAaj2zd-0LuVh(`Edj}q`jrm*NtL$-k@YrU~J5NQ| zs=cThT*X};xuH~_yYMS1N{@NT33n)qBup7pzd{#NQcn?TnPTSr%=n#bXh*nCThMZG zgh(Ba1)L_B+gE`866(^kUyf)r%VeH1@_3nT-_nt-(M%T|0?XnEi=oPb%!dM}#T3?` z!LoD09JNwQQR|{go01y}AyeB3XTV9AHjdU^i3!RUR7#%;>JK(<7FT=qKMlHI%NwdT=Nudp3@AlQZ){jkddo34qv_Zv&|Z~|eE zvv^L!$~+7aFNISeBv`a{X7z9EM-P|wx2|3$fD5HOH^f<}E3`ki%E!J}6+QKt?FVo_ z9vc;C*rhG^a`GrLH)Wa+553zn!4)tFL6ScVGUZSbx<)6z^*vtU<_X^)+Nn?CYoelP zCa}r%th>*QX4JoG!u3b)4IYRWYUDOb=Z<2S(lXw>HL8TqyrxU(!e=}_5xNYW1Ybv_ zOIZ57{T|?BcV49-;9`5sJpn?jwt-A~`3#{%PvEYbzOCo^kIILLe{oq_UiQD>@;$xVtdL{jE4%FHpVGvnt#G5LwC}5w z^W18=%2?Q^uE9~EF};%D1WkDT^o@RXgxBoaVr#D=Z#caq_lMg%3XY9R@BD3=mrrxv zqP6mUqm;PnAfWsNlAF>oXh2?)MRA9>nzD_P zvnm=(`Npb>+jEtk*Y#GIs;-N>$XcEs`M#?-(+M-{%q!xXDV0MmoN;P2w>25QgDb=- zP&^{VA=4JY`X|FhgacSEtdu96yeU>ve`68NePQA0$l$(N<~^;^A(MlAUP9qpA0Oq> zmfiKeqe=|u{*OG5)YNk_YhwXe!AoIb^r;7wpAfFY)rP{la;7PD-%_X&9g0a^a?cCBe4vXLFyiLmF^4qj#{H=Vfd*itnVR89Fr8c1Tw zd8b*s-fG$*&bZE6Qs<#`_)_Px1>6rr!-a4%=?}SmCY_2AcWn7IL>d^b4~T7S&c08& z#xO&raO|Fb)1@y-Oo3_9D-F+|XCyNAxu+*xYl-iX$2^?5m=*QzYIJ_a4EWvN^1F#d zu*4eQd`861Z%l;z^-cKis7UJC#=w8*s*u0>Itcu;t-}5rDw^qGSXGZT63SMY8DLv} zU%uF|k{b&$-O=V2Y0c_Whl{8(9QZ5pHOvSaH5f6TE3r_%y_7U3-XczZ)# zdI072nHFt$CSmZK<5A2Fj+YHf9S5Vdfj#eJfgJ}<^hbG4A^sOMCUzfbXNMOF29zm9 zlpYK@lpPN^4?aMIyy@l2S3ig1-h-}OQ<1^9|bm!@{i{UNFOzur33|?-971#%-rsh9+g+|%u zndc_=WeUi}i^fQ0scLjS*>ez*F7&nB$<$}#RN4*}`9g`~?)S|@wdq1Z^Er1PT^3BP z4AG}`i0?#4E; zj@$nrL)lef**XUzr44=D07*I+1EjWV3>;G47_cSk z4UFfZZUE~colp*H7%#w@)M;Uo(xjj5-=qggI{a*}M1|vMg2V&XV4}t%rbBw+Z9!m7 zqU-<^6~UTN0P;&{fHl1ES|)gs#8OlwO-D@WhQXS6I5-|gz&K+qH2`*47V++1Qil{G z<+BFssH>|v;E+y$3<+iXQ#_Hbem<}&Nv@ztNx*RAb(ABLG$~l0D2RT(e^8jb%>Vi= zAl39s*Q6!hFf|!J9zkf7Z8W^g6m_r9)y0@Xo4!#yUP1&y$E5q>4w--q+|&bZ6rWEE z75)&EqaI%&5OtH>sABO57kucN2iwU+9j8*R$!FdxJt>oX+X1c_jf8hsR{Tfm$?hD4 zQoX!jNf&f1%aUzst}QY)`ieab`LR_pN;k3(9+j19$9FU-k}3SgV%(BhcQ_jB8Gzk= zkwN8{X0jg{_v$VRK*$Uq#LTVljN5Q=ypmJ0NENIxc=h0^v|g{IZm+<_5U6Q{?K#^-d#KXD80g#LBZY#l+ilam>-~)Ipns97FLs;VSKz*$OT{eHk+0 zyAu+ha3)S};*NKPp&{Zk-ee!NUtL-q09(E$uy!-H^}e=}U^!h7$t7fC5+0kt8a+iI z=*>m9y_RHIWb=0CdLnRcOio@-(p}uWNq8vG_8{f%_Vq-& zu)t+5INwgomPJ4^Jz04^vU+?k!X3py`P$KPv z(UOS?8w7cV>sj#p%3cOTp!y|0f^pNkm5&Zy8wqlE@ep-8S+uiHKD@QFciLn3*e7DA z=cD=V$$Omn;B-?Y_?`y5JO2W3A+g%JlQohmSuI>;C*$D`W6Ss4@w&uYZ+4jb5-ln6 z_)n+=@RA9OFavbHPNID){zu!jD0h3J`;#A}?Gr-2`&-mr_}+Q^bYPsL793j3V$}QP z;ei!}G(e`ILb;%Fa-Nd)#gV5km}|)xKTuMr3P&EfW)Jxg!ghxo4sfsqRcCVE3n{ff zQ%LQZ$t)czdc1R_h<#x)wC(tRTeXi3%QHK7qXn^t>=aDC+f-Q)oU`1Tv+zleP~We(&aZ> zl{33hRoWZK(Z+>wG{0Jf%JE5r)V#RwLFvb|9A?mx^nuwDq#Rtp9K^(|kR2__eEg-R zTD;e3I%fWoJ1n^rD3v#NcysycqO{`+I<48LH8dcG7w#SH;$y!#ot>Cvo+Xo0Y(!(2 zR(O8MY)JSj=qt0HrfYUjfA2gQUQB^2Em}g;A~U6rXTEZtc~fqWojNqjF$NOH8F+zP zh`o}clB@FMV4VC-uJ#y*{0hrQ3M1T$QS@!t^2=_JryyS@Ulw1gb8-Hmr>5~9PvtJz zDC$Rwwwisg@;KvR;lWzV^^D~`(@G3u+s1158gfU|k1>E&IWB`=m^Fs(ahSH6h=X|6 z-9Ak@L@mNg^)$=ZNc~oi?4a6kOC3eHq5-#Hlc15d^J$s0!KeA%`IpY53A__%1QK3p zIx%7uFJ5@Bh8D`;pFH#2C{x5z+P6wujjZxabMU60!w0IMX5*Ujrt<3EmDuQYs*hT1Vr`Mr>QhEh!u!HkgdI)8 ziVct^bwi#*nhO^f`_r^x>dUAy0Kl?*w6B0o$RNa|GWv^ zbJ+9TJ7*?tW?mir1LC3LN5vb&GA?piwpqJZF4XL~xMR}L?_B&l!Y{MBn79l&Zj_WJ zi;q5S-+E)SZZla|GIyQekG_KLsCnynV=(i$D@N9+ zquW7_?#^j9)XsU!mtI%+v^g@~eDRCl*e(Z0KH?!Fx;w|aePd_i#9VCMLLJs-(KZi# ztOj1wh&H}qdn3*_bf1#ZpD|r4p(CuI=t}XND&dOVh@C(gL)oa?l`Y+^vzXSmgoak% zzKChok0wjFNehDPc+K#QF2Z}v&B?QYRe^VY$ox1-Zbd#3^eL!FrQ^l@V1L!zOe)o3 zRmPB>5RGOfeh+Q?&aFEQ8}xHW=Wg5#=?m&x%G!Pz(~@}8pD&J%@n{)cAg2SX5mW#3 zpnCgPA&FFF0*wOQ0xR%jxK6fD4tox3c1q5r4zG@T9omkimo3`uq6Q;bTRl4*XL4pz zrVBXs#3t%I6r0ay$xZDE%xHK=ONfnFrZ(;Du(S!L3TmaIS`ar zjVP#fTb5X6O{c-oVD7GTf#(cP*9qM}qojLX?|r&r@^rd!{JQCjb=d_I;$kqs6?3*|KOUbHgi7RKD1hD0~%5Jx-zOVw>vdy$wo#H!~#RAk4HagXpr)L-DlSQH=f%CTQ%GETW4CAKefl_&2{aW z^%dup_*g8KNmwa0wPSjMtHY{Mu7Sc;5@KM#Itgc6gLw1)huIZh0~W2L*`qYtWMyjI z3g1S)5gwDPMJ?wS>+vDH1UV3;2vOZh%qMiy&ix*X`jTA{+rPT?}oB3Yh#1v1!fjj+vqa;l^&nxd%e^-;Zgn%{N?v~Wi(~C0(vVF8 zsy?>_Oa*)nj->SOiR1<{KxZT&xu8v1^oPy<>)G)e z&%F+ylI-VT-)%Mz2Fp?|6+R#PF|dqW!H;czv*0Rowo7_u6o~sEzUcm=;lT}7sk)Vl z6;b5}0rR_@n`_GjF$NjLeO1`T{%^V)@|W-i{b}N9poxYV3QctJNV@f}D+?4_6^%pv z41mCJl(RFE=-iR4Icq=~26jiGJ#m2T-(>KilKv@k|L_P8bvD?<^D_R>G9(2dM6v7O zPE@}{?R}W^4~#a_?K%>Va&!QJe;1c@=!Hc(gCqe63?%vM0Aye=DHz}k{E9(| zyJXSltJkqm%MP(DcF`T<}av@-@E j*$E_j0E58;e^>>FI}8Ub{&1yX(hwPtprD3 + group_split("groups") + expect_equal(length(fd), length(unique(df$groups))) +}) + From b87b8dc87c40e1c72ddce917a9e260d8409eaad9 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sun, 15 Oct 2023 17:18:24 +1000 Subject: [PATCH 02/20] group_split by column issue #35975561 --- R/dplyr_methods.R | 29 ++++++++++++++++++++++++++++ tests/testthat/Rplots.pdf | Bin 0 -> 5794 bytes tests/testthat/test-dplyr_methods.R | 7 +++++++ 3 files changed, 36 insertions(+) create mode 100644 tests/testthat/Rplots.pdf diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index db079f9..9418d5a 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -875,3 +875,32 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { as_tibble() %>% dplyr::pull(var=!!var, name=!!name, ...) } + +#' @name group_split +#' @rdname group_split +#' @inherit dplyr::group_split +#' +#' @param .data +#' +#' @return +#' @export +#' +#' @examples +#' data(pbmc_small) +#' pbmc_small |> group_split(pbmc_small, groups) +group_split.SingleCellExperiment <- function(.data, var) { + var <- enquo(var) + + var_list <- .data |> + as_tibble() |> + select(!!var) |> + unlist(use.names = FALSE) + + groups <- unique(var_list) + + v <- vector(mode = "list", length = length(groups)) + + for (n in seq_along(groups)) { v[[n]] <- .data[, var_list == groups[[n]]] } + + v +} diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf new file mode 100644 index 0000000000000000000000000000000000000000..73b7057a8fa17bcb60fff62d6a7dc05ba57d1d47 GIT binary patch literal 5794 zcma)A2UHVXx1~sn(m_Be0}3K7BoLY)E!0S_(gYz$1QJYw(z}2(MHEDOZz3QdO;Dr? zLg+ak@6)gN-v9ntE0Z;6_PyuKU3<>#y9Ev5n&J=%X^>#>bnr|tA$ZUR z3xWWW0Os;lkfI_8td2!G;87Sf+yRdSgy9NMNvIS=5(0zBLS!IPXF*^+jMM*l&lH7s zBaSi#w6G2yE+|JFVBm@O@Wc~`8DcR`o{mT?>8`YdlmvtrJIsY4h0cP`p9dk)PNXWJ zzp4SirrsV%0IcqScW}d80lqwkW`d{;r06&-G0up@%?*hPNqyVrc$_-C^0I;STac$s8M~oAaR5lJz zTrzi%S8ASFGy1LMvBpJFuL}$x;W~+>u2E=}Ao{V(_7T|;;;szMcQ5yZfBWvD^K@Rw z)ol|H0sBeoBpV1>Qa1Aj6cJcQ8wQgYMaRlYti`w`fh~VAaj2zd-0LuVh(`Edj}q`jrm*NtL$-k@YrU~J5NQ| zs=cThT*X};xuH~_yYMS1N{@NT33n)qBup7pzd{#NQcn?TnPTSr%=n#bXh*nCThMZG zgh(Ba1)L_B+gE`866(^kUyf)r%VeH1@_3nT-_nt-(M%T|0?XnEi=oPb%!dM}#T3?` z!LoD09JNwQQR|{go01y}AyeB3XTV9AHjdU^i3!RUR7#%;>JK(<7FT=qKMlHI%NwdT=Nudp3@AlQZ){jkddo34qv_Zv&|Z~|eE zvv^L!$~+7aFNISeBv`a{X7z9EM-P|wx2|3$fD5HOH^f<}E3`ki%E!J}6+QKt?FVo_ z9vc;C*rhG^a`GrLH)Wa+553zn!4)tFL6ScVGUZSbx<)6z^*vtU<_X^)+Nn?CYoelP zCa}r%th>*QX4JoG!u3b)4IYRWYUDOb=Z<2S(lXw>HL8TqyrxU(!e=}_5xNYW1Ybv_ zOIZ57{T|?BcV49-;9`5sJpn?jwt-A~`3#{%PvEYbzOCo^kIILLe{oq_UiQD>@;$xVtdL{jE4%FHpVGvnt#G5LwC}5w z^W18=%2?Q^uE9~EF};%D1WkDT^o@RXgxBoaVr#D=Z#caq_lMg%3XY9R@BD3=mrrxv zqP6mUqm;PnAfWsNlAF>oXh2?)MRA9>nzD_P zvnm=(`Npb>+jEtk*Y#GIs;-N>$XcEs`M#?-(+M-{%q!xXDV0MmoN;P2w>25QgDb=- zP&^{VA=4JY`X|FhgacSEtdu96yeU>ve`68NePQA0$l$(N<~^;^A(MlAUP9qpA0Oq> zmfiKeqe=|u{*OG5)YNk_YhwXe!AoIb^r;7wpAfFY)rP{la;7PD-%_X&9g0a^a?cCBe4vXLFyiLmF^4qj#{H=Vfd*itnVR89Fr8c1Tw zd8b*s-fG$*&bZE6Qs<#`_)_Px1>6rr!-a4%=?}SmCY_2AcWn7IL>d^b4~T7S&c08& z#xO&raO|Fb)1@y-Oo3_9D-F+|XCyNAxu+*xYl-iX$2^?5m=*QzYIJ_a4EWvN^1F#d zu*4eQd`861Z%l;z^-cKis7UJC#=w8*s*u0>Itcu;t-}5rDw^qGSXGZT63SMY8DLv} zU%uF|k{b&$-O=V2Y0c_Whl{8(9QZ5pHOvSaH5f6TE3r_%y_7U3-XczZ)# zdI072nHFt$CSmZK<5A2Fj+YHf9S5Vdfj#eJfgJ}<^hbG4A^sOMCUzfbXNMOF29zm9 zlpYK@lpPN^4?aMIyy@l2S3ig1-h-}OQ<1^9|bm!@{i{UNFOzur33|?-971#%-rsh9+g+|%u zndc_=WeUi}i^fQ0scLjS*>ez*F7&nB$<$}#RN4*}`9g`~?)S|@wdq1Z^Er1PT^3BP z4AG}`i0?#4E; zj@$nrL)lef**XUzr44=D07*I+1EjWV3>;G47_cSk z4UFfZZUE~colp*H7%#w@)M;Uo(xjj5-=qggI{a*}M1|vMg2V&XV4}t%rbBw+Z9!m7 zqU-<^6~UTN0P;&{fHl1ES|)gs#8OlwO-D@WhQXS6I5-|gz&K+qH2`*47V++1Qil{G z<+BFssH>|v;E+y$3<+iXQ#_Hbem<}&Nv@ztNx*RAb(ABLG$~l0D2RT(e^8jb%>Vi= zAl39s*Q6!hFf|!J9zkf7Z8W^g6m_r9)y0@Xo4!#yUP1&y$E5q>4w--q+|&bZ6rWEE z75)&EqaI%&5OtH>sABO57kucN2iwU+9j8*R$!FdxJt>oX+X1c_jf8hsR{Tfm$?hD4 zQoX!jNf&f1%aUzst}QY)`ieab`LR_pN;k3(9+j19$9FU-k}3SgV%(BhcQ_jB8Gzk= zkwN8{X0jg{_v$VRK*$Uq#LTVljN5Q=ypmJ0NENIxc=h0^v|g{IZm+<_5U6Q{?K#^-d#KXD80g#LBZY#l+ilam>-~)Ipns97FLs;VSKz*$OT{eHk+0 zyAu+ha3)S};*NKPp&{Zk-ee!NUtL-q09(E$uy!-H^}e=}U^!h7$t7fC5+0kt8a+iI z=*>m9y_RHIWb=0CdLnRcOio@-(p}uWNq8vG_8{f%_Vq-& zu)t+5INwgomPJ4^Jz04^vU+?k!X3py`P$KPv z(UOS?8w7cV>sj#p%3cOTp!y|0f^pNkm5&Zy8wqlE@ep-8S+uiHKD@QFciLn3*e7DA z=cD=V$$Omn;B-?Y_?`y5JO2W3A+g%JlQohmSuI>;C*$D`W6Ss4@w&uYZ+4jb5-ln6 z_)n+=@RA9OFavbHPNID){zu!jD0h3J`;#A}?Gr-2`&-mr_}+Q^bYPsL793j3V$}QP z;ei!}G(e`ILb;%Fa-Nd)#gV5km}|)xKTuMr3P&EfW)Jxg!ghxo4sfsqRcCVE3n{ff zQ%LQZ$t)czdc1R_h<#x)wC(tRTeXi3%QHK7qXn^t>=aDC+f-Q)oU`1Tv+zleP~We(&aZ> zl{33hRoWZK(Z+>wG{0Jf%JE5r)V#RwLFvb|9A?mx^nuwDq#Rtp9K^(|kR2__eEg-R zTD;e3I%fWoJ1n^rD3v#NcysycqO{`+I<48LH8dcG7w#SH;$y!#ot>Cvo+Xo0Y(!(2 zR(O8MY)JSj=qt0HrfYUjfA2gQUQB^2Em}g;A~U6rXTEZtc~fqWojNqjF$NOH8F+zP zh`o}clB@FMV4VC-uJ#y*{0hrQ3M1T$QS@!t^2=_JryyS@Ulw1gb8-Hmr>5~9PvtJz zDC$Rwwwisg@;KvR;lWzV^^D~`(@G3u+s1158gfU|k1>E&IWB`=m^Fs(ahSH6h=X|6 z-9Ak@L@mNg^)$=ZNc~oi?4a6kOC3eHq5-#Hlc15d^J$s0!KeA%`IpY53A__%1QK3p zIx%7uFJ5@Bh8D`;pFH#2C{x5z+P6wujjZxabMU60!w0IMX5*Ujrt<3EmDuQYs*hT1Vr`Mr>QhEh!u!HkgdI)8 ziVct^bwi#*nhO^f`_r^x>dUAy0Kl?*w6B0o$RNa|GWv^ zbJ+9TJ7*?tW?mir1LC3LN5vb&GA?piwpqJZF4XL~xMR}L?_B&l!Y{MBn79l&Zj_WJ zi;q5S-+E)SZZla|GIyQekG_KLsCnynV=(i$D@N9+ zquW7_?#^j9)XsU!mtI%+v^g@~eDRCl*e(Z0KH?!Fx;w|aePd_i#9VCMLLJs-(KZi# ztOj1wh&H}qdn3*_bf1#ZpD|r4p(CuI=t}XND&dOVh@C(gL)oa?l`Y+^vzXSmgoak% zzKChok0wjFNehDPc+K#QF2Z}v&B?QYRe^VY$ox1-Zbd#3^eL!FrQ^l@V1L!zOe)o3 zRmPB>5RGOfeh+Q?&aFEQ8}xHW=Wg5#=?m&x%G!Pz(~@}8pD&J%@n{)cAg2SX5mW#3 zpnCgPA&FFF0*wOQ0xR%jxK6fD4tox3c1q5r4zG@T9omkimo3`uq6Q;bTRl4*XL4pz zrVBXs#3t%I6r0ay$xZDE%xHK=ONfnFrZ(;Du(S!L3TmaIS`ar zjVP#fTb5X6O{c-oVD7GTf#(cP*9qM}qojLX?|r&r@^rd!{JQCjb=d_I;$kqs6?3*|KOUbHgi7RKD1hD0~%5Jx-zOVw>vdy$wo#H!~#RAk4HagXpr)L-DlSQH=f%CTQ%GETW4CAKefl_&2{aW z^%dup_*g8KNmwa0wPSjMtHY{Mu7Sc;5@KM#Itgc6gLw1)huIZh0~W2L*`qYtWMyjI z3g1S)5gwDPMJ?wS>+vDH1UV3;2vOZh%qMiy&ix*X`jTA{+rPT?}oB3Yh#1v1!fjj+vqa;l^&nxd%e^-;Zgn%{N?v~Wi(~C0(vVF8 zsy?>_Oa*)nj->SOiR1<{KxZT&xu8v1^oPy<>)G)e z&%F+ylI-VT-)%Mz2Fp?|6+R#PF|dqW!H;czv*0Rowo7_u6o~sEzUcm=;lT}7sk)Vl z6;b5}0rR_@n`_GjF$NjLeO1`T{%^V)@|W-i{b}N9poxYV3QctJNV@f}D+?4_6^%pv z41mCJl(RFE=-iR4Icq=~26jiGJ#m2T-(>KilKv@k|L_P8bvD?<^D_R>G9(2dM6v7O zPE@}{?R}W^4~#a_?K%>Va&!QJe;1c@=!Hc(gCqe63?%vM0Aye=DHz}k{E9(| zyJXSltJkqm%MP(DcF`T<}av@-@E j*$E_j0E58;e^>>FI}8Ub{&1yX(hwPtprD3 + group_split("groups") + expect_equal(length(fd), length(unique(df$groups))) +}) + From ea644372420f88ef624450c9c112286d5d7d053d Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sun, 15 Oct 2023 19:04:22 +1000 Subject: [PATCH 03/20] split groups row data --- R/dplyr_methods.R | 41 ++++++++++++++++++++-------- tests/testthat/Rplots.pdf | Bin 5794 -> 5789 bytes tests/testthat/test-dplyr_methods.R | 8 ++++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 9418d5a..789eb97 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -889,18 +889,35 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' data(pbmc_small) #' pbmc_small |> group_split(pbmc_small, groups) group_split.SingleCellExperiment <- function(.data, var) { - var <- enquo(var) - - var_list <- .data |> - as_tibble() |> - select(!!var) |> - unlist(use.names = FALSE) - - groups <- unique(var_list) - v <- vector(mode = "list", length = length(groups)) - - for (n in seq_along(groups)) { v[[n]] <- .data[, var_list == groups[[n]]] } + + if(any(paste(substitute(var)) == names(colData(.data)))) { + var <- enquo(var) + var_list <- .data |> + as_tibble() |> + select(!!var) |> + unlist(use.names = FALSE) + + groups <- unique(var_list) + + v <- vector(mode = "list", length = length(groups)) + + for (n in seq_along(groups)) { + v[[n]] <- .data[, var_list == groups[[n]]] + } + + return(v) + } - v + if(any(paste(substitute(var)) == names(rowData(.data)))) { + var <- enquo(var) + var_list <- .data |> + rowData() |> + as_tibble() |> + select(!!var) |> + unlist(use.names = FALSE) + + split(.data, var_list) |> + as.list() + } } diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf index 73b7057a8fa17bcb60fff62d6a7dc05ba57d1d47..5b3a1bf157b9a28919010bcdc0f1d8947aabcb0b 100644 GIT binary patch delta 1315 zcmZ3aJ6Cst6|1Fzv60C{dvPcu`Yw~H!Q_c->h*>Kdw++Y*=Skv;l{nKwZ5z#6CIk? z99j5^(W$t@Lx8tq|M?%vuWzcEbXG;Ik9&6ZT+iQm&Knq-c>e^=*Q>er#(w|&!&&P& zwq8@X{x>W9aQSKBhfnVw|FXJ$|EqtDCN<$K+;wJMCc+PzPp#ima;x+D<;6`u7cy@U z+so=L!5Q=ZLcM$4*Bip0A8@~k>Xuq;sBrztBk!MF*PgpwxN+00sm{#l=Z@DCbaJM4 zGg=2frosP1x#~F10Ypk%Li_!|7n}gFQ!Ixb(BCANn$Z zYwl%J)3U}SrE}lnd_p$NSkZl>vq-eNt#a}jFX)cCMmXViEcb9 zm2l(G*|jqdZV1e8|MT$k%qOK)PeXQ@T)pvx_wv`53PG#(>0P#(JY|Z}`ZX_mjwcj9 zvv~1Q^@!NjjH0UO{`{HB-Je411>7CIE7LbVeX-(s=bq@ADW6^L>e{WURC*^Sf7IsU zx2;aqSIt7E&**qEJF|rWfVKvvf19e_ScUt5;ozh65x%h_fq-0 zSkml3L4*CH+dW_Zefj&Dt4#f%hs%Xqm*1_-S-)L;+xFM%Kdx=9fAmg{qmHx9H-i13 z2+If8CGJLTC7&zm1U5(+F}w_j4(OTZSbtl=e8Fx$Yd)(VjQ*ysPWq}_%$EI#+SBlN zs}M);D%A_!LZ2R<5!%|e_`Z(-G4 zXy4IaYxTWKp*>%GLD$4j+dN+CZ1SG?Ynz8J+gzWCcLgf#o+ol~q;jqQRx{!JvI8y= z?9ZZ@E>FD{p_wS!SUH!sb|MHH9r=HC#Kl^pfqx0cXy++e_|9kLUtfsK@ zzh_o&Rcc;JaY@l;S+*#CTO$Pn5Kzcd-~uxYjEv1KFvLtO4KTz^jf^L65w)r}G)9*) zu(UKm7c(?9H^dM#GsVzrYGjCEy{R#>9VJDHnK`LNTsAfe`oWo1lXJy%ISjc}RbBnv FxBvuyQq}+f delta 1305 zcmbQMyGVC}6|1?SnW6DSdvPcu`Yw~1@#G#hmHLBuw+(pqzSger;d|=hIc;rrSDr%y z?~SB{_C@Ww*?b2lC7o^i`!B zi7dP(wZt~J|7Pv(oe%EaseAQr?ZfrE>KSX5)}&S?EUhE?q_g%N+ol^U=6tVNcj27^k5-48(<3LD5Ybl* zZyCkR%Q)Z0-QD`};C1DbMaO-)Og}sEUd?zV;uY*%c`G8U?fm&)604rOMuoL_Pc$yJ zIzKu5=!u0FKmYE&y7uCc`VUFx^;Z~|e%2LY+vxG~h-L3RM{VVoBL5YpPAuxG4>;vv zap*pG>XO-y!h0B8Qs@6F7uwJ z7g2NjUtLw4ox|?B$YVWIl+O2h_?yk!@c8^{U(bGT1;1a|e6V;O& zn9G<+lK0x)t&KVOPDP@VBdYqA?X(Y7)rxy!>;3X& z{|JBQclh9Jkn_ljW0^*SzmokX!zHtiU-oBQy^v{w+*anJ5)xbLyNj4_MQxEO{lF`i z-Pz*km65CUacK?X?`&NGsnF;J>AGKLZ@!Wt(dWe8@5c8}f1@r_rmWVgrbpq8e$s7P z8g)WG?>9`BZq-@6tNeAIjL)lW*Dn7KT`!PrtZr`SLD4 zm0jcHym-NmUIo+Thcq}yB) z1=fd}rd^Od@>}nHwe7@am)FJ1_UHy{PS&$eeRbPt>t~JJb6cFB{giM&!I|o}{#%XT zc?I(qS5CxuC;Yo(X{BVYxZ#-lBQ=I=KRE8R?2k^Y2%X)UA~_+Jb@%cu534%->kaZZ z$UPE1b=xh@TRrKHk&s~rs{smD(?xg@~&FD`N}ToeE#y} z-tH8!-E>@ViVVaBDZ>gw;t F1pse|JdFSV diff --git a/tests/testthat/test-dplyr_methods.R b/tests/testthat/test-dplyr_methods.R index 4b5c7de..f8c1923 100755 --- a/tests/testthat/test-dplyr_methods.R +++ b/tests/testthat/test-dplyr_methods.R @@ -323,5 +323,13 @@ test_that("group_split()", { fd <- df |> group_split("groups") expect_equal(length(fd), length(unique(df$groups))) + + fd <- df |> + group_split("vst.variable") + expect_equal(length(fd), length(unique(rowData(df)$vst.variable))) + + fd <- df |> + group_split(vst.variable) + expect_equal(length(fd), length(unique(rowData(df)$vst.variable))) }) From 2da015c351e2f821e1949dc995e3dfd6e01d9eee Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sun, 15 Oct 2023 19:04:22 +1000 Subject: [PATCH 04/20] split groups row data Issue #35975561 --- R/dplyr_methods.R | 41 ++++++++++++++++++++-------- tests/testthat/Rplots.pdf | Bin 5794 -> 5789 bytes tests/testthat/test-dplyr_methods.R | 8 ++++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 9418d5a..789eb97 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -889,18 +889,35 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' data(pbmc_small) #' pbmc_small |> group_split(pbmc_small, groups) group_split.SingleCellExperiment <- function(.data, var) { - var <- enquo(var) - - var_list <- .data |> - as_tibble() |> - select(!!var) |> - unlist(use.names = FALSE) - - groups <- unique(var_list) - v <- vector(mode = "list", length = length(groups)) - - for (n in seq_along(groups)) { v[[n]] <- .data[, var_list == groups[[n]]] } + + if(any(paste(substitute(var)) == names(colData(.data)))) { + var <- enquo(var) + var_list <- .data |> + as_tibble() |> + select(!!var) |> + unlist(use.names = FALSE) + + groups <- unique(var_list) + + v <- vector(mode = "list", length = length(groups)) + + for (n in seq_along(groups)) { + v[[n]] <- .data[, var_list == groups[[n]]] + } + + return(v) + } - v + if(any(paste(substitute(var)) == names(rowData(.data)))) { + var <- enquo(var) + var_list <- .data |> + rowData() |> + as_tibble() |> + select(!!var) |> + unlist(use.names = FALSE) + + split(.data, var_list) |> + as.list() + } } diff --git a/tests/testthat/Rplots.pdf b/tests/testthat/Rplots.pdf index 73b7057a8fa17bcb60fff62d6a7dc05ba57d1d47..5b3a1bf157b9a28919010bcdc0f1d8947aabcb0b 100644 GIT binary patch delta 1315 zcmZ3aJ6Cst6|1Fzv60C{dvPcu`Yw~H!Q_c->h*>Kdw++Y*=Skv;l{nKwZ5z#6CIk? z99j5^(W$t@Lx8tq|M?%vuWzcEbXG;Ik9&6ZT+iQm&Knq-c>e^=*Q>er#(w|&!&&P& zwq8@X{x>W9aQSKBhfnVw|FXJ$|EqtDCN<$K+;wJMCc+PzPp#ima;x+D<;6`u7cy@U z+so=L!5Q=ZLcM$4*Bip0A8@~k>Xuq;sBrztBk!MF*PgpwxN+00sm{#l=Z@DCbaJM4 zGg=2frosP1x#~F10Ypk%Li_!|7n}gFQ!Ixb(BCANn$Z zYwl%J)3U}SrE}lnd_p$NSkZl>vq-eNt#a}jFX)cCMmXViEcb9 zm2l(G*|jqdZV1e8|MT$k%qOK)PeXQ@T)pvx_wv`53PG#(>0P#(JY|Z}`ZX_mjwcj9 zvv~1Q^@!NjjH0UO{`{HB-Je411>7CIE7LbVeX-(s=bq@ADW6^L>e{WURC*^Sf7IsU zx2;aqSIt7E&**qEJF|rWfVKvvf19e_ScUt5;ozh65x%h_fq-0 zSkml3L4*CH+dW_Zefj&Dt4#f%hs%Xqm*1_-S-)L;+xFM%Kdx=9fAmg{qmHx9H-i13 z2+If8CGJLTC7&zm1U5(+F}w_j4(OTZSbtl=e8Fx$Yd)(VjQ*ysPWq}_%$EI#+SBlN zs}M);D%A_!LZ2R<5!%|e_`Z(-G4 zXy4IaYxTWKp*>%GLD$4j+dN+CZ1SG?Ynz8J+gzWCcLgf#o+ol~q;jqQRx{!JvI8y= z?9ZZ@E>FD{p_wS!SUH!sb|MHH9r=HC#Kl^pfqx0cXy++e_|9kLUtfsK@ zzh_o&Rcc;JaY@l;S+*#CTO$Pn5Kzcd-~uxYjEv1KFvLtO4KTz^jf^L65w)r}G)9*) zu(UKm7c(?9H^dM#GsVzrYGjCEy{R#>9VJDHnK`LNTsAfe`oWo1lXJy%ISjc}RbBnv FxBvuyQq}+f delta 1305 zcmbQMyGVC}6|1?SnW6DSdvPcu`Yw~1@#G#hmHLBuw+(pqzSger;d|=hIc;rrSDr%y z?~SB{_C@Ww*?b2lC7o^i`!B zi7dP(wZt~J|7Pv(oe%EaseAQr?ZfrE>KSX5)}&S?EUhE?q_g%N+ol^U=6tVNcj27^k5-48(<3LD5Ybl* zZyCkR%Q)Z0-QD`};C1DbMaO-)Og}sEUd?zV;uY*%c`G8U?fm&)604rOMuoL_Pc$yJ zIzKu5=!u0FKmYE&y7uCc`VUFx^;Z~|e%2LY+vxG~h-L3RM{VVoBL5YpPAuxG4>;vv zap*pG>XO-y!h0B8Qs@6F7uwJ z7g2NjUtLw4ox|?B$YVWIl+O2h_?yk!@c8^{U(bGT1;1a|e6V;O& zn9G<+lK0x)t&KVOPDP@VBdYqA?X(Y7)rxy!>;3X& z{|JBQclh9Jkn_ljW0^*SzmokX!zHtiU-oBQy^v{w+*anJ5)xbLyNj4_MQxEO{lF`i z-Pz*km65CUacK?X?`&NGsnF;J>AGKLZ@!Wt(dWe8@5c8}f1@r_rmWVgrbpq8e$s7P z8g)WG?>9`BZq-@6tNeAIjL)lW*Dn7KT`!PrtZr`SLD4 zm0jcHym-NmUIo+Thcq}yB) z1=fd}rd^Od@>}nHwe7@am)FJ1_UHy{PS&$eeRbPt>t~JJb6cFB{giM&!I|o}{#%XT zc?I(qS5CxuC;Yo(X{BVYxZ#-lBQ=I=KRE8R?2k^Y2%X)UA~_+Jb@%cu534%->kaZZ z$UPE1b=xh@TRrKHk&s~rs{smD(?xg@~&FD`N}ToeE#y} z-tH8!-E>@ViVVaBDZ>gw;t F1pse|JdFSV diff --git a/tests/testthat/test-dplyr_methods.R b/tests/testthat/test-dplyr_methods.R index 4b5c7de..f8c1923 100755 --- a/tests/testthat/test-dplyr_methods.R +++ b/tests/testthat/test-dplyr_methods.R @@ -323,5 +323,13 @@ test_that("group_split()", { fd <- df |> group_split("groups") expect_equal(length(fd), length(unique(df$groups))) + + fd <- df |> + group_split("vst.variable") + expect_equal(length(fd), length(unique(rowData(df)$vst.variable))) + + fd <- df |> + group_split(vst.variable) + expect_equal(length(fd), length(unique(rowData(df)$vst.variable))) }) From 1ac4299e25b4d73bd65ec4eda84bf1e270292331 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sat, 21 Oct 2023 13:34:02 +1000 Subject: [PATCH 05/20] tidy up code and docs --- NAMESPACE | 1 + R/dplyr_methods.R | 2 -- man/group_split.Rd | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 man/group_split.Rd diff --git a/NAMESPACE b/NAMESPACE index 26adeae..535ebe6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ S3method(full_join,SingleCellExperiment) S3method(ggplot,SingleCellExperiment) S3method(glimpse,tidySingleCellExperiment) S3method(group_by,SingleCellExperiment) +S3method(group_split,SingleCellExperiment) S3method(inner_join,SingleCellExperiment) S3method(join_transcripts,Seurat) S3method(join_transcripts,default) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 789eb97..b8cfb19 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -882,7 +882,6 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' #' @param .data #' -#' @return #' @export #' #' @examples @@ -890,7 +889,6 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' pbmc_small |> group_split(pbmc_small, groups) group_split.SingleCellExperiment <- function(.data, var) { - if(any(paste(substitute(var)) == names(colData(.data)))) { var <- enquo(var) var_list <- .data |> diff --git a/man/group_split.Rd b/man/group_split.Rd new file mode 100644 index 0000000..1ffd177 --- /dev/null +++ b/man/group_split.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dplyr_methods.R +\name{group_split} +\alias{group_split} +\alias{group_split.SingleCellExperiment} +\title{Split data frame by groups} +\usage{ +\method{group_split}{SingleCellExperiment}(.data, var) +} +\arguments{ +\item{.data}{} +} +\value{ +A list of tibbles. Each tibble contains the rows of \code{.tbl} for the +associated group and all the columns, including the grouping variables. +Note that this returns a \link[vctrs:list_of]{list_of} which is slightly +stricter than a simple list but is useful for representing lists where +every element has the same type. +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +\code{\link[dplyr:group_split]{group_split()}} works like \code{\link[base:split]{base::split()}} but: +\itemize{ +\item It uses the grouping structure from \code{\link[dplyr:group_by]{group_by()}} and therefore is subject +to the data mask +\item It does not name the elements of the list based on the grouping as this +only works well for a single character grouping variable. Instead, +use \code{\link[dplyr:group_keys]{group_keys()}} to access a data frame that defines the groups. +} + +\code{group_split()} is primarily designed to work with grouped data frames. +You can pass \code{...} to group and split an ungrouped data frame, but this +is generally not very useful as you want have easy access to the group +metadata. +} +\section{Lifecycle}{ + + +\code{group_split()} is not stable because you can achieve very similar results by +manipulating the nested column returned from +\code{\link[tidyr:nest]{tidyr::nest(.by =)}}. That also retains the group keys all +within a single data structure. \code{group_split()} may be deprecated in the +future. + +} + +\examples{ +data(pbmc_small) +pbmc_small |> group_split(pbmc_small, groups) +} +\seealso{ +Other grouping functions: +\code{\link[dplyr]{group_by}()}, +\code{\link[dplyr]{group_map}()}, +\code{\link[dplyr]{group_nest}()}, +\code{\link[dplyr]{group_trim}()} +} From 3c9648b1a72e40eea7d6ecebd58ebb9094f09302 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sat, 21 Oct 2023 19:54:41 +1000 Subject: [PATCH 06/20] refactor group_split Issue #97 --- R/dplyr_methods.R | 55 +++++++++++++---------------- tests/testthat/test-dplyr_methods.R | 9 ++--- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index b8cfb19..abb3673 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -887,35 +887,28 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' @examples #' data(pbmc_small) #' pbmc_small |> group_split(pbmc_small, groups) -group_split.SingleCellExperiment <- function(.data, var) { - - if(any(paste(substitute(var)) == names(colData(.data)))) { - var <- enquo(var) - var_list <- .data |> - as_tibble() |> - select(!!var) |> - unlist(use.names = FALSE) - - groups <- unique(var_list) - - v <- vector(mode = "list", length = length(groups)) - - for (n in seq_along(groups)) { - v[[n]] <- .data[, var_list == groups[[n]]] - } - - return(v) - } - - if(any(paste(substitute(var)) == names(rowData(.data)))) { - var <- enquo(var) - var_list <- .data |> - rowData() |> - as_tibble() |> - select(!!var) |> - unlist(use.names = FALSE) - - split(.data, var_list) |> - as.list() - } +group_split.SingleCellExperiment <- function(.data, ...) { + + var_list <- enquos(...) + + .data <- .data |> + unite("group_col", !!!var_list, remove = FALSE) + + group_list <- .data |> + as_tibble() |> + select(group_col) |> + unlist() |> + unique() + + v <- vector(mode = "list", length = length(group_list)) + + for (i in seq_along(v)) { + v[[i]] <- .data |> + filter(group_col == group_list[[i]]) |> + select(!group_col) + } + + v + } + diff --git a/tests/testthat/test-dplyr_methods.R b/tests/testthat/test-dplyr_methods.R index f8c1923..7a00291 100755 --- a/tests/testthat/test-dplyr_methods.R +++ b/tests/testthat/test-dplyr_methods.R @@ -325,11 +325,12 @@ test_that("group_split()", { expect_equal(length(fd), length(unique(df$groups))) fd <- df |> - group_split("vst.variable") - expect_equal(length(fd), length(unique(rowData(df)$vst.variable))) + group_split("groups") + expect_equal(length(fd), length(unique(df$groups))) fd <- df |> - group_split(vst.variable) - expect_equal(length(fd), length(unique(rowData(df)$vst.variable))) + group_split(groups, ident) + expect_equal(length(fd), length(unique(df$groups)) * + length(unique(df$ident))) }) From c12ec7af86b9a90ce3fd003a94e1689fbbc5b9a3 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sun, 22 Oct 2023 11:30:40 +1000 Subject: [PATCH 07/20] docs --- R/dplyr_methods.R | 5 +++-- man/group_split.Rd | 20 ++++---------------- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index abb3673..374e023 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -878,9 +878,10 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' @name group_split #' @rdname group_split -#' @inherit dplyr::group_split +#' @inherit dplyr::group_split title description details sections source #' -#' @param .data +#' @param .data A `tidySingleCellExperiment` object +#' @param ... The grouping variables by which to split the object #' #' @export #' diff --git a/man/group_split.Rd b/man/group_split.Rd index 1ffd177..b954a30 100644 --- a/man/group_split.Rd +++ b/man/group_split.Rd @@ -5,17 +5,12 @@ \alias{group_split.SingleCellExperiment} \title{Split data frame by groups} \usage{ -\method{group_split}{SingleCellExperiment}(.data, var) +\method{group_split}{SingleCellExperiment}(.data, ...) } \arguments{ -\item{.data}{} -} -\value{ -A list of tibbles. Each tibble contains the rows of \code{.tbl} for the -associated group and all the columns, including the grouping variables. -Note that this returns a \link[vctrs:list_of]{list_of} which is slightly -stricter than a simple list but is useful for representing lists where -every element has the same type. +\item{.data}{A `tidySingleCellExperiment` object} + +\item{...}{The grouping variables by which to split the object} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} @@ -49,10 +44,3 @@ future. data(pbmc_small) pbmc_small |> group_split(pbmc_small, groups) } -\seealso{ -Other grouping functions: -\code{\link[dplyr]{group_by}()}, -\code{\link[dplyr]{group_map}()}, -\code{\link[dplyr]{group_nest}()}, -\code{\link[dplyr]{group_trim}()} -} From 9f22be8208e413d30d8b18b0f5ca0df86d12a9fe Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sun, 22 Oct 2023 12:32:38 +1000 Subject: [PATCH 08/20] fix warning, error persists I seem to be doing something that the SCE version of unite doesn't like. Maybe could avoiding adding a column in the first place. --- R/dplyr_methods.R | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 374e023..3d227ba 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -893,11 +893,12 @@ group_split.SingleCellExperiment <- function(.data, ...) { var_list <- enquos(...) .data <- .data |> - unite("group_col", !!!var_list, remove = FALSE) + unite.SingleCellExperiment("group_col", !!!var_list, remove = FALSE) - group_list <- .data |> - as_tibble() |> - select(group_col) |> + group_df <- .data |> + as_tibble() + + group_list <- group_df$group_col |> unlist() |> unique() @@ -905,8 +906,9 @@ group_split.SingleCellExperiment <- function(.data, ...) { for (i in seq_along(v)) { v[[i]] <- .data |> - filter(group_col == group_list[[i]]) |> - select(!group_col) + filter(group_col == group_list[[i]], ) + + v[[i]] <- select(v[[i]], !group_col) } v From dee22e5f0b95f94297b4271385f5886426b5ec46 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sun, 22 Oct 2023 15:06:05 +1000 Subject: [PATCH 09/20] should fix error but doesn't --- R/dplyr_methods.R | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 3d227ba..3fa40dd 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -892,23 +892,21 @@ group_split.SingleCellExperiment <- function(.data, ...) { var_list <- enquos(...) - .data <- .data |> - unite.SingleCellExperiment("group_col", !!!var_list, remove = FALSE) - group_df <- .data |> as_tibble() - group_list <- group_df$group_col |> - unlist() |> + group_df <- group_df |> + unite("group_col", !!!var_list, remove = FALSE) + + group_list <- group_df$group_col + + groups <- group_list |> unique() - v <- vector(mode = "list", length = length(group_list)) + v <- vector(mode = "list", length = length(groups)) for (i in seq_along(v)) { - v[[i]] <- .data |> - filter(group_col == group_list[[i]], ) - - v[[i]] <- select(v[[i]], !group_col) + v[[i]] <- .data[,group_list == groups[[i]]] } v From 4b7565ed0a637ab691827c199887c2c49c23528f Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Thu, 7 Dec 2023 18:22:42 +1000 Subject: [PATCH 10/20] group_column___ --- R/dplyr_methods.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 3fa40dd..77b4c67 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -896,9 +896,9 @@ group_split.SingleCellExperiment <- function(.data, ...) { as_tibble() group_df <- group_df |> - unite("group_col", !!!var_list, remove = FALSE) + unite("group_column___", !!!var_list, remove = FALSE) - group_list <- group_df$group_col + group_list <- group_df$group_column___ groups <- group_list |> unique() From 5a55e7a2490c4443bde5d29213f7935629abc963 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Thu, 7 Dec 2023 18:35:02 +1000 Subject: [PATCH 11/20] consistency with dplyr --- R/dplyr_methods.R | 15 ++++++++------- man/group_split.Rd | 23 ++++++++++++++++++++--- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 77b4c67..d1d95b6 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -878,21 +878,18 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' @name group_split #' @rdname group_split -#' @inherit dplyr::group_split title description details sections source -#' -#' @param .data A `tidySingleCellExperiment` object -#' @param ... The grouping variables by which to split the object +#' @inherit dplyr::group_split #' #' @export #' #' @examples #' data(pbmc_small) #' pbmc_small |> group_split(pbmc_small, groups) -group_split.SingleCellExperiment <- function(.data, ...) { +group_split.SingleCellExperiment <- function(.tbl, ..., .keep = TRUE) { var_list <- enquos(...) - group_df <- .data |> + group_df <- .tbl |> as_tibble() group_df <- group_df |> @@ -906,7 +903,11 @@ group_split.SingleCellExperiment <- function(.data, ...) { v <- vector(mode = "list", length = length(groups)) for (i in seq_along(v)) { - v[[i]] <- .data[,group_list == groups[[i]]] + v[[i]] <- .tbl[,group_list == groups[[i]]] + + if(.keep == FALSE) { + v[[i]] <- select(v[[i]], !!!var_list) + } } v diff --git a/man/group_split.Rd b/man/group_split.Rd index b954a30..e921cc7 100644 --- a/man/group_split.Rd +++ b/man/group_split.Rd @@ -5,12 +5,22 @@ \alias{group_split.SingleCellExperiment} \title{Split data frame by groups} \usage{ -\method{group_split}{SingleCellExperiment}(.data, ...) +\method{group_split}{SingleCellExperiment}(.tbl, ..., .keep = TRUE) } \arguments{ -\item{.data}{A `tidySingleCellExperiment` object} +\item{.tbl}{A tbl.} -\item{...}{The grouping variables by which to split the object} +\item{...}{If \code{.tbl} is an ungrouped data frame, a grouping specification, +forwarded to \code{\link[dplyr:group_by]{group_by()}}.} + +\item{.keep}{Should the grouping columns be kept?} +} +\value{ +A list of tibbles. Each tibble contains the rows of \code{.tbl} for the +associated group and all the columns, including the grouping variables. +Note that this returns a \link[vctrs:list_of]{list_of} which is slightly +stricter than a simple list but is useful for representing lists where +every element has the same type. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} @@ -44,3 +54,10 @@ future. data(pbmc_small) pbmc_small |> group_split(pbmc_small, groups) } +\seealso{ +Other grouping functions: +\code{\link[dplyr]{group_by}()}, +\code{\link[dplyr]{group_map}()}, +\code{\link[dplyr]{group_nest}()}, +\code{\link[dplyr]{group_trim}()} +} From 1de8c85ac1416c80aa8e8eeb95c4bb3c9a8eebdc Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Thu, 7 Dec 2023 18:44:37 +1000 Subject: [PATCH 12/20] simplify --- R/dplyr_methods.R | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index d1d95b6..d513f25 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -889,13 +889,10 @@ group_split.SingleCellExperiment <- function(.tbl, ..., .keep = TRUE) { var_list <- enquos(...) - group_df <- .tbl |> - as_tibble() - - group_df <- group_df |> - unite("group_column___", !!!var_list, remove = FALSE) - - group_list <- group_df$group_column___ + group_list <- .tbl |> + as_tibble() |> + unite("group_column___", !!!var_list, remove = FALSE) |> + pull(group_column___) groups <- group_list |> unique() From 9c1cbeb9a5a29b9c76ef33b0da093f72761cdde8 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Thu, 7 Dec 2023 18:58:10 +1000 Subject: [PATCH 13/20] quotes to fix global binding issue --- R/dplyr_methods.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 37ce936..38e944c 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -837,7 +837,7 @@ group_split.SingleCellExperiment <- function(.tbl, ..., .keep = TRUE) { group_list <- .tbl |> as_tibble() |> unite("group_column___", !!!var_list, remove = FALSE) |> - pull(group_column___) + pull("group_column___") groups <- group_list |> unique() From 1ea58298e060414af7678390212dec52e78146ce Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Thu, 7 Dec 2023 19:01:21 +1000 Subject: [PATCH 14/20] fix .keep Think I accidentally reverted this --- R/dplyr_methods.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 38e944c..ac10afc 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -848,7 +848,7 @@ group_split.SingleCellExperiment <- function(.tbl, ..., .keep = TRUE) { v[[i]] <- .tbl[,group_list == groups[[i]]] if(.keep == FALSE) { - v[[i]] <- select(v[[i]], !!!var_list) + v[[i]] <- select(v[[i]], !(!!!var_list)) } } From c7585e3276b44e35babbc205d885587672eb6563 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Thu, 7 Dec 2023 19:06:37 +1000 Subject: [PATCH 15/20] check dots --- R/dplyr_methods.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index ac10afc..9b46a32 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -825,6 +825,7 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' @rdname group_split #' @inherit dplyr::group_split #' +#' @importFrom ellipsis check_dots_used #' @export #' #' @examples From 2d52d9930e810de341e0fd5463ffe65aadc64d77 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Thu, 7 Dec 2023 19:35:44 +1000 Subject: [PATCH 16/20] fix example code --- R/dplyr_methods.R | 11 ++++++----- man/group_split.Rd | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index 9b46a32..d081285 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -824,13 +824,14 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' @name group_split #' @rdname group_split #' @inherit dplyr::group_split -#' -#' @importFrom ellipsis check_dots_used -#' @export -#' +#' #' @examples #' data(pbmc_small) -#' pbmc_small |> group_split(pbmc_small, groups) +#' group_split(pbmc_small, groups) +#' +#' @importFrom ellipsis check_dots_used +#' @importFrom dplyr group_split +#' @export group_split.SingleCellExperiment <- function(.tbl, ..., .keep = TRUE) { var_list <- enquos(...) diff --git a/man/group_split.Rd b/man/group_split.Rd index e921cc7..1cf3fa3 100644 --- a/man/group_split.Rd +++ b/man/group_split.Rd @@ -52,7 +52,8 @@ future. \examples{ data(pbmc_small) -pbmc_small |> group_split(pbmc_small, groups) +group_split(pbmc_small, groups) + } \seealso{ Other grouping functions: From 9347b5d2259d84cbadbf885b5a37214b665da4bc Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Thu, 7 Dec 2023 19:41:36 +1000 Subject: [PATCH 17/20] consistency --- R/dplyr_methods.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index d081285..be4db25 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -827,10 +827,9 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' #' @examples #' data(pbmc_small) -#' group_split(pbmc_small, groups) +#' pbmc_small |> group_split(groups) #' #' @importFrom ellipsis check_dots_used -#' @importFrom dplyr group_split #' @export group_split.SingleCellExperiment <- function(.tbl, ..., .keep = TRUE) { From 332e30e83cb0934812220b98777b596066d5a357 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sat, 9 Dec 2023 19:04:33 +1000 Subject: [PATCH 18/20] drop tests with qoutes --- tests/testthat/test-dplyr_methods.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/testthat/test-dplyr_methods.R b/tests/testthat/test-dplyr_methods.R index 948c2f8..2dc4135 100755 --- a/tests/testthat/test-dplyr_methods.R +++ b/tests/testthat/test-dplyr_methods.R @@ -378,14 +378,6 @@ test_that("rowwise()", { }) test_that("group_split()", { - fd <- df |> - group_split("groups") - expect_equal(length(fd), length(unique(df$groups))) - - fd <- df |> - group_split("groups") - expect_equal(length(fd), length(unique(df$groups))) - fd <- df |> group_split(groups, ident) expect_equal(length(fd), length(unique(df$groups)) * From 0fb669aea1c374327d899ef3ef36dede23d6f348 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sat, 9 Dec 2023 19:05:25 +1000 Subject: [PATCH 19/20] use dplyr functions --- NAMESPACE | 1 + R/dplyr_methods.R | 9 +++++---- man/group_split.Rd | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 17170d6..45635a7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -74,6 +74,7 @@ importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,group_by_drop_default) +importFrom(dplyr,group_rows) importFrom(dplyr,group_split) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) diff --git a/R/dplyr_methods.R b/R/dplyr_methods.R index be4db25..8a18f54 100755 --- a/R/dplyr_methods.R +++ b/R/dplyr_methods.R @@ -830,6 +830,8 @@ pull.SingleCellExperiment <- function(.data, var=-1, name=NULL, ...) { #' pbmc_small |> group_split(groups) #' #' @importFrom ellipsis check_dots_used +#' @importFrom dplyr group_by +#' @importFrom dplyr group_rows #' @export group_split.SingleCellExperiment <- function(.tbl, ..., .keep = TRUE) { @@ -837,16 +839,15 @@ group_split.SingleCellExperiment <- function(.tbl, ..., .keep = TRUE) { group_list <- .tbl |> as_tibble() |> - unite("group_column___", !!!var_list, remove = FALSE) |> - pull("group_column___") + dplyr::group_by(!!!var_list) groups <- group_list |> - unique() + dplyr::group_rows() v <- vector(mode = "list", length = length(groups)) for (i in seq_along(v)) { - v[[i]] <- .tbl[,group_list == groups[[i]]] + v[[i]] <- .tbl[,groups[[i]]] if(.keep == FALSE) { v[[i]] <- select(v[[i]], !(!!!var_list)) diff --git a/man/group_split.Rd b/man/group_split.Rd index 1cf3fa3..11e2394 100644 --- a/man/group_split.Rd +++ b/man/group_split.Rd @@ -52,7 +52,7 @@ future. \examples{ data(pbmc_small) -group_split(pbmc_small, groups) +pbmc_small |> group_split(groups) } \seealso{ From b2a7e3db2830b3f0386f23f62b713b991e3a7ca8 Mon Sep 17 00:00:00 2001 From: Boyd Tarlinton Date: Sun, 10 Dec 2023 15:14:36 +1000 Subject: [PATCH 20/20] tests --- tests/testthat/test-dplyr_methods.R | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-dplyr_methods.R b/tests/testthat/test-dplyr_methods.R index 2dc4135..93f0541 100755 --- a/tests/testthat/test-dplyr_methods.R +++ b/tests/testthat/test-dplyr_methods.R @@ -377,10 +377,32 @@ test_that("rowwise()", { expect_identical(fd[[1]], sapply(df$lys, sum)) }) -test_that("group_split()", { +test_that("group_split() works for one variable", { + fd <- df |> + group_split(groups) + expect_equal(length(fd), length(unique(df$groups))) +}) + +test_that("group_split() works for combination of variables", { fd <- df |> group_split(groups, ident) expect_equal(length(fd), length(unique(df$groups)) * length(unique(df$ident))) }) +test_that("group_split() works for one logical statement", { + fd_log <- df |> + group_split(groups=="g1") + fd_var <- df |> + group_split(groups=="g1") + expect_equal(lapply(fd_var, count), lapply(fd_log, count)) +}) + +test_that("group_split() works for two logical statements", { + fd <- df |> + group_split(PC_1>0 & groups=="g1") + fd_counts <- lapply(fd, count) + expect_equal(c(fd_counts[[1]], fd_counts[[2]], use.names = FALSE), + list(75, 5)) +}) +