From 87886387dcc6bbcaee91b6f1bd590c63c434b4dc Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Tue, 1 Jul 2025 18:17:38 +0400 Subject: [PATCH 1/2] explode docs and tests --- .../kotlinx/dataframe/api/explode.kt | 168 +++++++++++++++++- .../jetbrains/kotlinx/dataframe/api/filter.kt | 1 - .../documentation/DocumentationUrls.kt | 3 + .../kotlinx/dataframe/api/explode.kt | 88 +++++++++ 4 files changed, 253 insertions(+), 7 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt index 8c59bba5af..4b46b545b4 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt @@ -10,6 +10,9 @@ import org.jetbrains.kotlinx.dataframe.annotations.Refine import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls +import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources +import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns import org.jetbrains.kotlinx.dataframe.impl.api.explodeImpl import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API import kotlin.reflect.KProperty @@ -19,13 +22,82 @@ private val defaultExplodeColumns: ColumnsSelector<*, *> = { } // region explode DataFrame + +/** + * Splits list-like values in the specified [\columns] and spreads them vertically — + * that is, adds a separate row for each element (one value per row). + * Values in all other columns are duplicated to preserve row context. + * + * If no [\columns] are specified, all columns (at any depth) containing + * [List] or [DataFrame] values will be exploded. + * + * If [dropEmpty] is `true`, rows with empty lists or DataFrames will be removed. + * If `false`, such rows will be exploded into `null` values. + * + * Returns a new [DataFrame] with exploded columns. + * + * Each exploded column will have a new type (`List` -> `T`). + * When several columns are exploded in one operation, lists in different columns will be aligned. + * + * This operation is the reverse of [implode]. + * + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * For more information, see: {@include [DocumentationUrls.Explode]} + * + * ### This `explode` overload + */ +@ExcludeFromSources +internal interface ExplodeDocs + +/** + * {@include [ExplodeDocs]} + * {@include [SelectingColumns.Dsl]} + * + * #### Examples + * + * ```kotlin + * // Explodes all `List` and `DataFrame` columns at any depth + * df.explode() + * + * // Explodes the "tags" column of type `List` + * df.explode { tags } + * + * // Explodes all columns of type `List` + * df.explode { colsOf>() } + * ``` + * + * @param dropEmpty If `true`, removes rows with empty lists or DataFrames. + * If `false`, such rows will be exploded into `null` values. + * @param columns The [ColumnsSelector] used to select columns to explode. + * If not specified, all applicable columns will be exploded. + * @return A new [DataFrame] with exploded columns. + */ @Refine @Interpretable("Explode0") public fun DataFrame.explode( dropEmpty: Boolean = true, - selector: ColumnsSelector = defaultExplodeColumns, -): DataFrame = explodeImpl(dropEmpty, selector) - + columns: ColumnsSelector = defaultExplodeColumns, +): DataFrame = explodeImpl(dropEmpty, columns) + +/** + * {@include [ExplodeDocs]} + * {@include [SelectingColumns.ColumnNames]} + * + * #### Example + * + * ```kotlin + * // Explodes the "tags" and "scores" columns, where + * // "tags" is a `List` and "scores" is a `List` + * val exploded = df.explode("tags", "scores") + * ``` + * + * @param dropEmpty If `true`, removes rows with empty lists or DataFrames. + * If `false`, such rows will be exploded into `null` values. + * @param columns The [column names][String] used to select columns to explode. + * If not specified, all applicable columns will be exploded. + * @return A new [DataFrame] with exploded columns. + */ public fun DataFrame.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame = explode(dropEmpty) { columns.toColumnSet() } @@ -43,11 +115,73 @@ public fun DataFrame.explode(vararg columns: KProperty, dropEmpty: // region explode DataRow +/** + * Splits list-like values in the specified [\columns] of this [DataRow] and spreads them vertically — + * that is, adds a separate row for each element (one value per row) + * and combine them into new [DataFrame]. + * Values in all other columns are duplicated to preserve row context. + * + * If no [\columns] are specified, all columns (at any depth) containing + * [List] or [DataFrame] values will be exploded. + * + * If [dropEmpty] is `true`, the result will exclude rows with empty lists or DataFrames. + * If `false`, such values will be exploded into `null`. + * + * Returns a new [DataFrame] expanded into multiple rows based on the exploded columns. + * + * Each exploded column will have a new type (`List` → `T`). + * When several columns are exploded in one operation, lists in different columns will be aligned. + * + * @include [SelectingColumns.ColumnGroupsAndNestedColumnsMention] + * + * For more information, see: {@include [DocumentationUrls.Explode]} + * + * ### This `explode` overload + */ +@ExcludeFromSources +internal interface ExplodeDataRowDocs + +/** + * {@include [ExplodeDataRowDocs]} + * {@include [SelectingColumns.Dsl]} + * + * #### Example + * + * ```kotlin + * // Explodes the `hobbies` and `scores` values of the row, + * // of types `List` and `List`, respectively + * row.explode { hobbies and scores } + * ``` + * + * @param dropEmpty If `true`, removes rows with empty lists or DataFrames. + * If `false`, such rows will be exploded into `null` values. + * @param columns The [ColumnsSelector] used to select columns to explode. + * If not specified, all applicable columns will be exploded. + * @return A new [DataFrame] with exploded columns from this [DataRow]. + */ public fun DataRow.explode( dropEmpty: Boolean = true, - selector: ColumnsSelector = defaultExplodeColumns, -): DataFrame = toDataFrame().explode(dropEmpty, selector) - + columns: ColumnsSelector = defaultExplodeColumns, +): DataFrame = toDataFrame().explode(dropEmpty, columns) + +/** + * {@include [ExplodeDataRowDocs]} + * {@include [SelectingColumns.ColumnNames]} + * + * #### Example + * + * ```kotlin + * // Explodes the `hobbies` and `scores` values of the row, + * // of types `List` and `List`, respectively + * row.explode("hobbies", "scores") + * ``` + * + * @param dropEmpty If `true`, removes rows with empty lists or DataFrames. + * If `false`, such rows will be exploded into `null` values. + * @param columns The [column names][String] used to select columns to explode. + * If not specified, all applicable columns will be exploded. + * @return A new [DataFrame] with exploded columns from this [DataRow]. + */ public fun DataRow.explode(vararg columns: String, dropEmpty: Boolean = true): DataFrame = explode(dropEmpty) { columns.toColumnSet() } @@ -65,9 +199,31 @@ public fun DataRow.explode(vararg columns: KProperty, dropEmpty: Bo // region explode DataColumn +/** + * Splits list-like values in this [DataColumn] and spreads them vertically — + * that is, adds a separate row for each element (one value per row). + * + * Returns a new [DataColumn] with the exploded values. + * The resulting column will have a new type (`List` → `T`). + * + * For more information, see: {@include [DocumentationUrls.Explode]} + * + * @return A new [DataColumn] with exploded values. + */ @JvmName("explodeList") public fun DataColumn>.explode(): DataColumn = explodeImpl() as DataColumn +/** + * Explodes a [DataColumn] of [DataFrame] values into a single [ColumnGroup]. + * + * Each nested [DataFrame] is unwrapped, and its columns are placed side by side + * within a column group named after the original column. + * The number of resulting rows equals the total number of rows across all nested DataFrames. + * + * For more information, see: {@include [DocumentationUrls.Explode]} + * + * @return A [ColumnGroup] containing the concatenated contents of all nested DataFrames. + */ @JvmName("explodeFrames") public fun DataColumn>.explode(): ColumnGroup = concat().asColumnGroup(name()) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt index 5dbbcd3fbf..3ec2249853 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/filter.kt @@ -16,7 +16,6 @@ import org.jetbrains.kotlinx.dataframe.columns.SingleColumn import org.jetbrains.kotlinx.dataframe.columns.asColumnSet import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate -import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources import org.jetbrains.kotlinx.dataframe.documentation.Indent import org.jetbrains.kotlinx.dataframe.documentation.LineBreak import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 19f2f73e30..771133046e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -113,4 +113,7 @@ internal interface DocumentationUrls { /** [See `filter` on the documentation website.]({@include [Url]}/filter.html) */ interface Filter + + /** [See `explode` on the documentation website.]({@include [Url]}/explode.html) */ + interface Explode } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt index cd98357945..334f8c5485 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt @@ -44,4 +44,92 @@ class ExplodeTests { val df = dataFrameOf("a", "b")(1, 2) df.explode() shouldBe df } + + @Test + fun `explode multiple aligned columns`() { + val a by columnOf(listOf(1, 2), listOf(3, 4, 5)) + val b by columnOf(listOf(1, 2, 3), listOf(4, 5)) + + val df = dataFrameOf(a, b) + val exploded = df.explode { a and b } + + val expected = dataFrameOf("a", "b")( + 1, 1, + 2, 2, + null, 3, + 3, 4, + 4, 5, + 5, null, + ) + + exploded shouldBe expected + } + + @Test + fun `explode with empty list and dropEmpty true`() { + val df = dataFrameOf("a", "b")( + 1, listOf(1, 2), + 2, emptyList(), + 3, listOf(3), + ) + + val exploded = df.explode(dropEmpty = true) + + val expected = dataFrameOf("a", "b")( + 1, 1, + 1, 2, + 3, 3, + ) + + exploded shouldBe expected + } + + @Test + fun `explode with empty list and dropEmpty false`() { + val df = dataFrameOf("a", "b")( + 1, listOf(1, 2), + 2, emptyList(), + 3, listOf(3), + ) + + val exploded = df.explode(dropEmpty = false) + + val expected = dataFrameOf("a", "b")( + 1, 1, + 1, 2, + 2, null, + 3, 3, + ) + + exploded shouldBe expected + } + + @Test + fun `explode DataColumn of lists`() { + val col by columnOf(listOf(1, 2), listOf(3, 4)) + + val exploded = col.explode() + val expected = columnOf(1, 2, 3, 4) named "col" + + exploded shouldBe expected + } + + @Test + fun `explode FrameColumn into ColumnGroup`() { + val col by columnOf( + dataFrameOf("x", "y")(1, 2, 3, 4), + dataFrameOf("x", "y")(5, 6, 7, 8), + ) + + val exploded = col.explode() + + val expected = dataFrameOf("x", "y")( + 1, 2, + 3, 4, + 5, 6, + 7, 8, + ).asColumnGroup("col") + + exploded shouldBe expected + } } From 2a063cb526d998051777bdc59879f0837fc02f8b Mon Sep 17 00:00:00 2001 From: "andrei.kislitsyn" Date: Thu, 3 Jul 2025 18:24:39 +0400 Subject: [PATCH 2/2] explode fixes --- .../kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt | 8 ++++---- .../kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt | 4 ++++ .../kotlinx/dataframe/plugin/impl/api/explode.kt | 4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt index 4b46b545b4..992f4f6e09 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt @@ -25,13 +25,13 @@ private val defaultExplodeColumns: ColumnsSelector<*, *> = { /** * Splits list-like values in the specified [\columns] and spreads them vertically — - * that is, adds a separate row for each element (one value per row). + * that is, it adds a separate row for each element (one value per row). * Values in all other columns are duplicated to preserve row context. * * If no [\columns] are specified, all columns (at any depth) containing * [List] or [DataFrame] values will be exploded. * - * If [dropEmpty] is `true`, rows with empty lists or DataFrames will be removed. + * If [dropEmpty] is `true`, rows with empty lists or [DataFrame]s will be removed. * If `false`, such rows will be exploded into `null` values. * * Returns a new [DataFrame] with exploded columns. @@ -117,7 +117,7 @@ public fun DataFrame.explode(vararg columns: KProperty, dropEmpty: /** * Splits list-like values in the specified [\columns] of this [DataRow] and spreads them vertically — - * that is, adds a separate row for each element (one value per row) + * that is, it adds a separate row for each element (one value per row) * and combine them into new [DataFrame]. * Values in all other columns are duplicated to preserve row context. * @@ -201,7 +201,7 @@ public fun DataRow.explode(vararg columns: KProperty, dropEmpty: Bo /** * Splits list-like values in this [DataColumn] and spreads them vertically — - * that is, adds a separate row for each element (one value per row). + * that is, it adds a separate row for each element (one value per row). * * Returns a new [DataColumn] with the exploded values. * The resulting column will have a new type (`List` → `T`). diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt index c432c230cc..3e505c9885 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt @@ -259,6 +259,8 @@ public fun Gather.notNull(): Gather = wher * .mapValues { (it + 0.5).toFloat() } * .into("series", "value") * ``` + * + * @see [explode] * @return A new [Gather] instance with exploded list values. */ @Interpretable("GatherExplodeLists") @@ -296,6 +298,8 @@ public fun Gather.explodeLists(): Gather = * .mapValues { (it + 0.5).toFloat() } * .into("series", "value") * ``` + * + * @see [explode] * @return A new [Gather] instance with exploded list values. */ @JvmName("explodeListsTyped") diff --git a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/explode.kt b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/explode.kt index 31c3370522..21c3f6bf69 100644 --- a/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/explode.kt +++ b/plugins/kotlin-dataframe/src/org/jetbrains/kotlinx/dataframe/plugin/impl/api/explode.kt @@ -16,11 +16,11 @@ import org.jetbrains.kotlinx.dataframe.plugin.impl.simpleColumnOf internal class Explode0 : AbstractInterpreter() { val Arguments.dropEmpty: Boolean by arg(defaultValue = Present(true)) val Arguments.receiver: PluginDataFrameSchema by dataFrame() - val Arguments.selector: ColumnsResolver? by arg(defaultValue = Present(null)) + val Arguments.columns: ColumnsResolver? by arg(defaultValue = Present(null)) override val Arguments.startingSchema get() = receiver override fun Arguments.interpret(): PluginDataFrameSchema { - val columns = selector ?: object : ColumnsResolver { + val columns = columns ?: object : ColumnsResolver { override fun resolve(df: PluginDataFrameSchema): List { return df.flatten(includeFrames = false).filter { val column = it.column