From 644fc6617ce4179fd6b94b4ba05ed0c364325eb5 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Mon, 8 May 2023 14:05:24 +0300 Subject: [PATCH 1/9] update split condition so resulting iterable has multiple elements --- .../kotlinx/dataframe/samples/api/Modify.kt | 49 +- ...nx.dataframe.samples.api.Modify.split.html | 152 +---- ...x.dataframe.samples.api.Modify.split1.html | 591 ++++++++++++++++++ ...taframe.samples.api.Modify.splitRegex.html | 96 +-- ...aframe.samples.api.Modify.splitRegex1.html | 545 ++++++++++++++++ docs/StardustDocs/topics/split.md | 54 +- 6 files changed, 1243 insertions(+), 244 deletions(-) create mode 100644 docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.split1.html create mode 100644 docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitRegex1.html diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index 1a8bf5c505..374491133b 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -496,9 +496,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun split_properties() { // SampleStart - df.split { name }.by { it.values() }.into("nameParts") - - df.split { name.lastName }.by(" ").default("").inward { "word$it" } + df.split { name.lastName }.by { it.asIterable() }.into("char1", "char2") // SampleEnd } @@ -509,9 +507,7 @@ class Modify : TestBase() { val name by columnGroup() val lastName by name.column() - df.split { name }.by { it.values() }.into("nameParts") - - df.split { lastName }.by(" ").default("").inward { "word$it" } + df.split { lastName }.by { it.asIterable() }.into("char1", "char2") // SampleEnd } @@ -519,18 +515,53 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun split_strings() { // SampleStart - df.split { name }.by { it.values() }.into("nameParts") + df.split { "name"["lastName"]() }.by { it.asIterable() }.into("char1", "char2") + // SampleEnd + } + + @Test + @TransformDataFrameExpressions + fun split1_properties() { + // SampleStart + df.split { name.lastName }.by { it.asIterable() }.default(' ').inward { "char$it" } + // SampleEnd + } + + @Test + @TransformDataFrameExpressions + fun split1_accessors() { + // SampleStart + val name by columnGroup() + val lastName by name.column() - df.split { "name"["lastName"] }.by(" ").default("").inward { "word$it" } + df.split { lastName }.by { it.asIterable() }.default(' ').inward { "char$it" } + // SampleEnd + } + + @Test + @TransformDataFrameExpressions + fun split1_strings() { + // SampleStart + df.split { "name"["lastName"]() }.by { it.asIterable() }.default(' ').inward { "char$it" } // SampleEnd } @Test @TransformDataFrameExpressions fun splitRegex() { + // SampleStart val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name") - val name by column() + // SampleEnd + } + + private val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name") + + @Test + @TransformDataFrameExpressions + fun splitRegex1() { // SampleStart + val name by column() + merged.split { name } .match("""(.*) \((.*)\)""") .inward("firstName", "lastName") diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.split.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.split.html index 0c7339963b..066ae32b3a 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.split.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.split.html @@ -182,25 +182,12 @@ - - - - - - - - - - - -
- df.split { name }.by { it.values() }.into("nameParts") - +
Input DataFrame: rowsCount = 7, columnsCount = 5
@@ -217,51 +204,14 @@ Step 2: SplitWithTransform
-

-
-
- Output DataFrame: rowsCount = 7, columnsCount = 6 -
- -

-
-
-
-
- df.split { name.lastName }.by(" ").default("").inward { "word$it" } - -
- Input DataFrame: rowsCount = 7, columnsCount = 5 -
- -

-
-
- Step 1: Split -
- -

-
-
- Step 2: SplitWithTransform -
- -

-
-
- Step 3: SplitWithTransform -
-

Output DataFrame: rowsCount = 7, columnsCount = 5 -
+

-
-
\ No newline at end of file diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.split1.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.split1.html new file mode 100644 index 0000000000..169b6e326e --- /dev/null +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.split1.html @@ -0,0 +1,591 @@ + + + + + + + + +
+ Input DataFrame: rowsCount = 7, columnsCount = 5 +
+ +

+
+
+ Step 1: Split +
+ +

+
+
+ Step 2: SplitWithTransform +
+ +

+
+
+ Step 3: SplitWithTransform +
+ +

+
+
+ Output DataFrame: rowsCount = 7, columnsCount = 5 +
+ +

+
+ + + \ No newline at end of file diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitRegex.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitRegex.html index fd00a081be..13ce92efeb 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitRegex.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitRegex.html @@ -182,23 +182,12 @@ - - - - - - - - - -
- df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name") - +
Input DataFrame: rowsCount = 7, columnsCount = 5
@@ -223,37 +212,6 @@

-
-
-
- merged.split { name }.match("""(.*) \((.*)\)""").inward("firstName", "lastName") - -
- Input DataFrame: rowsCount = 7, columnsCount = 5 -
- -

-
-
- Step 1: Split -
- -

-
-
- Step 2: SplitWithTransform -
- -

-
-
- Output DataFrame: rowsCount = 7, columnsCount = 5 -
- -

-
-
-
\ No newline at end of file diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitRegex1.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitRegex1.html new file mode 100644 index 0000000000..9a7d16348d --- /dev/null +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitRegex1.html @@ -0,0 +1,545 @@ + + + + + + + + +
+ Input DataFrame: rowsCount = 7, columnsCount = 5 +
+ +

+
+
+ Step 1: Split +
+ +

+
+
+ Step 2: SplitWithTransform +
+ +

+
+
+ Output DataFrame: rowsCount = 7, columnsCount = 5 +
+ +

+
+ + + \ No newline at end of file diff --git a/docs/StardustDocs/topics/split.md b/docs/StardustDocs/topics/split.md index 08e2ec1b98..4a2542e141 100644 --- a/docs/StardustDocs/topics/split.md +++ b/docs/StardustDocs/topics/split.md @@ -72,9 +72,7 @@ Default `columnNamesGenerator` generates column names `split1`, `split2`... ```kotlin -df.split { name }.by { it.values() }.into("nameParts") - -df.split { name.lastName }.by(" ").default("").inward { "word$it" } +df.split { name.lastName }.by { it.asIterable() }.into("char1", "char2") ``` @@ -84,35 +82,71 @@ df.split { name.lastName }.by(" ").default("").inward { "word$it" } val name by columnGroup() val lastName by name.column() -df.split { name }.by { it.values() }.into("nameParts") - -df.split { lastName }.by(" ").default("").inward { "word$it" } +df.split { lastName }.by { it.asIterable() }.into("char1", "char2") ``` ```kotlin -df.split { name }.by { it.values() }.into("nameParts") - -df.split { "name"["lastName"] }.by(" ").default("").inward { "word$it" } +df.split { "name"["lastName"]() }.by { it.asIterable() }.into("char1", "char2") ``` + + + + +```kotlin +df.split { name.lastName }.by { it.asIterable() }.default(' ').inward { "char$it" } +``` + + + + +```kotlin +val name by columnGroup() +val lastName by name.column() + +df.split { lastName }.by { it.asIterable() }.default(' ').inward { "char$it" } +``` + + + + +```kotlin +df.split { "name"["lastName"]() }.by { it.asIterable() }.default(' ').inward { "char$it" } +``` + + + + + `String` columns can also be split into group matches of [`Regex`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.text/-regex/) pattern: ```kotlin +val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name") +``` + + + + + + +```kotlin +val name by column() + merged.split { name } .match("""(.*) \((.*)\)""") .inward("firstName", "lastName") ``` - + [`FrameColumn`](DataColumn.md#framecolumn) can be split into columns: From 783e82dcdaa6b1e14e474eb659b2523877b31533 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Wed, 17 May 2023 14:13:17 +0300 Subject: [PATCH 2/9] add builder for creating df from third party data --- .../kotlinx/dataframe/api/constructors.kt | 30 +++++++++++++++++++ .../kotlinx/dataframe/impl/DataFrameImpl.kt | 2 +- .../kotlinx/dataframe/api/constructors.kt | 16 ++++++++++ .../kotlinx/dataframe/api/constructors.kt | 30 +++++++++++++++++++ .../kotlinx/dataframe/impl/DataFrameImpl.kt | 2 +- .../kotlinx/dataframe/api/constructors.kt | 16 ++++++++++ 6 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt create mode 100644 core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 10f1ebb536..7854580d33 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException +import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.columnName @@ -23,6 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference import org.jetbrains.kotlinx.dataframe.impl.columns.forceResolve import org.jetbrains.kotlinx.dataframe.impl.columns.unbox +import org.jetbrains.kotlinx.dataframe.impl.unnamedColumnPrefix import org.jetbrains.kotlinx.dataframe.size import kotlin.random.Random import kotlin.random.nextInt @@ -348,6 +350,34 @@ public class DataFrameBuilder(private val header: List) { public fun randomBoolean(nrow: Int): AnyFrame = fillNotNull(nrow) { Random.nextBoolean() } } +/** + * Helper class for implementing operations when column names can be potentially duplicated. + * For example, operations involving multiple dataframes, computed columns or parsing some third-party data + */ +public class DynamicDataFrameBuilder { + private var cols: MutableList = mutableListOf() + private val generator = ColumnNameGenerator() + + public fun add(col: AnyCol): String { + val uniqueName = if (col.name().isEmpty()) { + generator.addUnique(unnamedColumnPrefix) + } else { + generator.addUnique(col.name()) + } + val renamed = if (uniqueName != col.name()) { + col.rename(uniqueName) + } else { + col + } + cols.add(renamed) + return uniqueName + } + + public fun toDataFrame(): AnyFrame { + return dataFrameOf(cols) + } +} + /** * Returns [DataFrame] with no rows and no columns. * diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt index 1881b97314..93fe387f83 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt @@ -27,7 +27,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle import org.jetbrains.kotlinx.dataframe.io.renderToString import kotlin.reflect.KProperty -private const val unnamedColumnPrefix = "untitled" +internal const val unnamedColumnPrefix = "untitled" internal open class DataFrameImpl(cols: List, val nrow: Int) : DataFrame, AggregatableInternal { diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt new file mode 100644 index 0000000000..ceb14767e5 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -0,0 +1,16 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class ConstructorsTests { + + @Test + fun `untitled column naming`() { + val builder = DynamicDataFrameBuilder() + repeat(5) { + builder.add(columnOf(1, 2, 3)) + } + builder.toDataFrame() shouldBe dataFrameOf(List(5) { columnOf(1, 2, 3) }) + } +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt index 10f1ebb536..7854580d33 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -15,6 +15,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnReference import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException +import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.columnName @@ -23,6 +24,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.createColumn import org.jetbrains.kotlinx.dataframe.impl.columns.createComputedColumnReference import org.jetbrains.kotlinx.dataframe.impl.columns.forceResolve import org.jetbrains.kotlinx.dataframe.impl.columns.unbox +import org.jetbrains.kotlinx.dataframe.impl.unnamedColumnPrefix import org.jetbrains.kotlinx.dataframe.size import kotlin.random.Random import kotlin.random.nextInt @@ -348,6 +350,34 @@ public class DataFrameBuilder(private val header: List) { public fun randomBoolean(nrow: Int): AnyFrame = fillNotNull(nrow) { Random.nextBoolean() } } +/** + * Helper class for implementing operations when column names can be potentially duplicated. + * For example, operations involving multiple dataframes, computed columns or parsing some third-party data + */ +public class DynamicDataFrameBuilder { + private var cols: MutableList = mutableListOf() + private val generator = ColumnNameGenerator() + + public fun add(col: AnyCol): String { + val uniqueName = if (col.name().isEmpty()) { + generator.addUnique(unnamedColumnPrefix) + } else { + generator.addUnique(col.name()) + } + val renamed = if (uniqueName != col.name()) { + col.rename(uniqueName) + } else { + col + } + cols.add(renamed) + return uniqueName + } + + public fun toDataFrame(): AnyFrame { + return dataFrameOf(cols) + } +} + /** * Returns [DataFrame] with no rows and no columns. * diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt index 1881b97314..93fe387f83 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameImpl.kt @@ -27,7 +27,7 @@ import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle import org.jetbrains.kotlinx.dataframe.io.renderToString import kotlin.reflect.KProperty -private const val unnamedColumnPrefix = "untitled" +internal const val unnamedColumnPrefix = "untitled" internal open class DataFrameImpl(cols: List, val nrow: Int) : DataFrame, AggregatableInternal { diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt new file mode 100644 index 0000000000..ceb14767e5 --- /dev/null +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/constructors.kt @@ -0,0 +1,16 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class ConstructorsTests { + + @Test + fun `untitled column naming`() { + val builder = DynamicDataFrameBuilder() + repeat(5) { + builder.add(columnOf(1, 2, 3)) + } + builder.toDataFrame() shouldBe dataFrameOf(List(5) { columnOf(1, 2, 3) }) + } +} From 19b8ee248c0878f5bd57a6ea4cf69642aea5229f Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Wed, 17 May 2023 14:28:21 +0300 Subject: [PATCH 3/9] documentation for DynamicDataFrameBuilder --- .../explainer/PluginCallbackProxy.kt | 69 ++- .../kotlinx/dataframe/samples/api/Create.kt | 19 + .../explainer/PluginCallbackProxy.kt | 69 ++- .../kotlinx/dataframe/samples/api/Create.kt | 19 + ...pi.Create.createDataFrameFromIterable.html | 476 ++++++++++++++++++ ...les.api.Create.createDataFrameFromMap.html | 476 ++++++++++++++++++ ....samples.api.Create.duplicatedColumns.html | 476 ++++++++++++++++++ docs/StardustDocs/topics/createDataFrame.md | 27 + 8 files changed, 1579 insertions(+), 52 deletions(-) create mode 100644 docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.createDataFrameFromIterable.html create mode 100644 docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.createDataFrameFromMap.html create mode 100644 docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.duplicatedColumns.html diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt index c2223087ec..c3d34c30ed 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt @@ -155,50 +155,67 @@ object PluginCallbackProxy : PluginCallback { ) } + private fun List.joinToSource(): String = + joinToString(".") { it.source } + private fun statementOutput( expressions: List, ): DataFrameHtmlData { var data = DataFrameHtmlData() - if (expressions.size < 2) error("Sample without output or input (i.e. function returns some value)") - for ((i, expression) in expressions.withIndex()) { - when (i) { - 0 -> { - val table = convertToHTML(expression.df) - val description = table.copy( - body = """ + val allow = setOf( + "toDataFrame", "peek(dataFrameOf(col), dataFrameOf(col))" + ) + if (expressions.isEmpty()) { + error("No dataframe expressions in sample") + } + if (expressions.size == 1) { + if (allow.any { expressions[0].source.contains(it) }) { + val expression = expressions[0] + data += convertToHTML(expression.df) + } else { + error("${expressions.joinToSource()} Sample without output or input (i.e. function returns some value)") + } + } else { + for ((i, expression) in expressions.withIndex()) { + when (i) { + 0 -> { + val table = convertToHTML(expression.df) + val description = table.copy( + body = """
Input ${convertToDescription(expression.df)} ${table.body}
- """.trimIndent() - ) - data += description - } + """.trimIndent() + ) + data += description + } - expressions.lastIndex -> { - val table = convertToHTML(expression.df) - val description = table.copy( - body = """ + expressions.lastIndex -> { + val table = convertToHTML(expression.df) + val description = table.copy( + body = """
Output ${convertToDescription(expression.df)} ${table.body}
- """.trimIndent() - ) - data += description - } + """.trimIndent() + ) + data += description + } - else -> { - val table = convertToHTML(expression.df) - val description = table.copy( - body = """ + else -> { + val table = convertToHTML(expression.df) + val description = table.copy( + body = """
Step $i: ${convertToDescription(expression.df)} ${table.body}
- """.trimIndent() - ) - data += description + """.trimIndent() + ) + data += description + } } } } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt index 13a0943580..8584a85608 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt @@ -1,7 +1,9 @@ package org.jetbrains.kotlinx.dataframe.samples.api import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.DynamicDataFrameBuilder import org.jetbrains.kotlinx.dataframe.api.Infer import org.jetbrains.kotlinx.dataframe.api.ValueProperty import org.jetbrains.kotlinx.dataframe.api.add @@ -431,4 +433,21 @@ class Create : TestBase() { df["scores"].kind shouldBe ColumnKind.Frame df["summary"]["min score"].values() shouldBe listOf(3, 5) } + + @Test + @TransformDataFrameExpressions + fun duplicatedColumns() { + // SampleStart + fun peek(vararg dataframes: AnyFrame): AnyFrame { + val builder = DynamicDataFrameBuilder() + for (df in dataframes) { + df.columns().firstOrNull()?.let { builder.add(it) } + } + return builder.toDataFrame() + } + + val col by columnOf(1, 2, 3) + peek(dataFrameOf(col), dataFrameOf(col)) + // SampleEnd + } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt index c2223087ec..c3d34c30ed 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt @@ -155,50 +155,67 @@ object PluginCallbackProxy : PluginCallback { ) } + private fun List.joinToSource(): String = + joinToString(".") { it.source } + private fun statementOutput( expressions: List, ): DataFrameHtmlData { var data = DataFrameHtmlData() - if (expressions.size < 2) error("Sample without output or input (i.e. function returns some value)") - for ((i, expression) in expressions.withIndex()) { - when (i) { - 0 -> { - val table = convertToHTML(expression.df) - val description = table.copy( - body = """ + val allow = setOf( + "toDataFrame", "peek(dataFrameOf(col), dataFrameOf(col))" + ) + if (expressions.isEmpty()) { + error("No dataframe expressions in sample") + } + if (expressions.size == 1) { + if (allow.any { expressions[0].source.contains(it) }) { + val expression = expressions[0] + data += convertToHTML(expression.df) + } else { + error("${expressions.joinToSource()} Sample without output or input (i.e. function returns some value)") + } + } else { + for ((i, expression) in expressions.withIndex()) { + when (i) { + 0 -> { + val table = convertToHTML(expression.df) + val description = table.copy( + body = """
Input ${convertToDescription(expression.df)} ${table.body}
- """.trimIndent() - ) - data += description - } + """.trimIndent() + ) + data += description + } - expressions.lastIndex -> { - val table = convertToHTML(expression.df) - val description = table.copy( - body = """ + expressions.lastIndex -> { + val table = convertToHTML(expression.df) + val description = table.copy( + body = """
Output ${convertToDescription(expression.df)} ${table.body}
- """.trimIndent() - ) - data += description - } + """.trimIndent() + ) + data += description + } - else -> { - val table = convertToHTML(expression.df) - val description = table.copy( - body = """ + else -> { + val table = convertToHTML(expression.df) + val description = table.copy( + body = """
Step $i: ${convertToDescription(expression.df)} ${table.body}
- """.trimIndent() - ) - data += description + """.trimIndent() + ) + data += description + } } } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt index 13a0943580..8584a85608 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt @@ -1,7 +1,9 @@ package org.jetbrains.kotlinx.dataframe.samples.api import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.DynamicDataFrameBuilder import org.jetbrains.kotlinx.dataframe.api.Infer import org.jetbrains.kotlinx.dataframe.api.ValueProperty import org.jetbrains.kotlinx.dataframe.api.add @@ -431,4 +433,21 @@ class Create : TestBase() { df["scores"].kind shouldBe ColumnKind.Frame df["summary"]["min score"].values() shouldBe listOf(3, 5) } + + @Test + @TransformDataFrameExpressions + fun duplicatedColumns() { + // SampleStart + fun peek(vararg dataframes: AnyFrame): AnyFrame { + val builder = DynamicDataFrameBuilder() + for (df in dataframes) { + df.columns().firstOrNull()?.let { builder.add(it) } + } + return builder.toDataFrame() + } + + val col by columnOf(1, 2, 3) + peek(dataFrameOf(col), dataFrameOf(col)) + // SampleEnd + } } diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.createDataFrameFromIterable.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.createDataFrameFromIterable.html new file mode 100644 index 0000000000..f0f27e06d8 --- /dev/null +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.createDataFrameFromIterable.html @@ -0,0 +1,476 @@ + + + + + + + + +
+ +

+ + + \ No newline at end of file diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.createDataFrameFromMap.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.createDataFrameFromMap.html new file mode 100644 index 0000000000..f0f27e06d8 --- /dev/null +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.createDataFrameFromMap.html @@ -0,0 +1,476 @@ + + + + + + + + +
+ +

+ + + \ No newline at end of file diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.duplicatedColumns.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.duplicatedColumns.html new file mode 100644 index 0000000000..958449a85d --- /dev/null +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Create.duplicatedColumns.html @@ -0,0 +1,476 @@ + + + + + + + + +
+ +

+ + + \ No newline at end of file diff --git a/docs/StardustDocs/topics/createDataFrame.md b/docs/StardustDocs/topics/createDataFrame.md index 369909be18..54bb266b92 100644 --- a/docs/StardustDocs/topics/createDataFrame.md +++ b/docs/StardustDocs/topics/createDataFrame.md @@ -119,6 +119,7 @@ val age by columnOf(15, 20, 22) listOf(name, age).toDataFrame() ``` + `DataFrame` from `Map>`: @@ -132,6 +133,7 @@ val map = mapOf("name" to listOf("Alice", "Bob", "Charlie"), "age" to listOf(15, map.toDataFrame() ``` + Creates a [`DataFrame`](DataFrame.md) from an [`Iterable`](https://kotlinlang.org/api/latest/jvm/stdlib/kotlin.collections/-iterable/) of [basic types](https://kotlinlang.org/docs/basic-types.html) (except arrays): @@ -213,3 +215,28 @@ val df = students.toDataFrame { ``` + +### DynamicDataFrameBuilder + +Previously mentioned dataframe constructors throw an exception when column names are duplicated. +When implementing a custom operation involving multiple dataframes, computed columns or parsing some third-party data, +it might be desirable to disambiguate column names instead of throwing an exception. + + + +```kotlin +fun peek(vararg dataframes: AnyFrame): AnyFrame { + val builder = DynamicDataFrameBuilder() + for (df in dataframes) { + df.columns().firstOrNull()?.let { builder.add(it) } + } + return builder.toDataFrame() +} + +val col by columnOf(1, 2, 3) +peek(dataFrameOf(col), dataFrameOf(col)) +``` + + + + From aaa585f56490e1cef5a1f150f710a3126293b787 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Wed, 17 May 2023 15:30:31 +0300 Subject: [PATCH 4/9] update split docs --- .../kotlinx/dataframe/samples/api/Modify.kt | 28 ++++++++----- ...frame.samples.api.Modify.splitInplace.html | 20 +++++----- ...rame.samples.api.Modify.splitIntoRows.html | 40 +++++++++---------- docs/StardustDocs/topics/split.md | 28 ++++++++----- 4 files changed, 66 insertions(+), 50 deletions(-) diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index 374491133b..82d0a081b6 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -469,7 +469,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun splitInplace_properties() { // SampleStart - df.split { name.firstName }.by { it.chars().toList() }.inplace() + df.split { name.firstName }.by { it.asIterable() }.inplace() // SampleEnd } @@ -480,7 +480,7 @@ class Modify : TestBase() { val name by columnGroup() val firstName by name.column() - df.split { firstName }.by { it.chars().toList() }.inplace() + df.split { firstName }.by { it.asIterable() }.inplace() // SampleEnd } @@ -488,7 +488,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun splitInplace_strings() { // SampleStart - df.split { "name"["firstName"]() }.by { it.chars().toList() }.inplace() + df.split { "name"["firstName"]() }.by { it.asIterable() }.inplace() // SampleEnd } @@ -523,7 +523,9 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun split1_properties() { // SampleStart - df.split { name.lastName }.by { it.asIterable() }.default(' ').inward { "char$it" } + df.split { name.lastName } + .by { it.asIterable() }.default(' ') + .inward { "char$it" } // SampleEnd } @@ -534,7 +536,9 @@ class Modify : TestBase() { val name by columnGroup() val lastName by name.column() - df.split { lastName }.by { it.asIterable() }.default(' ').inward { "char$it" } + df.split { lastName } + .by { it.asIterable() }.default(' ') + .inward { "char$it" } // SampleEnd } @@ -542,7 +546,9 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun split1_strings() { // SampleStart - df.split { "name"["lastName"]() }.by { it.asIterable() }.default(' ').inward { "char$it" } + df.split { "name"["lastName"]() } + .by { it.asIterable() }.default(' ') + .inward { "char$it" } // SampleEnd } @@ -550,7 +556,9 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun splitRegex() { // SampleStart - val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name") + val merged = df.merge { name.lastName and name.firstName } + .by { it[0] + " (" + it[1] + ")" } + .into("name") // SampleEnd } @@ -593,7 +601,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun splitIntoRows_properties() { // SampleStart - df.split { name.firstName }.by { it.chars().toList() }.intoRows() + df.split { name.firstName }.by { it.asIterable() }.intoRows() df.split { name }.by { it.values() }.intoRows() // SampleEnd @@ -606,7 +614,7 @@ class Modify : TestBase() { val name by columnGroup() val firstName by name.column() - df.split { firstName }.by { it.chars().toList() }.intoRows() + df.split { firstName }.by { it.asIterable() }.intoRows() df.split { name }.by { it.values() }.intoRows() // SampleEnd @@ -616,7 +624,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun splitIntoRows_strings() { // SampleStart - df.split { "name"["firstName"]() }.by { it.chars().toList() }.intoRows() + df.split { "name"["firstName"]() }.by { it.asIterable() }.intoRows() df.split { colGroup("name") }.by { it.values() }.intoRows() // SampleEnd diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitInplace.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitInplace.html index cb30c9ae00..743a4e3323 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitInplace.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitInplace.html @@ -519,15 +519,15 @@ /* ```kotlin -val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name") +val merged = df.merge { name.lastName and name.firstName } + .by { it[0] + " (" + it[1] + ")" } + .into("name") ``` @@ -185,7 +193,7 @@ Use `.intoRows()` terminal operation in `split` configuration to spread split va ```kotlin -df.split { name.firstName }.by { it.chars().toList() }.intoRows() +df.split { name.firstName }.by { it.asIterable() }.intoRows() df.split { name }.by { it.values() }.intoRows() ``` @@ -197,7 +205,7 @@ df.split { name }.by { it.values() }.intoRows() val name by columnGroup() val firstName by name.column() -df.split { firstName }.by { it.chars().toList() }.intoRows() +df.split { firstName }.by { it.asIterable() }.intoRows() df.split { name }.by { it.values() }.intoRows() ``` @@ -206,7 +214,7 @@ df.split { name }.by { it.values() }.intoRows() ```kotlin -df.split { "name"["firstName"]() }.by { it.chars().toList() }.intoRows() +df.split { "name"["firstName"]() }.by { it.asIterable() }.intoRows() df.split { colGroup("name") }.by { it.values() }.intoRows() ``` From c0aedcd147f5b520e1a9199da774dad8ffec555e Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Fri, 19 May 2023 14:56:31 +0300 Subject: [PATCH 5/9] migrate xml schema location due to changes in Writerside --- docs/StardustDocs/c.list | 2 +- docs/StardustDocs/cfg/build-script.xml | 2 +- docs/StardustDocs/cfg/buildprofiles.xml | 2 +- docs/StardustDocs/d.tree | 2 +- docs/StardustDocs/project.ihp | 2 +- docs/StardustDocs/r.list | 2 +- docs/StardustDocs/redirection-rules.xml | 2 +- docs/StardustDocs/v.list | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/StardustDocs/c.list b/docs/StardustDocs/c.list index e97afd856f..8339a6eb7d 100644 --- a/docs/StardustDocs/c.list +++ b/docs/StardustDocs/c.list @@ -1,6 +1,6 @@ + SYSTEM "https://resources.jetbrains.com/writerside/1.0/categories.dtd"> diff --git a/docs/StardustDocs/cfg/build-script.xml b/docs/StardustDocs/cfg/build-script.xml index 0118bbec2c..10cda55263 100644 --- a/docs/StardustDocs/cfg/build-script.xml +++ b/docs/StardustDocs/cfg/build-script.xml @@ -1,5 +1,5 @@ + SYSTEM "https://resources.jetbrains.com/writerside/1.0/build-script.dtd"> diff --git a/docs/StardustDocs/cfg/buildprofiles.xml b/docs/StardustDocs/cfg/buildprofiles.xml index a0ccbf802f..41f924fb36 100644 --- a/docs/StardustDocs/cfg/buildprofiles.xml +++ b/docs/StardustDocs/cfg/buildprofiles.xml @@ -1,6 +1,6 @@ - https://kotlin.github.io/dataframe/ diff --git a/docs/StardustDocs/d.tree b/docs/StardustDocs/d.tree index 8272d4702b..b1d1c72798 100644 --- a/docs/StardustDocs/d.tree +++ b/docs/StardustDocs/d.tree @@ -1,6 +1,6 @@ + SYSTEM "https://resources.jetbrains.com/writerside/1.0/product-profile.dtd"> - + diff --git a/docs/StardustDocs/r.list b/docs/StardustDocs/r.list index fc3c4bf694..bf01304e53 100644 --- a/docs/StardustDocs/r.list +++ b/docs/StardustDocs/r.list @@ -1,5 +1,5 @@ - + diff --git a/docs/StardustDocs/redirection-rules.xml b/docs/StardustDocs/redirection-rules.xml index 3725ab4d21..654fc8703f 100644 --- a/docs/StardustDocs/redirection-rules.xml +++ b/docs/StardustDocs/redirection-rules.xml @@ -1,5 +1,5 @@ - + Created after removal of "About Dataframe" from Dataframe diff --git a/docs/StardustDocs/v.list b/docs/StardustDocs/v.list index 245c641eba..6c3cf8012f 100644 --- a/docs/StardustDocs/v.list +++ b/docs/StardustDocs/v.list @@ -1,5 +1,5 @@ - + From b4fc60eee58c3d9d0c69bc2deba1ca3004b87da2 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Fri, 19 May 2023 14:58:32 +0300 Subject: [PATCH 6/9] add spaces in title to make them easier to read --- docs/StardustDocs/topics/addRemove.md | 2 +- docs/StardustDocs/topics/appendDuplicate.md | 2 +- docs/StardustDocs/topics/explodeImplode.md | 2 +- docs/StardustDocs/topics/groupByConcat.md | 2 +- docs/StardustDocs/topics/groupUngroupFlatten.md | 2 +- docs/StardustDocs/topics/insertReplace.md | 2 +- docs/StardustDocs/topics/moveRename.md | 2 +- docs/StardustDocs/topics/pivotGather.md | 2 +- docs/StardustDocs/topics/splitMerge.md | 2 +- docs/StardustDocs/topics/updateConvert.md | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/StardustDocs/topics/addRemove.md b/docs/StardustDocs/topics/addRemove.md index e6d4bb1b32..5b4e21ff87 100644 --- a/docs/StardustDocs/topics/addRemove.md +++ b/docs/StardustDocs/topics/addRemove.md @@ -1,4 +1,4 @@ -[//]: # (title: Add/map/remove columns) +[//]: # (title: Add / map / remove columns) * [`add`](add.md) columns to [`DataFrame`](DataFrame.md) * [`map`](map.md) columns to new [`DataFrame`](DataFrame.md) or [`DataColumn`](DataColumn.md) diff --git a/docs/StardustDocs/topics/appendDuplicate.md b/docs/StardustDocs/topics/appendDuplicate.md index 48e731fee7..13653e069c 100644 --- a/docs/StardustDocs/topics/appendDuplicate.md +++ b/docs/StardustDocs/topics/appendDuplicate.md @@ -1,4 +1,4 @@ -[//]: # (title: Append/duplicate rows) +[//]: # (title: Append / duplicate rows) * [`append`](append.md) — append new rows * [`duplicate`](duplicate.md) — duplicate selected rows diff --git a/docs/StardustDocs/topics/explodeImplode.md b/docs/StardustDocs/topics/explodeImplode.md index 8e3fb04072..96a20663f4 100644 --- a/docs/StardustDocs/topics/explodeImplode.md +++ b/docs/StardustDocs/topics/explodeImplode.md @@ -1,4 +1,4 @@ -[//]: # (title: Explode/implode columns) +[//]: # (title: Explode / implode columns) * [`explode`](explode.md) — distributes lists of values or [`DataFrames`](DataFrame.md) in given columns vertically, replicating data in other columns * [`implode`](implode.md) — collects column values in given columns into lists or [`DataFrames`](DataFrame.md), grouping by other columns diff --git a/docs/StardustDocs/topics/groupByConcat.md b/docs/StardustDocs/topics/groupByConcat.md index 55dea66b78..267ecc753c 100644 --- a/docs/StardustDocs/topics/groupByConcat.md +++ b/docs/StardustDocs/topics/groupByConcat.md @@ -1,4 +1,4 @@ -[//]: # (title: GroupBy/concat rows) +[//]: # (title: GroupBy / concat rows) * [`groupBy`](groupBy.md) — groups rows of [`DataFrame`](DataFrame.md) by given key columns. * [`concat`](concat.md) — concatenates rows from several [`DataFrames`](DataFrame.md) into single [`DataFrame`](DataFrame.md). diff --git a/docs/StardustDocs/topics/groupUngroupFlatten.md b/docs/StardustDocs/topics/groupUngroupFlatten.md index 5b7ee5a3e0..f31e29bf96 100644 --- a/docs/StardustDocs/topics/groupUngroupFlatten.md +++ b/docs/StardustDocs/topics/groupUngroupFlatten.md @@ -1,4 +1,4 @@ -[//]: # (title: Group/ungroup/flatten columns) +[//]: # (title: Group / ungroup / flatten columns) * [`group`](group.md) — groups given columns into [`ColumnGroups`](DataColumn.md#columngroup). * [`ungroup`](ungroup.md) — ungroups given [`ColumnGroups`](DataColumn.md#columngroup) by replacing them with their children columns diff --git a/docs/StardustDocs/topics/insertReplace.md b/docs/StardustDocs/topics/insertReplace.md index b628a4c249..e8b84318c8 100644 --- a/docs/StardustDocs/topics/insertReplace.md +++ b/docs/StardustDocs/topics/insertReplace.md @@ -1,4 +1,4 @@ -[//]: # (title: Insert/replace columns) +[//]: # (title: Insert / replace columns) * [`insert`](insert.md) — inserts new column into [`DataFrame`](DataFrame.md) * [`replace`](replace.md) — replaces columns in [`DataFrame`](DataFrame.md) diff --git a/docs/StardustDocs/topics/moveRename.md b/docs/StardustDocs/topics/moveRename.md index 9cada9439a..588cfa0e99 100644 --- a/docs/StardustDocs/topics/moveRename.md +++ b/docs/StardustDocs/topics/moveRename.md @@ -1,4 +1,4 @@ -[//]: # (title: Move/rename/reorder columns) +[//]: # (title: Move / rename / reorder columns) * [`move`](move.md) — move columns or change column grouping * [`rename`](rename.md) — rename columns diff --git a/docs/StardustDocs/topics/pivotGather.md b/docs/StardustDocs/topics/pivotGather.md index e5819ecd8b..7dd6963efe 100644 --- a/docs/StardustDocs/topics/pivotGather.md +++ b/docs/StardustDocs/topics/pivotGather.md @@ -1,4 +1,4 @@ -[//]: # (title: Pivot/gather columns) +[//]: # (title: Pivot / gather columns) * [`pivot`](pivot.md) — transforms column values into new columns (long to wide) * [`gather`](gather.md) — collects values from several columns into two `key` and `value` columns (wide to long) diff --git a/docs/StardustDocs/topics/splitMerge.md b/docs/StardustDocs/topics/splitMerge.md index 722f290c74..48dff35e3f 100644 --- a/docs/StardustDocs/topics/splitMerge.md +++ b/docs/StardustDocs/topics/splitMerge.md @@ -1,4 +1,4 @@ -[//]: # (title: Split/merge columns) +[//]: # (title: Split / merge columns) * [`split`](split.md) column values horizontally or vertically * [`merge`](merge.md) values from several columns into single column diff --git a/docs/StardustDocs/topics/updateConvert.md b/docs/StardustDocs/topics/updateConvert.md index 77575b0515..0539cb1b3f 100644 --- a/docs/StardustDocs/topics/updateConvert.md +++ b/docs/StardustDocs/topics/updateConvert.md @@ -1,4 +1,4 @@ -[//]: # (title: Update/convert values) +[//]: # (title: Update / convert values) Both [`update`](update.md) and [`convert`](convert.md) can be used to change columns values in `DataFrame`. From be6ad2a819888c87fa25a0efe3a6db1abdd83221 Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Fri, 19 May 2023 15:01:55 +0300 Subject: [PATCH 7/9] grammar fix --- docs/StardustDocs/topics/updateConvert.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/StardustDocs/topics/updateConvert.md b/docs/StardustDocs/topics/updateConvert.md index 0539cb1b3f..47c5dce7cc 100644 --- a/docs/StardustDocs/topics/updateConvert.md +++ b/docs/StardustDocs/topics/updateConvert.md @@ -1,7 +1,7 @@ [//]: # (title: Update / convert values) -Both [`update`](update.md) and [`convert`](convert.md) can be used to change columns values in `DataFrame`. +Both [`update`](update.md) and [`convert`](convert.md) can be used to change column values in a `DataFrame`. Difference between these operations: -* `convert` allows to change the type of the column, `update` doesn't -* `update` allows to filter cells to be updated, `convert` doesn't +* `convert` allows changing the type of the column, `update` doesn't +* `update` allows filtering cells to be updated, `convert` doesn't From 3e51444e199da1d437b5b906f89f44a7ca1a959e Mon Sep 17 00:00:00 2001 From: Nikita Klimenko Date: Tue, 1 Aug 2023 15:13:34 +0300 Subject: [PATCH 8/9] update generated sources and korro --- .../explainer/PluginCallbackProxy.kt | 2 +- .../kotlinx/dataframe/samples/api/Modify.kt | 71 ++++++++++++++----- ...rame.samples.api.Modify.splitIntoRows.html | 20 +++--- 3 files changed, 66 insertions(+), 27 deletions(-) diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt index c3d34c30ed..8ee639f9e0 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/explainer/PluginCallbackProxy.kt @@ -118,7 +118,7 @@ object PluginCallbackProxy : PluginCallback { body = """
- ${expressions.joinToString(".") { it.source } + ${expressions.joinToSource() .also { if (it.length > 95) TODO("expression is too long ${it.length}. better to split sample in multiple snippets") } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index 1a8bf5c505..82d0a081b6 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -469,7 +469,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun splitInplace_properties() { // SampleStart - df.split { name.firstName }.by { it.chars().toList() }.inplace() + df.split { name.firstName }.by { it.asIterable() }.inplace() // SampleEnd } @@ -480,7 +480,7 @@ class Modify : TestBase() { val name by columnGroup() val firstName by name.column() - df.split { firstName }.by { it.chars().toList() }.inplace() + df.split { firstName }.by { it.asIterable() }.inplace() // SampleEnd } @@ -488,7 +488,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun splitInplace_strings() { // SampleStart - df.split { "name"["firstName"]() }.by { it.chars().toList() }.inplace() + df.split { "name"["firstName"]() }.by { it.asIterable() }.inplace() // SampleEnd } @@ -496,9 +496,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun split_properties() { // SampleStart - df.split { name }.by { it.values() }.into("nameParts") - - df.split { name.lastName }.by(" ").default("").inward { "word$it" } + df.split { name.lastName }.by { it.asIterable() }.into("char1", "char2") // SampleEnd } @@ -509,9 +507,7 @@ class Modify : TestBase() { val name by columnGroup() val lastName by name.column() - df.split { name }.by { it.values() }.into("nameParts") - - df.split { lastName }.by(" ").default("").inward { "word$it" } + df.split { lastName }.by { it.asIterable() }.into("char1", "char2") // SampleEnd } @@ -519,18 +515,61 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun split_strings() { // SampleStart - df.split { name }.by { it.values() }.into("nameParts") + df.split { "name"["lastName"]() }.by { it.asIterable() }.into("char1", "char2") + // SampleEnd + } + + @Test + @TransformDataFrameExpressions + fun split1_properties() { + // SampleStart + df.split { name.lastName } + .by { it.asIterable() }.default(' ') + .inward { "char$it" } + // SampleEnd + } + + @Test + @TransformDataFrameExpressions + fun split1_accessors() { + // SampleStart + val name by columnGroup() + val lastName by name.column() - df.split { "name"["lastName"] }.by(" ").default("").inward { "word$it" } + df.split { lastName } + .by { it.asIterable() }.default(' ') + .inward { "char$it" } + // SampleEnd + } + + @Test + @TransformDataFrameExpressions + fun split1_strings() { + // SampleStart + df.split { "name"["lastName"]() } + .by { it.asIterable() }.default(' ') + .inward { "char$it" } // SampleEnd } @Test @TransformDataFrameExpressions fun splitRegex() { - val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name") - val name by column() // SampleStart + val merged = df.merge { name.lastName and name.firstName } + .by { it[0] + " (" + it[1] + ")" } + .into("name") + // SampleEnd + } + + private val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name") + + @Test + @TransformDataFrameExpressions + fun splitRegex1() { + // SampleStart + val name by column() + merged.split { name } .match("""(.*) \((.*)\)""") .inward("firstName", "lastName") @@ -562,7 +601,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun splitIntoRows_properties() { // SampleStart - df.split { name.firstName }.by { it.chars().toList() }.intoRows() + df.split { name.firstName }.by { it.asIterable() }.intoRows() df.split { name }.by { it.values() }.intoRows() // SampleEnd @@ -575,7 +614,7 @@ class Modify : TestBase() { val name by columnGroup() val firstName by name.column() - df.split { firstName }.by { it.chars().toList() }.intoRows() + df.split { firstName }.by { it.asIterable() }.intoRows() df.split { name }.by { it.values() }.intoRows() // SampleEnd @@ -585,7 +624,7 @@ class Modify : TestBase() { @TransformDataFrameExpressions fun splitIntoRows_strings() { // SampleStart - df.split { "name"["firstName"]() }.by { it.chars().toList() }.intoRows() + df.split { "name"["firstName"]() }.by { it.asIterable() }.intoRows() df.split { colGroup("name") }.by { it.values() }.intoRows() // SampleEnd diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitIntoRows.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitIntoRows.html index 3cc292d690..ca02559014 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitIntoRows.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.splitIntoRows.html @@ -196,9 +196,9 @@ -
- df.split { name.firstName }.by { it.asIterable() }.intoRows() - +
+ df.split { name.firstName }.by { it.asIterable() }.intoRows() +
Input DataFrame: rowsCount = 7, columnsCount = 5
@@ -223,11 +223,11 @@

... showing only top 20 of 37 rows

-
-
-
- df.split { name }.by { it.values() }.intoRows() - +
+
+
+ df.split { name }.by { it.values() }.intoRows() +
Input DataFrame: rowsCount = 7, columnsCount = 5
@@ -252,8 +252,8 @@

-
-
+
+