diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 96ae9e8d62..1d1fc710b7 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -69,10 +69,12 @@ val addGeneratedSourcesToGit by tasks.creating(GitTask::class) { // Backup the kotlin source files location val kotlinMainSources = kotlin.sourceSets.main.get().kotlin.sourceDirectories +val kotlinTestSources = kotlin.sourceSets.test.get().kotlin.sourceDirectories // Task to generate the processed documentation val processKDocsMain by creatingProcessDocTask( - sources = kotlinMainSources.filterNot { "build/generated" in it.path }, // Exclude generated sources + sources = (kotlinMainSources + kotlinTestSources) // Include both test and main sources for cross-referencing + .filterNot { "build/generated" in it.path }, // Exclude generated sources ) { target = file(generatedSourcesFolderName) processors = listOf( @@ -107,8 +109,9 @@ tasks.withType { doFirst { kotlin.sourceSets.main { kotlin.setSrcDirs( - processKDocsMain.targets + - kotlinMainSources.filter { "build/generated" in it.path } // Include generated sources (which were excluded above) + processKDocsMain.targets + .filterNot { "src/test/kotlin" in it.path || "src/test/java" in it.path } // filter out test sources again + .plus(kotlinMainSources.filter { "build/generated" in it.path }) // Include generated sources (which were excluded above) ) } } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt index 74552bae8d..064bbeb022 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt @@ -74,6 +74,17 @@ public typealias RowValueExpression = DataRow.(it: C) -> R */ public typealias RowColumnExpression = (row: DataRow, col: DataColumn) -> R +/** + * [ColumnExpression] is used to express or select any instance of `R` using the given instance of [DataColumn]`` as + * `this` and `it`. + * + * Shorthand for: + * ```kotlin + * DataColumn.(it: DataColumn) -> R + * ``` + */ +public typealias ColumnExpression = Selector, R> + /** * [ColumnSelector] is used to express or select a single column, represented by [SingleColumn]``, using the * context of [ColumnsSelectionDsl]`` as `this` and `it`. diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt index 60ca70a447..7e837b558d 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -11,36 +11,27 @@ import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.Predicate -import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn -import org.jetbrains.kotlinx.dataframe.columns.renamedReference +import org.jetbrains.kotlinx.dataframe.columns.* +import org.jetbrains.kotlinx.dataframe.documentation.AccessApi import org.jetbrains.kotlinx.dataframe.hasNulls import org.jetbrains.kotlinx.dataframe.impl.columnName -import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnsList -import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet -import org.jetbrains.kotlinx.dataframe.impl.columns.addPath -import org.jetbrains.kotlinx.dataframe.impl.columns.allColumnsExcept -import org.jetbrains.kotlinx.dataframe.impl.columns.changePath -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnSet -import org.jetbrains.kotlinx.dataframe.impl.columns.getAt -import org.jetbrains.kotlinx.dataframe.impl.columns.getChildrenAt -import org.jetbrains.kotlinx.dataframe.impl.columns.single -import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns -import org.jetbrains.kotlinx.dataframe.impl.columns.top -import org.jetbrains.kotlinx.dataframe.impl.columns.transform -import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle +import org.jetbrains.kotlinx.dataframe.impl.columns.* import org.jetbrains.kotlinx.dataframe.impl.columns.tree.dfs import kotlin.reflect.KProperty import kotlin.reflect.KType import kotlin.reflect.typeOf +/** + * Referring to a column in the selection DSL can be done in several ways corresponding to all + * [Access APIs][AccessApi]: + * TODO: [Issue #286](https://github.com/Kotlin/dataframe/issues/286) + */ +private interface CommonColumnSelectionExamples + +/** [Column Selection DSL][ColumnSelectionDsl] */ +internal interface ColumnSelectionDslLink + +/** TODO: [Issue #286](https://github.com/Kotlin/dataframe/issues/286) */ public interface ColumnSelectionDsl : ColumnsContainer { public operator fun ColumnReference.invoke(): DataColumn = get(this) @@ -56,6 +47,10 @@ public interface ColumnSelectionDsl : ColumnsContainer { public operator fun String.get(column: String): ColumnPath = pathOf(this, column) } +/** [Columns Selection DSL][ColumnsSelectionDsl] */ +internal interface ColumnsSelectionDslLink + +/** TODO: [Issue #286](https://github.com/Kotlin/dataframe/issues/286) */ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColumn> { public fun ColumnSet.first(condition: ColumnFilter): SingleColumn = @@ -70,24 +65,26 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun ColumnsContainer<*>.group(name: String): ColumnGroupReference = name.toColumnOf() - public operator fun String.rangeTo(endInclusive: String): ColumnSet<*> = toColumnAccessor().rangeTo(endInclusive.toColumnAccessor()) - - public operator fun AnyColumnReference.rangeTo(endInclusive: AnyColumnReference): ColumnSet<*> = object : ColumnSet { - override fun resolve(context: ColumnResolutionContext): List> { - val startPath = this@rangeTo.resolveSingle(context)!!.path - val endPath = endInclusive.resolveSingle(context)!!.path - val parentPath = startPath.parent()!! - require(parentPath == endPath.parent()) { "Start and end columns have different parent column paths" } - val parentCol = context.df.getColumnGroup(parentPath) - val startIndex = parentCol.getColumnIndex(startPath.name) - val endIndex = parentCol.getColumnIndex(endPath.name) - return (startIndex..endIndex).map { - parentCol.getColumn(it).let { - it.addPath(parentPath + it.name) + public operator fun String.rangeTo(endInclusive: String): ColumnSet<*> = + toColumnAccessor().rangeTo(endInclusive.toColumnAccessor()) + + public operator fun AnyColumnReference.rangeTo(endInclusive: AnyColumnReference): ColumnSet<*> = + object : ColumnSet { + override fun resolve(context: ColumnResolutionContext): List> { + val startPath = this@rangeTo.resolveSingle(context)!!.path + val endPath = endInclusive.resolveSingle(context)!!.path + val parentPath = startPath.parent()!! + require(parentPath == endPath.parent()) { "Start and end columns have different parent column paths" } + val parentCol = context.df.getColumnGroup(parentPath) + val startIndex = parentCol.getColumnIndex(startPath.name) + val endIndex = parentCol.getColumnIndex(endPath.name) + return (startIndex..endIndex).map { + parentCol.getColumn(it).let { + it.addPath(parentPath + it.name) + } } } } - } public fun none(): ColumnSet<*> = ColumnsList(emptyList()) @@ -115,7 +112,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun ColumnSet>.select(vararg columns: String): ColumnSet<*> = select { columns.toColumns() } - public fun ColumnSet>.select(vararg columns: KProperty): ColumnSet = select { columns.toColumns() } + public fun ColumnSet>.select(vararg columns: KProperty): ColumnSet = + select { columns.toColumns() } public fun ColumnSet>.select(selector: ColumnsSelector): ColumnSet = createColumnSet { this@select.resolve(it).flatMap { group -> @@ -145,7 +143,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region allDfs - public fun ColumnSet<*>.allDfs(includeGroups: Boolean = false): ColumnSet = if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() } + public fun ColumnSet<*>.allDfs(includeGroups: Boolean = false): ColumnSet = + if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() } public fun String.allDfs(includeGroups: Boolean = false): ColumnSet = toColumnAccessor().allDfs(includeGroups) @@ -242,7 +241,9 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun col(property: KProperty): ColumnAccessor = property.toColumnAccessor() - public operator fun ColumnSet<*>.get(colName: String): ColumnSet = transform { it.mapNotNull { it.getChild(colName) } } + public operator fun ColumnSet<*>.get(colName: String): ColumnSet = + transform { it.mapNotNull { it.getChild(colName) } } + public operator fun ColumnSet<*>.get(column: ColumnReference): ColumnSet = cols(column) public fun SingleColumn.take(n: Int): ColumnSet<*> = transformSingle { it.children().take(n) } @@ -288,8 +289,11 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public infix fun ColumnReference.into(column: KProperty<*>): ColumnReference = named(column.columnName) public infix fun String.into(newName: String): ColumnReference = toColumnAccessor().into(newName) - public infix fun String.into(column: ColumnAccessor<*>): ColumnReference = toColumnAccessor().into(column.name()) - public infix fun String.into(column: KProperty<*>): ColumnReference = toColumnAccessor().into(column.columnName) + public infix fun String.into(column: ColumnAccessor<*>): ColumnReference = + toColumnAccessor().into(column.name()) + + public infix fun String.into(column: KProperty<*>): ColumnReference = + toColumnAccessor().into(column.columnName) public infix fun ColumnReference.named(newName: String): ColumnReference = renamedReference(newName) public infix fun ColumnReference.named(name: KProperty<*>): ColumnReference = named(name.columnName) @@ -311,6 +315,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public infix fun KProperty.and(other: String): ColumnSet = toColumnAccessor() and other public infix fun KProperty.and(other: KProperty): ColumnSet = toColumnAccessor() and other.toColumnAccessor() + public infix fun KProperty.and(other: ColumnsSelector): ColumnSet = toColumnAccessor() and other() // endregion @@ -339,7 +344,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public inline fun ColumnsSelectionDsl.expr( name: String = "", infer: Infer = Infer.Nulls, - noinline expression: AddExpression + noinline expression: AddExpression, ): DataColumn = mapToColumn(name, infer, expression) internal fun ColumnsSelector.filter(predicate: (ColumnWithPath) -> Boolean): ColumnsSelector = diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 2b05fc3a93..c6e2541cd3 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -1,36 +1,190 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.AnyColumnReference -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.Update.UpdateOperationArg import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.documentation.* import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns -import org.jetbrains.kotlinx.dataframe.kind -import org.jetbrains.kotlinx.dataframe.typeClass import kotlin.reflect.KProperty // region fillNulls -public fun DataFrame.fillNulls(cols: ColumnsSelector): Update = - update(cols).where { it == null } +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNulls` Operation Usage][FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + */ +internal interface FillNulls { -public fun DataFrame.fillNulls(vararg cols: String): Update = - fillNulls { cols.toColumns() } + /** ## [fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls] Operation Usage + * + * [fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls] `{ `[columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]` }` + * + * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowValueCondition.WithExample]` } ]` + * + * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * + * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { `[colExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { `[rowColExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRowAndColumn.RowColumnExpression.WithExample]` } + * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) + * | .`[withNull][org.jetbrains.kotlinx.dataframe.api.Update.withNull]`() + * | .`[withZero][org.jetbrains.kotlinx.dataframe.api.Update.withZero]`() + * | .`[asFrame][org.jetbrains.kotlinx.dataframe.api.Update.asFrame]` { `[dataFrameExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` + */ + interface Usage +} + +private interface SetFillNullsOperationArg + +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * + * ## This Fill Nulls Overload + * + */ +private interface CommonFillNullsFunctionDoc + +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * + * ## This Fill Nulls Overload + * + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNulls(columns: ColumnsSelector): Update = + update(columns).where { it == null } -public fun DataFrame.fillNulls(vararg cols: KProperty): Update = - fillNulls { cols.toColumns() } +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * + * ## This Fill Nulls Overload + * + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]`("length", "age")` + * + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNulls(vararg columns: String): Update = + fillNulls { columns.toColumns() } -public fun DataFrame.fillNulls(vararg cols: ColumnReference): Update = - fillNulls { cols.toColumns() } +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * + * ## This Fill Nulls Overload + * + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]`(Person::length, Person::age)` + * + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNulls(vararg columns: KProperty): Update = + fillNulls { columns.toColumns() } -public fun DataFrame.fillNulls(cols: Iterable>): Update = - fillNulls { cols.toColumnSet() } +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNulls` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNulls.Usage]. + * + * For more information: [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * + * ## This Fill Nulls Overload + * + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `df.`[fillNulls][org.jetbrains.kotlinx.dataframe.api.fillNulls]`(length, age)` + * + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNulls(vararg columns: ColumnReference): Update = + fillNulls { columns.toColumns() } + +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.fillNulls(columns: Iterable>): Update = + fillNulls { columns.toColumnSet() } // endregion @@ -56,65 +210,553 @@ internal inline val Float?.isNA: Boolean get() = this == null || this.isNaN() // region fillNaNs -public fun DataFrame.fillNaNs(cols: ColumnsSelector): Update = - update(cols).where { it.isNaN } +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][NaN] values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNaNs` Operation Usage][FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + */ +internal interface FillNaNs { + + /** ## [fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs] Operation Usage + * + * [fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs] `{ `[columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]` }` + * + * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowValueCondition.WithExample]` } ]` + * + * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * + * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { `[colExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { `[rowColExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRowAndColumn.RowColumnExpression.WithExample]` } + * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) + * | .`[withNull][org.jetbrains.kotlinx.dataframe.api.Update.withNull]`() + * | .`[withZero][org.jetbrains.kotlinx.dataframe.api.Update.withZero]`() + * | .`[asFrame][org.jetbrains.kotlinx.dataframe.api.Update.asFrame]` { `[dataFrameExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` + */ + interface Usage +} + +internal interface SetFillNaNsOperationArg -public fun DataFrame.fillNaNs(vararg cols: String): Update = - fillNaNs { cols.toColumns() } +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.documentation.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + */ +private interface CommonFillNaNsFunctionDoc -public fun DataFrame.fillNaNs(vararg cols: KProperty): Update = - fillNaNs { cols.toColumns() } +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.documentation.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNaNs(columns: ColumnsSelector): Update = + update(columns).where { it.isNaN } -public fun DataFrame.fillNaNs(vararg cols: ColumnReference): Update = - fillNaNs { cols.toColumns() } +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.documentation.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]`("length", "age")` + * + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNaNs(vararg columns: String): Update = + fillNaNs { columns.toColumns() } -public fun DataFrame.fillNaNs(cols: Iterable>): Update = - fillNaNs { cols.toColumnSet() } +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.documentation.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]`(Person::length, Person::age)` + * + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNaNs(vararg columns: KProperty): Update = + fillNaNs { columns.toColumns() } + +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][org.jetbrains.kotlinx.dataframe.documentation.NaN] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNaNs` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNaNs.Usage]. + * + * For more information: [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NaNs Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `df.`[fillNaNs][org.jetbrains.kotlinx.dataframe.api.fillNaNs]`(length, age)` + * + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNaNs(vararg columns: ColumnReference): Update = + fillNaNs { columns.toColumns() } + +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.fillNaNs(columns: Iterable>): Update = + fillNaNs { columns.toColumnSet() } // endregion // region fillNA -public fun DataFrame.fillNA(cols: ColumnsSelector): Update = - update(cols).where { it.isNA } +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][NA] values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNA` Operation Usage][FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + */ +internal interface FillNA { + + /** ## [fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA] Operation Usage + * + * [fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA] `{ `[columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]` }` + * + * - `[.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { `[rowValueCondition][org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowValueCondition.WithExample]` } ]` + * + * - `[.`[at][org.jetbrains.kotlinx.dataframe.api.Update.at]` (`[rowIndices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * + * - `.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { `[rowExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol]` { `[colExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol]` { `[rowColExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRowAndColumn.RowColumnExpression.WithExample]` } + * | .`[withValue][org.jetbrains.kotlinx.dataframe.api.Update.withValue]`(value) + * | .`[withNull][org.jetbrains.kotlinx.dataframe.api.Update.withNull]`() + * | .`[withZero][org.jetbrains.kotlinx.dataframe.api.Update.withZero]`() + * | .`[asFrame][org.jetbrains.kotlinx.dataframe.api.Update.asFrame]` { `[dataFrameExpression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` + */ + interface Usage +} + +internal interface SetFillNAOperationArg + +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + */ +private interface CommonFillNAFunctionDoc -public fun DataFrame.fillNA(vararg cols: String): Update = - fillNA { cols.toColumns() } +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNA(columns: ColumnsSelector): Update = + update(columns).where { it.isNA } -public fun DataFrame.fillNA(vararg cols: KProperty): Update = - fillNA { cols.toColumns() } +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]`("length", "age")` + * + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNA(vararg columns: String): Update = + fillNA { columns.toColumns() } -public fun DataFrame.fillNA(vararg cols: ColumnReference): Update = - fillNA { cols.toColumns() } +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]`(Person::length, Person::age)` + * + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNA(vararg columns: KProperty): Update = + fillNA { columns.toColumns() } -public fun DataFrame.fillNA(cols: Iterable>): Update = - fillNA { cols.toColumnSet() } +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values with given value or expression. + * Specific case of [update][org.jetbrains.kotlinx.dataframe.api.update]. + * + * Check out the [`fillNA` Operation Usage][org.jetbrains.kotlinx.dataframe.api.FillNA.Usage]. + * + * For more information: [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Fill NA Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `df.`[fillNA][org.jetbrains.kotlinx.dataframe.api.fillNA]`(length, age)` + * + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ +public fun DataFrame.fillNA(vararg columns: ColumnReference): Update = + fillNA { columns.toColumns() } + +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.fillNA(columns: Iterable>): Update = + fillNA { columns.toColumnSet() } // endregion +/** @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame] to drop rows in. */ +private interface DropDslParam + +/** @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] used to select the columns of this [DataFrame] to drop rows in. */ +private interface DropKPropertiesParam + +/** @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] used to select the columns of this [DataFrame] to drop rows in. */ +private interface DropColumnNamesParam + +/** @param columns The Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). used to select the columns of this [DataFrame] to drop rows in. */ +private interface DropColumnAccessorsParam + // region dropNulls -public fun DataFrame.dropNulls(whereAllNull: Boolean = false, selector: ColumnsSelector): DataFrame { - val cols = this[selector] +/** + * ## The Drop Nulls Operation + * + * Removes rows with `null` values. Specific case of [drop][DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNull = true` to only drop rows where all selected cells are `null`. By default, + * rows are dropped if any of the selected cells are `null`. + * + * For more information: [See `dropNulls` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnulls) + */ +internal interface DropNulls { + + /** + * @param whereAllNull `false` by default. + * If `true`, rows are dropped if all selected cells are `null`. + * If `false`, rows are dropped if any of the selected cells is `null`. + */ + interface WhereAllNullParam +} + +private interface SetDropNullsOperationArg + +/** + * ## The Drop Nulls Operation + * + * Removes rows with `null` values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNull = true` to only drop rows where all selected cells are `null`. By default, + * rows are dropped if any of the selected cells are `null`. + * + * For more information: [See `dropNulls` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnulls) + * ## This Drop Nulls Overload + */ +private interface CommonDropNullsFunctionDoc + +/** + * ## The Drop Nulls Operation + * + * Removes rows with `null` values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNull = true` to only drop rows where all selected cells are `null`. By default, + * rows are dropped if any of the selected cells are `null`. + * + * For more information: [See `dropNulls` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnulls) + * ## This Drop Nulls Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]`(whereAllNull = true) { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * @param whereAllNull `false` by default. + * If `true`, rows are dropped if all selected cells are `null`. + * If `false`, rows are dropped if any of the selected cells is `null`. + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNulls(whereAllNull: Boolean = false, columns: ColumnsSelector): DataFrame { + val cols = this[columns] return if (whereAllNull) drop { row -> cols.all { col -> col[row] == null } } else drop { row -> cols.any { col -> col[row] == null } } } +/** + * ## The Drop Nulls Operation + * + * Removes rows with `null` values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNull = true` to only drop rows where all selected cells are `null`. By default, + * rows are dropped if any of the selected cells are `null`. + * + * For more information: [See `dropNulls` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnulls) + * ## This Drop Nulls Overload + * This overload operates on all columns in the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * @param whereAllNull `false` by default. + * If `true`, rows are dropped if all selected cells are `null`. + * If `false`, rows are dropped if any of the selected cells is `null`. + */ public fun DataFrame.dropNulls(whereAllNull: Boolean = false): DataFrame = dropNulls(whereAllNull) { all() } -public fun DataFrame.dropNulls(vararg cols: KProperty<*>, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumns() } +/** + * ## The Drop Nulls Operation + * + * Removes rows with `null` values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNull = true` to only drop rows where all selected cells are `null`. By default, + * rows are dropped if any of the selected cells are `null`. + * + * For more information: [See `dropNulls` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnulls) + * ## This Drop Nulls Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]`(Person::length, Person::age)` + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]`(Person::length, whereAllNull = true)` + * @param whereAllNull `false` by default. + * If `true`, rows are dropped if all selected cells are `null`. + * If `false`, rows are dropped if any of the selected cells is `null`. + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNulls(vararg columns: KProperty<*>, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } -public fun DataFrame.dropNulls(vararg cols: String, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumns() } +/** + * ## The Drop Nulls Operation + * + * Removes rows with `null` values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNull = true` to only drop rows where all selected cells are `null`. By default, + * rows are dropped if any of the selected cells are `null`. + * + * For more information: [See `dropNulls` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnulls) + * ## This Drop Nulls Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]`("length", "age")` + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]`("length", whereAllNull = true)` + * @param whereAllNull `false` by default. + * If `true`, rows are dropped if all selected cells are `null`. + * If `false`, rows are dropped if any of the selected cells is `null`. + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNulls(vararg columns: String, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } -public fun DataFrame.dropNulls(vararg cols: AnyColumnReference, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumns() } +/** + * ## The Drop Nulls Operation + * + * Removes rows with `null` values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNull = true` to only drop rows where all selected cells are `null`. By default, + * rows are dropped if any of the selected cells are `null`. + * + * For more information: [See `dropNulls` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnulls) + * ## This Drop Nulls Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]`(length, age)` + * + * `df.`[dropNulls][org.jetbrains.kotlinx.dataframe.api.dropNulls]`(length, whereAllNull = true)` + * @param whereAllNull `false` by default. + * If `true`, rows are dropped if all selected cells are `null`. + * If `false`, rows are dropped if any of the selected cells is `null`. + * @param columns The Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNulls(vararg columns: AnyColumnReference, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } -public fun DataFrame.dropNulls(cols: Iterable, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumnSet() } +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.dropNulls( + columns: Iterable, + whereAllNull: Boolean = false, +): DataFrame = + dropNulls(whereAllNull) { columns.toColumnSet() } +/** + * ## The Drop Nulls Operation + * + * Removes `null` values from this [DataColumn], adjusting the type accordingly. + */ public fun DataColumn.dropNulls(): DataColumn = (if (!hasNulls()) this else filter { it != null }) as DataColumn @@ -122,28 +764,199 @@ public fun DataColumn.dropNulls(): DataColumn = // region dropNA -public fun DataFrame.dropNA(whereAllNA: Boolean = false, selector: ColumnsSelector): DataFrame { - val cols = this[selector] +/** + * ## The Drop `NA` Operation + * + * Removes rows with [`NA`][NA] values. Specific case of [drop][DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNA = true` to only drop rows where all selected cells are [`NA`][NA]. By default, + * rows are dropped if any of the selected cells are [`NA`][NA]. + * + * For more information: [See `dropNA` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropna) + */ +internal interface DropNA { + /** + * @param whereAllNA `false` by default. + * If `true`, rows are dropped if all selected cells are [`NA`][NA]. + * If `false`, rows are dropped if any of the selected cells is [`NA`][NA]. + */ + interface WhereAllNAParam +} + +private interface SetDropNAOperationArg + +/** + * ## The Drop `NA` Operation + * + * Removes rows with [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNA = true` to only drop rows where all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. By default, + * rows are dropped if any of the selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * + * For more information: [See `dropNA` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropna) + * ## This Drop NA Overload + */ +private interface CommonDropNAFunctionDoc + +/** + * ## The Drop `NA` Operation + * + * Removes rows with [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNA = true` to only drop rows where all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. By default, + * rows are dropped if any of the selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * + * For more information: [See `dropNA` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropna) + * ## This Drop NA Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]`(whereAllNA = true) { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * @param whereAllNA `false` by default. + * If `true`, rows are dropped if all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * If `false`, rows are dropped if any of the selected cells is [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNA(whereAllNA: Boolean = false, columns: ColumnsSelector): DataFrame { + val cols = this[columns] return if (whereAllNA) drop { cols.all { this[it].isNA } } else drop { cols.any { this[it].isNA } } } -public fun DataFrame.dropNA(vararg cols: KProperty<*>, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumns() } +/** + * ## The Drop `NA` Operation + * + * Removes rows with [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNA = true` to only drop rows where all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. By default, + * rows are dropped if any of the selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * + * For more information: [See `dropNA` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropna) + * ## This Drop NA Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]`(Person::length, Person::age)` + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]`(Person::length, whereAllNA = true)` + * @param whereAllNA `false` by default. + * If `true`, rows are dropped if all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * If `false`, rows are dropped if any of the selected cells is [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNA(vararg columns: KProperty<*>, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(vararg cols: String, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumns() } +/** + * ## The Drop `NA` Operation + * + * Removes rows with [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNA = true` to only drop rows where all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. By default, + * rows are dropped if any of the selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * + * For more information: [See `dropNA` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropna) + * ## This Drop NA Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]`("length", "age")` + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]`("length", whereAllNA = true)` + * @param whereAllNA `false` by default. + * If `true`, rows are dropped if all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * If `false`, rows are dropped if any of the selected cells is [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNA(vararg columns: String, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(vararg cols: AnyColumnReference, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumns() } +/** + * ## The Drop `NA` Operation + * + * Removes rows with [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNA = true` to only drop rows where all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. By default, + * rows are dropped if any of the selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * + * For more information: [See `dropNA` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropna) + * ## This Drop NA Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]`(length, age)` + * + * `df.`[dropNA][org.jetbrains.kotlinx.dataframe.api.dropNA]`(length, whereAllNA = true)` + * @param whereAllNA `false` by default. + * If `true`, rows are dropped if all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * If `false`, rows are dropped if any of the selected cells is [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * @param columns The Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNA(vararg columns: AnyColumnReference, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } -public fun DataFrame.dropNA(cols: Iterable, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumnSet() } +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.dropNA(columns: Iterable, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumnSet() } +/** + * ## The Drop `NA` Operation + * + * Removes rows with [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNA = true` to only drop rows where all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. By default, + * rows are dropped if any of the selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * + * For more information: [See `dropNA` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropna) + * ## This Drop NA Overload + * This overload operates on all columns in the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * @param whereAllNA `false` by default. + * If `true`, rows are dropped if all selected cells are [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + * If `false`, rows are dropped if any of the selected cells is [`NA`][org.jetbrains.kotlinx.dataframe.documentation.NA]. + */ public fun DataFrame.dropNA(whereAllNA: Boolean = false): DataFrame = dropNA(whereAllNA) { all() } +/** + * ## The Drop `NA` Operation + * + * Removes [`NA`][NA] values from this [DataColumn], adjusting the type accordingly. + */ public fun DataColumn.dropNA(): DataColumn = when (typeClass) { Double::class, Float::class -> filter { !it.isNA }.cast() @@ -154,28 +967,202 @@ public fun DataColumn.dropNA(): DataColumn = // region dropNaNs -public fun DataFrame.dropNaNs(whereAllNaN: Boolean = false, selector: ColumnsSelector): DataFrame { - val cols = this[selector] +/** + * ## The Drop `NaN` Operation + * + * Removes rows with [`NaN`][Double.isNaN] values. Specific case of [drop][DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNaN = true` to only drop rows where all selected cells are [`NaN`][Double.isNaN]. By default, + * rows are dropped if any of the selected cells are [`NaN`][Double.isNaN]. + * + * For more information: [See `dropNaNs` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnans) + */ +internal interface DropNaNs { + + /** + * @param whereAllNaN `false` by default. + * If `true`, rows are dropped if all selected cells are [`NaN`][Double.isNaN]. + * If `false`, rows are dropped if any of the selected cells is [`NaN`][Double.isNaN]. + */ + interface WhereAllNaNParam +} + +private interface SetDropNaNsOperationArg + +/** + * ## The Drop `NaN` Operation + * + * Removes rows with [`NaN`][Double.isNaN] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNaN = true` to only drop rows where all selected cells are [`NaN`][Double.isNaN]. By default, + * rows are dropped if any of the selected cells are [`NaN`][Double.isNaN]. + * + * For more information: [See `dropNaNs` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnans) + * ## This Drop NaNs Overload + */ +private interface CommonDropNaNsFunctionDoc +/** + * ## The Drop `NaN` Operation + * + * Removes rows with [`NaN`][Double.isNaN] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNaN = true` to only drop rows where all selected cells are [`NaN`][Double.isNaN]. By default, + * rows are dropped if any of the selected cells are [`NaN`][Double.isNaN]. + * + * For more information: [See `dropNaNs` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnans) + * ## This Drop NaNs Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]`(whereAllNaN = true) { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * @param whereAllNaN `false` by default. + * If `true`, rows are dropped if all selected cells are [`NaN`][Double.isNaN]. + * If `false`, rows are dropped if any of the selected cells is [`NaN`][Double.isNaN]. + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNaNs(whereAllNaN: Boolean = false, columns: ColumnsSelector): DataFrame { + val cols = this[columns] return if (whereAllNaN) drop { cols.all { this[it].isNaN } } else drop { cols.any { this[it].isNaN } } } -public fun DataFrame.dropNaNs(vararg cols: KProperty<*>, whereAllNaN: Boolean = false): DataFrame = - dropNaNs(whereAllNaN) { cols.toColumns() } +/** + * ## The Drop `NaN` Operation + * + * Removes rows with [`NaN`][Double.isNaN] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNaN = true` to only drop rows where all selected cells are [`NaN`][Double.isNaN]. By default, + * rows are dropped if any of the selected cells are [`NaN`][Double.isNaN]. + * + * For more information: [See `dropNaNs` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnans) + * ## This Drop NaNs Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]`(Person::length, Person::age)` + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]`(Person::length, whereAllNaN = true)` + * @param whereAllNaN `false` by default. + * If `true`, rows are dropped if all selected cells are [`NaN`][Double.isNaN]. + * If `false`, rows are dropped if any of the selected cells is [`NaN`][Double.isNaN]. + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNaNs(vararg columns: KProperty<*>, whereAllNaN: Boolean = false): DataFrame = + dropNaNs(whereAllNaN) { columns.toColumns() } -public fun DataFrame.dropNaNs(vararg cols: String, whereAllNaN: Boolean = false): DataFrame = - dropNaNs(whereAllNaN) { cols.toColumns() } +/** + * ## The Drop `NaN` Operation + * + * Removes rows with [`NaN`][Double.isNaN] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNaN = true` to only drop rows where all selected cells are [`NaN`][Double.isNaN]. By default, + * rows are dropped if any of the selected cells are [`NaN`][Double.isNaN]. + * + * For more information: [See `dropNaNs` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnans) + * ## This Drop NaNs Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]`("length", "age")` + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]`("length", whereAllNaN = true)` + * @param whereAllNaN `false` by default. + * If `true`, rows are dropped if all selected cells are [`NaN`][Double.isNaN]. + * If `false`, rows are dropped if any of the selected cells is [`NaN`][Double.isNaN]. + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNaNs(vararg columns: String, whereAllNaN: Boolean = false): DataFrame = + dropNaNs(whereAllNaN) { columns.toColumns() } -public fun DataFrame.dropNaNs(vararg cols: AnyColumnReference, whereAllNaN: Boolean = false): DataFrame = - dropNaNs(whereAllNaN) { cols.toColumns() } +/** + * ## The Drop `NaN` Operation + * + * Removes rows with [`NaN`][Double.isNaN] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNaN = true` to only drop rows where all selected cells are [`NaN`][Double.isNaN]. By default, + * rows are dropped if any of the selected cells are [`NaN`][Double.isNaN]. + * + * For more information: [See `dropNaNs` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnans) + * ## This Drop NaNs Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]`(length, age)` + * + * `df.`[dropNaNs][org.jetbrains.kotlinx.dataframe.api.dropNaNs]`(length, whereAllNaN = true)` + * @param whereAllNaN `false` by default. + * If `true`, rows are dropped if all selected cells are [`NaN`][Double.isNaN]. + * If `false`, rows are dropped if any of the selected cells is [`NaN`][Double.isNaN]. + * @param columns The Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to drop rows in. + */ +public fun DataFrame.dropNaNs(vararg columns: AnyColumnReference, whereAllNaN: Boolean = false): DataFrame = + dropNaNs(whereAllNaN) { columns.toColumns() } -public fun DataFrame.dropNaNs(cols: Iterable, whereAllNaN: Boolean = false): DataFrame = - dropNaNs(whereAllNaN) { cols.toColumnSet() } +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.dropNaNs( + columns: Iterable, + whereAllNaN: Boolean = false, +): DataFrame = + dropNaNs(whereAllNaN) { columns.toColumnSet() } +/** + * ## The Drop `NaN` Operation + * + * Removes rows with [`NaN`][Double.isNaN] values. Specific case of [drop][org.jetbrains.kotlinx.dataframe.DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns]). + * Also, you can supply `whereAllNaN = true` to only drop rows where all selected cells are [`NaN`][Double.isNaN]. By default, + * rows are dropped if any of the selected cells are [`NaN`][Double.isNaN]. + * + * For more information: [See `dropNaNs` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnans) + * ## This Drop NaNs Overload + * This overload operates on all columns in the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * @param whereAllNaN `false` by default. + * If `true`, rows are dropped if all selected cells are [`NaN`][Double.isNaN]. + * If `false`, rows are dropped if any of the selected cells is [`NaN`][Double.isNaN]. + */ public fun DataFrame.dropNaNs(whereAllNaN: Boolean = false): DataFrame = dropNaNs(whereAllNaN) { all() } +/** + * ## The Drop `NaN` Operation + * + * Removes [`NaN`][NaN] values from this [DataColumn], adjusting the type accordingly. + */ public fun DataColumn.dropNaNs(): DataColumn = when (typeClass) { Double::class, Float::class -> filter { !it.isNaN }.cast() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index 58c3c48047..2a9375869b 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -50,7 +50,8 @@ public fun DataFrame.add(vararg columns: AnyBaseCol): DataFrame = addA * @throws [UnequalColumnSizesException] if columns in expected result have different sizes * @return new [DataFrame] with added columns */ -public fun DataFrame.addAll(columns: Iterable): DataFrame = dataFrameOf(columns() + columns).cast() +public fun DataFrame.addAll(columns: Iterable): DataFrame = + dataFrameOf(columns() + columns).cast() /** * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. @@ -75,7 +76,8 @@ public fun DataFrame.add(vararg dataFrames: AnyFrame): DataFrame = add * @return new [DataFrame] with added columns */ @JvmName("addAllFrames") -public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = addAll(dataFrames.flatMap { it.columns() }) +public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = + addAll(dataFrames.flatMap { it.columns() }) // endregion @@ -96,7 +98,16 @@ public interface AddDataRow : DataRow { public fun AnyRow.newValue(): C } -public typealias AddExpression = Selector, C> +/** + * [AddExpression] is used to express or select any instance of `R` using the given instance of [AddDataRow]`` as + * `this` and `it`. + * + * Shorthand for: + * ```kotlin + * AddDataRow.(it: AddDataRow) -> R + * ``` + */ +public typealias AddExpression = Selector, R> /** * Creates new column using row [expression] and adds it to the end of [DataFrame] @@ -166,11 +177,15 @@ public class AddDsl(@PublishedApi internal val df: DataFrame) : ColumnsCon return df.mapToColumn("", Infer.Nulls, expression) } - public inline infix fun String.from(noinline expression: RowExpression): Boolean = add(this, Infer.Nulls, expression) + public inline infix fun String.from(noinline expression: RowExpression): Boolean = + add(this, Infer.Nulls, expression) // TODO: use path instead of name - public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = name().from(expression) - public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = add(name, Infer.Nulls, expression) + public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = + name().from(expression) + + public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = + add(name, Infer.Nulls, expression) public infix fun String.from(column: AnyColumnReference): Boolean = add(column.rename(this)) public inline infix fun ColumnAccessor.from(column: ColumnReference): Boolean = name() from column diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt index a65dfcc1f0..119ab658a7 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt @@ -2,10 +2,9 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.RowValueFilter import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.impl.and import org.jetbrains.kotlinx.dataframe.impl.api.gatherImpl import org.jetbrains.kotlinx.dataframe.impl.columnName import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns @@ -13,18 +12,33 @@ import kotlin.reflect.KProperty import kotlin.reflect.KType import kotlin.reflect.typeOf -public fun DataFrame.gather(selector: ColumnsSelector): Gather = Gather( - this, selector, null, typeOf(), - { it }, null -) -public fun DataFrame.gather(vararg columns: String): Gather = gather { columns.toColumns() } -public fun DataFrame.gather(vararg columns: ColumnReference): Gather = gather { columns.toColumns() } -public fun DataFrame.gather(vararg columns: KProperty): Gather = gather { columns.toColumns() } +public fun DataFrame.gather(selector: ColumnsSelector): Gather = + Gather( + df = this, + columns = selector, + filter = null, + keyType = typeOf(), + keyTransform = { it }, + valueTransform = null, + ) -public fun Gather.where(filter: Predicate): Gather = copy(filter = this.filter and filter) -public fun Gather.notNull(): Gather = where { it != null } as Gather +public fun DataFrame.gather(vararg columns: String): Gather = + gather { columns.toColumns() } -public fun Gather.explodeLists(): Gather = copy(explode = true) +public fun DataFrame.gather(vararg columns: ColumnReference): Gather = + gather { columns.toColumns() } + +public fun DataFrame.gather(vararg columns: KProperty): Gather = + gather { columns.toColumns() } + +public fun Gather.where(filter: RowValueFilter): Gather = + copy(filter = this.filter and filter) + +public fun Gather.notNull(): Gather = + where { it != null } as Gather + +public fun Gather.explodeLists(): Gather = + copy(explode = true) public inline fun Gather.mapKeys(noinline transform: (String) -> K): Gather = copy(keyTransform = transform as ((String) -> Nothing), keyType = typeOf()) as Gather @@ -35,7 +49,7 @@ public fun Gather.mapValues(transform: (C) -> R): Gathe public data class Gather( internal val df: DataFrame, internal val columns: ColumnsSelector, - internal val filter: ((C) -> Boolean)? = null, + internal val filter: RowValueFilter? = null, internal val keyType: KType? = null, internal val keyTransform: ((String) -> K), internal val valueTransform: ((C) -> R)? = null, diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt index 31e8ee8695..f6eb6cdfd9 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -1,8 +1,8 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.columns.ColumnReference @@ -21,15 +21,20 @@ public data class Reorder( } public fun DataFrame.reorder(selector: ColumnsSelector): Reorder = Reorder(this, selector, false) -public fun DataFrame.reorder(vararg columns: ColumnReference): Reorder = reorder { columns.toColumns() } +public fun DataFrame.reorder(vararg columns: ColumnReference): Reorder = + reorder { columns.toColumns() } + public fun DataFrame.reorder(vararg columns: KProperty): Reorder = reorder { columns.toColumns() } public fun DataFrame.reorder(vararg columns: String): Reorder = reorder { columns.toColumns() } -public fun > Reorder.by(expression: Selector, V>): DataFrame = reorderImpl(false, expression) +public fun > Reorder.by(expression: ColumnExpression): DataFrame = + reorderImpl(false, expression) -public fun Reorder.byName(desc: Boolean = false): DataFrame = if (desc) byDesc { it.name } else by { it.name } +public fun Reorder.byName(desc: Boolean = false): DataFrame = + if (desc) byDesc { it.name } else by { it.name } -public fun > Reorder.byDesc(expression: Selector, V>): DataFrame = reorderImpl(true, expression) +public fun > Reorder.byDesc(expression: ColumnExpression): DataFrame = + reorderImpl(true, expression) public fun > DataFrame.reorderColumnsBy( dfs: Boolean = true, @@ -37,6 +42,7 @@ public fun > DataFrame.reorderColumnsBy( expression: Selector ): DataFrame = Reorder(this, { if (dfs) allDfs(true) else all() }, dfs).reorderImpl(desc, expression) -public fun DataFrame.reorderColumnsByName(dfs: Boolean = true, desc: Boolean = false): DataFrame = reorderColumnsBy(dfs, desc) { name() } +public fun DataFrame.reorderColumnsByName(dfs: Boolean = true, desc: Boolean = false): DataFrame = + reorderColumnsBy(dfs, desc) { name() } // endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt index e134fd9dd0..4a91e748f6 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt @@ -10,12 +10,28 @@ import kotlin.reflect.KProperty // region DataFrame -public fun DataFrame.select(columns: ColumnsSelector): DataFrame = get(columns).toDataFrame().cast() -public fun DataFrame.select(vararg columns: KProperty<*>): DataFrame = select(columns.map { it.columnName }) -public fun DataFrame.select(vararg columns: String): DataFrame = select(columns.asIterable()) -public fun DataFrame.select(vararg columns: AnyColumnReference): DataFrame = select { columns.toColumns() } -@JvmName("selectT") -public fun DataFrame.select(columns: Iterable): DataFrame = columns.map { get(it) }.toDataFrame().cast() -public fun DataFrame.select(columns: Iterable): DataFrame = select { columns.toColumnSet() } +public fun DataFrame.select(columns: ColumnsSelector): DataFrame = + get(columns).toDataFrame().cast() + +public fun DataFrame.select(vararg columns: KProperty<*>): DataFrame = + select(columns.asIterable()) + +@JvmName("selectKPropertyIterable") +public fun DataFrame.select(columns: Iterable>): DataFrame = + select(columns.map { it.columnName }) + +public fun DataFrame.select(vararg columns: String): DataFrame = + select(columns.asIterable()) + +@JvmName("selectStringIterable") +public fun DataFrame.select(columns: Iterable): DataFrame = + columns.map { get(it) }.toDataFrame().cast() + +public fun DataFrame.select(vararg columns: AnyColumnReference): DataFrame = + select { columns.toColumns() } + +@JvmName("selectAnyColumnReferenceIterable") +public fun DataFrame.select(columns: Iterable): DataFrame = + select { columns.toColumnSet() } // endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index 943c94fa6e..444661f8bf 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -7,6 +7,7 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator import org.jetbrains.kotlinx.dataframe.impl.api.createDataFrameImpl @@ -23,7 +24,8 @@ public inline fun Iterable.toDataFrame(): DataFrame = toDataFr properties() } -public inline fun Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = createDataFrameImpl(T::class, body) +public inline fun Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = + createDataFrameImpl(T::class, body) public inline fun Iterable.toDataFrame(vararg props: KProperty<*>, maxDepth: Int = 0): DataFrame = toDataFrame { @@ -76,6 +78,7 @@ public fun Iterable>.toDataFrameFromPairs(): Da when (path.size) { 0 -> { } + 1 -> { val name = path[0] val uniqueName = nameGenerator.addUnique(name) @@ -85,6 +88,7 @@ public fun Iterable>.toDataFrameFromPairs(): Da columns.add(col.rename(uniqueName)) columnIndices[uniqueName] = index } + else -> { val name = path[0] val uniqueName = columnGroupName.getOrPut(name) { @@ -187,6 +191,83 @@ public abstract class CreateDataFrameDsl : TraversePropertiesDsl { // endregion +// region toDataFrame overloads for built-in types + +/* +Without overloads Iterable.toDataFrame produces unexpected result + + +``` +val string = listOf("aaa", "aa", null) +string.toDataFrame() +``` +=> + length +0 3 +1 2 +2 null + */ + +@JvmName("toDataFrameByte") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameShort") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameInt") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameLong") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameString") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameChar") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameBoolean") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameUByte") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameUShort") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameUInt") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@JvmName("toDataFrameULong") +public inline fun Iterable.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty::value from { it } +}.cast() + +@DataSchema +public interface ValueProperty { + public val value: T +} + // region Create DataFrame from Map public fun Map>.toDataFrame(): AnyFrame { @@ -195,7 +276,12 @@ public fun Map>.toDataFrame(): AnyFrame { @JvmName("toDataFrameColumnPathAnyNullable") public fun Map>.toDataFrame(): AnyFrame { - return map { it.key to DataColumn.createWithTypeInference(it.key.last(), it.value.asList()) }.toDataFrameFromPairs() + return map { + it.key to DataColumn.createWithTypeInference( + it.key.last(), + it.value.asList() + ) + }.toDataFrameFromPairs() } // endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 8c719d8702..72e27075bd 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -1,113 +1,761 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataFrameExpression -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.RowColumnExpression -import org.jetbrains.kotlinx.dataframe.RowValueExpression -import org.jetbrains.kotlinx.dataframe.RowValueFilter -import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.Update.Usage +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.documentation.* import org.jetbrains.kotlinx.dataframe.impl.api.asFrameImpl import org.jetbrains.kotlinx.dataframe.impl.api.updateImpl import org.jetbrains.kotlinx.dataframe.impl.api.updateWithValuePerColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns import org.jetbrains.kotlinx.dataframe.impl.headPlusArray -import org.jetbrains.kotlinx.dataframe.index import kotlin.reflect.KProperty +/** + * ## The Update Operation + * + * Returns the [DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + */ +public data class Update( + val df: DataFrame, + val filter: RowValueFilter?, + val columns: ColumnsSelector, +) { + public fun cast(): Update = + Update(df, filter as RowValueFilter?, columns as ColumnsSelector) + + /** This argument providing the (clickable) name of the update-like function. + * Note: If clickable, make sure to [alias][your type]. + */ + internal interface UpdateOperationArg + + /** + * ## [update][update] Operation Usage + * + * [update][update] `{ `[columns][SelectingColumns]` }` + * + * - `[.`[where][Update.where]` { `[rowValueCondition][SelectingRows.RowValueCondition.WithExample]` } ]` + * + * - `[.`[at][Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * + * - `.`[with][Update.with]` { `[rowExpression][ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][Update.notNull]` { `[rowExpression][ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][Update.perCol]` { `[colExpression][ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][Update.perRowCol]` { `[rowColExpression][ExpressionsGivenRowAndColumn.RowColumnExpression.WithExample]` } + * | .`[withValue][Update.withValue]`(value) + * | .`[withNull][Update.withNull]`() + * | .`[withZero][Update.withZero]`() + * | .`[asFrame][Update.asFrame]` { `[dataFrameExpression][ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` + * + */ + public interface Usage + + /** The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. */ + public interface Columns + + /** @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame] to update. */ + internal interface DslParam + + /** @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame] to update. */ + internal interface ColumnAccessorsParam + + /** @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame] to update. */ + internal interface KPropertiesParam + + /** @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame] to update. */ + internal interface ColumnNamesParam +} + +// region update + +private interface SetSelectingColumnsOperationArg + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + */ +private interface CommonUpdateFunctionDoc + +/** + * ## Optional + * Combine `df.`[update][update]`(...).`[with][Update.with]` { ... }` + * into `df.`[update][update]`(...) { ... }` + */ +private interface UpdateWithNote + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * + * @param columns The [Columns selector DSL][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.Dsl.WithExample] used to select the columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ public fun DataFrame.update(columns: ColumnsSelector): Update = Update(this, null, columns) -public fun DataFrame.update(columns: Iterable>): Update = - update { columns.toColumnSet() } - +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`("length", "age")` + * + * ## Optional + * Combine `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...).`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { ... }` + * into `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...) { ... }` + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ public fun DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(Person::length, Person::age)` + * + * ## Optional + * Combine `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...).`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { ... }` + * into `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...) { ... }` + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } + +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(length, age)` + * + * ## Optional + * Combine `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...).`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { ... }` + * into `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`(...) { ... }` + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + */ public fun DataFrame.update(vararg columns: ColumnReference): Update = update { columns.toColumns() } -public data class Update( - val df: DataFrame, - val filter: RowValueFilter?, - val columns: ColumnsSelector -) { - public fun cast(): Update = - Update(df, filter as RowValueFilter?, columns as ColumnsSelector) -} +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.update(columns: Iterable>): Update = + update { columns.toColumnSet() } + +// endregion +/** ## Where + * Filter or find rows to operate on after [selecting columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] using a + * [row value filter][org.jetbrains.kotlinx.dataframe.RowValueFilter]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { length }.`[where][org.jetbrains.kotlinx.dataframe.api.where]` { it > 10.0 }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }.`[where][org.jetbrains.kotlinx.dataframe.api.where]` { `[index][org.jetbrains.kotlinx.dataframe.index]`() > 4 && city != "Paris" }` + * + * + * + * + * @param predicate The [row value filter][RowValueFilter] to select the rows to update. + */ public fun Update.where(predicate: RowValueFilter): Update = copy(filter = filter and predicate) +/** ## At + * Only update the columns at certain given [row indices][CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][update]` { city }.`[at][at]`(5..10).`[with][with]` { "Paris" }` + * + * `df.`[update][update]` { name }.`[at][at]`(1, 2, 3, 4).`[with][with]` { "Empty" }` + * + * ## This At Overload + */ +private interface CommonUpdateAtFunctionDoc { + + /** The indices of the rows to update. Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. */ + interface RowIndicesParam +} + +/** + * ## At + * Only update the columns at certain given [row indices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection][Collection]<[Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { city }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(5..10).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Paris" }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(1, 2, 3, 4).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Empty" }` + * + * ## This At Overload + * + * Provide a [Collection][Collection]<[Int][Int]> of row indices to update. + * + * @param rowIndices The indices of the rows to update. Either a [Collection][Collection]<[Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + */ public fun Update.at(rowIndices: Collection): Update = where { index in rowIndices } + +/** + * ## At + * Only update the columns at certain given [row indices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection][Collection]<[Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { city }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(5..10).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Paris" }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(1, 2, 3, 4).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Empty" }` + * + * ## This At Overload + * + * Provide a `vararg` of [Ints][Int] of row indices to update. + * + * @param rowIndices The indices of the rows to update. Either a [Collection][Collection]<[Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + */ public fun Update.at(vararg rowIndices: Int): Update = at(rowIndices.toSet()) + +/** + * ## At + * Only update the columns at certain given [row indices][org.jetbrains.kotlinx.dataframe.api.CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection][Collection]<[Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { city }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(5..10).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Paris" }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name }.`[at][org.jetbrains.kotlinx.dataframe.api.at]`(1, 2, 3, 4).`[with][org.jetbrains.kotlinx.dataframe.api.with]` { "Empty" }` + * + * ## This At Overload + * + * Provide an [IntRange][IntRange] of row indices to update. + * + * @param rowRange The indices of the rows to update. Either a [Collection][Collection]<[Int][Int]>, an [IntRange][IntRange], or just `vararg` indices. + */ public fun Update.at(rowRange: IntRange): Update = where { index in rowRange } +/** ## Per Row Col + * Provide a new value for every selected cell given both its row and column using a [row-column expression][org.jetbrains.kotlinx.dataframe.RowColumnExpression]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { age ` { row, col ->` + * + * `row.age / col.`[mean][org.jetbrains.kotlinx.dataframe.DataColumn.mean]`(skipNA = true)` + * + * `}` + * + * + * .`[perRowCol][org.jetbrains.kotlinx.dataframe.api.perRowCol]} + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per col][org.jetbrains.kotlinx.dataframe.api.Update.perCol] to provide a new value for every selected cell giving its column. + * @param expression The [Row Column Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRowAndColumn.RowColumnExpression] to provide a new value for every selected cell giving its row and column. + */ public fun Update.perRowCol(expression: RowColumnExpression): DataFrame = updateImpl { row, column, _ -> expression(row, column) } +/** [Update per row col][Update.perRowCol] to provide a new value for every selected cell giving its row and column. */ +private interface SeeAlsoPerRowCol + +/** ## Update Expression + * @see ExpressionsGivenRow.RowValueExpression.WithExample + * @see ExpressionsGivenRow.AddDataRowNote + */ // doc processor plugin does not work with type aliases yet public typealias UpdateExpression = AddDataRow.(C) -> R +/** ## With + * Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][org.jetbrains.kotlinx.dataframe.RowValueExpression]. + * + * For example: + * + * `df.`[update][update]` { city }.`[with][with]` { name.firstName + " from " + it }` + * + * `df.`[update][update]` { city }.`[with][with]` { it.uppercase() }` + * + * + * + * ## Note + * [update with][org.jetbrains.kotlinx.dataframe.api.Update.with]- and [add][org.jetbrains.kotlinx.dataframe.api.add]-like expressions use [AddDataRow][org.jetbrains.kotlinx.dataframe.api.AddDataRow] instead of [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as the DSL's receiver type. + * This is an extension to [RowValueExpression][org.jetbrains.kotlinx.dataframe.RowValueExpression] and + * [RowExpression][org.jetbrains.kotlinx.dataframe.RowExpression] that provides access to + * the modified/generated value of the preceding row ([AddDataRow.newValue][org.jetbrains.kotlinx.dataframe.api.AddDataRow.newValue]). + * ## See Also + * - [Update per col][org.jetbrains.kotlinx.dataframe.api.Update.perCol] to provide a new value for every selected cell giving its column. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * @param expression The [Row Value Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample] to update the rows with. + */ public fun Update.with(expression: UpdateExpression): DataFrame = updateImpl { row, _, value -> expression(row, value) } +/** [Update with][Update.with] to provide a new value for every selected cell giving its row. */ +private interface SeeAlsoWith + +/** ## As Frame + * + * Updates selected [column group][ColumnGroup] as a [DataFrame] with the given [expression]. + * + * Provide a new value for every selected data frame using a [dataframe expression][org.jetbrains.kotlinx.dataframe.DataFrameExpression]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name ` { `[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { lastName } }` + * .`[asFrame][org.jetbrains.kotlinx.dataframe.api.asFrame]} + * @param expression The [Data Frame Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpression] to replace the selected column group with. + */ public fun Update>.asFrame(expression: DataFrameExpression>): DataFrame = asFrameImpl(expression) +@Deprecated( + "Useless unless in combination with `withValue(null)`, but then users can just use `with { null }`...", + ReplaceWith("this as Update") +) public fun Update.asNullable(): Update = this as Update +/** ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + */ +private interface CommonUpdatePerColDoc + +/** Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][DataRow] as Map. */ +private interface UpdatePerColMap + +/** + * ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + * Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * For example: + * + * `val defaults = {@includeArg [CommonUpdatePerColMapDoc][org.jetbrains.kotlinx.dataframe.api.CommonUpdatePerColMapDoc]}` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name and age }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { ... }.`[perCol][org.jetbrains.kotlinx.dataframe.api.perCol]`(defaults)` + * + * @throws IllegalArgumentException if a value for a selected cell's column is not defined in [values\]. + */ +private interface CommonUpdatePerColMapDoc + +/** + * ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + * Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * For example: + * + * `val defaults = `[mapOf][mapOf]`("name" to "Empty", "age" to 0)` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name and age }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { ... }.`[perCol][org.jetbrains.kotlinx.dataframe.api.perCol]`(defaults)` + * + * @throws IllegalArgumentException if a value for a selected cell's column is not defined in [values][values]. + * + * + * @param values The [Map]<[String], Value> to provide a new value for every selected cell. + * For each selected column, there must be a value in the map with the same name. + */ public fun Update.perCol(values: Map): DataFrame = updateWithValuePerColumnImpl { values[it.name()] ?: throw IllegalArgumentException("Update value for column ${it.name()} is not defined") } +/** + * ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + * Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * For example: + * + * `val defaults = df.`[getRows][org.jetbrains.kotlinx.dataframe.DataFrame.getRows]`(`[listOf][listOf]`(0))` + * + * `.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name ` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { name and age }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { ... }.`[perCol][org.jetbrains.kotlinx.dataframe.api.perCol]`(defaults)` + * + * @throws IllegalArgumentException if a value for a selected cell's column is not defined in [values][values]. + * .`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { "Empty" }` + * + * `.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { age }.`[with][org.jetbrains.kotlinx.dataframe.api.Update.with]` { 0 }` + * + * `.first()} + * + * @param values The [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] to provide a new value for every selected cell. + */ public fun Update.perCol(values: AnyRow): DataFrame = perCol(values.toMap() as Map) -public fun Update.perCol(valueSelector: Selector, C>): DataFrame = +/** + * ## Per Col + * + * Per Col can be used for two different types of operations: + * - Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * - Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as Map. + * + * ## See Also + * - [Update with][org.jetbrains.kotlinx.dataframe.api.Update.with] to provide a new value for every selected cell giving its row. + * - [Update per row col][org.jetbrains.kotlinx.dataframe.api.Update.perRowCol] to provide a new value for every selected cell giving its row and column. + * ## This Per Col Overload + * Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { age ` { `[mean][org.jetbrains.kotlinx.dataframe.DataColumn.mean]`(skipNA = true) }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { age ` { `[count][org.jetbrains.kotlinx.dataframe.DataColumn.count]` { it > 10 } }` + * + * .`[perCol][org.jetbrains.kotlinx.dataframe.api.perCol]} + * + * @param valueSelector The [Column Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpression] to provide a new value for every selected cell giving its column. + */ +public fun Update.perCol(valueSelector: ColumnExpression): DataFrame = updateWithValuePerColumnImpl(valueSelector) +/** [Update per col][Update.perCol] to provide a new value for every selected cell giving its column. */ +private interface SeeAlsoPerCol + +/** Chains up two row value filters together. */ internal infix fun RowValueFilter?.and(other: RowValueFilter): RowValueFilter { if (this == null) return other val thisExp = this return { thisExp(this, it) && other(this, it) } } +/** ## Not Null + * + * Selects only the rows where the values in the selected columns are not null. + * + * Shorthand for: [update][org.jetbrains.kotlinx.dataframe.api.update]` { ... }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { it != null }` + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Number][Number]`?>() }.`[notNull][org.jetbrains.kotlinx.dataframe.api.notNull]`()`.[perCol][org.jetbrains.kotlinx.dataframe.api.Update.perCol] `{ `[mean][org.jetbrains.kotlinx.dataframe.api.mean]`() }` + * + * ### Optional + * Provide an [expression][expression] to update the rows with. + * This combines [with][org.jetbrains.kotlinx.dataframe.api.Update.with] with [notNull][org.jetbrains.kotlinx.dataframe.api.notNull]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { city }.`[notNull][org.jetbrains.kotlinx.dataframe.api.Update.notNull]` { it.`[toUpperCase][String.toUpperCase]`() }` + * + * @param expression Optional [Row Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowExpression.WithExample] to update the rows with. */ public fun Update.notNull(): Update = - copy(filter = filter and { it != null }) as Update + where { it != null } as Update -public fun Update.notNull(expression: RowValueExpression): DataFrame = - notNull().updateImpl { row, column, value -> - expression(row, value) - } +/** + * ## Not Null + * + * Selects only the rows where the values in the selected columns are not null. + * + * Shorthand for: [update][update]` { ... }.`[where][Update.where]` { it != null }` + * + * For example: + * + * `df.`[update][update]` { `[colsOf][colsOf]`<`[Number][Number]`?>() }.`[notNull][notNull]`()`.[perCol][Update.perCol] `{ `[mean][mean]`() }` + * + * ### Optional + * Provide an [expression] to update the rows with. + * This combines [with][Update.with] with [notNull]. + * + * For example: + * + * `df.`[update][update]` { city }.`[notNull][Update.notNull]` { it.`[toUpperCase][String.toUpperCase]`() }` + * + * @param expression Optional [Row Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowExpression.WithExample] to update the rows with. + */ +public fun Update.notNull(expression: UpdateExpression): DataFrame = + notNull().with(expression) +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * ### This overload is a combination of [update][org.jetbrains.kotlinx.dataframe.api.update] and [with][org.jetbrains.kotlinx.dataframe.api.Update.with]. + * + * Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][org.jetbrains.kotlinx.dataframe.RowValueExpression]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`("city")` ` { name.firstName + " from " + it }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`("city")` ` { it.uppercase() }` + * + * + * + * @param columns The [Column references][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnAccessors.WithExample] of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + * @param expression The [Row Value Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample] to update the rows with. + */ public fun DataFrame.update( firstCol: ColumnReference, vararg cols: ColumnReference, - expression: RowValueExpression + expression: UpdateExpression ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * ### This overload is a combination of [update][org.jetbrains.kotlinx.dataframe.api.update] and [with][org.jetbrains.kotlinx.dataframe.api.Update.with]. + * + * Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][org.jetbrains.kotlinx.dataframe.RowValueExpression]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`("city")` ` { name.firstName + " from " + it }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`("city")` ` { it.uppercase() }` + * + * + * + * @param columns The [KProperties][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.KProperties.WithExample] corresponding to columns of this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + * @param expression The [Row Value Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample] to update the rows with. + */ public fun DataFrame.update( firstCol: KProperty, vararg cols: KProperty, - expression: RowValueExpression + expression: UpdateExpression ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) +/** + * ## The Update Operation + * + * Returns the [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][org.jetbrains.kotlinx.dataframe.api.Update.Usage]. + * + * For more information: [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) + * ## ‎ + * The columns to update need to be selected. See [Selecting Columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] for all the selecting options. + * ## This Update Overload + * ### This overload is a combination of [update][org.jetbrains.kotlinx.dataframe.api.update] and [with][org.jetbrains.kotlinx.dataframe.api.Update.with]. + * + * Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][org.jetbrains.kotlinx.dataframe.RowValueExpression]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`("city")` ` { name.firstName + " from " + it }` + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]`("city")` ` { it.uppercase() }` + * + * + * + * @param columns The [Column names][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.ColumnNames.WithExample] belonging to this [DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame] to update. + * @param expression The [Row Value Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample] to update the rows with. + */ public fun DataFrame.update( firstCol: String, vararg cols: String, - expression: RowValueExpression + expression: UpdateExpression ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) -public fun Update.withNull(): DataFrame = asNullable().withValue(null) +/** + * Specific version of [with] that simply sets the value of each selected row to {@includeArg [CommonSpecificWithDocFirstArg]}. + * + * For example: + * + * `df.`[update][update]` { id }.`[where][Update.where]` { it < 0 }.`{@includeArg [CommonSpecificWithDocSecondArg]}` + */ +private interface CommonSpecificWithDoc + +/** Arg for the resulting value */ +private interface CommonSpecificWithDocFirstArg + +/** Arg for the function call */ +private interface CommonSpecificWithDocSecondArg + +/** + * ## With Null + * Specific version of [with][org.jetbrains.kotlinx.dataframe.api.with] that simply sets the value of each selected row to `null`. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { id }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { it < 0 }.`[withNull][org.jetbrains.kotlinx.dataframe.api.withNull]`()` + * + * + */ +public fun Update.withNull(): DataFrame = with { null } +/** + * ## With Zero + * Specific version of [with][org.jetbrains.kotlinx.dataframe.api.with] that simply sets the value of each selected row to `0`. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { id }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { it < 0 }.`[withZero][org.jetbrains.kotlinx.dataframe.api.withZero]`()` + * + * + */ public fun Update.withZero(): DataFrame = updateWithValuePerColumnImpl { 0 as C } +/** + * ## With Value + * Specific version of [with][org.jetbrains.kotlinx.dataframe.api.with] that simply sets the value of each selected row to [value][org.jetbrains.kotlinx.dataframe.api.value]. + * + * For example: + * + * `df.`[update][org.jetbrains.kotlinx.dataframe.api.update]` { id }.`[where][org.jetbrains.kotlinx.dataframe.api.Update.where]` { it < 0 }.`[withValue][org.jetbrains.kotlinx.dataframe.api.withValue]`(-1)` + * + * + * + * @param value The value to set the selected rows to. In contrast to [with][Update.with], this must be the same exact type. + */ public fun Update.withValue(value: C): DataFrame = with { value } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt new file mode 100644 index 0000000000..a92955347e --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt @@ -0,0 +1,135 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.documentation.AccessApi.* + +/** + * ## Access APIs + * + * By nature, data frames are dynamic objects, column labels depend on the input source and also new columns could be added + * or deleted while wrangling. Kotlin, in contrast, is a statically typed language and all types are defined and verified + * ahead of execution. That's why creating a flexible, handy, and, at the same time, safe API to a data frame is tricky. + * + * In `Kotlin DataFrame` we provide four different ways to access columns, and, while they're essentially different, they + * look pretty similar in the data wrangling DSL. These include: + * - [Extension Properties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ExtensionPropertiesApi] + * - [KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi] + * - [Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi] + * - [String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi] + * + * For more information: [See Access APIs on the documentation website.](https://kotlin.github.io/dataframe/apilevels.html) + * + */ +internal interface AccessApi { + + /** API: + * - [Extension Properties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ExtensionPropertiesApi] + * - [KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi] + * - [Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi] + * - [String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi] + */ + interface AnyApiLinks + + /** + * String API. + * In this [AccessApi], columns are accessed by a [String] representing their name. + * Type-checking is done at runtime, name-checking too. + * + * For more information: [See String API on the documentation website.](https://kotlin.github.io/dataframe/stringapi.html) + * + * For example: + * ```kotlin + * DataFrame.read("titanic.csv") + * .add("lastName") { "name"().split(",").last() } + * .dropNulls("age") + * .filter { + * "survived"() && + * "home"().endsWith("NY") && + * "age"() in 10..20 + * } + * ``` + */ + interface StringApi + + /** [String API][StringApi] */ + interface StringApiLink + + /** + * Column Accessors API. + * In this [AccessApi], every column has a descriptor; + * a variable that represents its name and type. + * + * For more information: [See Column Accessors API on the documentation website.](https://kotlin.github.io/dataframe/columnaccessorsapi.html) + * + * For example: + * ```kotlin + * val survived by column() + * val home by column() + * val age by column() + * val name by column() + * val lastName by column() + * + * DataFrame.read("titanic.csv") + * .add(lastName) { name().split(",").last() } + * .dropNulls { age } + * .filter { survived() && home().endsWith("NY") && age()!! in 10..20 } + * ``` + */ + interface ColumnAccessorsApi + + /** [Column Accessors API][AccessApi.ColumnAccessorsApi] */ + interface ColumnAccessorsApiLink + + /** + * KProperties API. + * In this [AccessApi], columns accessed by the + * [`KProperty`](https://kotlinlang.org/docs/reflection.html#property-references) + * of some class. + * The name and type of column should match the name and type of property, respectively. + * + * For more information: [See KProperties API on the documentation website.](https://kotlin.github.io/dataframe/kpropertiesapi.html) + * + * For example: + * ```kotlin + * data class Passenger( + * val survived: Boolean, + * val home: String, + * val age: Int, + * val lastName: String + * ) + * + * val passengers = DataFrame.read("titanic.csv") + * .add(Passenger::lastName) { "name"().split(",").last() } + * .dropNulls(Passenger::age) + * .filter { + * it[Passenger::survived] && + * it[Passenger::home].endsWith("NY") && + * it[Passenger::age] in 10..20 + * } + * .toListOf() + * ``` + */ + interface KPropertiesApi + + /** [KProperties API][KPropertiesApi] */ + interface KPropertiesApiLink + + /** + * Extension Properties API. + * In this [AccessApi], extension access properties are generated based on the dataframe schema. + * The name and type of properties are inferred from the name and type of the corresponding columns. + * + * For more information: [See Extension Properties API on the documentation website.](https://kotlin.github.io/dataframe/extensionpropertiesapi.html) + * + * For example: + * ```kotlin + * val df = DataFrame.read("titanic.csv") + * ``` + */ + interface ExtensionPropertiesApi + + /** [Extension Properties API][ExtensionPropertiesApi] */ + interface ExtensionPropertiesApiLink +} + +/** [Access API][AccessApi] */ +internal interface AccessApiLink diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt new file mode 100644 index 0000000000..7871a890d7 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -0,0 +1,66 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +private interface DocumentationUrls { + + interface NameArg + + /** See {@includeArg [NameArg]} on the documentation website. */ + interface Text + + /** https://kotlin.github.io/dataframe */ + interface Url + + interface DataRow { + + /** [See Row Expressions on the documentation website.](https://kotlin.github.io/dataframe/datarow.html#row-expressions) */ + interface RowExpressions + + /** [See Row Conditions on the documentation website.](https://kotlin.github.io/dataframe/datarow.html#row-conditions) */ + interface RowConditions + } + + /** [See `update` on the documentation website.](https://kotlin.github.io/dataframe/update.html) */ + interface Update + + /** [See `fill` on the documentation website.](https://kotlin.github.io/dataframe/fill.html) */ + interface Fill { + + /** [See `fillNulls` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnulls) */ + interface FillNulls + + /** [See `fillNaNs` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillnans) */ + interface FillNaNs + + /** [See `fillNA` on the documentation website.](https://kotlin.github.io/dataframe/fill.html#fillna) */ + interface FillNA + } + + /** [See `drop` on the documentation website.](https://kotlin.github.io/dataframe/drop.html) */ + interface Drop { + + /** [See `dropNulls` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnulls) */ + interface DropNulls + + /** [See `dropNaNs` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropnans) */ + interface DropNaNs + + /** [See `dropNA` on the documentation website.](https://kotlin.github.io/dataframe/drop.html#dropna) */ + interface DropNA + } + + /** [See Access APIs on the documentation website.](https://kotlin.github.io/dataframe/apilevels.html) */ + interface AccessApis { + + /** [See String API on the documentation website.](https://kotlin.github.io/dataframe/stringapi.html) */ + interface StringApi + + /** [See Column Accessors API on the documentation website.](https://kotlin.github.io/dataframe/columnaccessorsapi.html) */ + interface ColumnAccessorsApi + + /** [See KProperties API on the documentation website.](https://kotlin.github.io/dataframe/kpropertiesapi.html) */ + interface KPropertiesApi + + /** [See Extension Properties API on the documentation website.](https://kotlin.github.io/dataframe/extensionpropertiesapi.html) */ + interface ExtensionPropertiesApi + } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt new file mode 100644 index 0000000000..ac95e39c2f --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt @@ -0,0 +1,41 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpressionLink +import org.jetbrains.kotlinx.dataframe.ColumnExpression as DfColumnExpression + +/** + * ## Expressions Given Column + * Expressing values using a "Column Expression" can occur exclusively in a + * [Column Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpression]. + */ +internal interface ExpressionsGivenColumn { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + + interface SetDefaultOperationArg + + /** Provide a new value for every selected cell given its column using a [column expression][DfColumnExpression]. */ + interface ColumnExpression { + + /** + * Provide a new value for every selected cell given its column using a [column expression][org.jetbrains.kotlinx.dataframe.ColumnExpression]. + * + * For example: + * + * `df.`operation` { `[mean][DataColumn.mean]`(skipNA = true) }` + * + * `df.`operation` { `[count][DataColumn.count]` { it > 10 } }` + * + */ + interface WithExample + } + + /** [Column Expression][ColumnExpression] */ + interface ColumnExpressionLink +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt new file mode 100644 index 0000000000..635e0a7208 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt @@ -0,0 +1,32 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpressionLink +import org.jetbrains.kotlinx.dataframe.DataFrameExpression as DfDataFrameExpression + +/** + * ## Expressions Given DataFrame + * Expressing values using a "Data Frame Expression" can occur exclusively in a + * [Data Frame Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpression]. + */ +internal interface ExpressionsGivenDataFrame { + + interface OperationArg + + /** Provide a new value for every selected data frame using a [dataframe expression][DfDataFrameExpression]. */ + interface DataFrameExpression { + + /** + * Provide a new value for every selected data frame using a [dataframe expression][org.jetbrains.kotlinx.dataframe.DataFrameExpression]. + * + * For example: + * + * {@includeArg [OperationArg][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.OperationArg]}` { `[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { lastName } }` + */ + interface WithExample + } + + /** [Data Frame Expression][DataFrameExpression] */ + interface DataFrameExpressionLink +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt new file mode 100644 index 0000000000..4fcaf97090 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt @@ -0,0 +1,93 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.* +import org.jetbrains.kotlinx.dataframe.RowExpression as DfRowExpression +import org.jetbrains.kotlinx.dataframe.RowValueExpression as DfRowValueExpression + +/** + * ## Expressions Given Row + * Expressing values using a "Row Expression" ([See Row Expressions on the documentation website.](https://kotlin.github.io/dataframe/datarow.html#row-expressions)) + * can occur in the following two types of operations: + * + * - Providing a new value for every selected cell given the row of that cell ([Row Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowExpression.WithExample]), + * for instance in [map][DataFrame.map], [add][DataFrame.add], and [insert][DataFrame.insert] + * (using [RowExpression][DfRowExpression]). + * + * - Providing a new value for every selected cell given the row of that cell and its previous value ([Row Value Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowValueExpression.WithExample]), + * for instance in [update.with][Update.with], and [convert.notNull][Convert.notNull] + * (using [RowValueExpression][DfRowValueExpression]). + * + * NOTE: + * + * [update with][org.jetbrains.kotlinx.dataframe.api.Update.with]- and [add][org.jetbrains.kotlinx.dataframe.api.add]-like expressions use [AddDataRow][org.jetbrains.kotlinx.dataframe.api.AddDataRow] instead of [DataRow][org.jetbrains.kotlinx.dataframe.DataRow] as the DSL's receiver type. + * This is an extension to [RowValueExpression][org.jetbrains.kotlinx.dataframe.RowValueExpression] and + * [RowExpression][org.jetbrains.kotlinx.dataframe.RowExpression] that provides access to + * the modified/generated value of the preceding row ([AddDataRow.newValue][org.jetbrains.kotlinx.dataframe.api.AddDataRow.newValue]). + * + * A [Row Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.RowExpression.WithExample] is similar to a [Row Condition][org.jetbrains.kotlinx.dataframe.documentation.SelectingRows] but that expects a [Boolean][Boolean] as result. + */ +internal interface ExpressionsGivenRow { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + + interface SetDefaultOperationArg + + /** + * [update with][org.jetbrains.kotlinx.dataframe.api.Update.with]- and [add][org.jetbrains.kotlinx.dataframe.api.add]-like expressions use [AddDataRow] instead of [DataRow] as the DSL's receiver type. + * This is an extension to [RowValueExpression][DfRowValueExpression] and + * [RowExpression][DfRowExpression] that provides access to + * the modified/generated value of the preceding row ([AddDataRow.newValue]). + */ + interface AddDataRowNote + + /** Provide a new value for every selected cell given its row using a [row expression][DfRowExpression]. */ + interface RowExpression { + + /** + * Provide a new value for every selected cell given its row using a [row expression][org.jetbrains.kotlinx.dataframe.RowExpression]. + * + * For example: + * + * `df.`operation` { name.firstName + " " + name.lastName }` + * + * `df.`operation` { 2021 - age }` + * + */ + interface WithExample + } + + /** [Row Expression][RowExpression.WithExample] */ + interface RowExpressionLink + + /** Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][DfRowValueExpression]. + */ + interface RowValueExpression { + + /** + * Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][org.jetbrains.kotlinx.dataframe.RowValueExpression]. + * + * For example: + * + * `df.`operation` { name.firstName + " from " + it }` + * + * `df.`operation` { it.uppercase() }` + * + */ + interface WithExample + } + + /** [Row Value Expression][RowValueExpression.WithExample] */ + interface RowValueExpressionLink +} + +/** [Row Expression][ExpressionsGivenRow] */ +internal interface RowExpressionsLink diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRowAndColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRowAndColumn.kt new file mode 100644 index 0000000000..8f343c3e29 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRowAndColumn.kt @@ -0,0 +1,44 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRowAndColumn.RowColumnExpressionLink +import org.jetbrains.kotlinx.dataframe.RowColumnExpression as DfRowColumnExpression + +/** + * ## Expressions Given Row and Column + * Expressing values using a "Row-Column Expression" can occur exclusively in a + * [Row Column Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRowAndColumn.RowColumnExpression]. + */ +internal interface ExpressionsGivenRowAndColumn { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + + interface SetDefaultOperationArg + + /** Provide a new value for every selected cell given both its row and column using a [row-column expression][DfRowColumnExpression]. */ + interface RowColumnExpression { + + /** + * Provide a new value for every selected cell given both its row and column using a [row-column expression][org.jetbrains.kotlinx.dataframe.RowColumnExpression]. + * + * For example: + * + * `df.`operation` { row, col ->` + * + * `row.age / col.`[mean][org.jetbrains.kotlinx.dataframe.DataColumn.mean]`(skipNA = true)` + * + * `}` + * + * + */ + interface WithExample + } + + /** [Row Column Expression][RowColumnExpression] */ + interface RowColumnExpressionLink +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NA.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NA.kt new file mode 100644 index 0000000000..35251f23fd --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NA.kt @@ -0,0 +1,18 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.api.dropNA +import org.jetbrains.kotlinx.dataframe.api.fillNA + +/** + * ## `NA` + * `NA` in Dataframe can be seen as "[NaN] or `null`". + * + * [Floats][Float] or [Doubles][Double] can be represented as [Float.NaN] or [Double.NaN], respectively, + * in cases where a mathematical operation is undefined, such as dividing by zero. + * + * You can also use [fillNA][fillNA] to replace `NAs` in certain columns with a given value or expression + * or [dropNA][dropNA] to drop rows with `NAs` in them. + * + * @see NaN + */ +internal interface NA diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NaN.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NaN.kt new file mode 100644 index 0000000000..18b9792947 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NaN.kt @@ -0,0 +1,16 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.api.dropNaNs +import org.jetbrains.kotlinx.dataframe.api.fillNaNs + +/** + * ## `NaN` + * [Floats][Float] or [Doubles][Double] can be represented as [Float.NaN] or [Double.NaN], respectively, + * in cases where a mathematical operation is undefined, such as dividing by zero. + * + * You can also use [fillNaNs][fillNaNs] to replace `NaNs` in certain columns with a given value or expression + * or [dropNaNs][dropNaNs] to drop rows with `NaNs` in them. + * + * @see NA + */ +internal interface NaN diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt new file mode 100644 index 0000000000..85dc436dca --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -0,0 +1,165 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.ColumnSelector +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.SingleColumn +import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.* +import kotlin.reflect.KProperty + +/** [Selecting Columns][SelectingColumns] */ +internal interface SelectingColumnsLink + +/** + * ## Selecting Columns + * Selecting columns for various operations (including but not limited to + * [DataFrame.select], [DataFrame.update], [DataFrame.gather], and [DataFrame.fillNulls]) + * can be done in the following ways: + * - Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`operation` { length `[and][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.and]` age }` + * + * `df.`operation` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`operation` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * + * - Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`operation`("length", "age")` + * + * - Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `val age by `[column][org.jetbrains.kotlinx.dataframe.api.column]`<`[Double][Double]`>()` + * + * `df.`operation`(length, age)` + * + * - Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`operation`(Person::length, Person::age)` + * + */ +internal interface SelectingColumns { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + + interface SetDefaultOperationArg + + /** Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][ColumnSelector]- or [Columns Selector][ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn] or [ColumnSet], respectively. + */ + interface Dsl { + + /** Select or express columns using the Column(s) Selection DSL. + * (Any [Access API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi]). + * + * This DSL comes in the form of either a [Column Selector][org.jetbrains.kotlinx.dataframe.ColumnSelector]- or [Columns Selector][org.jetbrains.kotlinx.dataframe.ColumnsSelector] lambda, + * which operate in the [Column Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnSelectionDsl] or the [Columns Selection DSL][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl] and + * expect you to return a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] or [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], respectively. + * + * For example: + * + * `df.`operation` { length `[and][ColumnsSelectionDsl.and]` age }` + * + * `df.`operation` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`operation` { `[colsOf][colsOf]`<`[Double][Double]`>() }` + * + */ + interface WithExample + } + + /** [Columns selector DSL][Dsl.WithExample] */ + interface DslLink + + /** Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + */ + interface ColumnNames { + + /** Select columns using their [column names][String] + * ([String API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.StringApi]). + * + * For example: + * + * `df.`operation`("length", "age")` + * + */ + interface WithExample + } + + /** [Column names][ColumnNames.WithExample] */ + interface ColumnNamesLink + + /** Select columns using [column accessors][ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + */ + interface ColumnAccessors { + + /** Select columns using [column accessors][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] + * ([Column Accessors API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.ColumnAccessorsApi]). + * + * For example: + * + * `val length by `[column][column]`<`[Double][Double]`>()` + * + * `val age by `[column][column]`<`[Double][Double]`>()` + * + * `df.`operation`(length, age)` + * + */ + interface WithExample + } + + /** [Column references][ColumnAccessors.WithExample] */ + interface ColumnAccessorsLink + + /** Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). */ + interface KProperties { + + /** Select columns using [KProperties][KProperty] ([KProperties API][org.jetbrains.kotlinx.dataframe.documentation.AccessApi.KPropertiesApi]). + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`operation`(Person::length, Person::age)` + * + */ + interface WithExample + } + + /** [KProperties][KProperties.WithExample] */ + interface KPropertiesLink +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt new file mode 100644 index 0000000000..6d65f010a9 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt @@ -0,0 +1,81 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.RowValueFilter +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowConditionLink +import org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowValueConditionLink +import org.jetbrains.kotlinx.dataframe.index + +/** + * ## Selecting Rows + * Selecting rows that satisfy a "Row Condition" ([See Row Conditions on the documentation website.](https://kotlin.github.io/dataframe/datarow.html#row-conditions)) + * can occur in the following two types of operations: + * - Selecting entire rows ([Entire-Row Condition][org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.EntireRowCondition.WithExample]), for instance in [filter], [drop], [first], and [count] + * (using [RowFilter]). + * - Selecting parts of rows using a `where` operation after selecting columns ([Row-Value Condition][org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowValueCondition.WithExample]), + * such as with [update], [gather], and [format] + * (using [RowValueFilter]). + * + * A Row Condition is similar to a [Row Expression][org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow] but expects a [Boolean] as result. + */ +internal interface SelectingRows { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface FirstOperationArg + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface SecondOperationArg + + interface SetDefaultOperationArg + + /** [Entire-Row Condition][EntireRowCondition.WithExample] */ + interface RowConditionLink + + /** Filter or find rows to operate on using a [row filter][RowFilter]. */ + interface EntireRowCondition { + + /** + * Filter or find rows to operate on using a [row filter][org.jetbrains.kotlinx.dataframe.RowFilter]. + * + * For example: + * + * `df.`operation` { `[index][index]`() % 2 == 0 }` + * + * `df.`operation` { `[diff][diff]` { age } == 0 }` + * + */ + interface WithExample + } + + /** [Row-Value Condition][RowValueCondition.WithExample] */ + interface RowValueConditionLink + + /** Filter or find rows to operate on after [selecting columns][SelectingColumns] using a + * [row value filter][RowValueFilter]. + */ + interface RowValueCondition { + + /** + * Filter or find rows to operate on after [selecting columns][org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns] using a + * [row value filter][org.jetbrains.kotlinx.dataframe.RowValueFilter]. + * + * For example: + * + * `df.`operation` { length }.`where` { it > 10.0 }` + * + * `df.`operation` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }.`where` { `[index][index]`() > 4 && city != "Paris" }` + * + */ + interface WithExample + } +} + +/** [Row Condition][SelectingRows] */ +internal interface RowConditionLink diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt new file mode 100644 index 0000000000..3660dd5397 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt @@ -0,0 +1,4 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +/** ## ‎ */ +internal interface LineBreak diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt index 4050782550..e11e8bc0b3 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt @@ -31,6 +31,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.emptyPath import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyColumn import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame @@ -265,8 +266,8 @@ internal fun AnyFrame.convertToImpl( dsl.fillers.forEach { filler -> val paths = result.getColumnPaths(filler.columns) - missingPaths.removeAll(paths) - result = result.update(paths).with { + missingPaths.removeAll(paths.toSet()) + result = result.update { paths.toColumnSet() }.with { filler.expr(this, this) } } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt index af046e5efb..4844853ddf 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt @@ -60,9 +60,9 @@ internal fun Gather.gatherImpl( } // explode keys and values - when { - keysColumn != null && valuesColumn != null -> df = df.explode(keysColumn, valuesColumn) - else -> df = df.explode(keysColumn ?: valuesColumn!!) + df = when { + keysColumn != null && valuesColumn != null -> df.explode(keysColumn, valuesColumn) + else -> df.explode(keysColumn ?: valuesColumn!!) } // explode values in lists @@ -76,7 +76,7 @@ internal fun Gather.gatherImpl( val value = col[row] when { explode && value is List<*> -> { - val filtered = (value as List).filter(filter) + val filtered = (value as List).filter { filter(it) } val transformed = valueTransform?.let { filtered.map(it) } ?: filtered keys[colIndex] to transformed } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt index db04a39c19..2ce9f4db8b 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt @@ -1,9 +1,9 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.api.Reorder import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.cast @@ -23,7 +23,7 @@ import kotlin.reflect.typeOf internal fun > Reorder.reorderImpl( desc: Boolean, - expression: Selector, V> + expression: ColumnExpression ): DataFrame { data class ColumnInfo( val treeNode: TreeNode, diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt index 02e72afa2e..9f05b8f961 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt @@ -2,12 +2,12 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataFrameExpression import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowValueFilter -import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.api.AddDataRow import org.jetbrains.kotlinx.dataframe.api.Update import org.jetbrains.kotlinx.dataframe.api.asColumnGroup @@ -36,7 +36,7 @@ internal fun Update.updateImpl(expression: (AddDataRow, DataColu if (df.isEmpty()) df else df.replace(columns).with { it.updateImpl(df, filter, expression) } -internal fun Update.updateWithValuePerColumnImpl(selector: Selector, C>) = +internal fun Update.updateWithValuePerColumnImpl(selector: ColumnExpression) = if (df.isEmpty()) df else { df.replace(columns).with { diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 5ba0ffaaf4..68f50e0e47 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -189,7 +189,7 @@ internal fun Array>.toColumns(): ColumnSet = map { it.to @PublishedApi internal fun Array>.toColumns(): ColumnSet = asIterable().toColumnSet() -internal fun Iterable.toColumns() = map { it.toColumnAccessor() }.toColumnSet() +internal fun Iterable.toColumns(): ColumnSet = map { it.toColumnAccessor() }.toColumnSet() // endregion diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt new file mode 100644 index 0000000000..79effbcb53 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/Utils.kt @@ -0,0 +1,23 @@ +package org.jetbrains.kotlinx.dataframe + +import org.jetbrains.kotlinx.dataframe.api.print +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.io.renderToString +import org.jetbrains.kotlinx.dataframe.types.UtilTests +import java.net.URL + +fun testResource(resourcePath: String): URL = UtilTests::class.java.classLoader.getResource(resourcePath)!! +fun testCsv(csvName: String) = testResource("$csvName.csv") +fun testJson(jsonName: String) = testResource("$jsonName.json") + +fun > T.toDebugString(rowsLimit: Int = 20) = """ + ${renderToString(borders = true, title = true, columnTypes = true, valueLimit = -1, rowsLimit = rowsLimit)} + + ${schema()} +""".trimIndent() + +fun > T.alsoDebug(println: String? = null, rowsLimit: Int = 20): T = apply { + println?.let { println(it) } + print(borders = true, title = true, columnTypes = true, valueLimit = -1, rowsLimit = rowsLimit) + schema().print() +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt new file mode 100644 index 0000000000..4f164215df --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -0,0 +1,36 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.junit.Test +import kotlin.reflect.typeOf + +class AddTests { + + @Test + fun `add with new`() { + val x by columnOf(7, 2, 0, 3, 4, 2, 5, 0, 3, 4) + val df = dataFrameOf(x) + val added = df.add("Y") { if (x() == 0) 0 else (prev()?.newValue() ?: 0) + 1 } + val expected = listOf(1, 2, 0, 1, 2, 3, 4, 0, 1, 2) + added["Y"].values() shouldBe expected + } + + @Test + fun `throw for newValue at the next row`() { + val x by columnOf(7, 2, 0, 3, 4, 2, 5, 0, 3, 4) + val df = dataFrameOf(x) + shouldThrow { + df.add("y") { next()?.newValue() ?: 1 } + } + } + + private fun AnyFrame.addValue(value: T) = add("value") { listOf(value) } + + @Test + fun `add with generic function`() { + val df = dataFrameOf("a")(1).addValue(2) + df["value"].type() shouldBe typeOf>() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt new file mode 100644 index 0000000000..025a68bbdd --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt @@ -0,0 +1,26 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.junit.Test +import java.lang.IllegalArgumentException + +class CastTests { + + @Test + fun safeUnsafeCast() { + @DataSchema + data class Data(val a: Int, val b: String) + + val df = dataFrameOf("a", "b", "c")(1, "s", 2) + df.cast(verify = true) shouldBe df + + shouldThrow { + df.convert("a").toDouble().cast(verify = true) + } + val converted = df.convert("a").toDouble() + converted.cast(verify = false) shouldBe converted + converted.cast() shouldBe converted + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt new file mode 100644 index 0000000000..fd98ad8f30 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/chunked.kt @@ -0,0 +1,17 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.size +import org.junit.Test + +class ChunkedTests { + + @Test + fun chunkedColumnGroup() { + val a by columnOf(listOf(1, 2, 3).toColumn("b"), listOf(4, 5, 6).toColumn("c")) + val chunked = a.asColumnGroup().chunked(2) + chunked.size shouldBe 2 + chunked.name() shouldBe "a" + chunked[1].rowsCount() shouldBe 1 + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/concat.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/concat.kt new file mode 100644 index 0000000000..c290e500fb --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/concat.kt @@ -0,0 +1,14 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class ConcatTests { + + @Test + fun `different types`() { + val a by columnOf(1, 2) + val b by columnOf(3.0, null) + a.concat(b) shouldBe columnOf(1, 2, 3.0, null).named("a") + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/contains.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/contains.kt new file mode 100644 index 0000000000..47513b3cff --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/contains.kt @@ -0,0 +1,64 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class ContainsTests { + + @Test + fun `column contains`() { + val col by columnOf(1, 3, 5) + col.contains(3) shouldBe true + col.contains(2) shouldBe false + } + + @Test + fun `column group contains`() { + val df = dataFrameOf("a", "b")(1, 2, 3, 4) + val col = df.asColumnGroup("col") + col.contains(df[0]) shouldBe true + col.contains(df.update("b").withValue(0)[0]) shouldBe false + } + + @Test + fun `contains column`() { + val a by column() + val df = dataFrameOf("a")(1, 2) + (a in df) shouldBe true + df.containsColumn(a) shouldBe true + df.containsColumn("a") shouldBe true + df.containsColumn(df["a"]) shouldBe true + val b by column() + (b in df) shouldBe false + df.containsColumn(b) shouldBe false + } + + @Test + fun `contains nested column`() { + val g by columnGroup() + val a by g.column() + + val df = dataFrameOf("a")(1, 2).group("a").into("g") + (a in df) shouldBe true + } + + @Test + fun `row contains key`() { + val a by column() + val b by column() + data class A(val a: Int, val b: Int) + + val df = dataFrameOf("a")(1, 2) + val row = df[0] + + row.containsKey("a") shouldBe true + row.containsKey(a) shouldBe true + row.containsKey(A::a) shouldBe true + (A::a in row) shouldBe true + (a in row) shouldBe true + + row.containsKey("b") shouldBe false + row.containsKey(b) shouldBe false + row.containsKey(A::b) shouldBe false + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt new file mode 100644 index 0000000000..5269298fe5 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convert.kt @@ -0,0 +1,175 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.assertions.throwables.shouldNotThrow +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.matchers.shouldBe +import kotlinx.datetime.Clock +import kotlinx.datetime.Instant +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.exceptions.CellConversionException +import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException +import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException +import org.jetbrains.kotlinx.dataframe.hasNulls +import org.junit.Test +import java.time.LocalTime +import kotlin.reflect.typeOf +import kotlin.time.Duration.Companion.hours + +class ConvertTests { + + @Test + fun `convert nullable strings to time`() { + val time by columnOf("11?22?33", null) + val converted = time.toDataFrame().convert { time }.toLocalTime("HH?mm?ss")[time] + converted.hasNulls shouldBe true + converted[0] shouldBe LocalTime.of(11, 22, 33) + } + + @Test + fun `nullability persistence after conversion`() { + val col by columnOf("1", null) + col.convertToInt().forEach { + } + } + + @DataSchema + data class Schema(val time: Instant) + + @Test + fun `Instant to LocalDateTime`() { + val df = listOf(Schema(Clock.System.now())).toDataFrame() + df.convert { time }.toLocalDateTime() + } + + enum class EnumClass { A, B } + + @Test + fun `convert string to enum`() { + columnOf("A", "B").convertTo() shouldBe columnOf(EnumClass.A, EnumClass.B) + } + + @JvmInline + value class IntClass(val v: Int) + + @JvmInline + value class StringClass(val s: String?) + + @JvmInline + value class PrivateInt(private val v: Int) + + @Test + fun `convert string to value class`() { + columnOf("1").convertTo() shouldBe columnOf(IntClass(1)) + } + + @Test + fun `convert double to value class`() { + columnOf(1.0).convertTo() shouldBe columnOf(IntClass(1)) + } + + @Test + fun `convert from value class `() { + columnOf(IntClass(1)).convertTo() shouldBe columnOf(1.0) + columnOf(StringClass("1"), StringClass(null)).convertTo() shouldBe columnOf(1.0, null) + } + + @Test + fun `convert to value class exceptions`() { + shouldThrow { + columnOf("a").convertTo() + } + + shouldThrow { + columnOf("1", "10", "a").convertTo() + }.row shouldBe 2 + + shouldThrow { + columnOf("1", "x", "2.5").convertToDouble() + }.row shouldBe 1 + + shouldThrow { + columnOf(EnumClass.A).convertTo() + } + } + + @Test + fun `convert from value class exceptions`() { + shouldThrow { + columnOf(StringClass("a")).convertTo() + }.from shouldBe typeOf() + + shouldThrow { + columnOf(IntClass(1)).convertTo() + } + + shouldThrow { + columnOf(StringClass(null)).convertTo() + } + + shouldThrow { + columnOf(PrivateInt(1)).convertTo() + } + } + + @Test + fun `convert null strings`() { + val col = columnOf("none") + + shouldThrow { + col.convertTo() + } + + shouldThrow { + col.convertTo() + } + + DataFrame.parser.addNullString("none") + + shouldThrow { + col.convertTo() + } + + col.convertTo() shouldBe DataColumn.createValueColumn("", listOf(null), typeOf()) + + DataFrame.parser.resetToDefault() + } + + @Test + fun `convert to not nullable`() { + val col = columnOf(1.0, null) + + col.convertToInt() shouldBe columnOf(1, null) + + shouldThrow { + col.cast().convertToInt() + } + + col.convertTo() shouldBe columnOf(1, null) + } + + @Test + fun `convert to nullable without nulls`() { + val col = columnOf(1.0, 2.0) + + col.convertTo().hasNulls() shouldBe false + } + + @Test + fun `convert instant`() { + println(Clock.System.now().toEpochMilliseconds()) + val kotlinxInstants = columnOf(Instant.fromEpochMilliseconds(1657283006955)) + shouldNotThrow { + val javaInstant = kotlinxInstants.convertTo() + javaInstant.convertTo() shouldBe kotlinxInstants + } + } + + @Test + fun `convert duration to string`() { + val col = columnOf(1.hours) + val res = col.convertTo() + res.print() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt new file mode 100644 index 0000000000..04f7971532 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/convertTo.kt @@ -0,0 +1,344 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.alsoDebug +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException +import org.jetbrains.kotlinx.dataframe.kind +import org.junit.Test +import kotlin.reflect.typeOf + +class ConvertToTests { + + @Test + fun `convert frame column with empty frames`() { + val groups by columnOf(dataFrameOf("a")("1"), DataFrame.empty()) + val df = dataFrameOf(groups) + + @DataSchema + data class GroupSchema(val a: Int) + + @DataSchema + data class DataFrameSchema(val groups: DataFrame) + + val converted = df.convertTo() + + converted[groups].forEach { + it["a"].type() shouldBe typeOf() + } + } + + data class A(val value: Int) + + @DataSchema + data class Schema(val a: A) + + @Test + fun `convert with parser`() { + val df = dataFrameOf("a")("1") + + shouldThrow { + df.convertTo() + } + + df.convertTo { + parser { A(it.toInt()) } + } + .single().a.value shouldBe 1 + } + + @Test + fun `convert with converter`() { + val df = dataFrameOf("a")(1) + + shouldThrow { + df.convertTo() + } + + df.convertTo { + convert().with { A(it) } + }.single().a.value shouldBe 1 + } + + @Test + fun `convert nulls to not nulls with converter`() { + val df = dataFrameOf("a")("1", null) + + val converted = df.convertTo { + convert().with { it?.let { A(it.toInt()) } ?: A(0) } + } + val expected = dataFrameOf("a")(A(1), A(0)) + + converted shouldBe expected + } + + @JvmInline + value class IntClass(val value: Int) + + @DataSchema + interface IntSchema { + val a: IntClass? + } + + @Test + fun `convert value class with converter`() { + dataFrameOf("a")("1%") + .convertTo { + parser { IntClass(it.dropLast(1).toInt()) } + } shouldBe dataFrameOf("a")(IntClass(1)) + } + + @Test + fun `convert nulls with converter`() { + dataFrameOf("a")("1%", null) + .convertTo { + parser { IntClass(it.dropLast(1).toInt()) } + } shouldBe dataFrameOf("a")(IntClass(1), null) + } + + @Test + fun `convert with nullable converter argument`() { + val df = dataFrameOf("a")("1") + + val converted = df.convertTo { + convert().with { + it?.let { IntClass(it.toInt()) } + } + } + val expected = dataFrameOf("a")(IntClass(1)) + + converted shouldBe expected + } + + @DataSchema + data class Location( + val name: String, + val gps: Gps?, + ) + + @DataSchema + data class Gps(val latitude: Double, val longitude: Double) + + // @Test TODO: https://github.com/Kotlin/dataframe/issues/177 + fun `convert df with nullable DataRow`() { + val locations: AnyFrame = dataFrameOf("name", "gps")( + "Home", Gps(0.0, 0.0), + "Away", null, + ) + + locations.print(borders = true, title = true, columnTypes = true) + locations.schema().print() + + val converted = locations.convertTo() + + converted shouldBe locations + } + + @Test + fun `convert df with nullable DataRow to itself`() { + val locations: DataFrame = listOf( + Location("Home", Gps(0.0, 0.0)), + Location("Away", null), + ).toDataFrame() + + val converted = locations.convertTo() + + converted shouldBe locations + } + + @DataSchema + data class DataSchemaWithAnyFrame(val dfs: AnyFrame?) + + @Test + fun test() { + val df1 = dataFrameOf("a")(1, 2, 3) + val df2 = dataFrameOf("b")(4, 5) + val frameColumn by columnOf(df1, df2, null) + val df = dataFrameOf(frameColumn).alsoDebug() +// ⌌---------------⌍ +// | | untitled:[]| +// |--|------------| +// | 0| [3 x 1]| +// | 1| [2 x 1]| +// | 2| [0 x 0]| +// ⌎---------------⌏ +// +// untitled: * + } + + @Test + fun `convert df with AnyFrame to itself`() { + val locationsList = listOf( + Location("Home", Gps(0.0, 0.0)), + Location("Away", null), + null, + ) + val locations = locationsList + .toDataFrame() + .alsoDebug("locations:") + + val gpsList = listOf( + Gps(0.0, 0.0), + null, + ) + val gps = gpsList + .toDataFrame() + .alsoDebug("gps:") + + val df1 = listOf( + DataSchemaWithAnyFrame(locations), + ) + .toDataFrame() + .alsoDebug("df1:") + + df1.convertTo() + + val df2 = listOf( + DataSchemaWithAnyFrame(gps), + ) + .toDataFrame() + .alsoDebug("df2:") + + df2.convertTo() + + val df3 = listOf( + DataSchemaWithAnyFrame(null), + DataSchemaWithAnyFrame(gps), + ) + .toDataFrame { properties { preserve(DataFrame::class) } } + .alsoDebug("df3 before convert:") + + df3.convertTo() + + val df4 = listOf( + DataSchemaWithAnyFrame(null), + ) + .toDataFrame { properties { preserve(DataFrame::class) } } + .alsoDebug("df4 before convert:") + + df4.convertTo() + + val df5a: DataFrame<*> = dataFrameOf( + columnOf(locations, gps, null).named("dfs"), + ).alsoDebug("df5a:") + + df5a.convertTo() + + val df5 = listOf( + DataSchemaWithAnyFrame(null), + DataSchemaWithAnyFrame(locations), + DataSchemaWithAnyFrame(gps), + ) + .toDataFrame { properties { preserve(DataFrame::class) } } + .alsoDebug("df5 before convert:") + + df5.convertTo() + .alsoDebug("df5 after convert:") + .convertTo() + .alsoDebug("df5 after second convert:") + } + + interface KeyValue { + val key: String + val value: T + } + + @DataSchema + interface MySchema : KeyValue + + @Test + fun `Convert generic interface to itself`() { + val df = dataFrameOf("key", "value")( + "a", 1, + "b", 2, + ).alsoDebug() + val converted = df.convertTo().alsoDebug() + converted shouldBe df + } + + @Test + fun `convert with missing nullable column`() { + @DataSchema + data class Result(val a: Int, val b: Int?) + + val df = dataFrameOf("a")(1, 2) + val converted = df.convertTo() + converted shouldBe listOf(Result(1, null), Result(2, null)).toDataFrame() + } + + @Test + fun `convert with custom fill of missing columns`() { + val locations = listOf( + Location("Home", Gps(1.0, 1.0)), + Location("Away", null), + ).toDataFrame().cast() + + val converted = locations.remove { gps.longitude }.cast() + .convertTo { + fill { gps.longitude }.with { gps.latitude } + } + + converted shouldBe locations.update { gps.longitude }.with { gps.latitude } + } + + @Test + fun `convert column of empty lists into FrameColumn`() { + @DataSchema + data class Entry(val v: Int) + + @DataSchema + data class Result(val d: DataFrame) + + dataFrameOf("d")(emptyList(), emptyList()) + .convertTo() shouldBe + dataFrameOf("d")(DataFrame.emptyOf(), DataFrame.emptyOf()) + } + + @Test + fun `convert ColumnGroup into FrameColumn`() { + @DataSchema + data class Entry(val v: Int) + + @DataSchema + data class Result(val d: DataFrame) + + val columnGroup = DataColumn.createColumnGroup("d", dataFrameOf("v")(1, 2)) + columnGroup.kind() shouldBe ColumnKind.Group + val res = dataFrameOf(columnGroup).convertTo() + val frameColumn = res.getFrameColumn("d") + frameColumn.kind shouldBe ColumnKind.Frame + frameColumn.values() shouldBe listOf(dataFrameOf("v")(1), dataFrameOf("v")(2)) + } + + @Test + fun `convert ValueColumn of lists, nulls and frames into FrameColumn`(){ + @DataSchema + data class Entry(val v: Int) + + @DataSchema + data class Result(val d: DataFrame) + + val emptyList: List = emptyList() + val listOfRows: List = dataFrameOf("v")(1, 2).rows().toList() + val frame: DataFrame = listOf(Entry(3), Entry(4)).toDataFrame() + + val src = DataColumn.createValueColumn("d", listOf(emptyList, listOfRows, frame, null)).toDataFrame() + src["d"].kind shouldBe ColumnKind.Value + + val df = src.convertTo() + val frameColumn = df.getFrameColumn("d") + frameColumn.kind shouldBe ColumnKind.Frame + frameColumn.toList() shouldBe listOf( + DataFrame.emptyOf(), + dataFrameOf("v")(1, 2), + dataFrameOf("v")(3, 4), + DataFrame.emptyOf(), + ) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt new file mode 100644 index 0000000000..638e9ad32d --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt @@ -0,0 +1,54 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.doubles.ToleranceMatcher +import io.kotest.matchers.should +import io.kotest.matchers.shouldBe +import org.junit.Test + +class CorrTests { + + val df = dataFrameOf("a", "b", "c")( + 3, true, 1, + 6, false, 2 + ) + + @Test + fun `corr with boolean`() { + val corr = df.corr("a", "b").with("c") + corr.rowsCount() shouldBe 2 + corr.columnsCount() shouldBe 2 + corr.getColumn(0) shouldBe (columnOf("a", "b") named "column") + corr.getColumn(1).name() shouldBe "c" + corr["c"][0] as Double should ToleranceMatcher(1.0, 0.01) + corr["c"][1] as Double should ToleranceMatcher(-1.0, 0.01) + } + + @Test + fun `corr group`() { + val corr = df.group("a", "b").into("g") + .corr("g").with("c") + + corr shouldBe df.corr("a", "b").with("c").rename("column" to "g") + } + + @Test + fun `corr itself`() { + val corr = df.corr() + val expected = dataFrameOf("column", "a", "b", "c")( + "a", 1.0, -1.0, 1.0, + "b", -1.0, 1.0, -1.0, + "c", 1.0, -1.0, 1.0 + ) + corr.columns().zip(expected.columns()).forEach { (a, b) -> + a.type() shouldBe b.type() + if (a.isNumber()) { + a.name() shouldBe b.name() + a.values().zip(b.values()).forEach { (v1, v2) -> + v1 as Double should ToleranceMatcher(v2 as Double, 0.01) + } + } else { + a shouldBe b + } + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt new file mode 100644 index 0000000000..a418e87445 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt @@ -0,0 +1,14 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class DescribeTests { + + @Test + fun `describe all nulls`() { + val a by columnOf(1, null) + val df = dataFrameOf(a).drop(1) + df.describe()["min"][0] shouldBe null + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/emptyDataFrame.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/emptyDataFrame.kt new file mode 100644 index 0000000000..6f7d3b33a1 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/emptyDataFrame.kt @@ -0,0 +1,52 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.junit.Test +import kotlin.reflect.typeOf + +class EmptyDataFrameTests { + + @Test + fun simple() { + with(DataFrame.empty()) { + rowsCount() shouldBe 0 + columnsCount() shouldBe 0 + } + } + + @Test + fun emptyWithRows() { + with(DataFrame.empty(3)) { + rowsCount() shouldBe 3 + columnsCount() shouldBe 0 + } + } + + @DataSchema + data class FrameSchema(val e: Double) + + @DataSchema + data class GroupSchema(val c: Int, val d: String) + + @DataSchema + data class Schema(val a: Int, val group: GroupSchema, val frame: List) + + @Test + fun emptyWithColumns() { + with(DataFrame.emptyOf()) { + rowsCount() shouldBe 0 + columnsCount() shouldBe 3 + columnNames() shouldBe listOf("a", "group", "frame") + get("a").type() shouldBe typeOf() + getColumnGroup("group").let { + it.columnNames() shouldBe listOf("c", "d") + it["c"].type() shouldBe typeOf() + } + getFrameColumn("frame").let { + it.schema.value.columns.keys shouldBe listOf("e") + } + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt new file mode 100644 index 0000000000..53b7bea3cf --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt @@ -0,0 +1,37 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class ExplodeTests { + + @Test + fun `explode into`() { + val df = dataFrameOf("a" to listOf(1), "b" to listOf(listOf(2, 3))) + val exploded = df.explode { "b" into "c" } + val expected = dataFrameOf("a" to listOf(1, 1), "c" to listOf(2, 3)) + exploded shouldBe expected + } + + @Test + fun `explode list and duplicate value`() { + val exploded = dataFrameOf("a", "b")(1, listOf(2, 3)).explode() + exploded shouldBe dataFrameOf("a", "b")(1, 2, 1, 3) + } + + @Test + fun `explode list and frame column`() { + val exploded = dataFrameOf("a", "b", "c", "d")(1, listOf(2, 3), dataFrameOf("x", "y")(4, 5, 6, 7), listOf(8)) + .explode().ungroup("c") + exploded shouldBe dataFrameOf("a", "b", "x", "y", "d")( + 1, 2, 4, 5, 8, + 1, 3, 6, 7, null + ) + } + + @Test + fun `explode nothing`() { + val df = dataFrameOf("a", "b")(1, 2) + df.explode() shouldBe df + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt new file mode 100644 index 0000000000..7a376daa07 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/flatten.kt @@ -0,0 +1,77 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.junit.Test + +class FlattenTests { + + @Test + fun `flatten names`() { + val df = dataFrameOf("a", "b", "c")(1, 2, 3) + val grouped = df.group("a", "b").into("d") + grouped.flatten() shouldBe df + grouped.add("a") { 0 }.flatten().columnNames() shouldBe listOf("a1", "b", "c", "a") + } + + @DataSchema + interface TestRow { + val a: String + val b: String + val c: String + } + + @DataSchema + interface Grouped { + val d: DataRow + } + + @Test + fun `flatten access APIs`() { + val df = dataFrameOf("a", "b", "c")(1, 2, 3) + val grouped = df.group("a", "b").into("d") + + // String API + grouped.flatten("d") shouldBe df + val castedGroupedDF = grouped.cast() + + // KProperties API + castedGroupedDF.flatten(Grouped::d) shouldBe df + + // Extension properties API + castedGroupedDF.flatten { d } shouldBe df + + // Column accessors API + val d by columnGroup() + val a by d.column() + val b by d.column() + val c by d.column() + grouped.flatten(d) shouldBe df + } + + @Test + fun `flatten nested`() { + val df = dataFrameOf("a", "b", "c", "d")(1, 2, 3, 4) + val grouped = df.group("a", "b").into("e") + .group("e", "c").into("f") + + grouped.flatten() shouldBe df + val flattened = grouped.flatten { "f"["e"] } + flattened.columnNames() shouldBe listOf("f", "d") + flattened.ungroup("f") shouldBe df + + grouped.flatten { "f"["e"] and "f" } shouldBe df + } + + @Test + fun `flatten with parent name conflict`() { + val df = dataFrameOf("a", "b", "c", "d")(1, 2, 3, 4) + val grouped = df.group("a", "b").into("e") + .group("e", "c").into("f") + .rename { "f"["e"] }.into("a") + val flattened = grouped.flatten { "f"["a"] } + flattened.getColumnGroup("f").columnNames() shouldBe listOf("a", "b", "c") + flattened.ungroup("f") shouldBe df + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt new file mode 100644 index 0000000000..4412e7aa43 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt @@ -0,0 +1,185 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.intellij.lang.annotations.Language +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.codeGen.generateCode +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.io.readJsonStr +import org.jetbrains.kotlinx.dataframe.kind +import org.junit.Test +import kotlin.reflect.typeOf + +class GatherTests { + + //region Data Source + + @Language("json") + val df = """ + [ + { + "name": "abc", + "normal": { + "c1": "a", + "c2": "b", + "c3": "c" + }, + "reversed": { + "c1": "c", + "c2": "b", + "c3": "a" + }, + "first": { + "c1": "c" + } + }, + { + "name": "qw", + "normal": { + "c1": "q", + "c2": "w" + }, + "reversed": { + "c1": "w", + "c2": "q" + }, + "first": { + "c1": "q" + } + } + ] + """.let { + DataFrame.readJsonStr(it) + } + + //endregion + + val generatedCode = df.generateCode("Marker") + + //region Generated code + + @DataSchema(isOpen = false) + interface Marker1 { + val c1: String + val c2: String + val c3: String? + } + + @DataSchema(isOpen = false) + interface Marker2 { + val c1: String + val c2: String + val c3: String? + } + + @DataSchema(isOpen = false) + interface Marker3 { + val c1: String + } + + @DataSchema + interface Marker { + val name: String + val normal: DataRow + val reversed: DataRow + val first: DataRow + } + + //endregion + + val typed = df.cast() + + @Test + fun gather() { + val mode by column() + val temp by column() + val gathered = typed.gather { except(name) }.cast().into(mode, temp).ungroup(temp) + + val expected = typed.groupBy { name }.updateGroups { + val cols = columns().drop(1).map { it.asColumnGroup() } // drop 'name' column + val dataRows = cols.map { it[0] } + + val newDf = listOf( + name.withValues(List(cols.size) { name[0] }), + mode.withValues(cols.map { it.name() }), + dataRows.map { it.getValueOrNull("c1") }.toColumn("c1"), + dataRows.map { it.getValueOrNull("c2") }.toColumn("c2"), + dataRows.map { it.getValueOrNull("c3") }.toColumn("c3") + ).toDataFrame() + + newDf + }.concat() + + gathered shouldBe expected + } + + @Test + fun `generated code is fully typed`() { + generatedCode.contains("<*>") shouldBe false + } + + @Test + fun `gather column group`() { + val java by columnOf(1, 2, 3) + val kotlin by columnOf(1, 2, 3) + val languages by column>() + + val df = dataFrameOf(java, kotlin).group { java and kotlin }.into("languages") + + fun AnyFrame.check() { + this["value"].kind shouldBe ColumnKind.Group + columnsCount() shouldBe 2 + rowsCount() shouldBe 3 + } + + df.gather { languages }.into("key", "value").check() + } + + @Test + fun `gather mix of columns`() { + val a by columnOf(1, 1.1) + val b by columnOf(2, 2.2) + + val df = dataFrameOf(a, b)[0..0] + + val gathered = df.gather { a and b } + .into("key", "value") + + gathered["value"].type() shouldBe typeOf() + } + + @Test + fun `gather values`() { + val a by columnOf(1, 2) + val b by columnOf(3, 4) + + var df = dataFrameOf(a, b).gather { a and b }.valuesInto("data") + df.columnsCount() shouldBe 1 + df["data"].values() shouldBe listOf(1, 3, 2, 4) + + df = dataFrameOf(a, b).gather { a and b }.where { it % 2 == 1 }.valuesInto("data") + df.columnsCount() shouldBe 1 + df["data"].values() shouldBe listOf(1, 3) + } + + @Test + fun `gather explode lists`() { + val a by columnOf(1, 2) + val b by columnOf(listOf(3, 4), listOf(5, 6)) + + val df = dataFrameOf(a, b).gather { a and b } + .explodeLists() + .cast() + .where { it % 2 == 1 } + .into("key", "value") + + df shouldBe dataFrameOf("key", "value")( + "a", 1, + "b", 3, + "b", 5 + ) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/get.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/get.kt new file mode 100644 index 0000000000..04a3fdf840 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/get.kt @@ -0,0 +1,92 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.assertions.throwables.shouldThrowAny +import io.kotest.matchers.shouldBe +import io.kotest.matchers.string.shouldContain +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.junit.Test +import java.lang.ClassCastException +import java.lang.IllegalArgumentException + +class GetTests { + + @Test + fun `exceptions from empty dataframe`() { + val empty = DataFrame.empty() + shouldThrow { + empty.first() + } + shouldThrow { + empty.last() + } + shouldThrow { + empty[0] + } + } + + @Test + fun `get value from row`() { + val a by column() + val c by column() + data class A(val a: Int, val b: Int, val c: Int) + + val df = dataFrameOf("a", "b")(1, 2) + val row = df[0] + + row["a"] shouldBe 1 + row.getValue("a") shouldBe 1 + row.getValue(a) shouldBe 1 + row.getValue(A::a) shouldBe 1 + + row.getValueOrNull("c") shouldBe null + row.getValueOrNull(c) shouldBe null + row.getValueOrNull(A::c) shouldBe null + + shouldThrow { row.getValue("c") } + shouldThrow { row.getValue(c) } + shouldThrow { row.getValue(A::c) } + + val added = df.add(A::c) { "3" }[0] + + shouldThrow { added.getValue(c) + 1 } + shouldThrow { added.getValue("c") + 1 } + shouldThrow { added.getValue(A::c) + 1 } + } + + @DataSchema + data class Schema(val a: Int) + + @Test + fun `create typed frame column accessor`() { + val df = dataFrameOf( + columnOf( + dataFrameOf("a")(1), + dataFrameOf("a", "b")(2, 3, 4, 5) + ).named("x") + ) + val x by frameColumn() + df[x][0].a[0] shouldBe 1 + df[1][x].a[1] shouldBe 4 + } + + @Test + fun `create typed column group accessor`() { + val df = dataFrameOf( + dataFrameOf("a", "b")(1, 2, 3, 4).asColumnGroup("x") + ) + val x by columnGroup() + df[x][0].a shouldBe 1 + df[1][x].a shouldBe 3 + } + + @Test + fun `throw meaningful exception when traverse columns in DataRow`() { + val df = dataFrameOf("a")(null) + val throwable = shouldThrowAny { + df[0].getColumnGroup("a") + } + throwable.message shouldContain "Cannot cast null value of a ValueColumn to" + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt new file mode 100644 index 0000000000..4f7e787483 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/groupBy.kt @@ -0,0 +1,58 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.junit.Test +import kotlin.reflect.typeOf + +class GroupByTests { + + @Test + fun `groupBy values with nulls`() { + val df = dataFrameOf( + "a", "b" + )( + 1, 1, + 1, null, + 2, null, + 3, 1, + ) + + df.groupBy("a").values { "b" into "c" } shouldBe + dataFrameOf( + "a", "c" + )( + 1, listOf(1, null), + 2, listOf(null), + 3, listOf(1), + ) + + df.groupBy("a").values(dropNA = true) { "b" into "c" } shouldBe + dataFrameOf( + "a", "c" + )( + 1, listOf(1), + 2, emptyList(), + 3, listOf(1), + ) + } + + @Test + fun `aggregate FrameColumns into new column`() { + val df = dataFrameOf( + "a", "b", "c" + )( + 1, 2, 3, + 4, 5, 6, + ) + val grouped = df.groupBy("a", "b").into("d") + + grouped.groupBy("a").aggregate { + getColumn("d") into "e" + }["e"].type() shouldBe typeOf>() + + grouped.groupBy("a").aggregate { + getFrameColumn("d") into "e" + }["e"].type() shouldBe typeOf>() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/implode.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/implode.kt new file mode 100644 index 0000000000..c9ff8e689f --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/implode.kt @@ -0,0 +1,21 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class ImplodeTests { + + @Test + fun `implode into`() { + val df = dataFrameOf("a" to listOf(1, 1), "b" to listOf(2, 3)) + val imploded = df.implode { "b" into "c" } + val expected = dataFrameOf("a" to listOf(1), "c" to listOf(listOf(2, 3))) + imploded shouldBe expected + } + + @Test + fun `implode all`() { + val df = dataFrameOf("a" to listOf(1, 1), "b" to listOf(2, 3)) + df.implode() shouldBe df.implode { all() }[0] + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt new file mode 100644 index 0000000000..562cc3c954 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt @@ -0,0 +1,31 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test +import kotlin.reflect.typeOf + +class InferTypeTests { + + @Test + fun `infer type 1`() { + val col by columnOf("Alice", 1, 3.5) + col.type() shouldBe typeOf>() + val filtered = col.filter { it is String } + filtered.type() shouldBe typeOf>() + filtered.inferType().type() shouldBe typeOf() + } + + open class A(val value: T) + class B(value: T) : A(value) + + @Test + fun `infer type with argument`() { + val col by columnOf(1) + val df = dataFrameOf(col) + val converted = df.convert(col).with(Infer.None) { + B(it) as A + } + converted[col].type() shouldBe typeOf>() + converted.inferType(col)[col].type() shouldBe typeOf>() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt new file mode 100644 index 0000000000..ef8c1efac9 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt @@ -0,0 +1,33 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.nulls.shouldNotBeNull +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn +import org.junit.Ignore +import org.junit.Test + +class JoinTests { + + @Ignore + @Test + fun `left join frame column`() { + val df1 = dataFrameOf("a")(1, 2) + val df2 = dataFrameOf("a", "b")( + 1, dataFrameOf("c")(3), + 4, dataFrameOf("c")(5) + ) + val df = df1.leftJoin(df2) + + df.rowsCount() shouldBe 2 + df.columnNames() shouldBe listOf("a", "b") + df["a"] shouldBe df1["a"] + val b = df["b"] + b.kind() shouldBe ColumnKind.Frame + b.hasNulls() shouldBe false + val f = b.asAnyFrameColumn() + f[0] shouldBe df2["b"][0] + f[1].shouldNotBeNull() + f[1].isEmpty() shouldBe true + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt new file mode 100644 index 0000000000..4f9046b469 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/map.kt @@ -0,0 +1,21 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class MapTests { + + @Test + fun `map frame column with empty frames`() { + val frames by columnOf(dataFrameOf("a")(1), emptyDataFrame()) + frames.map { it.firstOrNull() }.size() shouldBe frames.size() + } + + @Test + fun `map ColumnsContainer`() { + val df = dataFrameOf("a")(1, 2).add { + expr { "a"() + 1 }.cumSum() into "b" + } + df["b"][1] shouldBe 5 + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt new file mode 100644 index 0000000000..b6b37bc99c --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt @@ -0,0 +1,86 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.assertions.throwables.shouldNotThrowAny +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet +import org.junit.Test + +class MoveTests { + + val columnNames = listOf("q", "a.b", "b.c", "w", "a.c.d", "e.f", "b.d", "r") + val columns = columnNames.map { emptyList().toColumn(it) } + val df = columns.toDataFrame() + val grouped = df.move { cols { it.name.contains(".") } }.into { it.name.split(".").toPath() } + + @Test + fun batchGrouping() { + grouped.columnNames() shouldBe listOf("q", "a", "b", "w", "e", "r") + grouped["a"].asColumnGroup().columnNames() shouldBe listOf("b", "c") + grouped["a"]["c"].asColumnGroup().columnNames() shouldBe listOf("d") + grouped["b"].asColumnGroup().columnNames() shouldBe listOf("c", "d") + grouped["e"].asColumnGroup().columnNames() shouldBe listOf("f") + } + + @Test + fun `select all`() { + grouped.getColumnsWithPaths { all() }.map { it.path.joinToString(".") } shouldBe grouped.columnNames() + } + + @Test + fun `select all dfs`() { + val selected = grouped.getColumnsWithPaths { all().allDfs() }.map { it.path.joinToString(".") } + selected shouldBe listOf("a.b", "a.c.d", "b.c", "b.d", "e.f") + } + + @Test + fun batchUngrouping() { + val ungrouped = grouped.move { dfs { it.depth() > 0 && !it.isColumnGroup() } }.into { pathOf(it.path.joinToString(".")) } + ungrouped.columnNames() shouldBe listOf("q", "a.b", "a.c.d", "b.c", "b.d", "w", "e.f", "r") + } + + @Test + fun `ungroup one`() { + val ungrouped = grouped.remove("b").ungroup { it["a"] } + ungrouped.columnNames() shouldBe listOf("q", "b", "c", "w", "e", "r") + ungrouped["c"].asColumnGroup().columnNames() shouldBe listOf("d") + } + + @Test + fun `flatten one`() { + val flattened = grouped.flatten { it["a"] } + flattened.columnNames() shouldBe listOf("q", "b1", "d", "b", "w", "e", "r") + } + + @Test + fun `flatten several`() { + val flattened = grouped.flatten { it["a"]["c"] and it["a"] and it["b"] } + flattened.columnNames() shouldBe listOf("q", "b", "d", "c", "d1", "w", "e", "r") + } + + @Test + fun `flatten all`() { + val flattened = grouped.flatten() + flattened.columnNames() shouldBe listOf("q", "b", "d", "c", "d1", "w", "f", "r") + } + + @Test + fun `selectDfs`() { + val selected = grouped.select { it["a"].dfs { !it.isColumnGroup() } } + selected.columnNames() shouldBe listOf("b", "d") + } + + @Test + fun `columnsWithPath in selector`() { + val selected = grouped.getColumnsWithPaths { it["a"] } + val actual = grouped.getColumnsWithPaths { selected.map { it.allDfs() }.toColumnSet() } + actual.map { it.path.joinToString(".") } shouldBe listOf("a.b", "a.c.d") + } + + @Test + fun `move after last`() { + val df = dataFrameOf("1", "2")(1, 2) + shouldNotThrowAny { + df.move("1").after("2") shouldBe dataFrameOf("2", "1")(2, 1) + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt new file mode 100644 index 0000000000..e1fbcd607c --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -0,0 +1,148 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import kotlinx.datetime.Instant +import kotlinx.datetime.LocalDate +import kotlinx.datetime.LocalDateTime +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.type +import org.junit.Test +import java.time.LocalTime +import java.time.Month +import java.util.Locale +import kotlin.reflect.typeOf +import kotlin.time.Duration.Companion.days +import kotlin.time.Duration.Companion.hours +import kotlin.time.Duration.Companion.minutes +import kotlin.time.Duration.Companion.seconds + +class ParseTests { + @Test + fun parseDate() { + val currentLocale = Locale.getDefault() + try { + Locale.setDefault(Locale.forLanguageTag("en-US")) + val date by columnOf("January 1, 2020") + val pattern = "MMMM d, yyyy" + + val parsed = date.parse(ParserOptions(dateTimePattern = pattern)).cast() + + parsed.type() shouldBe typeOf() + with(parsed[0]) { + month shouldBe Month.JANUARY + dayOfMonth shouldBe 1 + year shouldBe 2020 + } + + date.convertToLocalDate(pattern) shouldBe parsed + with(date.toDataFrame()) { + convert { date }.toLocalDate(pattern)[date] shouldBe parsed + parse(ParserOptions(dateTimePattern = pattern))[date] shouldBe parsed + } + + DataFrame.parser.addDateTimePattern(pattern) + + date.parse() shouldBe parsed + date.convertToLocalDate() shouldBe parsed + + DataFrame.parser.resetToDefault() + } finally { + Locale.setDefault(currentLocale) + } + } + + @Test + fun parseDateTime() { + val currentLocale = Locale.getDefault() + try { + Locale.setDefault(Locale.forLanguageTag("en-US")) + val dateTime by columnOf("3 Jun 2008 13:05:30") + val pattern = "d MMM yyyy HH:mm:ss" + val locale = Locale.forLanguageTag("en-US") + + val parsed = dateTime.parse(ParserOptions(dateTimePattern = pattern, locale = locale)).cast() + + parsed.type() shouldBe typeOf() + with(parsed[0]) { + month shouldBe Month.JUNE + dayOfMonth shouldBe 3 + year shouldBe 2008 + hour shouldBe 13 + minute shouldBe 5 + second shouldBe 30 + } + + dateTime.convertToLocalDateTime(pattern, locale) shouldBe parsed + with(dateTime.toDataFrame()) { + convert { dateTime }.toLocalDateTime(pattern)[dateTime] shouldBe parsed + parse(ParserOptions(dateTimePattern = pattern))[dateTime] shouldBe parsed + } + + DataFrame.parser.addDateTimePattern(pattern) + + dateTime.parse(ParserOptions(locale = locale)) shouldBe parsed + dateTime.convertToLocalDateTime(pattern, locale) shouldBe parsed + + DataFrame.parser.resetToDefault() + } finally { + Locale.setDefault(currentLocale) + } + } + + @Test + fun parseTime() { + val time by columnOf(" 13-05-30") + val pattern = "HH-mm-ss" + + val parsed = time.parse(ParserOptions(dateTimePattern = pattern)).cast() + + parsed.type() shouldBe typeOf() + with(parsed[0]) { + hour shouldBe 13 + minute shouldBe 5 + second shouldBe 30 + } + time.convertToLocalTime(pattern) shouldBe parsed + with(time.toDataFrame()) { + convert { time }.toLocalTime(pattern)[time] shouldBe parsed + parse(ParserOptions(dateTimePattern = pattern))[time] shouldBe parsed + } + + DataFrame.parser.addDateTimePattern(pattern) + + time.parse() shouldBe parsed + time.convertToLocalTime() shouldBe parsed + + DataFrame.parser.resetToDefault() + } + + @Test + fun `parse date without formatter`() { + val time by columnOf(" 2020-01-06", "2020-01-07 ") + val df = dataFrameOf(time) + val casted = df.convert(time).toLocalDate() + casted[time].type() shouldBe typeOf() + } + + @Test + fun `parse column group`() { + val df = dataFrameOf("a", "b")("1", "2") + df + .group("a", "b").into("c") + .parse("c") + .ungroup("c") shouldBe dataFrameOf("a", "b")(1, 2) + } + + @Test + fun `parse instant`() { + columnOf("2022-01-23T04:29:40Z").parse().type shouldBe typeOf() + columnOf("2022-01-23T04:29:40+01:00").parse().type shouldBe typeOf() + + columnOf("2022-01-23T04:29:40").parse().type shouldBe typeOf() + } + + @Test + fun `parse duration`() { + columnOf("1d 15m", "20h 35m 11s").parse() shouldBe columnOf(1.days + 15.minutes, 20.hours + 35.minutes + 11.seconds) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt new file mode 100644 index 0000000000..7271657290 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -0,0 +1,156 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test +import kotlin.reflect.typeOf + +class PivotTests { + + val a by columnOf(0, 1, 1) + val b by columnOf("q", "q", "w") + val c by columnOf('x', 'y', 'z') + + val df = dataFrameOf(a, b, c) + + @Test + fun `simple pivot`() { + val pivoted = df.pivot(b, inward = false).groupBy(a).values(c) + pivoted.columnNames() shouldBe listOf("a", "q", "w") + pivoted.rowsCount() shouldBe 2 + pivoted["q"].values() shouldBe listOf('x', 'y') + pivoted["w"].values() shouldBe listOf(null, 'z') + } + + @Test + fun `pivot with rename`() { + val pivoted = df.pivot(b).groupBy(a).values { c default '?' into "d" and (c into "e") } + pivoted.columnNames() shouldBe listOf("a", "b") + pivoted.rowsCount() shouldBe 2 + + pivoted["b"]["q"]["d"].values() shouldBe listOf('x', 'y') + pivoted["b"]["q"]["e"].values() shouldBe listOf('x', 'y') + pivoted["b"]["w"]["d"].values() shouldBe listOf('?', 'z') + pivoted["b"]["w"]["e"].values() shouldBe listOf(null, 'z') + } + + @Test + fun `pivot aggregate with default`() { + val pivoted = df.pivot(b, inward = false).groupBy(a).aggregate { + get(c).first() default '-' into "first" + get(c).last() into "last" default '?' + } + pivoted.columnsCount() shouldBe 3 + pivoted.rowsCount() shouldBe 2 + val cols = pivoted.getColumns { except(a).allDfs() } + cols.size shouldBe 4 + cols.forEach { + it.type() shouldBe typeOf() + } + pivoted["w"]["first"][0] shouldBe '-' + pivoted["w"]["last"][0] shouldBe '?' + } + + @Test + fun `pivot groupBy inward by default`() { + val df = dataFrameOf( + "a", "b", + )( + 2, "a", + 4, "b", + 3, "c", + ) + val pivoted = df.pivot("b").groupBy("a").matches() + pivoted.columnNames() shouldBe listOf("a", "b") + pivoted.getColumnGroup("b").columnNames() shouldBe listOf("a", "b", "c") + } + + @Test + fun `pivot values with nulls`() { + val df = dataFrameOf("a" to listOf(1, 2, 2, 3), "b" to listOf(1, 1, null, null)) + + df.pivot("a").values("b") shouldBe + dataFrameOf("1", "2", "3")(1, listOf(1, null), null)[0] + + df.pivot("a").values("b", dropNA = true) shouldBe + dataFrameOf("1" to listOf(1), "2" to listOf(1), "3" to listOf(null))[0] + } + + @Test + fun `pivot in aggregate`() { + val df = dataFrameOf("a" to listOf(1, 2, 2), "b" to listOf("q", "w", "q")) + + val expected = dataFrameOf("a", "q", "w")( + 1, 1, 0, + 2, 1, 1 + ).group("q", "w").into("b") + + df.groupBy("a").aggregate { + pivot("b").count() + } shouldBe expected + + df.groupBy("a").aggregate { + pivot("b").count() into "c" + } shouldBe expected.rename("b" to "c") + } + + @Test + fun `pivot two in aggregate`() { + val df = dataFrameOf( + "a" to listOf(1, 2, 2), + "b" to listOf("q", "w", "q"), + "c" to listOf("w", "q", "w"), + ) + + val expected = dataFrameOf( + columnOf(1, 2) named "a", + columnOf( + columnOf( + columnOf(1, 1) named "q", + columnOf(0, 1) named "w" + ) named "b", + columnOf( + columnOf(1, 1) named "w", + columnOf(0, 1) named "q" + ) named "c", + ) named "d" + ) + + df.groupBy("a").aggregate { + pivot("b", "c").count() into "d" + } shouldBe expected + } + + @Test + fun `pivot minBy values`() { + val df = dataFrameOf("a", "b", "c", "d")( + 1, 2, 3, 5, + 1, 0, 2, 4, + 2, 1, 3, 2, + 2, 5, 5, 3 + ) + df.pivot("a").minBy("b").values("c", "d", separate = true) shouldBe + dataFrameOf("c1", "c2", "d1", "d2")( + 2, 3, 4, 2 + ).move { all() }.into { pathOf(it.name()[0].toString(), it.name()[1].toString()) }[0] + } + + @Test + fun `pivot groupBy last with`() { + val df = dataFrameOf("a", "b", "c")( + 1, 2, 3, + 1, 0, 2, + 2, 1, 3, + 2, 1, 5 + ) + df.pivot("a", inward = false) + .groupBy("b") + .default(-1) + .last() + .with { "c"() } shouldBe + dataFrameOf("b", "1", "2")( + 2, 3, -1, + 0, 2, -1, + 1, -1, 5 + ) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt new file mode 100644 index 0000000000..6faeac9995 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/remove.kt @@ -0,0 +1,35 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.impl.api.removeImpl +import org.junit.Test + +class RemoveTests { + + val df = dataFrameOf("a", "b")(1, 2) + val b by column() + data class C(val b: Int) + + @Test + fun `simple remove`() { + val e = df.select("a") + df.remove("b") shouldBe e + df.remove { b } shouldBe e + df.remove(C::b) shouldBe e + } + + @Test + fun `remove renamed`() { + val (_, removed) = df.removeImpl { "a" named "c" } + removed[0].data.column!!.name shouldBe "c" + } + + @Test + fun `remove missing column`() { + val d = df.remove { b } + + d.remove("b") shouldBe d + d.remove { b } shouldBe d + d.remove(C::b) shouldBe d + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt new file mode 100644 index 0000000000..46890a1e25 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt @@ -0,0 +1,58 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.assertions.asClue +import io.kotest.assertions.throwables.shouldNotThrowAny +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn +import org.junit.Test + +class RenameTests { + companion object { + val nestedDf = dataFrameOf("test_name")(dataFrameOf("another_name")(1)) + val nestedColumnGroup = dataFrameOf("test_name")( + dataFrameOf("another_name")(1).first() + ) + val deeplyNestedDf = kotlin.run { + val df = dataFrameOf("another_name")(1) + val rowWithDf = dataFrameOf("group_name")(df).first() + dataFrameOf("test_name")(rowWithDf) + } + val deeplyNestedFrameColumn = kotlin.run { + val df = dataFrameOf("col_0")(1) + val df1 = dataFrameOf("col_1")(df) + dataFrameOf("col_2")(df1) + } + } + + @Test + fun `nested df`() { + nestedDf.renameToCamelCase() shouldBe dataFrameOf("testName")(dataFrameOf("anotherName")(1)) + } + + @Test + fun `nested row`() { + val df = nestedColumnGroup.renameToCamelCase() + df.columnNames() shouldBe listOf("testName") + df.getColumnGroup("testName").columnNames() shouldBe listOf("anotherName") + } + + @Test + fun `deeply nested df`() { + val df = deeplyNestedDf.renameToCamelCase() + df.schema().asClue { + df.columnNames() shouldBe listOf("testName") + df.getColumnGroup("testName").columnNames() shouldBe listOf("groupName") + df["testName"]["groupName"].asAnyFrameColumn()[0].columnNames() shouldBe listOf("anotherName") + } + } + + @Test + fun `deeply nested frame column`() { + val df = deeplyNestedFrameColumn.renameToCamelCase() + df.schema().asClue { + shouldNotThrowAny { + df["col2"].asAnyFrameColumn().firstOrNull()!!["col1"].asAnyFrameColumn().firstOrNull()!!["col0"] + } + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt new file mode 100644 index 0000000000..87e28271cd --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -0,0 +1,31 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class ReorderTests { + + @Test + fun simple() { + val df = dataFrameOf("b", "c", "a").fill(1, 0) + df.reorder { all() }.byName().columnNames() shouldBe listOf("a", "b", "c") + df.reorder { "a" and "c" }.byName().columnNames() shouldBe listOf("b", "a", "c") + df.reorder { "a" and "b" }.byName().columnNames() shouldBe listOf("a", "c", "b") + } + + @Test + fun nested() { + val df = dataFrameOf("b", "c", "a").fill(1, 0) + .group("c", "a").into("a") + + df.reorder { all() }.byName().columnNames() shouldBe listOf("a", "b") + + val sorted1 = df.reorder { "a".all() }.byName() + sorted1.columnNames() shouldBe listOf("b", "a") + sorted1["a"].asColumnGroup().columnNames() shouldBe listOf("a", "c") + + val sorted2 = df.reorder { allDfs(true) }.byName() + sorted2.columnNames() shouldBe listOf("a", "b") + sorted2["a"].asColumnGroup().columnNames() shouldBe listOf("a", "c") + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt new file mode 100644 index 0000000000..5df15bf103 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt @@ -0,0 +1,16 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test +import kotlin.reflect.typeOf + +class ReplaceTests { + + @Test + fun `replace named`() { + val df = dataFrameOf("a")(1) + val conv = df.replace { "a"() named "b" }.with { it.convertToDouble() } + conv.columnNames() shouldBe listOf("b") + conv.columnTypes() shouldBe listOf(typeOf()) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt new file mode 100644 index 0000000000..39f351db9f --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reverse.kt @@ -0,0 +1,27 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class ReverseTests { + + @Test + fun dataframe() { + val df = dataFrameOf("a", "b")(1, 2, 3, 4) + df.reverse() shouldBe dataFrameOf("a", "b")(3, 4, 1, 2) + } + + @Test + fun column() { + val col by columnOf(1, 2, 3) + col.reverse() shouldBe col.withValues(listOf(3, 2, 1)) + } + + @Test + fun columnGroup() { + val a by columnOf(1, 2) + val b by columnOf(3, 4) + val col by columnOf(a, b) + col.reverse() shouldBe columnOf(a.reverse(), b.reverse()).named("col") + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/sortGroupedDataframe.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/sortGroupedDataframe.kt new file mode 100644 index 0000000000..76d7b07edb --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/sortGroupedDataframe.kt @@ -0,0 +1,72 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.alsoDebug +import org.jetbrains.kotlinx.dataframe.io.read +import org.junit.Test + +class SortGroupedDataframeTests { + + @Test + fun `Sorted grouped iris dataset`() { + val irisData = DataFrame.read("src/test/resources/irisDataset.csv") + irisData.alsoDebug() + + irisData.groupBy("variety").let { + it.sortBy("petal.length").toString() shouldBe + it.sortBy { it["petal.length"] }.toString() + } + } + + enum class State { + Idle, Productive, Maintenance + } + + @Test + fun test4() { + class Event(val toolId: String, val state: State, val timestamp: Long) + + val tool1 = "tool_1" + val tool2 = "tool_2" + val tool3 = "tool_3" + + val events = listOf( + Event(tool1, State.Idle, 0), + Event(tool1, State.Productive, 5), + Event(tool2, State.Idle, 0), + Event(tool2, State.Maintenance, 10), + Event(tool2, State.Idle, 20), + Event(tool3, State.Idle, 0), + Event(tool3, State.Productive, 25), + ).toDataFrame() + + val lastTimestamp = events.maxOf { getValue("timestamp") } + val groupBy = events + .groupBy("toolId") + .sortBy("timestamp") + .add("stateDuration") { + (next()?.getValue("timestamp") ?: lastTimestamp) - getValue("timestamp") + } + + groupBy.toDataFrame().alsoDebug() + groupBy.schema().print() + groupBy.keys.print() + groupBy.keys[0].print() + + val df1 = groupBy.updateGroups { + val missingValues = State.values().asList().toDataFrame { + "state" from { it } + } + + val df = it + .fullJoin(missingValues, "state") + .fillNulls("stateDuration") + .with { 100L } + + df.groupBy("state").sumFor("stateDuration") + } + + df1.toDataFrame().alsoDebug().isNotEmpty() shouldBe true + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt new file mode 100644 index 0000000000..717b64ef29 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/split.kt @@ -0,0 +1,168 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.hasNulls +import org.jetbrains.kotlinx.dataframe.impl.DataRowImpl +import org.jetbrains.kotlinx.dataframe.type +import org.junit.Test +import kotlin.reflect.typeOf + +class SplitTests { + + val stringPairDf = dataFrameOf("first", "second")("22-65", "22-66") + val listPairDf = dataFrameOf("first", "second")(listOf("22", "65"), listOf("22", "66")) + + @Test + fun `split with default`() { + val recentDelays = listOf(listOf(23, 47), listOf(), listOf(24, 43, 87), listOf(13), listOf(67, 32)).toColumn("RecentDelays") + val df = dataFrameOf(recentDelays) + val split = df.split(recentDelays).default(0).into { "delay$it" } + split.columns().forEach { + it.hasNulls() shouldBe false + } + split.values().count { it == 0 } shouldBe 7 + } + + @Test + fun `split with regex`() { + val title by columnOf( + "Toy Story (1995)", + "Jumanji (1995)", + "Grumpier Old Men (1995)", + "Waiting to Exhale (1995)" + ) + + val regex = """(.*) \((\d{4})\)""".toRegex() + val split = title.toDataFrame() + .split { title } + .match(regex) + .into("title", "year") + .parse() + split.schema().print() + split["title"].hasNulls shouldBe false + split["year"].type shouldBe typeOf() + } + + @Test + fun `split into columns`() { + val df = dataFrameOf("a", "b", "c")( + 1, 2, 3, + 1, 4, 5, + 2, 3, 4, + 3, 6, 7 + ) + val res = df.groupBy("a").updateGroups { it.remove("a") }.into("g") + .update("g").at(1).with { DataFrame.empty() } + .update("g").at(2).withNull() + .split { "g"() }.intoColumns() + .ungroup("g") + res shouldBe dataFrameOf("a", "b", "c")( + 1, listOf(2, 4), listOf(3, 5), + 2, emptyList(), emptyList(), + 3, emptyList(), emptyList() + ) + } + + @Test + fun `split string by delimiter inward`() { + val res = stringPairDf.split("first", "second").by("-").inward("left", "right") + + res shouldBe dataFrameOf( + columnOf(columnOf("22") named "left", columnOf("65") named "right") named "first", + columnOf(columnOf("22") named "left", columnOf("66") named "right") named "second" + ) + } + + @Test + fun `split string by delimiter into columns with suffixes`() { + val res = stringPairDf.split("first", "second").by("-").into("left", "right") + + res shouldBe dataFrameOf( + columnOf("22") named "left", + columnOf("65") named "right", + columnOf("22") named "left1", + columnOf("66") named "right1" + ) + } + + @Test + fun `split list inward with autogenerated names`() { + val res = listPairDf.split { "first">() and "second">() }.inward() + + res shouldBe dataFrameOf( + columnOf(columnOf("22") named "split1", columnOf("65") named "split2") named "first", + columnOf(columnOf("22") named "split1", columnOf("66") named "split2") named "second" + ) + } + + @Test + fun `split list into with autogenerated names`() { + val res = listPairDf.split { "first">() and "second">() }.into() + + res shouldBe dataFrameOf( + columnOf("22") named "split1", + columnOf("65") named "split2", + columnOf("22") named "split3", + columnOf("66") named "split4" + ) + } + + @Test + fun `sequence of splits with autogenerated names`() { + var res = listPairDf.split { "first">() }.into() + res = res.split { "second">() }.into() + + res shouldBe dataFrameOf( + columnOf("22") named "split1", + columnOf("65") named "split2", + columnOf("22") named "split3", + columnOf("66") named "split4" + ) + } + + @Test + fun `split column group inward`() { + val df = stringPairDf.group("first", "second").into("group") + + // Note: this operation replaces original columns in group so there is no name conflict + val res = df.split { "group">() } + .by { it -> listOf(it[1], it[0]) } // swap columns + .inward("first", "second") // no name conflict + + res shouldBe dataFrameOf( + columnOf(columnOf("22-66") named "first", columnOf("22-65") named "second") named "group" + ) + } + + @Test + fun `split column group into hierarchy with correct names`() { + val df = dataFrameOf( + columnOf( + columnOf("a") named "first", + columnOf( + columnOf("b") named "first", + columnOf("c") named "second" + ) named "nestedGroup" + ) named "topLevelGroup", + columnOf("d") named "first", + ) + + val topLevelGroup by columnGroup() + val nestedGroup by topLevelGroup.columnGroup() + + val res = df.split { nestedGroup } + .by { it -> listOf(it[0], it[1]) } + .into("first", "second") // name conflict + + res shouldBe dataFrameOf( + columnOf( + columnOf("a") named "first", + columnOf("b") named "first1", + columnOf("c") named "second" + ) named "topLevelGroup", + columnOf("d") named "first", + ) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt new file mode 100644 index 0000000000..5861975a9a --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -0,0 +1,227 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.alsoDebug +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.kind +import org.jetbrains.kotlinx.dataframe.type +import org.junit.Ignore +import org.junit.Test +import kotlin.reflect.typeOf + +class CreateDataFrameTests { + + @Test + fun `visibility test`() { + class Data { + private val a = 1 + protected val b = 2 + internal val c = 3 + public val d = 4 + } + + listOf(Data()).toDataFrame() shouldBe dataFrameOf("d")(4) + } + + @Test + fun `exception test`() { + class Data { + val a: Int get() = error("Error") + val b = 1 + } + + val df = listOf(Data()).toDataFrame() + df.columnsCount() shouldBe 2 + df.rowsCount() shouldBe 1 + df.columnTypes() shouldBe listOf(typeOf(), typeOf()) + (df["a"][0] is IllegalStateException) shouldBe true + df["b"][0] shouldBe 1 + } + + @Test + fun `create frame column`() { + val df = dataFrameOf("a")(1) + val res = listOf(1, 2).toDataFrame { + "a" from { it } + "b" from { df } + "c" from { df[0] } + "d" from { if (it == 1) it else null } + "e" from { if (true) it else null } + } + res["a"].kind shouldBe ColumnKind.Value + res["a"].type() shouldBe typeOf() + res["b"].kind shouldBe ColumnKind.Frame + res["c"].kind shouldBe ColumnKind.Group + res["d"].type() shouldBe typeOf() + res["e"].type() shouldBe typeOf() + } + + @Test + fun `preserve fields order`() { + class B(val x: Int, val c: String, d: Double) { + val b: Int = x + val a: Double = d + } + + listOf(B(1, "a", 2.0)).toDataFrame().columnNames() shouldBe listOf("x", "c", "a", "b") + } + + @DataSchema + data class A(val v: Int) + + @DataSchema + data class B(val str: String, val frame: DataFrame, val row: DataRow, val list: List, val a: A) + + @Test + fun `preserve properties test`() { + val d1 = listOf(A(2), A(3)).toDataFrame() + val d2 = listOf(A(4), A(5)).toDataFrame() + + val data = listOf( + B("q", d1, d1[0], emptyList(), A(7)), + B("w", d2, d2[1], listOf(A(6)), A(8)) + ) + + val df = data.toDataFrame() + + df.frame.kind shouldBe ColumnKind.Frame + df.row.kind() shouldBe ColumnKind.Group + df.list.kind shouldBe ColumnKind.Frame + df.a.kind() shouldBe ColumnKind.Group + + df.str[1] shouldBe "w" + df.frame[0].v[1] shouldBe 3 + df.row[1].v shouldBe 5 + df.list[1].v[0] shouldBe 6 + df.a[0].v shouldBe 7 + + val df2 = data.toDataFrame { preserve(B::row); properties { preserve(DataFrame::class) } } + df2.frame.kind shouldBe ColumnKind.Value + df2.frame.type shouldBe typeOf>() + df2["row"].kind shouldBe ColumnKind.Value + df2["row"].type shouldBe typeOf>() + df2.list.kind shouldBe ColumnKind.Frame + df2.a.kind() shouldBe ColumnKind.Group + } + + enum class DummyEnum { A } + + @Test + fun `don't convert value types`() { + data class Entry(val a: Int, val b: String, val c: Boolean, val e: DummyEnum) + + val df = listOf(Entry(1, "s", true, DummyEnum.A)).toDataFrame(maxDepth = 100) + df.columns().forEach { + it.kind shouldBe ColumnKind.Value + } + } + + @Test + fun `convert type with no properties`() { + class Child + class Entry(val a: Int, val child: Child) + + val df = listOf(Entry(1, Child())).toDataFrame(maxDepth = 100) + df.rowsCount() shouldBe 1 + + val childCol = df[Entry::child] + childCol.kind() shouldBe ColumnKind.Group + childCol.asColumnGroup().columnsCount() shouldBe 0 + } + + @Test + fun `convert child schemas`() { + class Child2(val s: String) + + @DataSchema + class Child1(val child: Child2) + + @DataSchema + class Entry(val a: Int, val child: Child1) + + val df = listOf(Entry(1, Child1(Child2("s")))).toDataFrame() + df.rowsCount() shouldBe 1 + + val child1 = df[Entry::child] + child1.kind shouldBe ColumnKind.Group + + val child2 = child1.asColumnGroup()[Child1::child] + child2.kind shouldBe ColumnKind.Value + } + + @Test + fun inferredTypeForPropertyWithGenericIterableType() { + class Container(val data: Set) + + val element = Container(setOf(1)) + val value = listOf(element).toDataFrame(maxDepth = 10) + + value["data"].type() shouldBe typeOf>() + } + + @Test + fun inferredNullableTypeForPropertyWithGenericIterableType() { + class Container(val data: List) + + val element = Container(listOf(1, null)) + val value = listOf(element).toDataFrame(maxDepth = 10) + + value["data"].type() shouldBe typeOf>() + } + + @Suppress("unused") + @Test + fun treatErasedGenericAsAny() { + class IncompatibleVersionErrorData(val expected: T, val actual: T) + class DeserializedContainerSource(val incompatibility: IncompatibleVersionErrorData<*>) + val functions = listOf(DeserializedContainerSource(IncompatibleVersionErrorData(1, 2))) + + val df = functions.toDataFrame(maxDepth = 2) + + val col = df.getColumnGroup(DeserializedContainerSource::incompatibility) + col[IncompatibleVersionErrorData<*>::actual].type() shouldBe typeOf() + col[IncompatibleVersionErrorData<*>::expected].type() shouldBe typeOf() + } + + interface Named { + val name: String get() = "default impl" + } + + class Data(override val name: String) : Named + + @Test + fun simpleInheritance() { + val name = "temp" + val df = listOf(Data(name)).toDataFrame(maxDepth = 1) + + df["name"][0] shouldBe name + } + + @Test + fun builtInTypes() { + val string = listOf("aaa", "aa", null) + string.toDataFrame().also { it.print() } shouldBe dataFrameOf("value")(*string.toTypedArray()) + + val int = listOf(1, 2, 3) + int.toDataFrame().alsoDebug() shouldBe dataFrameOf("value")(*int.toTypedArray()) + } + + @Ignore + @Test + fun generateBuiltInsOverrides() { + listOf("Byte", "Short", "Int", "Long", "String", "Char", "Boolean", "UByte", "UShort", "UInt", "ULong").forEach { type -> + val typeParameter = type.first() + val func = """ + @JvmName("toDataFrame$type") + public inline fun Iterable<$typeParameter>.toDataFrame(): DataFrame> = toDataFrame { + ValueProperty<$typeParameter>::value from { it } + }.cast() + """.trimIndent() + println(func) + println() + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt new file mode 100644 index 0000000000..3ed621e191 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/toList.kt @@ -0,0 +1,28 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class DataClassesTests { + + @Test + fun convertDataClasses() { + data class Record(val sex: String, val grade: Int, val count: Int) + + data class PivotedRecord(val grade: Int, val male: Int, val female: Int) + + listOf( + Record("male", 5, 10), + Record("male", 6, 15), + Record("female", 5, 20), + Record("female", 6, 15) + ) + .toDataFrame() + .pivot(Record::sex, inward = false).groupBy(Record::grade).values(Record::count) + .toListOf() shouldBe + listOf( + PivotedRecord(5, 10, 20), + PivotedRecord(6, 15, 15) + ) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt new file mode 100644 index 0000000000..4818f0a2c0 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -0,0 +1,86 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.size +import org.junit.Test + +class UpdateTests { + + @Test + fun `update empty dataframe with missing column`() { + val df = DataFrame.Empty + val col by column() + df.update { col }.with { 2 } shouldBe df + } + + @DataSchema + interface DataPart { + val a: Int + val b: String + } + + @DataSchema + data class Data( + override val a: Int, + override val b: String, + val c: Boolean, + ) : DataPart + + @Test + fun `update asFrame`() { + val df = listOf( + Data(1, "a", true), + Data(2, "b", false), + ).toDataFrame() + + val group by columnGroup() named "Some Group" + val groupedDf = df.group { a and b }.into { group } + + val res = groupedDf + .update { group } + .where { !c } + .asFrame { + // size should still be full df size + size.nrow shouldBe 2 + + // this will only apply to rows where `.where { !c }` holds + update { a }.with { 0 } + } + + val (first, second) = res[{ group }].map { it.a }.toList() + first shouldBe 1 + second shouldBe 0 + + res[{ group }].name() shouldBe "Some Group" + } + + @DataSchema + interface SchemaA { + val i: Int? + } + + @DataSchema + interface SchemaB { + val i: Int + } + + @Test + fun `fillNulls update`() { + val df = dataFrameOf("i")(1, null) + + df.fillNulls(SchemaA::i).with { 42 } + + df.fillNulls(SchemaB::i).with { 42 } + } + + @Test + fun `fillNA update`() { + val df = dataFrameOf("i")(1, null) + + df.fillNA(SchemaA::i).with { 42 } + + df.fillNA(SchemaB::i).with { 42 } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeGenerationTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeGenerationTests.kt new file mode 100644 index 0000000000..70c09b5d80 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/CodeGenerationTests.kt @@ -0,0 +1,316 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import io.kotest.matchers.shouldBe +import org.jetbrains.dataframe.impl.codeGen.CodeGenerator +import org.jetbrains.dataframe.impl.codeGen.InterfaceGenerationMode +import org.jetbrains.dataframe.impl.codeGen.ReplCodeGenerator +import org.jetbrains.dataframe.impl.codeGen.generate +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.dropNulls +import org.jetbrains.kotlinx.dataframe.api.move +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.api.under +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.impl.codeGen.ReplCodeGeneratorImpl +import org.jetbrains.kotlinx.dataframe.testSets.person.BaseTest +import org.jetbrains.kotlinx.dataframe.testSets.person.Person +import org.junit.Test + +class CodeGenerationTests : BaseTest() { + + val personClassName = Person::class.qualifiedName!! + + val personShortName = Person::class.simpleName!! + + val dfName = (ColumnsContainer::class).simpleName!! + val dfRowName = (DataRow::class).simpleName!! + val dataCol = (DataColumn::class).simpleName!! + val dataRow = (DataRow::class).simpleName!! + val colGroup = (ColumnGroup::class).simpleName!! + val stringName = String::class.simpleName!! + val intName = Int::class.simpleName!! + + fun expectedProperties(fullTypeName: String, shortTypeName: String) = """ + val $dfName<$fullTypeName>.age: $dataCol<$intName> @JvmName("${shortTypeName}_age") get() = this["age"] as $dataCol<$intName> + val $dfRowName<$fullTypeName>.age: $intName @JvmName("${shortTypeName}_age") get() = this["age"] as $intName + val $dfName<$fullTypeName?>.age: $dataCol<$intName?> @JvmName("Nullable${shortTypeName}_age") get() = this["age"] as $dataCol<$intName?> + val $dfRowName<$fullTypeName?>.age: $intName? @JvmName("Nullable${shortTypeName}_age") get() = this["age"] as $intName? + val $dfName<$fullTypeName>.city: $dataCol<$stringName?> @JvmName("${shortTypeName}_city") get() = this["city"] as $dataCol<$stringName?> + val $dfRowName<$fullTypeName>.city: $stringName? @JvmName("${shortTypeName}_city") get() = this["city"] as $stringName? + val $dfName<$fullTypeName?>.city: $dataCol<$stringName?> @JvmName("Nullable${shortTypeName}_city") get() = this["city"] as $dataCol<$stringName?> + val $dfRowName<$fullTypeName?>.city: $stringName? @JvmName("Nullable${shortTypeName}_city") get() = this["city"] as $stringName? + val $dfName<$fullTypeName>.name: $dataCol<$stringName> @JvmName("${shortTypeName}_name") get() = this["name"] as $dataCol<$stringName> + val $dfRowName<$fullTypeName>.name: $stringName @JvmName("${shortTypeName}_name") get() = this["name"] as $stringName + val $dfName<$fullTypeName?>.name: $dataCol<$stringName?> @JvmName("Nullable${shortTypeName}_name") get() = this["name"] as $dataCol<$stringName?> + val $dfRowName<$fullTypeName?>.name: $stringName? @JvmName("Nullable${shortTypeName}_name") get() = this["name"] as $stringName? + val $dfName<$fullTypeName>.weight: $dataCol<$intName?> @JvmName("${shortTypeName}_weight") get() = this["weight"] as $dataCol<$intName?> + val $dfRowName<$fullTypeName>.weight: $intName? @JvmName("${shortTypeName}_weight") get() = this["weight"] as $intName? + val $dfName<$fullTypeName?>.weight: $dataCol<$intName?> @JvmName("Nullable${shortTypeName}_weight") get() = this["weight"] as $dataCol<$intName?> + val $dfRowName<$fullTypeName?>.weight: $intName? @JvmName("Nullable${shortTypeName}_weight") get() = this["weight"] as $intName? + """.trimIndent() + + @Test + fun `generate marker interface`() { + val codeGen = ReplCodeGenerator.create() + val generated = codeGen.process(df, ::df) + val typeName = ReplCodeGeneratorImpl.markerInterfacePrefix + val expectedDeclaration = """ + @DataSchema + interface $typeName + + """.trimIndent() + "\n" + expectedProperties(typeName, typeName) + + val expectedConverter = "it.cast<$typeName>()" + + generated.declarations shouldBe expectedDeclaration + generated.converter("it") shouldBe expectedConverter + + val rowGenerated = codeGen.process(df[0], ::typedRow) + rowGenerated.hasDeclarations shouldBe true + rowGenerated.hasConverter shouldBe true + } + + val row: AnyRow? = null + + val typedRow: DataRow = typed[0] + + @Test + fun `generate marker interface for row`() { + val property = ::row + val generated = ReplCodeGenerator.create().process(df[0], property) + val typeName = ReplCodeGeneratorImpl.markerInterfacePrefix + val expectedDeclaration = """ + @DataSchema + interface $typeName + + """.trimIndent() + "\n" + expectedProperties(typeName, typeName) + + val expectedConverter = "it.cast<$typeName>()" + + generated.declarations shouldBe expectedDeclaration + generated.converter("it") shouldBe expectedConverter + } + + @Test + fun `generate marker interface for nested data frame`() { + val property = ::df + val grouped = df.move { name and city }.under("nameAndCity") + val generated = ReplCodeGenerator.create().process(grouped, property) + val type1 = ReplCodeGeneratorImpl.markerInterfacePrefix + "1" + val type2 = ReplCodeGeneratorImpl.markerInterfacePrefix + val declaration1 = """ + @DataSchema(isOpen = false) + interface $type1 + + val $dfName<$type1>.city: $dataCol<$stringName?> @JvmName("${type1}_city") get() = this["city"] as $dataCol<$stringName?> + val $dfRowName<$type1>.city: $stringName? @JvmName("${type1}_city") get() = this["city"] as $stringName? + val $dfName<$type1?>.city: $dataCol<$stringName?> @JvmName("Nullable${type1}_city") get() = this["city"] as $dataCol<$stringName?> + val $dfRowName<$type1?>.city: $stringName? @JvmName("Nullable${type1}_city") get() = this["city"] as $stringName? + val $dfName<$type1>.name: $dataCol<$stringName> @JvmName("${type1}_name") get() = this["name"] as $dataCol<$stringName> + val $dfRowName<$type1>.name: $stringName @JvmName("${type1}_name") get() = this["name"] as $stringName + val $dfName<$type1?>.name: $dataCol<$stringName?> @JvmName("Nullable${type1}_name") get() = this["name"] as $dataCol<$stringName?> + val $dfRowName<$type1?>.name: $stringName? @JvmName("Nullable${type1}_name") get() = this["name"] as $stringName? + + """.trimIndent() + + val declaration2 = """ + @DataSchema + interface $type2 + + val $dfName<$type2>.age: $dataCol<$intName> @JvmName("${type2}_age") get() = this["age"] as $dataCol<$intName> + val $dfRowName<$type2>.age: $intName @JvmName("${type2}_age") get() = this["age"] as $intName + val $dfName<$type2?>.age: $dataCol<$intName?> @JvmName("Nullable${type2}_age") get() = this["age"] as $dataCol<$intName?> + val $dfRowName<$type2?>.age: $intName? @JvmName("Nullable${type2}_age") get() = this["age"] as $intName? + val $dfName<$type2>.nameAndCity: $colGroup<$type1> @JvmName("${type2}_nameAndCity") get() = this["nameAndCity"] as $colGroup<$type1> + val $dfRowName<$type2>.nameAndCity: $dataRow<$type1> @JvmName("${type2}_nameAndCity") get() = this["nameAndCity"] as $dataRow<$type1> + val $dfName<$type2?>.nameAndCity: $colGroup<$type1?> @JvmName("Nullable${type2}_nameAndCity") get() = this["nameAndCity"] as $colGroup<$type1?> + val $dfRowName<$type2?>.nameAndCity: $dataRow<$type1?> @JvmName("Nullable${type2}_nameAndCity") get() = this["nameAndCity"] as $dataRow<$type1?> + val $dfName<$type2>.weight: $dataCol<$intName?> @JvmName("${type2}_weight") get() = this["weight"] as $dataCol<$intName?> + val $dfRowName<$type2>.weight: $intName? @JvmName("${type2}_weight") get() = this["weight"] as $intName? + val $dfName<$type2?>.weight: $dataCol<$intName?> @JvmName("Nullable${type2}_weight") get() = this["weight"] as $dataCol<$intName?> + val $dfRowName<$type2?>.weight: $intName? @JvmName("Nullable${type2}_weight") get() = this["weight"] as $intName? + """.trimIndent() + + val expectedConverter = "it.cast<$type2>()" + + generated.declarations shouldBe declaration1 + "\n" + declaration2 + generated.converter("it") shouldBe expectedConverter + } + + @Test + fun `generate extension properties`() { + val personClass = (Person::class).qualifiedName!! + val expected = """ + @DataSchema + interface $personClass + """.trimIndent() + "\n" + expectedProperties(personClassName, personShortName) + + val code = CodeGenerator.create(useFqNames = false) + .generate(InterfaceGenerationMode.NoFields, extensionProperties = true).declarations + code shouldBe expected + } + + @Test + fun `frame to markers`() { + val f = SchemaProcessor.create("Temp") + val marker = f.process(df.schema(), true) + marker.isOpen shouldBe true + f.generatedMarkers shouldBe listOf(marker) + } + + @Test + fun `generate derived interface`() { + val codeGen = CodeGenerator.create() + val schema = df.dropNulls().schema() + val code = codeGen.generate( + schema = schema, + name = "ValidPerson", + fields = true, + extensionProperties = true, + isOpen = true, + visibility = MarkerVisibility.IMPLICIT_PUBLIC, + knownMarkers = listOf( + MarkersExtractor.get() + ) + ).code.declarations + val packageName = "org.jetbrains.kotlinx.dataframe" + val expected = """ + @DataSchema + interface ValidPerson : $personClassName { + override val city: kotlin.String + override val weight: kotlin.Int + } + + val $packageName.ColumnsContainer.city: $packageName.DataColumn @JvmName("ValidPerson_city") get() = this["city"] as $packageName.DataColumn + val $packageName.DataRow.city: kotlin.String @JvmName("ValidPerson_city") get() = this["city"] as kotlin.String + val $packageName.ColumnsContainer.city: $packageName.DataColumn @JvmName("NullableValidPerson_city") get() = this["city"] as $packageName.DataColumn + val $packageName.DataRow.city: kotlin.String? @JvmName("NullableValidPerson_city") get() = this["city"] as kotlin.String? + val $packageName.ColumnsContainer.weight: $packageName.DataColumn @JvmName("ValidPerson_weight") get() = this["weight"] as $packageName.DataColumn + val $packageName.DataRow.weight: kotlin.Int @JvmName("ValidPerson_weight") get() = this["weight"] as kotlin.Int + val $packageName.ColumnsContainer.weight: $packageName.DataColumn @JvmName("NullableValidPerson_weight") get() = this["weight"] as $packageName.DataColumn + val $packageName.DataRow.weight: kotlin.Int? @JvmName("NullableValidPerson_weight") get() = this["weight"] as kotlin.Int? + """.trimIndent() + code shouldBe expected + } + + @Test + fun `empty interface with properties`() { + val codeGen = CodeGenerator.create(useFqNames = false) + val code = codeGen.generate(df.schema(), "Person", false, true, true).code.declarations + val expected = """ + @DataSchema + interface Person + + """.trimIndent() + "\n" + expectedProperties("Person", "Person") + code shouldBe expected + } + + @Test + fun `interface with fields`() { + val repl = CodeGenerator.create() + val code = repl.generate(typed.schema(), "DataType", true, false, false).code.declarations + code shouldBe """ + @DataSchema(isOpen = false) + interface DataType { + val age: kotlin.Int + val city: kotlin.String? + val name: kotlin.String + val weight: kotlin.Int? + } + """.trimIndent() + } + + @Test + fun `declaration with internal visibility`() { + val repl = CodeGenerator.create() + val code = + repl.generate(typed.schema(), "DataType", true, true, false, MarkerVisibility.INTERNAL).code.declarations + val packageName = "org.jetbrains.kotlinx.dataframe" + code shouldBe """ + @DataSchema(isOpen = false) + internal interface DataType { + val age: kotlin.Int + val city: kotlin.String? + val name: kotlin.String + val weight: kotlin.Int? + } + + internal val $packageName.ColumnsContainer.age: $packageName.DataColumn @JvmName("DataType_age") get() = this["age"] as $packageName.DataColumn + internal val $packageName.DataRow.age: kotlin.Int @JvmName("DataType_age") get() = this["age"] as kotlin.Int + internal val $packageName.ColumnsContainer.age: $packageName.DataColumn @JvmName("NullableDataType_age") get() = this["age"] as $packageName.DataColumn + internal val $packageName.DataRow.age: kotlin.Int? @JvmName("NullableDataType_age") get() = this["age"] as kotlin.Int? + internal val $packageName.ColumnsContainer.city: $packageName.DataColumn @JvmName("DataType_city") get() = this["city"] as $packageName.DataColumn + internal val $packageName.DataRow.city: kotlin.String? @JvmName("DataType_city") get() = this["city"] as kotlin.String? + internal val $packageName.ColumnsContainer.city: $packageName.DataColumn @JvmName("NullableDataType_city") get() = this["city"] as $packageName.DataColumn + internal val $packageName.DataRow.city: kotlin.String? @JvmName("NullableDataType_city") get() = this["city"] as kotlin.String? + internal val $packageName.ColumnsContainer.name: $packageName.DataColumn @JvmName("DataType_name") get() = this["name"] as $packageName.DataColumn + internal val $packageName.DataRow.name: kotlin.String @JvmName("DataType_name") get() = this["name"] as kotlin.String + internal val $packageName.ColumnsContainer.name: $packageName.DataColumn @JvmName("NullableDataType_name") get() = this["name"] as $packageName.DataColumn + internal val $packageName.DataRow.name: kotlin.String? @JvmName("NullableDataType_name") get() = this["name"] as kotlin.String? + internal val $packageName.ColumnsContainer.weight: $packageName.DataColumn @JvmName("DataType_weight") get() = this["weight"] as $packageName.DataColumn + internal val $packageName.DataRow.weight: kotlin.Int? @JvmName("DataType_weight") get() = this["weight"] as kotlin.Int? + internal val $packageName.ColumnsContainer.weight: $packageName.DataColumn @JvmName("NullableDataType_weight") get() = this["weight"] as $packageName.DataColumn + internal val $packageName.DataRow.weight: kotlin.Int? @JvmName("NullableDataType_weight") get() = this["weight"] as kotlin.Int? + """.trimIndent() + } + + @Test + fun `declaration with explicit public visibility`() { + val repl = CodeGenerator.create() + val code = repl.generate( + typed.schema(), + "DataType", + true, + true, + false, + MarkerVisibility.EXPLICIT_PUBLIC + ).code.declarations + val packageName = "org.jetbrains.kotlinx.dataframe" + code shouldBe """ + @DataSchema(isOpen = false) + public interface DataType { + public val age: kotlin.Int + public val city: kotlin.String? + public val name: kotlin.String + public val weight: kotlin.Int? + } + + public val $packageName.ColumnsContainer.age: $packageName.DataColumn @JvmName("DataType_age") get() = this["age"] as $packageName.DataColumn + public val $packageName.DataRow.age: kotlin.Int @JvmName("DataType_age") get() = this["age"] as kotlin.Int + public val $packageName.ColumnsContainer.age: $packageName.DataColumn @JvmName("NullableDataType_age") get() = this["age"] as $packageName.DataColumn + public val $packageName.DataRow.age: kotlin.Int? @JvmName("NullableDataType_age") get() = this["age"] as kotlin.Int? + public val $packageName.ColumnsContainer.city: $packageName.DataColumn @JvmName("DataType_city") get() = this["city"] as $packageName.DataColumn + public val $packageName.DataRow.city: kotlin.String? @JvmName("DataType_city") get() = this["city"] as kotlin.String? + public val $packageName.ColumnsContainer.city: $packageName.DataColumn @JvmName("NullableDataType_city") get() = this["city"] as $packageName.DataColumn + public val $packageName.DataRow.city: kotlin.String? @JvmName("NullableDataType_city") get() = this["city"] as kotlin.String? + public val $packageName.ColumnsContainer.name: $packageName.DataColumn @JvmName("DataType_name") get() = this["name"] as $packageName.DataColumn + public val $packageName.DataRow.name: kotlin.String @JvmName("DataType_name") get() = this["name"] as kotlin.String + public val $packageName.ColumnsContainer.name: $packageName.DataColumn @JvmName("NullableDataType_name") get() = this["name"] as $packageName.DataColumn + public val $packageName.DataRow.name: kotlin.String? @JvmName("NullableDataType_name") get() = this["name"] as kotlin.String? + public val $packageName.ColumnsContainer.weight: $packageName.DataColumn @JvmName("DataType_weight") get() = this["weight"] as $packageName.DataColumn + public val $packageName.DataRow.weight: kotlin.Int? @JvmName("DataType_weight") get() = this["weight"] as kotlin.Int? + public val $packageName.ColumnsContainer.weight: $packageName.DataColumn @JvmName("NullableDataType_weight") get() = this["weight"] as $packageName.DataColumn + public val $packageName.DataRow.weight: kotlin.Int? @JvmName("NullableDataType_weight") get() = this["weight"] as kotlin.Int? + """.trimIndent() + } + + @Test + fun `column starts with number`() { + val df = dataFrameOf("1a", "-b", "?c")(1, 2, 3) + val repl = CodeGenerator.create() + val declarations = repl.generate(df.schema(), "DataType", false, true, false).code.declarations + df.columnNames().forEach { + val matches = "`$it`".toRegex().findAll(declarations).toList() + matches.size shouldBe 4 + } + } + + @Test + fun patterns() { + """^[\d]""".toRegex().matches("3fds") + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/MatchSchemeTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/MatchSchemeTests.kt new file mode 100644 index 0000000000..8a4bdd7df2 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/MatchSchemeTests.kt @@ -0,0 +1,100 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import io.kotest.matchers.shouldBe +import org.jetbrains.dataframe.impl.codeGen.ReplCodeGenerator +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.io.readJsonStr +import org.junit.Test + +class MatchSchemeTests { + + @DataSchema(isOpen = false) + interface Snippet { + val position: Int + val info: String + } + + @DataSchema(isOpen = false) + interface Item { + val kind: String + val id: String + val snippet: DataRow + } + + @DataSchema(isOpen = false) + interface PageInfo { + val totalResults: Int + val resultsPerPage: Int + val snippets: DataFrame + } + + @DataSchema + interface DataRecord { + val kind: String + val items: DataFrame + val pageInfo: DataRow + } + + val json = """ + { + "kind": "qq", + "pageInfo": { + "totalResults": 2, + "resultsPerPage": 3, + "snippets": [ + { + "position": 3, + "info": "str" + }, + { + "position": 5, + "info": "txt" + } + ] + }, + "items": [ + { + "kind": "asd", + "id": "zxc", + "snippet": { + "position": 2, + "info": "qwe" + } + } + ] + } + """.trimIndent() + + val df = DataFrame.readJsonStr(json) + + val typed = df.cast() + + @Test + fun `marker is reused`() { + val codeGen = ReplCodeGenerator.create() + codeGen.process(DataRecord::class) + codeGen.process(typed, ::typed).hasConverter shouldBe false + val generated = codeGen.process(df, ::df) + generated.declarations.split("\n").size shouldBe 1 + } + + val modified = df.add("new") { 4 } + + @Test + fun `marker is implemented`() { + val codeGen = ReplCodeGenerator.create() + codeGen.process(DataRecord::class) + val generated = codeGen.process(modified, ::modified) + generated.declarations.contains(DataRecord::class.simpleName!!) shouldBe true + } + + @Test + fun printSchema() { + val res = df.generateCode(false, true) + println(res) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/NameGenerationTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/NameGenerationTests.kt new file mode 100644 index 0000000000..dce359f3b1 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/NameGenerationTests.kt @@ -0,0 +1,53 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import io.kotest.matchers.shouldBe +import org.jetbrains.dataframe.impl.codeGen.CodeGenerator +import org.jetbrains.dataframe.impl.codeGen.ReplCodeGenerator +import org.jetbrains.dataframe.impl.codeGen.process +import org.jetbrains.kotlinx.dataframe.annotations.ColumnName +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.schema +import org.junit.Test + +class NameGenerationTests { + + val df = dataFrameOf("first column", "second_column", "____")(3, 5, 7) + + @Test + fun `interface generation`() { + val codeGen = CodeGenerator.create() + val code = codeGen.generate(df.schema(), "DataType", true, false, isOpen = false, MarkerVisibility.IMPLICIT_PUBLIC, emptyList()).code + + val expected = """ + @DataSchema(isOpen = false) + interface DataType { + @ColumnName("____") + val `____`: kotlin.Int + @ColumnName("first column") + val `first column`: kotlin.Int + val second_column: kotlin.Int + } + """.trimIndent() + + code.declarations shouldBe expected + } + + @DataSchema + interface DataRecord { + @ColumnName("first column") + val `first column`: Int + @ColumnName("second column") + val `second column`: Int + } + + @Test + fun `properties generation`() { + val codeGen = ReplCodeGenerator.create() + val code = codeGen.process().split("\n") + code.size shouldBe 8 + code.forEach { + it.count { it == '`' } shouldBe 2 + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ReplCodeGenTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ReplCodeGenTests.kt new file mode 100644 index 0000000000..50a3fbbf2f --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ReplCodeGenTests.kt @@ -0,0 +1,222 @@ +package org.jetbrains.kotlinx.dataframe.codeGen + +import io.kotest.matchers.shouldBe +import io.kotest.matchers.string.shouldNotBeEmpty +import org.jetbrains.dataframe.impl.codeGen.ReplCodeGenerator +import org.jetbrains.dataframe.impl.codeGen.process +import org.jetbrains.kotlinx.dataframe.ColumnsContainer +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.select +import org.jetbrains.kotlinx.dataframe.impl.codeGen.ReplCodeGeneratorImpl +import org.jetbrains.kotlinx.dataframe.testSets.person.BaseTest +import org.jetbrains.kotlinx.dataframe.testSets.person.city +import org.jetbrains.kotlinx.dataframe.testSets.person.weight +import org.junit.Test + +class ReplCodeGenTests : BaseTest() { + + val dfName = (ColumnsContainer::class).simpleName!! + val dfRowName = (DataRow::class).simpleName!! + val dataCol = (DataColumn::class).simpleName!! + val intName = Int::class.simpleName!! + val stringName = String::class.simpleName!! + + class Test1 { + @DataSchema + interface _DataFrameType + + @DataSchema(isOpen = false) + interface _DataFrameType1 : _DataFrameType + + @DataSchema(isOpen = false) + interface _DataFrameType2 : _DataFrameType + } + + class Test2 { + @DataSchema + interface _DataFrameType + + @DataSchema + interface _DataFrameType1 + + @DataSchema(isOpen = false) + interface _DataFrameType2 : _DataFrameType, _DataFrameType1 + } + + object Test3 { + @DataSchema + interface A { val x: List<*> } + + @DataSchema + interface B : A + + @DataSchema(isOpen = false) + interface C : B { + override val x: List + } + + @DataSchema + interface D : A + + val df = dataFrameOf("x")(listOf(1)) + } + + @Test + fun `process derived markers`() { + val repl = ReplCodeGenerator.create() + val code = repl.process(df).declarations + + val marker = ReplCodeGeneratorImpl.markerInterfacePrefix + val markerFull = Test1._DataFrameType::class.qualifiedName!! + + val expected = """ + @DataSchema + interface $marker + + val $dfName<$marker>.age: $dataCol<$intName> @JvmName("${marker}_age") get() = this["age"] as $dataCol<$intName> + val $dfRowName<$marker>.age: $intName @JvmName("${marker}_age") get() = this["age"] as $intName + val $dfName<$marker?>.age: $dataCol<$intName?> @JvmName("Nullable${marker}_age") get() = this["age"] as $dataCol<$intName?> + val $dfRowName<$marker?>.age: $intName? @JvmName("Nullable${marker}_age") get() = this["age"] as $intName? + val $dfName<$marker>.city: $dataCol<$stringName?> @JvmName("${marker}_city") get() = this["city"] as $dataCol<$stringName?> + val $dfRowName<$marker>.city: $stringName? @JvmName("${marker}_city") get() = this["city"] as $stringName? + val $dfName<$marker?>.city: $dataCol<$stringName?> @JvmName("Nullable${marker}_city") get() = this["city"] as $dataCol<$stringName?> + val $dfRowName<$marker?>.city: $stringName? @JvmName("Nullable${marker}_city") get() = this["city"] as $stringName? + val $dfName<$marker>.name: $dataCol<$stringName> @JvmName("${marker}_name") get() = this["name"] as $dataCol<$stringName> + val $dfRowName<$marker>.name: $stringName @JvmName("${marker}_name") get() = this["name"] as $stringName + val $dfName<$marker?>.name: $dataCol<$stringName?> @JvmName("Nullable${marker}_name") get() = this["name"] as $dataCol<$stringName?> + val $dfRowName<$marker?>.name: $stringName? @JvmName("Nullable${marker}_name") get() = this["name"] as $stringName? + val $dfName<$marker>.weight: $dataCol<$intName?> @JvmName("${marker}_weight") get() = this["weight"] as $dataCol<$intName?> + val $dfRowName<$marker>.weight: $intName? @JvmName("${marker}_weight") get() = this["weight"] as $intName? + val $dfName<$marker?>.weight: $dataCol<$intName?> @JvmName("Nullable${marker}_weight") get() = this["weight"] as $dataCol<$intName?> + val $dfRowName<$marker?>.weight: $intName? @JvmName("Nullable${marker}_weight") get() = this["weight"] as $intName? + """.trimIndent() + code shouldBe expected + + val code2 = repl.process() + code2 shouldBe "" + + val df3 = typed.filter { city != null } + val code3 = repl.process(df3).declarations + val marker3 = marker + "1" + val expected3 = """ + @DataSchema + interface $marker3 : $markerFull + + val $dfName<$marker3>.city: $dataCol<$stringName> @JvmName("${marker3}_city") get() = this["city"] as $dataCol<$stringName> + val $dfRowName<$marker3>.city: $stringName @JvmName("${marker3}_city") get() = this["city"] as $stringName + val $dfName<$marker3?>.city: $dataCol<$stringName?> @JvmName("Nullable${marker3}_city") get() = this["city"] as $dataCol<$stringName?> + val $dfRowName<$marker3?>.city: $stringName? @JvmName("Nullable${marker3}_city") get() = this["city"] as $stringName? + """.trimIndent() + + code3 shouldBe expected3 + + val code4 = repl.process() + code4 shouldBe "" + + val df5 = typed.filter { weight != null } + val code5 = repl.process(df5).declarations + val marker5 = marker + "2" + val expected5 = """ + @DataSchema + interface $marker5 : $markerFull + + val $dfName<$marker5>.weight: $dataCol<$intName> @JvmName("${marker5}_weight") get() = this["weight"] as $dataCol<$intName> + val $dfRowName<$marker5>.weight: $intName @JvmName("${marker5}_weight") get() = this["weight"] as $intName + val $dfName<$marker5?>.weight: $dataCol<$intName?> @JvmName("Nullable${marker5}_weight") get() = this["weight"] as $dataCol<$intName?> + val $dfRowName<$marker5?>.weight: $intName? @JvmName("Nullable${marker5}_weight") get() = this["weight"] as $intName? + """.trimIndent() + code5 shouldBe expected5 + + val code6 = repl.process() + code6 shouldBe "" + } + + @Test + fun `process markers union`() { + val repl = ReplCodeGenerator.create() + repl.process(typed.select { age and name }) + repl.process() shouldBe "" + repl.process(typed.select { city and weight }) + repl.process() shouldBe "" + + val expected = """ + @DataSchema + interface ${Test2._DataFrameType2::class.simpleName!!} : ${Test2._DataFrameType::class.qualifiedName}, ${Test2._DataFrameType1::class.qualifiedName} + + """.trimIndent() + + val code = repl.process(typed).declarations.trimIndent() + code shouldBe expected + } + + @Test + fun `process wrong marker inheritance`() { + val repl = ReplCodeGenerator.create() + repl.process(typed.select { age and name }) + repl.process() shouldBe "" + repl.process(typed.select { city and weight }) + repl.process() shouldBe "" // processed wrong marker (doesn't implement Test2.DataFrameType) + + val marker = Test2._DataFrameType2::class.simpleName!! + val expected = """ + @DataSchema + interface $marker : ${Test2._DataFrameType::class.qualifiedName} + + val $dfName<$marker>.city: $dataCol<$stringName?> @JvmName("${marker}_city") get() = this["city"] as $dataCol<$stringName?> + val $dfRowName<$marker>.city: $stringName? @JvmName("${marker}_city") get() = this["city"] as $stringName? + val $dfName<$marker?>.city: $dataCol<$stringName?> @JvmName("Nullable${marker}_city") get() = this["city"] as $dataCol<$stringName?> + val $dfRowName<$marker?>.city: $stringName? @JvmName("Nullable${marker}_city") get() = this["city"] as $stringName? + val $dfName<$marker>.weight: $dataCol<$intName?> @JvmName("${marker}_weight") get() = this["weight"] as $dataCol<$intName?> + val $dfRowName<$marker>.weight: $intName? @JvmName("${marker}_weight") get() = this["weight"] as $intName? + val $dfName<$marker?>.weight: $dataCol<$intName?> @JvmName("Nullable${marker}_weight") get() = this["weight"] as $dataCol<$intName?> + val $dfRowName<$marker?>.weight: $intName? @JvmName("Nullable${marker}_weight") get() = this["weight"] as $intName? + """.trimIndent() + + val code = repl.process(typed).declarations.trimIndent() + code shouldBe expected + } + + @Test + fun `process overridden property`() { + val repl = ReplCodeGenerator.create() + repl.process() + repl.process() + repl.process() + val c = repl.process(Test3.df, Test3::df) + c.declarations.shouldNotBeEmpty() + } + + @Test + fun `process diamond inheritance`() { + val repl = ReplCodeGenerator.create() + repl.process() + repl.process() + repl.process() + val c = repl.process(Test3.df, Test3::df) + """val .*ColumnsContainer<\w*>.x:""".toRegex().findAll(c.declarations).count() shouldBe 1 + } + + object Test4 { + + @DataSchema + interface A { val a: Int? } + + @DataSchema + interface B { val a: Int? } + + val df = dataFrameOf("a")(1) + } + + @Test + fun `process duplicate override`() { + val repl = ReplCodeGenerator.create() + repl.process() + repl.process() + val c = repl.process(Test4.df, Test4::df) + """val .*ColumnsContainer<\w*>.a:""".toRegex().findAll(c.declarations).count() shouldBe 1 + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ShortNamesRenderingTest.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ShortNamesRenderingTest.kt new file mode 100644 index 0000000000..07e6837998 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/codeGen/ShortNamesRenderingTest.kt @@ -0,0 +1,137 @@ +package org.jetbrains.kotlinx.dataframe.internal.codeGen + +import io.kotest.assertions.asClue +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.codeGen.MarkersExtractor +import org.jetbrains.kotlinx.dataframe.impl.codeGen.ShortNames +import org.jetbrains.kotlinx.dataframe.impl.codeGen.TypeRenderingStrategy +import org.junit.Test + +internal class ShortNamesRenderingTest : TypeRenderingStrategy by ShortNames { + + interface Marker + + @DataSchema + interface DataSchemaMarker + + interface A { + val a: DataSchemaMarker + val b: Int + val c: List + val d: () -> Unit + val e: DataRow + val f: DataFrame + } + + private val fields by lazy { + MarkersExtractor.get(A::class).allFields.associateBy { it.fieldName.unquoted } + } + + @Test + fun `data schema type`() { + fields.keys.asClue { + fields["a"]!!.renderAccessorFieldType() shouldBe "DataRow" + fields["a"]!!.renderFieldType() shouldBe "org.jetbrains.kotlinx.dataframe.internal.codeGen.ShortNamesRenderingTest.DataSchemaMarker" + } + } + + @Test + fun `builtin type`() { + fields.keys.asClue { + fields["b"]!!.renderAccessorFieldType() shouldBe "Int" + fields["b"]!!.renderFieldType() shouldBe "Int" + } + } + + @Test + fun `list parametrized by data schema type`() { + fields.keys.asClue { + fields["c"]!!.renderAccessorFieldType() shouldBe "DataFrame" + fields["c"]!!.renderFieldType() shouldBe "DataFrame" + } + } + + @Test + fun `short functional types are not supported`() { + fields.keys.asClue { + fields["d"]!!.renderAccessorFieldType() shouldBe "() -> kotlin.Unit" + fields["d"]!!.renderFieldType() shouldBe "() -> kotlin.Unit" + } + } + + @Test + fun `data row`() { + fields.keys.asClue { + fields["e"]!!.renderAccessorFieldType() shouldBe "DataRow" + fields["e"]!!.renderFieldType() shouldBe "org.jetbrains.kotlinx.dataframe.internal.codeGen.ShortNamesRenderingTest.Marker" + } + } + + @Test + fun `data frame`() { + fields.keys.asClue { + fields["f"]!!.renderAccessorFieldType() shouldBe "DataFrame" + fields["f"]!!.renderFieldType() shouldBe "DataFrame" + } + } + + @Test + fun `column for data schema type`() { + fields.keys.asClue { + fields["a"]!!.renderColumnType() shouldBe "ColumnGroup" + } + } + + @Test + fun `builtin type column`() { + fields.keys.asClue { + fields["b"]!!.renderColumnType() shouldBe "DataColumn" + } + } + + @Test + fun `column for list parametrized by data schema type`() { + fields.keys.asClue { + fields["c"]!!.renderColumnType() shouldBe "DataColumn>" + } + } + + @Test + fun `functional type column`() { + fields.keys.asClue { + fields["d"]!!.renderColumnType() shouldBe "DataColumn<() -> kotlin.Unit>" + } + } + + @Test + fun `data row column`() { + fields.keys.asClue { + fields["e"]!!.renderColumnType() shouldBe "ColumnGroup" + } + } + + @Test + fun `data frame column`() { + fields.keys.asClue { + fields["f"]!!.renderColumnType() shouldBe "DataColumn>" + } + } + + interface GenericDataSchema { + val a: A + } + + @Test + fun `generic field`() { + MarkersExtractor.get(GenericDataSchema::class).allFields[0].renderAccessorFieldType() shouldBe "A" + MarkersExtractor.get(GenericDataSchema::class).allFields[0].renderFieldType() shouldBe "A" + } + + @Test + fun `generic column`() { + MarkersExtractor.get(GenericDataSchema::class).allFields[0].renderColumnType() shouldBe "DataColumn" + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroups.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroups.kt new file mode 100644 index 0000000000..5169e7c1c3 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnGroups.kt @@ -0,0 +1,19 @@ +package org.jetbrains.kotlinx.dataframe.columns + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.junit.Test + +class ColumnGroupTests { + + @Test + fun emptyColumnGroup() { + val df = DataFrame.empty(2) + val group = DataColumn.createColumnGroup("a", df) + group.size() shouldBe 2 + group.columnsCount() shouldBe 0 + group.rowsCount() shouldBe 2 + group.distinct().rowsCount() shouldBe 0 + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt new file mode 100644 index 0000000000..e9c80d28a2 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/CsvTests.kt @@ -0,0 +1,258 @@ +package org.jetbrains.kotlinx.dataframe.io + +import io.kotest.assertions.throwables.shouldNotThrowAny +import io.kotest.matchers.nulls.shouldNotBeNull +import io.kotest.matchers.shouldBe +import kotlinx.datetime.LocalDateTime +import org.apache.commons.csv.CSVFormat +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.ParserOptions +import org.jetbrains.kotlinx.dataframe.api.allNulls +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.api.toStr +import org.jetbrains.kotlinx.dataframe.ncol +import org.jetbrains.kotlinx.dataframe.nrow +import org.jetbrains.kotlinx.dataframe.testCsv +import org.jetbrains.kotlinx.dataframe.testResource +import org.junit.Test +import java.io.File +import java.io.StringWriter +import java.util.Locale +import kotlin.reflect.KClass +import kotlin.reflect.typeOf + +class CsvTests { + + @Test + fun readNulls() { + val src = """ + first,second + 2,, + 3,, + """.trimIndent() + val df = DataFrame.readDelimStr(src) + df.nrow shouldBe 2 + df.ncol shouldBe 2 + df["first"].type() shouldBe typeOf() + df["second"].allNulls() shouldBe true + df["second"].type() shouldBe typeOf() + } + + @Test + fun write() { + val df = dataFrameOf("col1", "col2")( + 1, + null, + 2, + null + ).convert("col2").toStr() + + val str = StringWriter() + df.writeCSV(str) + + val res = DataFrame.readDelimStr(str.buffer.toString()) + + res shouldBe df + } + + @Test + fun readCSV() { + val df = DataFrame.read(simpleCsv) + + df.ncol shouldBe 11 + df.nrow shouldBe 5 + df.columnNames()[5] shouldBe "duplicate1" + df.columnNames()[6] shouldBe "duplicate11" + df["duplicate1"].type() shouldBe typeOf() + df["double"].type() shouldBe typeOf() + df["time"].type() shouldBe typeOf() + + println(df) + } + + @Test + fun readCsvWithFrenchLocaleAndAlternativeDelimiter() { + val df = DataFrame.readCSV( + url = csvWithFrenchLocale, + delimiter = ';', + parserOptions = ParserOptions(locale = Locale.FRENCH), + ) + + df.ncol shouldBe 11 + df.nrow shouldBe 5 + df.columnNames()[5] shouldBe "duplicate1" + df.columnNames()[6] shouldBe "duplicate11" + df["duplicate1"].type() shouldBe typeOf() + df["double"].type() shouldBe typeOf() + df["number"].type() shouldBe typeOf() + df["time"].type() shouldBe typeOf() + + println(df) + } + + @Test + fun readCsvWithFloats() { + val df = DataFrame.readCSV(wineCsv, delimiter = ';') + val schema = df.schema() + fun assertColumnType(columnName: String, kClass: KClass<*>) { + val col = schema.columns[columnName] + col.shouldNotBeNull() + col.type.classifier shouldBe kClass + } + + assertColumnType("citric acid", Double::class) + assertColumnType("alcohol", Double::class) + assertColumnType("quality", Int::class) + } + + @Test + fun `read standard CSV with floats when user has alternative locale`() { + val currentLocale = Locale.getDefault() + try { + Locale.setDefault(Locale.forLanguageTag("ru-RU")) + val df = DataFrame.readCSV(wineCsv, delimiter = ';') + val schema = df.schema() + fun assertColumnType(columnName: String, kClass: KClass<*>) { + val col = schema.columns[columnName] + col.shouldNotBeNull() + col.type.classifier shouldBe kClass + } + + assertColumnType("citric acid", Double::class) + assertColumnType("alcohol", Double::class) + assertColumnType("quality", Int::class) + } finally { + Locale.setDefault(currentLocale) + } + } + + @Test + fun `read with custom header`() { + val header = ('A'..'K').map { it.toString() } + val df = DataFrame.readCSV(simpleCsv, header = header, skipLines = 1) + df.columnNames() shouldBe header + df["B"].type() shouldBe typeOf() + + val headerShort = ('A'..'E').map { it.toString() } + val dfShort = DataFrame.readCSV(simpleCsv, header = headerShort, skipLines = 1) + dfShort.ncol shouldBe 5 + dfShort.columnNames() shouldBe headerShort + } + + @Test + fun `read first rows`() { + val expected = + listOf( + "untitled", + "user_id", + "name", + "duplicate", + "username", + "duplicate1", + "duplicate11", + "double", + "number", + "time", + "empty", + ) + val dfHeader = DataFrame.readCSV(simpleCsv, readLines = 0) + dfHeader.nrow shouldBe 0 + dfHeader.columnNames() shouldBe expected + + val dfThree = DataFrame.readCSV(simpleCsv, readLines = 3) + dfThree.nrow shouldBe 3 + + val dfFull = DataFrame.readCSV(simpleCsv, readLines = 10) + dfFull.nrow shouldBe 5 + } + + @Test + fun `if string starts with a number, it should be parsed as a string anyway`() { + val df = DataFrame.readCSV(durationCsv) + df["duration"].type() shouldBe typeOf() + df["floatDuration"].type() shouldBe typeOf() + } + + @Test + fun `if record has fewer columns than header then pad it with nulls`() { + val csvContent = """col1,col2,col3 + 568,801,587 + 780,588 + """.trimIndent() + + val df = shouldNotThrowAny { + DataFrame.readDelimStr(csvContent) + } + + df shouldBe dataFrameOf("col1", "col2", "col3")( + 568, 801, 587, + 780, 588, null + ) + } + + @Test + fun `write and read frame column`() { + val df = dataFrameOf("a", "b", "c")( + 1, 2, 3, + 1, 3, 2, + 2, 1, 3 + ) + val grouped = df.groupBy("a").into("g") + val str = grouped.toCsv() + val res = DataFrame.readDelimStr(str) + res shouldBe grouped + } + + @Test + fun `write and read column group`() { + val df = dataFrameOf("a", "b", "c")( + 1, 2, 3, + 1, 3, 2 + ) + val grouped = df.group("b", "c").into("d") + val str = grouped.toCsv() + val res = DataFrame.readDelimStr(str) + res shouldBe grouped + } + + @Test + fun `CSV String of saved dataframe starts with column name`() { + val df = dataFrameOf("a")(1) + df.toCsv().first() shouldBe 'a' + } + + @Test + fun `guess tsv`() { + val df = DataFrame.read(testResource("abc.tsv")) + df.columnsCount() shouldBe 3 + df.rowsCount() shouldBe 2 + } + + @Test + fun `write csv without header produce correct file`() { + val df = dataFrameOf("a", "b", "c")( + 1, 2, 3, + 1, 3, 2, + ) + df.writeCSV( + "src/test/resources/without_header.csv", + CSVFormat.DEFAULT.withSkipHeaderRecord(), + ) + val producedFile = File("src/test/resources/without_header.csv") + producedFile.exists() shouldBe true + producedFile.readText() shouldBe "1,2,3\r\n1,3,2\r\n" + producedFile.delete() + } + + companion object { + private val simpleCsv = testCsv("testCSV") + private val csvWithFrenchLocale = testCsv("testCSVwithFrenchLocale") + private val wineCsv = testCsv("wine") + private val durationCsv = testCsv("duration") + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt new file mode 100644 index 0000000000..f4a8ab1606 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ParserTests.kt @@ -0,0 +1,206 @@ +package org.jetbrains.kotlinx.dataframe.io + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.matchers.shouldBe +import kotlinx.datetime.LocalDateTime +import kotlinx.datetime.TimeZone +import kotlinx.datetime.toKotlinLocalDate +import kotlinx.datetime.toKotlinLocalDateTime +import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.convertTo +import org.jetbrains.kotlinx.dataframe.api.convertToDouble +import org.jetbrains.kotlinx.dataframe.api.convertToLocalDate +import org.jetbrains.kotlinx.dataframe.api.convertToLocalDateTime +import org.jetbrains.kotlinx.dataframe.api.convertToLocalTime +import org.jetbrains.kotlinx.dataframe.api.parse +import org.jetbrains.kotlinx.dataframe.api.parser +import org.jetbrains.kotlinx.dataframe.api.plus +import org.jetbrains.kotlinx.dataframe.api.times +import org.jetbrains.kotlinx.dataframe.api.tryParse +import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException +import org.junit.Test +import java.math.BigDecimal +import java.time.LocalTime +import java.util.Locale +import kotlin.reflect.typeOf + +class ParserTests { + + @Test + fun `parse datetime with custom format`() { + val col by columnOf("04.02.2021 -- 19:44:32") + col.tryParse().type() shouldBe typeOf() + DataFrame.parser.addDateTimePattern("dd.MM.uuuu -- HH:mm:ss") + val parsed = col.parse() + parsed.type() shouldBe typeOf() + parsed.cast()[0].year shouldBe 2021 + DataFrame.parser.resetToDefault() + } + + @Test(expected = IllegalStateException::class) + fun `parse should throw`() { + val col by columnOf("a", "b") + col.parse() + } + + @Test(expected = TypeConversionException::class) + fun `converter should throw`() { + val col by columnOf("a", "b") + col.convertTo() + } + + @Test(expected = TypeConversionException::class) + fun `converter for mixed column should throw`() { + val col by columnOf(1, "a") + col.convertTo() + } + + @Test + fun `convert mixed column`() { + val col by columnOf(1.0, "1") + val converted = col.convertTo() + converted.type() shouldBe typeOf() + converted[0] shouldBe 1 + converted[1] shouldBe 1 + } + + @Test + fun `convert BigDecimal column`() { + val col by columnOf(BigDecimal(1.0), BigDecimal(0.321)) + val converted = col.convertTo() + converted.type() shouldBe typeOf() + converted[0] shouldBe 1.0f + converted[1] shouldBe 0.321f + } + + @Test + fun `convert to Boolean`() { + val col by columnOf(BigDecimal(1.0), BigDecimal(0.0), 0, 1, 10L, 0.0, 0.1) + col.convertTo().shouldBe( + DataColumn.createValueColumn("col", listOf(true, false, false, true, true, false, true), typeOf()) + ) + } + + @Test + fun `convert to date and time`() { + val daysToStandardMillis = 24 * 60 * 60 * 1000L + val longCol = columnOf(1L, 60L, 3600L).times(1000L).plus(daysToStandardMillis * 366) + val datetimeCol = longCol.convertToLocalDateTime(TimeZone.UTC) + + datetimeCol.shouldBe( + columnOf( + java.time.LocalDateTime.of(1971, 1, 2, 0, 0, 1).toKotlinLocalDateTime(), + java.time.LocalDateTime.of(1971, 1, 2, 0, 1, 0).toKotlinLocalDateTime(), + java.time.LocalDateTime.of(1971, 1, 2, 1, 0, 0).toKotlinLocalDateTime() + ) + ) + longCol.convertToLocalDate(TimeZone.UTC).shouldBe( + columnOf( + java.time.LocalDate.of(1971, 1, 2).toKotlinLocalDate(), + java.time.LocalDate.of(1971, 1, 2).toKotlinLocalDate(), + java.time.LocalDate.of(1971, 1, 2).toKotlinLocalDate() + ) + ) + longCol.convertToLocalTime(TimeZone.UTC).shouldBe( + columnOf( + LocalTime.of(0, 0, 1), + LocalTime.of(0, 1, 0), + LocalTime.of(1, 0, 0) + ) + ) + + datetimeCol.convertToLocalDate().shouldBe( + columnOf( + java.time.LocalDate.of(1971, 1, 2).toKotlinLocalDate(), + java.time.LocalDate.of(1971, 1, 2).toKotlinLocalDate(), + java.time.LocalDate.of(1971, 1, 2).toKotlinLocalDate() + ) + ) + datetimeCol.convertToLocalTime().shouldBe( + columnOf( + LocalTime.of(0, 0, 1), + LocalTime.of(0, 1, 0), + LocalTime.of(1, 0, 0) + ) + ) + } + + @Test + fun `converting String to Double in different locales`() { + val currentLocale = Locale.getDefault() + try { + // Test 36 behaviour combinations: + + // 3 source columns + val columnDot = columnOf("12.345", "67.890") + val columnComma = columnOf("12,345", "67,890") + val columnMixed = columnOf("12.345", "67,890") + // * + // (3 locales as converting parameter + original converting) + val parsingLocaleNotDefined: Locale? = null + val parsingLocaleUsesDot: Locale = Locale.forLanguageTag("en-US") + val parsingLocaleUsesComma: Locale = Locale.forLanguageTag("ru-RU") + // * + // 3 system locales + + Locale.setDefault(Locale.forLanguageTag("C.UTF-8")) + + columnDot.convertTo().shouldBe(columnOf(12.345, 67.89)) + columnComma.convertTo().shouldBe(columnOf(12345.0, 67890.0)) + columnMixed.convertTo().shouldBe(columnOf(12.345, 67890.0)) + + columnDot.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67.89)) + columnComma.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12345.0, 67890.0)) + columnMixed.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67890.0)) + + columnDot.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67.89)) + columnComma.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12345.0, 67890.0)) + columnMixed.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67890.0)) + + shouldThrow { columnDot.convertToDouble(parsingLocaleUsesComma) } + columnComma.convertToDouble(parsingLocaleUsesComma).shouldBe(columnOf(12.345, 67.89)) + shouldThrow { columnMixed.convertToDouble(parsingLocaleUsesComma) } + + Locale.setDefault(Locale.forLanguageTag("en-US")) + + columnDot.convertTo().shouldBe(columnOf(12.345, 67.89)) + columnComma.convertTo().shouldBe(columnOf(12345.0, 67890.0)) + columnMixed.convertTo().shouldBe(columnOf(12.345, 67890.0)) + + columnDot.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67.89)) + columnComma.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12345.0, 67890.0)) + columnMixed.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67890.0)) + + columnDot.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67.89)) + columnComma.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12345.0, 67890.0)) + columnMixed.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67890.0)) + + shouldThrow { columnDot.convertToDouble(parsingLocaleUsesComma) } + columnComma.convertToDouble(parsingLocaleUsesComma).shouldBe(columnOf(12.345, 67.89)) + shouldThrow { columnMixed.convertToDouble(parsingLocaleUsesComma) } + + Locale.setDefault(Locale.forLanguageTag("ru-RU")) + + columnDot.convertTo().shouldBe(columnOf(12.345, 67.89)) + columnComma.convertTo().shouldBe(columnOf(12.345, 67.89)) + columnMixed.convertTo().shouldBe(columnOf(12.345, 67890.0)) + + columnDot.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67.89)) + columnComma.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67.89)) + columnMixed.convertToDouble(parsingLocaleNotDefined).shouldBe(columnOf(12.345, 67890.0)) + + columnDot.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67.89)) + columnComma.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12345.0, 67890.0)) + columnMixed.convertToDouble(parsingLocaleUsesDot).shouldBe(columnOf(12.345, 67890.0)) + + shouldThrow { columnDot.convertToDouble(parsingLocaleUsesComma) } + columnComma.convertToDouble(parsingLocaleUsesComma).shouldBe(columnOf(12.345, 67.89)) + shouldThrow { columnMixed.convertToDouble(parsingLocaleUsesComma) } + } finally { + Locale.setDefault(currentLocale) + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt new file mode 100644 index 0000000000..02cdc6642e --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt @@ -0,0 +1,206 @@ +package org.jetbrains.kotlinx.dataframe.io + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.asGroupBy +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.getColumnGroupOrNull +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.map +import org.jetbrains.kotlinx.dataframe.api.minBy +import org.jetbrains.kotlinx.dataframe.api.move +import org.jetbrains.kotlinx.dataframe.api.name +import org.jetbrains.kotlinx.dataframe.api.remove +import org.jetbrains.kotlinx.dataframe.api.select +import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.dataTypes.IMG +import org.junit.Test +import kotlin.reflect.typeOf + +class PlaylistJsonTest { + + @DataSchema(isOpen = false) + interface DataFrameType4 { + val url: String + val width: Int + val height: Int + } + + @DataSchema(isOpen = false) + interface DataFrameType5 { + val url: String + val width: Int + val height: Int + } + + @DataSchema(isOpen = false) + interface DataFrameType6 { + val url: String + val width: Int + val height: Int + } + + @DataSchema(isOpen = false) + interface DataFrameType7 { + val url: String? + val width: Int? + val height: Int? + } + + @DataSchema(isOpen = false) + interface DataFrameType8 { + val url: String? + val width: Int? + val height: Int? + } + + @DataSchema(isOpen = false) + interface DataFrameType3 { + val default: DataRow + val medium: DataRow + val high: DataRow + val standard: DataRow + val maxres: DataRow + } + + @DataSchema(isOpen = false) + interface DataFrameType9 { + val kind: String + val videoId: String + } + + @DataSchema(isOpen = false) + interface DataFrameType2 { + val publishedAt: String + val channelId: String + val title: String + val description: String + val thumbnails: DataRow + val channelTitle: String + val playlistId: String + val position: Int + val resourceId: DataRow + } + + @DataSchema(isOpen = false) + interface DataFrameType1 { + val kind: String + val etag: String + val id: String + val snippet: DataRow + } + + @DataSchema(isOpen = false) + interface DataFrameType10 { + val totalResults: Int + val resultsPerPage: Int + } + + @DataSchema + interface DataRecord { + val kind: String + val etag: String + val nextPageToken: String + val items: DataFrame + val pageInfo: DataRow + } + + val path = "../data/playlistItems.json" + val df = DataFrame.read(path) + val typed = df.cast() + val item = typed.items[0] + + @Test + fun `deep update`() { + val updated = item.convert { snippet.thumbnails.default.url }.with { IMG(it) } + updated.snippet.thumbnails.default.url.type() shouldBe typeOf() + } + + @Test + fun `deep update group`() { + val updated = item.convert { snippet.thumbnails.default }.with { it.url } + updated.snippet.thumbnails["default"].type() shouldBe typeOf() + } + + @Test + fun `deep batch update`() { + val updated = item.convert { snippet.thumbnails.default.url and snippet.thumbnails.high.url }.with { IMG(it) } + updated.snippet.thumbnails.default.url.type() shouldBe typeOf() + updated.snippet.thumbnails.high.url.type() shouldBe typeOf() + } + + @Test + fun `deep batch update all`() { + val updated = item.convert { dfs { it.name() == "url" } }.with { (it as? String)?.let { IMG(it) } } + updated.snippet.thumbnails.default.url.type() shouldBe typeOf() + updated.snippet.thumbnails.maxres.url.type() shouldBe typeOf() + updated.snippet.thumbnails.standard.url.type() shouldBe typeOf() + updated.snippet.thumbnails.medium.url.type() shouldBe typeOf() + updated.snippet.thumbnails.high.url.type() shouldBe typeOf() + } + + @Test + fun `select group`() { + item.select { snippet.thumbnails.default }.columnsCount() shouldBe 1 + item.select { snippet.thumbnails.default.all() }.columnsCount() shouldBe 3 + } + + @Test + fun `deep remove`() { + val item2 = item.remove { snippet.thumbnails.default and snippet.thumbnails.maxres and snippet.channelId and etag } + item2.columnsCount() shouldBe item.columnsCount() - 1 + item2.snippet.columnsCount() shouldBe item.snippet.columnsCount() - 1 + item2.snippet.thumbnails.columnsCount() shouldBe item.snippet.thumbnails.columnsCount() - 2 + } + + @Test + fun `remove all from group`() { + val item2 = item.remove { snippet.thumbnails.default and snippet.thumbnails.maxres and snippet.thumbnails.medium and snippet.thumbnails.high and snippet.thumbnails.standard } + item2.snippet.columnsCount() shouldBe item.snippet.columnsCount() - 1 + item2.snippet.getColumnGroupOrNull("thumbnails") shouldBe null + } + + @Test + fun `deep move with rename`() { + val moved = item.move { snippet.thumbnails.default }.into { snippet.path() + "movedDefault" } + moved.snippet.thumbnails.columnNames() shouldBe item.snippet.thumbnails.remove { default }.columnNames() + moved.snippet.columnsCount() shouldBe item.snippet.columnsCount() + 1 + (moved.snippet["movedDefault"] as ColumnGroup<*>).columnsCount() shouldBe item.snippet.thumbnails.default.columnsCount() + } + + @Test + fun `union`() { + val merged = item.concat(item) + merged.rowsCount() shouldBe item.rowsCount() * 2 + val group = merged.snippet + group.rowsCount() shouldBe item.snippet.rowsCount() * 2 + group.columnNames() shouldBe item.snippet.columnNames() + } + + @Test + fun `select with rename`() { + val selected = item.select { snippet.thumbnails.default.url into "default" and (snippet.thumbnails.maxres.url named "maxres") } + selected.columnNames() shouldBe listOf("default", "maxres") + selected["default"].toList() shouldBe item.snippet.thumbnails.default.url.toList() + selected["maxres"].toList() shouldBe item.snippet.thumbnails.maxres.url.toList() + } + + @Test + fun `aggregate by column`() { + val res = typed.asGroupBy { items }.aggregate { + this into "items" + minBy { snippet.publishedAt }.snippet into "earliest" + } + + res.columnsCount() shouldBe typed.columnsCount() + 1 + res.getColumnIndex("earliest") shouldBe typed.getColumnIndex("items") + 1 + + val expected = typed.items.map { it.snippet.minBy { publishedAt } }.toList() + res["earliest"].toList() shouldBe expected + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ReadTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ReadTests.kt new file mode 100644 index 0000000000..48415c5807 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ReadTests.kt @@ -0,0 +1,104 @@ +package org.jetbrains.kotlinx.dataframe.io + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.all +import org.jetbrains.kotlinx.dataframe.api.allNulls +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.getColumn +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.impl.columns.asAnyFrameColumn +import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.ncol +import org.jetbrains.kotlinx.dataframe.nrow +import org.jetbrains.kotlinx.dataframe.schema.ColumnSchema +import org.junit.Test +import kotlin.reflect.typeOf + +class ReadTests { + + @Test + fun readJsonNulls() { + val data = """ + [{"a":null, "b":1},{"a":null, "b":2}] + """.trimIndent() + + val df = DataFrame.readJsonStr(data) + df.ncol shouldBe 2 + df.nrow shouldBe 2 + df["a"].hasNulls() shouldBe true + df["a"].allNulls() shouldBe true + df.all { it["a"] == null } shouldBe true + df["a"].type() shouldBe nothingType(nullable = true) + df["b"].hasNulls() shouldBe false + } + + @Test + fun readFrameColumn() { + val data = """ + [{"a":[{"b":[]}]},{"a":[]},{"a":[{"b":[{"c":1}]}]}] + """.trimIndent() + val df = DataFrame.readJsonStr(data) + df.nrow shouldBe 3 + val a = df["a"].asAnyFrameColumn() + a[1].nrow shouldBe 0 + a[0].nrow shouldBe 1 + a[2].nrow shouldBe 1 + val schema = a.schema.value + schema.columns.size shouldBe 1 + val schema2 = schema.columns["b"] as ColumnSchema.Frame + schema2.schema.columns.size shouldBe 1 + schema2.schema.columns["c"]!!.kind shouldBe ColumnKind.Value + } + + @Test + fun readFrameColumnEmptySlice() { + val data = """ + [ [], [ {"a": [{"q":2},{"q":3}] } ] ] + """.trimIndent() + + val df = DataFrame.readJsonStr(data) + df.nrow shouldBe 2 + df.ncol shouldBe 1 + val empty = df[0][0] as AnyFrame + empty.nrow shouldBe 0 + empty.ncol shouldBe 0 + } + + @Test + fun `read big decimal`() { + val data = """ + [[3452345234345, 7795.34000000], [12314123532, 7795.34000000]] + """.trimIndent() + val df = DataFrame.readJsonStr(data) + println(df.getColumn("array").cast>()[0][1].javaClass) + } + + @Test + fun `array of arrays`() { + val data = """ + { + "values": [[1,2,3],[4,5,6],[7,8,9]] + } + """.trimIndent() + val df = DataFrame.readJsonStr(data) + val values by column>>() + df[values][0][1][1] shouldBe 5 + } + + @Test + fun `read json with header`() { + val data = """ + [[1,2,3], + [4,5,6]] + """.trimIndent() + val header = listOf("a", "b", "c") + val df = DataFrame.readJsonStr(data, header) + df.rowsCount() shouldBe 2 + df.columnsCount() shouldBe 3 + df.columnNames() shouldBe header + df.columnTypes() shouldBe List(3) { typeOf() } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/TypeInferenceTest.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/TypeInferenceTest.kt new file mode 100644 index 0000000000..6582ec510e --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/TypeInferenceTest.kt @@ -0,0 +1,19 @@ +package org.jetbrains.kotlinx.dataframe.io + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.junit.Test +import kotlin.reflect.typeOf + +class TypeInferenceTest { + + open class A + + private class B : A() + + @Test + fun `private subtypes`() { + val df = dataFrameOf("col")(B(), B()) + df["col"].type() shouldBe typeOf() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt new file mode 100644 index 0000000000..f02d0060f3 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/json.kt @@ -0,0 +1,954 @@ +package org.jetbrains.kotlinx.dataframe.io + +import io.kotest.matchers.collections.shouldBeIn +import io.kotest.matchers.shouldBe +import io.kotest.matchers.string.shouldContain +import io.kotest.matchers.string.shouldNotContain +import io.kotest.matchers.types.instanceOf +import org.intellij.lang.annotations.Language +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.alsoDebug +import org.jetbrains.kotlinx.dataframe.api.JsonPath +import org.jetbrains.kotlinx.dataframe.api.allNulls +import org.jetbrains.kotlinx.dataframe.api.columnsCount +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.forEach +import org.jetbrains.kotlinx.dataframe.api.getColumnGroup +import org.jetbrains.kotlinx.dataframe.api.getFrameColumn +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.api.toDouble +import org.jetbrains.kotlinx.dataframe.api.toMap +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.io.JSON.TypeClashTactic.* +import org.jetbrains.kotlinx.dataframe.type +import org.jetbrains.kotlinx.dataframe.values +import org.junit.Test +import kotlin.reflect.* + +class JsonTests { + + @Test + fun `parse json array with header`() { + @Language("json") + val json = """[ + [1, "a"], + [2, "b"], + [3, "c"], + [4, "d"], + [5, "e"] + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, header = listOf("numbers", "letters")) + .alsoDebug() + + df.columnsCount() shouldBe 2 + df.rowsCount() shouldBe 5 + df["numbers"].type() shouldBe typeOf() + df["letters"].type() shouldBe typeOf() + df["numbers"].values() shouldBe listOf(1, 2, 3, 4, 5) + df["letters"].values() shouldBe listOf("a", "b", "c", "d", "e") + } + + @Test + fun `parse json array with header Any`() { + @Language("json") + val json = """[ + [1, "a"], + [2, "b"], + [3, "c"], + [4, "d"], + [5, "e"] + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, header = listOf("numbers", "letters"), typeClashTactic = ANY_COLUMNS) + .alsoDebug() + + df.columnsCount() shouldBe 2 + df.rowsCount() shouldBe 5 + df["numbers"].type() shouldBe typeOf() + df["letters"].type() shouldBe typeOf() + df["numbers"].values() shouldBe listOf(1, 2, 3, 4, 5) + df["letters"].values() shouldBe listOf("a", "b", "c", "d", "e") + } + + @Test + fun parseJson1() { + @Language("json") + val json = """[ + {"a":1, "b":"text"}, + {"a":2, "b":5, "c":4.5} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json) + .alsoDebug() + df.columnsCount() shouldBe 3 + df.rowsCount() shouldBe 2 + df["a"].type() shouldBe typeOf() + df["b"].type() shouldBe typeOf>() + df["c"].type() shouldBe typeOf() + } + + @Test + fun parseJson1Any() { + @Language("json") + val json = """[ + {"a":1, "b":"text"}, + {"a":2, "b":5, "c":4.5} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + df.columnsCount() shouldBe 3 + df.rowsCount() shouldBe 2 + df["a"].type() shouldBe typeOf() + df["b"].type() shouldBe typeOf>() + df["c"].type() shouldBe typeOf() + } + + @Test + fun parseJson2() { + @Language("json") + val json = """[ + {"a":"text"}, + {"a":{"b":2}}, + {"a":[6,7,8]} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 3 + val group = df["a"] as ColumnGroup<*> + group.columnsCount() shouldBe 3 + group["b"].type() shouldBe typeOf() + group["value"].type() shouldBe typeOf() + group["array"].type() shouldBe typeOf>() + } + + @Test + fun parseJson2Any() { + @Language("json") + val json = """[ + {"a":"text"}, + {"a":{"b":2}}, + {"a":[6,7,8]} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 3 + val a = df["a"] as ValueColumn<*> + a.type() shouldBe typeOf() + a[0] shouldBe "text" + (a[1] as DataRow<*>)["b"] shouldBe 2 + a[2] shouldBe listOf(6, 7, 8) + } + + @Test + fun parseJson3() { + @Language("json") + val json = """[ + {"a":[3, 5]}, + {}, + {"a":[3.4, 5.6]} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 3 + df["a"].type() shouldBe typeOf>() + df[1]["a"] shouldBe emptyList() + } + + @Test + fun parseJson3Any() { + @Language("json") + val json = """[ + {"a":[3, 5]}, + {}, + {"a":[3.4, 5.6]} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 3 + df["a"].type() shouldBe typeOf>() + df[1]["a"] shouldBe emptyList() + } + + @Test + fun parseJson4() { + @Language("json") + val json = """[ + {"a":[ {"b":2}, {"c":3} ]}, + {"a":[ {"b":4}, {"d":5} ]} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 2 + val group = df["a"] as FrameColumn<*> + group[0].alsoDebug() + .let { + it.columnsCount() shouldBe 3 + it.rowsCount() shouldBe 2 + it["b"].type() shouldBe typeOf() + it["c"].type() shouldBe typeOf() + it["d"].type() shouldBe typeOf() + it["b"].values.toList() shouldBe listOf(2, null) + it["c"].values.toList() shouldBe listOf(null, 3) + it["d"].values.toList() shouldBe listOf(null, null) + } + + group[1].alsoDebug() + .let { + it.columnsCount() shouldBe 3 + it.rowsCount() shouldBe 2 + it["b"].type() shouldBe typeOf() + it["c"].type() shouldBe typeOf() + it["d"].type() shouldBe typeOf() + it["b"].values.toList() shouldBe listOf(4, null) + it["c"].values.toList() shouldBe listOf(null, null) + it["d"].values.toList() shouldBe listOf(null, 5) + } + } + + @Test + fun parseJson4Any() { + @Language("json") + val json = """[ + {"a":[ {"b":2}, {"c":3} ]}, + {"a":[ {"b":4}, {"d":5} ]} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 2 + val group = df["a"] as FrameColumn<*> + group[0].alsoDebug() + .let { + it.columnsCount() shouldBe 3 + it.rowsCount() shouldBe 2 + it["b"].type() shouldBe typeOf() + it["c"].type() shouldBe typeOf() + it["d"].type() shouldBe typeOf() + it["b"].values.toList() shouldBe listOf(2, null) + it["c"].values.toList() shouldBe listOf(null, 3) + it["d"].values.toList() shouldBe listOf(null, null) + } + + group[1].alsoDebug() + .let { + it.columnsCount() shouldBe 3 + it.rowsCount() shouldBe 2 + it["b"].type() shouldBe typeOf() + it["c"].type() shouldBe typeOf() + it["d"].type() shouldBe typeOf() + it["b"].values.toList() shouldBe listOf(4, null) + it["c"].values.toList() shouldBe listOf(null, null) + it["d"].values.toList() shouldBe listOf(null, 5) + } + } + + @Test + fun `parse json with nested json array with mixed values`() { + @Language("json") + val json = """[ + {"a":"text"}, + {"a":{"b":2}}, + {"a":[6, {"a": "b"}, [1, {"a" : "b"}],8]}, + {"a":[{"a": "b"}, {"a" : "c"}, {"a" : "d"}]} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 4 + val group = df["a"] as ColumnGroup<*> + group.columnsCount() shouldBe 3 + group["b"].type() shouldBe typeOf() + group["value"].type() shouldBe typeOf() + group["array"].type() shouldBe typeOf>() + val nestedDf = group.getFrameColumn("array")[2] + nestedDf["a"].type() shouldBe typeOf() + nestedDf["value"].type() shouldBe typeOf() + nestedDf["array"].type() shouldBe typeOf>() + group.getFrameColumn("array")[3] + .alsoDebug() + .let { + it.columnsCount() shouldBe 3 + it.rowsCount() shouldBe 3 + it["a"].type() shouldBe typeOf() + it["a"].values.toList() shouldBe listOf("b", "c", "d") + } + } + + @Test + fun `parse json with nested json array with mixed values Any`() { + @Language("json") + val json = """[ + {"a":"text"}, + {"a":{"b":2}}, + {"a":[6, {"a": "b"}, [1, {"a" : "b"}],8]}, + {"a":[{"a": "b"}, {"a" : "c"}, {"a" : "d"}]} + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS).alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 4 + val a = df["a"] as ValueColumn<*> + a.type() shouldBe typeOf() + a[0] shouldBe "text" + (a[1] as DataRow<*>).let { + it.columnsCount() shouldBe 1 + it["b"] shouldBe 2 + } + (a[2] as List<*>).let { + it[0] shouldBe 6 + (it[1] as DataRow<*>).let { + it.columnsCount() shouldBe 1 + it["a"] shouldBe "b" + } + (it[2] as List<*>).let { + it[0] shouldBe 1 + (it[1] as DataRow<*>).let { + it.columnsCount() shouldBe 1 + it["a"] shouldBe "b" + } + } + it[3] shouldBe 8 + } + (a[3] as DataFrame<*>) + .alsoDebug() + .let { + it.columnsCount() shouldBe 1 + it.rowsCount() shouldBe 3 + it["a"].type() shouldBe typeOf() + it["a"].values.toList() shouldBe listOf("b", "c", "d") + } + } + + @Test + fun `write df with primitive types`() { + val df = dataFrameOf("colInt", "colDouble?", "colBoolean?")( + 1, 1.0, true, + 2, null, false, + 3, 3.0, null + ).alsoDebug("df:") + + val res = DataFrame.readJsonStr(df.toJson()).alsoDebug("res:") + res shouldBe df + } + + @Test + fun `write df with primitive types Any`() { + val df = dataFrameOf("colInt", "colDouble?", "colBoolean?")( + 1, 1.0, true, + 2, null, false, + 3, 3.0, null + ).alsoDebug("df:") + + val res = + DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS).alsoDebug("res:") + res shouldBe df + } + + @Test + fun `NaN double serialization`() { + val df = dataFrameOf("v")(1.1, Double.NaN) + df["v"].type() shouldBe typeOf() + DataFrame.readJsonStr(df.toJson()) shouldBe df + } + + @Test + fun `NaN double serialization Any`() { + val df = dataFrameOf("v")(1.1, Double.NaN) + df["v"].type() shouldBe typeOf() + DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df + } + + @Test + fun `NaN float serialization`() { + val df = dataFrameOf("v")(1.1f, Float.NaN) + df["v"].type() shouldBe typeOf() + DataFrame.readJsonStr(df.toJson()) shouldBe df.convert("v").toDouble() + } + + @Test + fun `NaN float serialization Any`() { + val df = dataFrameOf("v")(1.1f, Float.NaN) + df["v"].type() shouldBe typeOf() + DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df.convert("v") + .toDouble() + } + + @Test + fun `NaN string serialization`() { + val df = dataFrameOf("v")("NaM", "NaN") + df["v"].type() shouldBe typeOf() + DataFrame.readJsonStr(df.toJson()) shouldBe df + } + + @Test + fun `NaN string serialization Any`() { + val df = dataFrameOf("v")("NaM", "NaN") + df["v"].type() shouldBe typeOf() + DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df + } + + @Test + fun `list serialization`() { + val df = dataFrameOf("a")(listOf(1, 2, 3)) + DataFrame.readJsonStr(df.toJson()) shouldBe df + } + + @Test + fun `list serialization Any`() { + val df = dataFrameOf("a")(listOf(1, 2, 3)) + DataFrame.readJsonStr(df.toJson(), typeClashTactic = ANY_COLUMNS) shouldBe df + } + + @Test + fun `list serialization with nulls`() { + val df = dataFrameOf("a")(listOf(1, 2, 3), null) + val text = df.toJson() + val df1 = DataFrame.readJsonStr(text) + df1["a"][1] shouldBe emptyList() + } + + @Test + fun `list serialization with nulls Any`() { + val df = dataFrameOf("a")(listOf(1, 2, 3), null) + val text = df.toJson() + val df1 = DataFrame.readJsonStr(text, typeClashTactic = ANY_COLUMNS) + df1["a"][1] shouldBe emptyList() + } + + @Test + fun `serialize column with name 'value'`() { + val df = dataFrameOf("a")(dataFrameOf("value")(1, 2, 3)) + + @Language("json") + val json = df.toJson() + json shouldContain "\"value\":1" + val df1 = DataFrame.readJsonStr(json) + df shouldBe df1 + } + + @Test + fun `literal json field named 'value'`() { + @Language("json") + val json = """ + { + "data": { + "source": { + "value": "123" + } + } + } + """.trimIndent() + val df = DataFrame.readJsonStr(json) + df[0].getColumnGroup("data").getColumnGroup("source")["value"] shouldBe "123" + } + + @Test + fun `array json field named 'value'`() { + @Language("json") + val json = """{ "value": ["123"] }""" + + val df = DataFrame.readJsonStr(json).alsoDebug() + df[0]["value"] shouldBe listOf("123") + } + + @Test + fun `record json field named 'value'`() { + @Language("json") + val json = """{ "value": { "test" : "123" } }""" + + val df = DataFrame.readJsonStr(json) + df[0].getColumnGroup("value")["test"] shouldBe "123" + } + + @Test + fun `json field named 'array'`() { + @Language("json") + val json = """ + { + "data": { + "source": { + "array": "123" + } + } + } + """.trimIndent() + + val df = DataFrame.readJsonStr(json) + df[0].getColumnGroup("data").getColumnGroup("source")["array"] shouldBe "123" + } + + @Test + fun `array json field named 'array'`() { + @Language("json") + val json = """ + [{ + "a": { + "value": "text", + "array": [] + } + }, { + "a": { + "b": 2, + "array": [] + } + }, { + "a": { + "array": [6, 7, 8] + } + }] + """.trimIndent() + + val df = DataFrame.readJsonStr(json).alsoDebug() + val group = df.getColumnGroup("a") + group["array"].type() shouldBe typeOf>() + group["value"].type() shouldBe typeOf() + group["b"].type() shouldBe typeOf() + } + + @Test + fun `value field name clash`() { + @Language("json") + val json = """[ + {"a":"text", "c": 1}, + {"a":{"b":2,"value":1.0, "array": null, "array1":12}}, + {"a":[6,7,8]}, + null + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json) + .alsoDebug() + df.columnsCount() shouldBe 2 + df.rowsCount() shouldBe 4 + df["c"].type() shouldBe typeOf() + val group = df["a"] as ColumnGroup<*> + group.columnsCount() shouldBe 6 + group["b"].type() shouldBe typeOf() + group["value"].type() shouldBe typeOf() + group["value1"].type() shouldBe typeOf() + group["array"].type() shouldBe nothingType(nullable = true) + + val schema = df.schema().toString() + schema shouldContain "Nothing?" + schema shouldNotContain "Void?" + + group["array1"].type() shouldBe typeOf() + group["array2"].type() shouldBe typeOf>() + } + + @Test + fun `value field (no) name clash Any`() { + @Language("json") + val json = """[ + {"a":"text", "c": 1}, + {"a":{"b":2,"value":1.0, "array": null, "array1":12}}, + {"a":[6,7,8]}, + null + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + df.columnsCount() shouldBe 2 + df.rowsCount() shouldBe 4 + val c = df["c"] as ValueColumn<*> + c.type() shouldBe typeOf() + c[0] shouldBe 1 + c[1..3].allNulls() shouldBe true + val a = df["a"] as ValueColumn<*> + a.type() shouldBe typeOf() + a[0] shouldBe "text" + (a[1] as DataRow<*>).let { + it.columnsCount() shouldBe 4 + it["b"] shouldBe 2 + it["value"] shouldBe 1.0 + it["array"] shouldBe null + it["array1"] shouldBe 12 + } + a[2] shouldBe listOf(6, 7, 8) + a[3] shouldBe null + } + + @Test + fun `objects with null Any`() { + @Language("json") + val json = """[ + {"a":{"b":1}}, + {"a":{"b":2}}, + {"a":{"b": null}}, + {"a": {}}, + {"a": null}, + {}, + null + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 7 + val a = df["a"] as ColumnGroup<*> + a.columnsCount() shouldBe 1 + a["b"].let { + it.type() shouldBe typeOf() + it[0] shouldBe 1 + it[1] shouldBe 2 + it[2..6].allNulls() shouldBe true + } + } + + @Test + fun `primitive arrays with null Any`() { + @Language("json") + val json = """[ + {"a":[1,2,3]}, + {"a":[null]}, + {"a":[]}, + {"a": null}, + {}, + null + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 6 + val a = df["a"] as ValueColumn<*> + a.type shouldBe typeOf>() + a[0] shouldBe listOf(1, 2, 3) + a[1] shouldBe listOf(null) + a[2..5].forEach { + it shouldBe emptyList() + } + } + + @Test + fun `non-primitive arrays with null Any`() { + @Language("json") + val json = """[ + {"a":[null, null]}, + {"a":[{"b" : 1},{"b": 2}]}, + {"a":[]}, + {"a": null}, + {}, + null + ] + """.trimIndent() + val df = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + df.columnsCount() shouldBe 1 + df.rowsCount() shouldBe 6 + val a = df["a"] as FrameColumn<*> + a[0].let { + it.columnsCount() shouldBe 1 + it.rowsCount() shouldBe 2 + it["b"].let { + it.type() shouldBe typeOf() + it[0] shouldBe null + it[1] shouldBe null + } + } + a[1].let { + it.columnsCount() shouldBe 1 + it.rowsCount() shouldBe 2 + it["b"].let { + it.type() shouldBe typeOf() + it[0] shouldBe 1 + it[1] shouldBe 2 + } + } + a[2..5].forEach { + it.columnsCount() shouldBe 0 + it.rowsCount() shouldBe 0 + } + } + + @Test + fun `Listification test Array Value`() { + @Language("json") + val json = """[ + {"a":[1,2,3]}, + {"a":null}, + {"a":1} + ] + """.trimIndent() + val df = DataFrame.readJsonStr( + text = json, + typeClashTactic = ARRAY_AND_VALUE_COLUMNS, + keyValuePaths = listOf(JsonPath()), + ) + .alsoDebug() + } + + @Test + fun `Listification test Any column`() { + @Language("json") + val json = """[ + {"a":[1,2,3]}, + {"a":null}, + {"a":1} + ] + """.trimIndent() + val df = DataFrame.readJsonStr( + text = json, + typeClashTactic = ANY_COLUMNS, + keyValuePaths = listOf(JsonPath()), + ) + .alsoDebug() + } + + @Test + fun `KeyValue property Array Value`() { + @Language("json") + val json = """[ + {"a":{"b":1}}, + {"a":{"c": 2, "d": null, "b":[1, 2, 3]}}, + {"a":{}}, + {"a": null}, + {}, + null + ] + """.trimIndent() + + // before + val noKeyValue = DataFrame.readJsonStr(json, typeClashTactic = ARRAY_AND_VALUE_COLUMNS) + .alsoDebug() + +// ⌌-------------------------------------------------------⌍ +// | | a:{b:{value:Int?, array:List}, c:Int?, d:Any?}| +// |--|----------------------------------------------------| +// | 0| { b:{ value:1, array:[] }, c:null, d:null }| +// | 1| { b:{ value:null, array:[1, 2, 3] }, c:2, d:null }| +// | 2| { b:{ value:null, array:[] }, c:null, d:null }| +// | 3| { b:{ value:null, array:[] }, c:null, d:null }| +// | 4| { b:{ value:null, array:[] }, c:null, d:null }| +// | 5| { b:{ value:null, array:[] }, c:null, d:null }| +// ⌎-------------------------------------------------------⌏ + noKeyValue.columnsCount() shouldBe 1 + noKeyValue.rowsCount() shouldBe 6 + noKeyValue["a"].also { + it shouldBe instanceOf>() + it as ColumnGroup<*> + + it["b"].type() shouldBe typeOf>() + it["b"]["value"].type() shouldBe typeOf() + it["b"]["array"].type() shouldBe typeOf>() + it["c"].type() shouldBe typeOf() + it["d"].type() shouldBe nothingType(nullable = true) + + it[0].let { + (it["b"] as DataRow<*>).toMap() shouldBe mapOf("value" to 1, "array" to emptyList()) + it["c"] shouldBe null + it["d"] shouldBe null + } + it[1].let { + (it["b"] as DataRow<*>).toMap() shouldBe mapOf("value" to null, "array" to listOf(1, 2, 3)) + it["c"] shouldBe 2 + it["d"] shouldBe null + } + (it as ColumnGroup<*>)[2..5].forEach { + it.let { + (it["b"] as DataRow<*>).toMap() shouldBe mapOf("value" to null, "array" to emptyList()) + it["c"] shouldBe null + it["d"] shouldBe null + } + } + } + + // $["a"] should be read as keyValue + val keyValuePaths = listOf( + JsonPath().append("a") + ) + + // after + val withKeyValue = + DataFrame.readJsonStr(json, keyValuePaths = keyValuePaths, typeClashTactic = ARRAY_AND_VALUE_COLUMNS) + .alsoDebug() + .also { + it["a"][1].let { it as AnyFrame }.alsoDebug() + } +// ⌌------------------------------⌍ +// | | a:[key:String, value:Any?]| +// |--|---------------------------| +// | 0| [1 x 2] { key:b, value:1 }| +// | 1| [3 x 2]| -> { key:c, value:2 } +// | 2| [0 x 2]| { key:d, value:null } +// | 3| [0 x 2]| { key:b, value:[1,2,3] } +// | 4| [0 x 2]| +// | 5| [0 x 2]| +// ⌎------------------------------⌏ + + withKeyValue.columnsCount() shouldBe 1 + withKeyValue.rowsCount() shouldBe 6 + withKeyValue["a"].also { + it shouldBe instanceOf>() + it as FrameColumn<*> + + it[0].let { + it.columnsCount() shouldBe 2 + it.rowsCount() shouldBe 1 + it["key"].let { + it.type() shouldBe typeOf() + it[0] shouldBe "b" + } + it["value"].let { + it.type() shouldBe typeOf() // tightened by values, but Int? is also valid of course + it[0] shouldBe 1 + } + } + it[1].let { + it.columnsCount() shouldBe 2 + it.rowsCount() shouldBe 3 + it["key"].let { + it.type() shouldBe typeOf() + it[0] shouldBe "c" + it[1] shouldBe "d" + } + it["value"].let { + it.type() shouldBe typeOf() + it[0] shouldBe 2 + it[1] shouldBe null + } + } + it[2..5].forEach { + it.columnsCount() shouldBe 2 + it.rowsCount() shouldBe 0 + + it["key"].type() shouldBe typeOf() + it["value"].type() shouldBeIn listOf(typeOf(), typeOf()) // no data, so Any(?) ValueColumn + } + } + } + + @Test + fun `KeyValue property Any`() { // TODO needs more tests + @Language("json") + val json = """[ + {"a":{"b": 1}}, + {"a":{"c": 2, "d": null, "b":[1, 2, 3]}}, + {"a":{}}, + {"a": null}, + {}, + null + ] + """.trimIndent() + + // before + val noKeyValue = DataFrame.readJsonStr(json, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + +// ⌌------------------------------⌍ +// | | a:{b:Any?, c:Int?, d:Any?}| +// |--|---------------------------| +// | 0| { b:1 }| +// | 1|{ b:[1,2,3], c:2, d: null }| +// | 2| { }| +// | 3| { }| +// | 4| { }| +// | 5| { }| +// ⌎------------------------------⌏ + noKeyValue.columnsCount() shouldBe 1 + noKeyValue.rowsCount() shouldBe 6 + noKeyValue["a"].also { + it shouldBe instanceOf>() + it as ColumnGroup<*> + + it["b"].type() shouldBe typeOf() + it["c"].type() shouldBe typeOf() + it["d"].type() shouldBe typeOf() + + it[0].toMap() shouldBe mapOf("b" to 1, "c" to null, "d" to null) + it[1].toMap() shouldBe mapOf("b" to listOf(1, 2, 3), "c" to 2, "d" to null) + (it as ColumnGroup<*>)[2..5].forEach { + it.toMap() shouldBe mapOf("b" to null, "c" to null, "d" to null) + } + } + + // $["a"] should be read as keyValue + val keyValuePaths = listOf( + JsonPath().append("a") + ) + + // after + val withKeyValue = DataFrame.readJsonStr(json, keyValuePaths = keyValuePaths, typeClashTactic = ANY_COLUMNS) + .alsoDebug() + .also { + it["a"][1].let { it as AnyFrame }.alsoDebug() + } + +// ⌌------------------------------⌍ +// | | a:[key:String, value:Any?]| +// |--|---------------------------| +// | 0| [1 x 2] { key:b, value:1 }| +// | 1| [3 x 2]| -> { key:c, value:2 } +// | 2| [0 x 2]| { key:d, value:null } +// | 3| [0 x 2]| { key:b, value:[1,2,3] } +// | 4| [0 x 2]| +// | 5| [0 x 2]| +// ⌎------------------------------⌏ + withKeyValue.columnsCount() shouldBe 1 + withKeyValue.rowsCount() shouldBe 6 + withKeyValue["a"].also { + it shouldBe instanceOf>() + it as FrameColumn<*> + + it[0].let { + it.columnsCount() shouldBe 2 + it.rowsCount() shouldBe 1 + it["key"].let { + it.type() shouldBe typeOf() + it[0] shouldBe "b" + } + it["value"].let { + it.type() shouldBe typeOf() // tightened by values, but Int? is also valid of course + it[0] shouldBe 1 + } + } + it[1].let { + it.columnsCount() shouldBe 2 + it.rowsCount() shouldBe 3 + it["key"].let { + it.type() shouldBe typeOf() + it[0] shouldBe "c" + it[1] shouldBe "d" + } + it["value"].let { + it.type() shouldBe typeOf() + it[0] shouldBe 2 + it[1] shouldBe null + it[2] shouldBe listOf(1, 2, 3) + } + } + it[2..5].forEach { + it.columnsCount() shouldBe 2 + it.rowsCount() shouldBe 0 + + it["key"].type() shouldBe typeOf() + it["value"].type() shouldBeIn listOf(typeOf(), typeOf()) // no data, so Any(?) ValueColumn + } + } + } + + @Test + fun `nulls in columns should be encoded explicitly`() { + val df = dataFrameOf("a", "b")("1", null, "2", 12) + df.toJson(canonical = true) shouldContain "\"b\":null" + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/CodeGenerationTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/CodeGenerationTests.kt new file mode 100644 index 0000000000..33c4b9e6e5 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/CodeGenerationTests.kt @@ -0,0 +1,40 @@ +package org.jetbrains.kotlinx.dataframe.jupyter + +import org.jetbrains.kotlinx.jupyter.api.Code +import org.junit.Test + +class CodeGenerationTests : DataFrameJupyterTest() { + + private fun Code.checkCompilation() { + lines().forEach { + exec(it) + } + } + + @Test + fun `nullable dataframe`() { + """ + fun create(): AnyFrame? = dataFrameOf("a")(1) + val df = create() + df.a + """.checkCompilation() + } + + @Test + fun `nullable columnGroup`() { + """ + fun create(): AnyCol? = dataFrameOf("a")(1).asColumnGroup().asDataColumn() + val col = create() + col.a + """.checkCompilation() + } + + @Test + fun `nullable dataRow`() { + """ + fun create(): AnyRow? = dataFrameOf("a")(1).single() + val row = create() + row.a + """.checkCompilation() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/DataFrameJupyterTest.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/DataFrameJupyterTest.kt new file mode 100644 index 0000000000..f56be9591d --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/DataFrameJupyterTest.kt @@ -0,0 +1,51 @@ +package org.jetbrains.kotlinx.dataframe.jupyter + +import org.jetbrains.jupyter.parser.notebook.Cell +import org.jetbrains.kotlinx.jupyter.testkit.JupyterReplTestCase +import org.jetbrains.kotlinx.jupyter.testkit.ReplProvider + +abstract class DataFrameJupyterTest : JupyterReplTestCase( + ReplProvider.forLibrariesTesting(listOf("dataframe")) +) + +fun interface CodeReplacer { + fun replace(code: String): String + + companion object { + val DEFAULT = CodeReplacer { it } + + fun byMap(replacements: Map) = CodeReplacer { code -> + replacements.entries.fold(code) { acc, (key, replacement) -> + acc.replace(key, replacement) + } + } + + fun byMap(vararg replacements: Pair): CodeReplacer = byMap(mapOf(*replacements)) + } +} + +fun interface CellClause { + fun isAccepted(cell: Cell): Boolean + + companion object { + val IS_CODE = CellClause { it.type == Cell.Type.CODE } + } +} + +infix fun CellClause.and(other: CellClause): CellClause { + return CellClause { cell -> + // Prevent lazy evaluation + val acceptedThis = this.isAccepted(cell) + val acceptedOther = other.isAccepted(cell) + acceptedThis && acceptedOther + } +} + +fun CellClause.Companion.stopAfter(breakClause: CellClause) = object : CellClause { + var clauseTriggered: Boolean = false + + override fun isAccepted(cell: Cell): Boolean { + clauseTriggered = clauseTriggered || breakClause.isAccepted(cell) + return !clauseTriggered + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterCodegenTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterCodegenTests.kt new file mode 100644 index 0000000000..1280f11919 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/JupyterCodegenTests.kt @@ -0,0 +1,317 @@ +package org.jetbrains.kotlinx.dataframe.jupyter + +import io.kotest.assertions.throwables.shouldNotThrowAny +import io.kotest.matchers.should +import io.kotest.matchers.shouldBe +import io.kotest.matchers.types.shouldBeInstanceOf +import org.intellij.lang.annotations.Language +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.api.isNotEmpty +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.type +import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult +import org.jetbrains.kotlinx.jupyter.testkit.JupyterReplTestCase +import org.junit.Test +import kotlin.reflect.typeOf + +class JupyterCodegenTests : JupyterReplTestCase() { + + @Test + fun `codegen adding column with generic type function`() { + @Language("kts") + val res1 = exec( + """ + fun AnyFrame.addValue(value: T) = add("value") { listOf(value) } + val df = dataFrameOf("a")(1).addValue(2) + """.trimIndent() + ) + res1 shouldBe Unit + val res2 = execRaw("df") as AnyFrame + + res2["value"].type shouldBe typeOf>() + } + + @Test + fun `Don't inherit from data class`() { + @Language("kts") + val res1 = exec( + """ + @DataSchema + data class A(val a: Int) + """.trimIndent() + ) + + @Language("kts") + val res2 = execRaw( + """ + val df = dataFrameOf("a", "b")(1, 2) + df + """.trimIndent() + ) + + (res2 as AnyFrame).should { it.isNotEmpty() } + } + + @Test + fun `Don't inherit from non open class`() { + @Language("kts") + val res1 = exec( + """ + @DataSchema + class A(val a: Int) + """.trimIndent() + ) + + @Language("kts") + val res2 = execRaw( + """ + val df = dataFrameOf("a", "b")(1, 2) + df + """.trimIndent() + ) + + (res2 as AnyFrame).should { it.isNotEmpty() } + } + + @Test + fun `Don't inherit from open class`() { + @Language("kts") + val res1 = exec( + """ + @DataSchema + open class A(val a: Int) + """.trimIndent() + ) + + @Language("kts") + val res2 = execRaw( + """ + val df = dataFrameOf("a", "b")(1, 2) + df + """.trimIndent() + ) + + (res2 as AnyFrame).should { it.isNotEmpty() } + } + + @Test + fun `Do inherit from open interface`() { + @Language("kts") + val res1 = exec( + """ + @DataSchema + interface A { val a: Int } + """.trimIndent() + ) + + @Language("kts") + val res2 = execRaw( + """ + val df = dataFrameOf("a", "b")(1, 2) + df + """.trimIndent() + ) + + (res2 as AnyFrame).should { it.isNotEmpty() } + } + + @Test + fun `codegen for enumerated frames`() { + @Language("kts") + val res1 = exec( + """ + val names = (0..2).map { it.toString() } + val df = dataFrameOf(names)(1, 2, 3) + """.trimIndent() + ) + res1 shouldBe Unit + + val res2 = execRaw("df.`1`") + res2.shouldBeInstanceOf>() + } + + @Test + fun `codegen for complex column names`() { + @Language("kts") + val res1 = exec( + """ + val df = DataFrame.readDelimStr("[a], (b), {c}\n1, 2, 3") + df + """.trimIndent() + ) + res1.shouldBeInstanceOf() + + val res2 = exec( + """listOf(df.`{a}`[0], df.`(b)`[0], df.`{c}`[0])""" + ) + res2 shouldBe listOf(1, 2, 3) + } + + @Test + fun `codegen for '$' that is interpolator in kotlin string literals`() { + @Language("kts") + val res1 = exec( + """ + val df = DataFrame.readDelimStr("\${'$'}id\n1") + df + """.trimIndent() + ) + res1.shouldBeInstanceOf() + val res2 = exec( + "listOf(df.`\$id`[0])" + ) + res2 shouldBe listOf(1) + } + + @Test + fun `codegen for backtick that is forbidden in kotlin identifiers`() { + @Language("kts") + val res1 = exec( + """ + val df = DataFrame.readDelimStr("Day`s\n1") + df + """.trimIndent() + ) + res1.shouldBeInstanceOf() + println(res1.entries.joinToString()) + val res2 = exec( + "listOf(df.`Day's`[0])" + ) + res2 shouldBe listOf(1) + } + + @Test + fun `codegen for chars that is forbidden in JVM identifiers`() { + val forbiddenChar = ";" + + @Language("kts") + val res1 = exec( + """ + val df = DataFrame.readDelimStr("Test$forbiddenChar\n1") + df + """.trimIndent() + ) + res1.shouldBeInstanceOf() + println(res1.entries.joinToString()) + val res2 = exec( + "listOf(df.`Test `[0])" + ) + res2 shouldBe listOf(1) + } + + @Test + fun `codegen for chars that is forbidden in JVM identifiers 1`() { + val forbiddenChar = "\\\\" + + @Language("kts") + val res1 = exec( + """ + val df = DataFrame.readDelimStr("Test$forbiddenChar\n1") + df + """.trimIndent() + ) + res1.shouldBeInstanceOf() + println(res1.entries.joinToString()) + val res2 = exec( + "listOf(df.`Test `[0])" + ) + res2 shouldBe listOf(1) + } + + @Test + fun `generic interface`() { + val res1 = exec( + """ + @DataSchema + interface Generic { + val field: T + } + """.trimIndent() + ) + res1.shouldBeInstanceOf() + val res2 = exec( + """ + val ColumnsContainer>.test1: DataColumn get() = field + val DataRow>.test2: T get() = field + """.trimIndent() + ) + res2.shouldBeInstanceOf() + } + + @Test + fun `generic interface with upper bound`() { + val res1 = exec( + """ + @DataSchema + interface Generic { + val field: T + } + """.trimIndent() + ) + res1.shouldBeInstanceOf() + val res2 = exec( + """ + val ColumnsContainer>.test1: DataColumn get() = field + val DataRow>.test2: T get() = field + """.trimIndent() + ) + res2.shouldBeInstanceOf() + } + + @Test + fun `generic interface with variance and user type in type parameters`() { + val res1 = exec( + """ + interface UpperBound + + @DataSchema(isOpen = false) + interface Generic { + val field: T + } + """.trimIndent() + ) + res1.shouldBeInstanceOf() + val res2 = exec( + """ + val ColumnsContainer>.test1: DataColumn get() = field + val DataRow>.test2: T get() = field + """.trimIndent() + ) + res2.shouldBeInstanceOf() + } + + @Test + fun `generate a new marker when dataframe marker is not a data schema so that columns are accessible with extensions`() { + exec( + """ + enum class State { + Idle, Productive, Maintenance + } + + class Event(val toolId: String, val state: State, val timestamp: Long) + + val tool1 = "tool_1" + val tool2 = "tool_2" + val tool3 = "tool_3" + val events = listOf( + Event(tool1, State.Idle, 0), + Event(tool1, State.Productive, 5), + Event(tool2, State.Idle, 0), + Event(tool2, State.Maintenance, 10), + Event(tool2, State.Idle, 20), + Event(tool3, State.Idle, 0), + Event(tool3, State.Productive, 25), + ).toDataFrame() + """.trimIndent() + ) + shouldNotThrowAny { + exec( + """ + events.toolId + events.state + events.timestamp + """.trimIndent() + ) + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt new file mode 100644 index 0000000000..11be55cb95 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/RenderingTests.kt @@ -0,0 +1,128 @@ +package org.jetbrains.kotlinx.dataframe.jupyter + +import com.beust.klaxon.* +import io.kotest.matchers.shouldBe +import io.kotest.matchers.string.shouldContain +import io.kotest.matchers.string.shouldNotContain +import org.intellij.lang.annotations.Language +import org.jetbrains.kotlinx.jupyter.api.MimeTypedResult +import org.jetbrains.kotlinx.jupyter.testkit.JupyterReplTestCase +import org.junit.Test + +class RenderingTests : JupyterReplTestCase() { + @Test + fun `dataframe is rendered to html`() { + @Language("kts") + val html = execHtml( + """ + val name by column() + val height by column() + val df = dataFrameOf(name, height)( + "Bill", 135, + "Charlie", 160 + ) + df + """.trimIndent() + ) + html shouldContain "Bill" + + @Language("kts") + val useRes = exec( + """ + USE { + render { (it * 2).toString() } + } + """.trimIndent() + ) + useRes shouldBe Unit + + val html2 = execHtml("df") + html2 shouldContain (135 * 2).toString() + html2 shouldContain (160 * 2).toString() + } + + @Test + fun `rendering options`() { + @Language("kts") + val html1 = execHtml( + """ + data class Person(val age: Int, val name: String) + val df = (1..70).map { Person(it, "A".repeat(it)) }.toDataFrame() + df + """.trimIndent() + ) + html1 shouldContain "showing only top 20 of 70 rows" + + @Language("kts") + val html2 = execHtml( + """ + dataFrameConfig.display.rowsLimit = 50 + df + """.trimIndent() + ) + html2 shouldContain "showing only top 50 of 70 rows" + } + + @Test + fun `dark color scheme`() { + fun execSimpleDf() = execHtml("""dataFrameOf("a", "b")(1, 2, 3, 4)""") + + val htmlLight = execSimpleDf() + val r1 = exec("notebook.changeColorScheme(ColorScheme.DARK); 1") + val htmlDark = execSimpleDf() + + r1 shouldBe 1 + val darkClassAttribute = """theme='dark'""" + htmlLight shouldNotContain darkClassAttribute + htmlDark shouldContain darkClassAttribute + } + + @Test + fun `test kotlin notebook plugin utils rows subset`() { + @Language("kts") + val result = exec( + """ + data class Row(val id: Int) + val df = (1..100).map { Row(it) }.toDataFrame() + KotlinNotebookPluginUtils.getRowsSubsetForRendering(df, 20 , 50) + """.trimIndent() + ) + + val json = parseDataframeJson(result) + + json.int("nrow") shouldBe 30 + json.int("ncol") shouldBe 1 + + val rows = json.array>("kotlin_dataframe")!! + rows.getObj(0).int("id") shouldBe 21 + rows.getObj(rows.lastIndex).int("id") shouldBe 50 + } + + private fun parseDataframeJson(result: MimeTypedResult): JsonObject { + val parser = Parser.default() + return parser.parse(StringBuilder(result["application/kotlindataframe+json"]!!)) as JsonObject + } + + private fun JsonArray<*>.getObj(index: Int) = this.get(index) as JsonObject + + @Test + fun `test kotlin notebook plugin utils groupby`() { + @Language("kts") + val result = exec( + """ + data class Row(val id: Int, val group: Int) + val df = (1..100).map { Row(it, if (it <= 50) 1 else 2) }.toDataFrame() + KotlinNotebookPluginUtils.getRowsSubsetForRendering(df.groupBy("group"), 0, 10) + """.trimIndent() + ) + + val json = parseDataframeJson(result) + + json.int("nrow") shouldBe 2 + json.int("ncol") shouldBe 2 + + val rows = json.array>("kotlin_dataframe")!! + rows.getObj(0).array("group1")!!.size shouldBe 50 + rows.getObj(1).array("group1")!!.size shouldBe 50 + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/ResourcesTest.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/ResourcesTest.kt new file mode 100644 index 0000000000..36077d14b8 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/ResourcesTest.kt @@ -0,0 +1,17 @@ +package org.jetbrains.kotlinx.dataframe.jupyter + +import io.kotest.matchers.shouldNotBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.junit.Test +import java.io.InputStreamReader + +class ResourcesTest { + + @Test + fun `resources available`() { + val res = DataFrame::class.java.getResourceAsStream("/table.html") + println(InputStreamReader(res).readText()) + + res shouldNotBe null + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/SampleNotebooksTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/SampleNotebooksTests.kt new file mode 100644 index 0000000000..8a091f0b15 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/jupyter/SampleNotebooksTests.kt @@ -0,0 +1,126 @@ +package org.jetbrains.kotlinx.dataframe.jupyter + +import org.jetbrains.jupyter.parser.JupyterParser +import org.jetbrains.jupyter.parser.notebook.CodeCell +import org.jetbrains.jupyter.parser.notebook.Output +import org.junit.Ignore +import org.junit.Test +import java.io.File +import java.util.Locale + +class SampleNotebooksTests : DataFrameJupyterTest() { + @Test + fun puzzles() = exampleTest("puzzles", "40 puzzles") + + @Test + fun github() = exampleTest( + "github", + cellClause = CellClause.stopAfter { cell -> + "personal access token" in cell.source + }, + cleanup = { + File("jetbrains.json").delete() + } + ) + + @Test + fun titanic() = exampleTest( + "titanic", "Titanic", + replacer = CodeReplacer.byMap( + "../../idea-examples/" to "$ideaExamplesPath/" + ) + ) + + @Test + @Ignore + fun wine() = exampleTest( + "wine", "WineNetWIthKotlinDL", + replacer = CodeReplacer.byMap( + testFile("wine", "winequality-red.csv") + ) + ) + + @Test + @Ignore + fun netflix() { + val currentLocale = Locale.getDefault() + try { + // Set explicit locale as of test data contains locale-dependent values (date for parsing) + Locale.setDefault(Locale.forLanguageTag("en-US")) + + exampleTest( + "netflix", + replacer = CodeReplacer.byMap( + testFile("netflix", "country_codes.csv"), + testFile("netflix", "netflix_titles.csv"), + ) + ) + } finally { + Locale.setDefault(currentLocale) + } + } + + @Test + @Ignore + fun movies() = exampleTest( + "movies", + replacer = CodeReplacer.byMap( + "ml-latest/movies.csv" to "$ideaExamplesPath/movies/src/main/resources/movies.csv" + ), + // There is no tags data in repository + cellClause = CellClause.stopAfter { cell -> + "tags.csv" in cell.source + } + ) + + private fun doTest( + notebookPath: String, + replacer: CodeReplacer, + cellClause: CellClause, + cleanup: () -> Unit = {}, + ) { + val notebookFile = File(notebookPath) + val notebook = JupyterParser.parse(notebookFile) + val finalClause = cellClause and CellClause.IS_CODE + + val codeCellsData = notebook.cells + .filter { finalClause.isAccepted(it) } + .map { CodeCellData(it.source, (it as? CodeCell)?.outputs.orEmpty()) } + + try { + for (codeCellData in codeCellsData) { + val code = codeCellData.code + val codeToExecute = replacer.replace(code) + + println("Executing code:\n$codeToExecute") + val cellResult = exec(codeToExecute) + println(cellResult) + } + } finally { + cleanup() + } + } + + private fun exampleTest( + dir: String, + notebookName: String? = null, + replacer: CodeReplacer = CodeReplacer.DEFAULT, + cellClause: CellClause = CellClause { true }, + cleanup: () -> Unit = {}, + ) { + val fileName = if (notebookName == null) "$dir.ipynb" else "$notebookName.ipynb" + doTest("$jupyterExamplesPath/$dir/$fileName", replacer, cellClause, cleanup) + } + + data class CodeCellData( + val code: String, + val outputs: List, + ) + + companion object { + const val ideaExamplesPath = "../examples/idea-examples" + const val jupyterExamplesPath = "../examples/jupyter-notebooks" + + fun testFile(folder: String, fileName: String) = fileName to "$jupyterExamplesPath/$folder/$fileName" + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/BasicTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/BasicTests.kt new file mode 100644 index 0000000000..18f13ec759 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/BasicTests.kt @@ -0,0 +1,206 @@ +package org.jetbrains.kotlinx.dataframe.puzzles + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.junit.Test +import java.text.DecimalFormatSymbols +import kotlin.reflect.typeOf + +class BasicTests { + + private val animal by columnOf("cat", "cat", "snake", "dog", "dog", "cat", "snake", "cat", "dog", "dog") + private val age by columnOf(2.5, 3.0, 0.5, Double.NaN, 5.0, 2.0, 4.5, Double.NaN, 7.0, 3.0) + private val visits by columnOf(1, 3, 2, 3, 2, 3, 1, 1, 2, 1) + private val priority by columnOf("yes", "yes", "no", "yes", "no", "no", "no", "yes", "no", "no") + + private val df = dataFrameOf(animal, age, visits, priority) + + @Test + fun `return first 3 rows`() { + val expected = dataFrameOf("animal", "age", "visits", "priority")( + "cat", 2.5, 1, "yes", + "cat", 3.0, 3, "yes", + "snake", 0.5, 2, "no" + ) + + df[0 until 3] shouldBe expected + df.head(3) shouldBe expected + df.take(3) shouldBe expected + } + + @Test + fun `select animal and age columns from df`() { + val expected = dataFrameOf(animal, age) + + df[animal, age] shouldBe expected + df["animal", "age"] shouldBe expected + df.select { animal and age } shouldBe expected + df.select { "animal" and "age" } shouldBe expected + } + + @Test + fun `select rows (3, 4, 8) and columns (animal, age)`() { + val expected = dataFrameOf("animal", "age")( + "dog", Double.NaN, + "dog", 5.0, + "dog", 7.0 + ) + + df[3, 4, 8][animal, age] shouldBe expected + df[3, 4, 8]["animal", "age"] shouldBe expected + df.select { animal and age }[3, 4, 8] shouldBe expected + df.select { "animal" and "age" }[3, 4, 8] shouldBe expected + } + + @Test + fun `select only rows where number of visits is grater than 2`() { + val expected = dataFrameOf("animal", "age", "visits", "priority")( + "cat", 3.0, 3, "yes", + "dog", Double.NaN, 3, "yes", + "cat", 2.0, 3, "no" + ) + + df.filter { visits > 2 } shouldBe expected + df.filter { "visits"() > 2 } shouldBe expected + } + + @Test + fun `select rows where age is missing`() { + val expected = dataFrameOf("animal", "age", "visits", "priority")( + "dog", Double.NaN, 3, "yes", + "cat", Double.NaN, 1, "yes" + ) + + df.filter { age().isNaN() } shouldBe expected + df.filter { "age"().isNaN() } shouldBe expected + } + + @Test + fun `select rows where animal is a cat and age is less than 3`() { + val expected = dataFrameOf("animal", "age", "visits", "priority")( + "cat", 2.5, 1, "yes", + "cat", 2.0, 3, "no" + ) + + df.filter { animal() == "cat" && age() < 3 } shouldBe expected + df.filter { "animal"() == "cat" && "age"() < 3 } shouldBe expected + } + + @Test + fun `select rows where age is between 2 and 4 (inclusive)`() { + val expected = dataFrameOf("animal", "age", "visits", "priority")( + "cat", 2.5, 1, "yes", + "cat", 3.0, 3, "yes", + "cat", 2.0, 3, "no", + "dog", 3.0, 1, "no" + ) + + df.filter { age() in 2.0..4.0 } shouldBe expected + df.filter { "age"() in 2.0..4.0 } shouldBe expected + } + + @Test + fun `change age in row 5 to 1,5`() { + val dfActualAcc0 = df.update { age }.at(5).with { 1.5 } + val dfActualAcc1 = df.update { "age"() }.at(5).with { 1.5 } + + dfActualAcc0[5][age] shouldBe 1.5 + dfActualAcc1[5]["age"] shouldBe 1.5 + } + + @Test + fun `calculate sum of all visits`() { + df[visits].sum() shouldBe 19 + df.sum { visits } shouldBe 19 + + df["visits"].cast().sum() shouldBe 19 + df.sum { "visits"() } shouldBe 19 + df.sum("visits") shouldBe 19 + } + + @Test + fun `calculate mean age for each animal`() { + val expected = dataFrameOf("animal", "age")( + "cat", Double.NaN, + "snake", 2.5, + "dog", Double.NaN + ) + + df.groupBy { animal }.mean { age } shouldBe expected + df.groupBy("animal").mean("age") shouldBe expected + } + + @Test + fun `append and drop new row`() { + val modifiedDf = df.append("dog", 5.5, 2, "no") + + val d = DecimalFormatSymbols.getInstance().decimalSeparator + modifiedDf[10].toString() shouldBe "{ animal:dog, age:5${d}500000, visits:2, priority:no }" + + modifiedDf.dropLast() shouldBe df + } + + @Test + fun `count number of each type of animal`() { + val expected = dataFrameOf("animal", "count")( + "cat", 4, + "snake", 2, + "dog", 4, + ) + + df.groupBy { animal }.count() shouldBe expected + df.groupBy("animal").count() shouldBe expected + } + + @Test + fun `sort df first by the values in age in descending order, then by in visit in ascending order`() { + val expected = dataFrameOf("age", "visits")(4.5, 1, 3.0, 1, 3.0, 3) + + val sortDfAcc = df.sortBy { age.desc() and visits } + val sortDfStr = df.sortBy { "age".desc() and "visits" } + + sortDfAcc[age, visits][4..6] shouldBe expected + sortDfStr["age", "visits"][4..6] shouldBe expected + } + + @Test + fun `replace priority column to boolean values`() { + val convertedDfAcc = df.convert { priority }.with { it == "yes" } + val convertedDfStr = df.convert { "priority"() }.with { it == "yes" } + + convertedDfAcc[priority].type() shouldBe typeOf() + convertedDfAcc["priority"].type() shouldBe typeOf() + + convertedDfStr[priority][5] shouldBe false + convertedDfStr["priority"][5] shouldBe false + } + + @Test + fun `change dog to corgi`() { + val updatedDfAcc = df.update { animal }.where { it == "dog" }.with { "corgi" } + val updatedDfStr = df.update("animal").where { it == "dog" }.with { "corgi" } + + updatedDfAcc[animal][3] shouldBe "corgi" + updatedDfAcc[animal][8] shouldBe "corgi" + + updatedDfStr["animal"][3] shouldBe "corgi" + updatedDfStr["animal"][8] shouldBe "corgi" + } + + @Test + fun `find mean age for each animal type and number of visits`() { + val expected = dataFrameOf("animal", "1", "3", "2")( + "cat", 2.5, 2.5, null, + "snake", 4.5, null, 0.5, + "dog", 3.0, Double.NaN, 6.0 + ) + + val actualDfAcc = df.pivot(inward = false) { visits }.groupBy { animal }.mean(skipNA = true) { age } + val actualDfStr = df.pivot("visits", inward = false).groupBy("animal").mean("age", skipNA = true) + + actualDfAcc shouldBe expected + actualDfStr shouldBe expected + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/CleaningDataTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/CleaningDataTests.kt new file mode 100644 index 0000000000..7a159f5dec --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/CleaningDataTests.kt @@ -0,0 +1,120 @@ +package org.jetbrains.kotlinx.dataframe.puzzles + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.junit.Test + +class CleaningDataTests { + + private val fromTo = listOf( + "LoNDon_paris", + "MAdrid_miLAN", + "londON_StockhOlm", + "Budapest_PaRis", + "Brussels_londOn" + ).toColumn("From_To") + private val flightNumber = listOf(10045.0, Double.NaN, 10065.0, Double.NaN, 10085.0).toColumn("FlightNumber") + private val recentDelays = + listOf(listOf(23, 47), listOf(), listOf(24, 43, 87), listOf(13), listOf(67, 32)).toColumn("RecentDelays") + private val airline = listOf( + "KLM(!)", + "{Air France} (12)", + "(British Airways. )", + "12. Air France", + "'Swiss Air'" + ).toColumn("Airline") + + private var df = dataFrameOf(fromTo, flightNumber, recentDelays, airline) + + @Test + fun `interpolate test`() { + val expected = columnOf(10045, 10055, 10065, 10075, 10085).named("FlightNumber") + + df.update { flightNumber }.where { it.isNaN() } + .with { prev()!![flightNumber] + (next()!![flightNumber] - prev()!![flightNumber]) / 2 } + .convert { flightNumber }.toInt()[flightNumber] shouldBe expected + + df.update { "FlightNumber"() }.where { it.isNaN() } + .with { + prev()!![ { "FlightNumber"() }] + (next()!![ { "FlightNumber"() }] - prev()!![ { "FlightNumber"() }]) / 2 + } + .convert { flightNumber }.toInt()["FlightNumber"] shouldBe expected + } + + @Test + fun `split From_To`() { + val expected = dataFrameOf("From", "To")( + "LoNDon", "paris", + "MAdrid", "miLAN", + "londON", "StockhOlm", + "Budapest", "PaRis", + "Brussels", "londOn" + ) + + df.split { fromTo }.by('_').into("From", "To")["From", "To"] shouldBe expected + df.split { "From_To"() }.by('_').into("From", "To")["From", "To"] shouldBe expected + } + + @Test + fun `uppercase for cities`() { + val from by column("From") + val to by column("To") + + val expected = dataFrameOf("From", "To")( + "London", "Paris", + "Madrid", "Milan", + "London", "Stockholm", + "Budapest", "Paris", + "Brussels", "London" + ) + + df + .split { fromTo }.by('_').into("From", "To")[from, to] + .update { from and to }.with { it.lowercase().replaceFirstChar(Char::uppercase) } shouldBe expected + + df + .split { "From_To"() }.by('_').into("From", "To")["From", "To"] + .update { "From"() and "To"() } + .with { it.lowercase().replaceFirstChar(Char::uppercase) } shouldBe expected + } + + @Test + fun `airline test`() { + val expected = columnOf("KLM", "Air France", "British Airways", "Air France", "Swiss Air").named("Airline") + + df.update { airline }.with { + "([a-zA-Z\\s]+)".toRegex().find(it)?.value?.trim() ?: "" + }[airline] shouldBe expected + + df.update { "Airline"() }.with { + "([a-zA-Z\\s]+)".toRegex().find(it)?.value?.trim() ?: "" + }["Airline"] shouldBe expected + } + + @Test + fun `split delays`() { + val delay1 by column("delay_1") + val delay2 by column("delay_2") + val delay3 by column("delay_3") + + val expected = dataFrameOf("delay_1", "delay_2", "delay_3")( + 23.0, 47.0, Double.NaN, + Double.NaN, Double.NaN, Double.NaN, + 24.0, 43.0, 87.0, + 13.0, Double.NaN, Double.NaN, + 67.0, 32.0, Double.NaN + ) + + df + .convert { recentDelays }.with { it.map { d -> d.toDouble() } } + .split { recentDelays }.default(Double.NaN).into { "delay_$it" }[delay1, delay2, delay3] shouldBe expected + + df + .convert { "RecentDelays">() }.with { it.map { d -> d.toDouble() } } + .split { "RecentDelays">() }.default(Double.NaN) + .into { "delay_$it" }[delay1, delay2, delay3] shouldBe expected + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/DateTestUtils.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/DateTestUtils.kt new file mode 100644 index 0000000000..a608786b14 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/DateTestUtils.kt @@ -0,0 +1,41 @@ +package org.jetbrains.kotlinx.dataframe.puzzles + +import kotlinx.datetime.DateTimeUnit +import kotlinx.datetime.LocalDate +import kotlinx.datetime.daysUntil +import kotlinx.datetime.plus + +class DateRangeIterator(first: LocalDate, last: LocalDate, val step: Int) : Iterator { + private val finalElement: LocalDate = last + private var hasNext: Boolean = if (step > 0) first <= last else first >= last + private var next: LocalDate = if (hasNext) first else finalElement + + override fun hasNext(): Boolean = hasNext + + override fun next(): LocalDate { + val value = next + if (value == finalElement) { + if (!hasNext) throw kotlin.NoSuchElementException() + hasNext = false + } else { + next = next.plus(step, DateTimeUnit.DayBased(1)) + } + return value + } +} + +operator fun ClosedRange.iterator() = DateRangeIterator(this.start, this.endInclusive, 1) + +fun ClosedRange.toList(): List { + return when (val size = this.start.daysUntil(this.endInclusive)) { + 0 -> emptyList() + 1 -> listOf(iterator().next()) + else -> { + val dest = ArrayList(size) + for (item in this) { + dest.add(item) + } + dest + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/DateTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/DateTests.kt new file mode 100644 index 0000000000..67a5a5a93f --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/DateTests.kt @@ -0,0 +1,105 @@ +package org.jetbrains.kotlinx.dataframe.puzzles + +import io.kotest.matchers.shouldBe +import kotlinx.datetime.LocalDate +import kotlinx.datetime.Month +import kotlinx.datetime.toJavaLocalDate +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.junit.Test +import java.time.temporal.WeekFields +import java.util.* +import kotlin.math.round +import kotlin.random.Random + +class DateTests { + + private val start = LocalDate(2015, 1, 1) + private val end = LocalDate(2016, 1, 1) + private val dti = (start..end).toList().toColumn("dti") + private val random = Random(42) + private val s = List(dti.size()) { random.nextDouble() }.toColumn("s") + private val df = dataFrameOf(dti, s) + + @Test + fun `sum for every Wednesday`() { + round(df.filter { dti().dayOfWeek.ordinal == 2 }.sum { s }) shouldBe 29.0 + round(df.filter { "dti"().dayOfWeek.ordinal == 2 }.sum { "s"() }) shouldBe 29.0 + } + + @Test + fun `mean for each calendar month`() { + val expected = dataFrameOf("month", "s")( + Month.JANUARY, 0.4931329003584097, + Month.FEBRUARY, 0.5712887136099648, + Month.MARCH, 0.5263142140806993, + Month.APRIL, 0.5125149149109348, + Month.MAY, 0.5030292029672427, + Month.JUNE, 0.4691575481416088, + Month.JULY, 0.5533841389695694, + Month.AUGUST, 0.5661103164088407, + Month.SEPTEMBER, 0.44344455128172383, + Month.OCTOBER, 0.41726495068242264, + Month.NOVEMBER, 0.43862977969202627, + Month.DECEMBER, 0.5130316016982762 + ) + + df.groupBy { dti.map { it.month } named "month" }.mean() shouldBe expected + df.groupBy { "dti"().map { it.month } named "month" }.mean() shouldBe expected + } + + @Test + fun `find date on which highest value`() { + val expected = dataFrameOf("month4", "dti", "month41")( + 1, LocalDate(2015, 2, 11), 1, + 2, LocalDate(2015, 8, 25), 2, + 3, LocalDate(2015, 9, 2), 3, + ) + val month4 by column() + val month41 by column() + + df.add("month4") { + when (dti().monthNumber) { + in 1..4 -> 1 + in 5..8 -> 2 + else -> 3 + } + }.groupBy("month4").aggregate { maxBy(s) into "max" }.flatten()[month4, dti, month41] shouldBe expected + + df.add("month4") { + when ("dti"().monthNumber) { + in 1..4 -> 1 + in 5..8 -> 2 + else -> 3 + } + }.groupBy("month4").aggregate { maxBy("s") into "max" }.flatten()["month4", "dti", "month41"] shouldBe expected + } + + @Test + fun `create column consisting of the third Thursday in each month`() { + val start = LocalDate(2015, 1, 1) + val end = LocalDate(2015, 12, 31) + + val expected = columnOf( + LocalDate(2015, 1, 15), + LocalDate(2015, 2, 19), + LocalDate(2015, 3, 19), + LocalDate(2015, 4, 16), + LocalDate(2015, 5, 14), + LocalDate(2015, 6, 18), + LocalDate(2015, 7, 16), + LocalDate(2015, 8, 13), + LocalDate(2015, 9, 17), + LocalDate(2015, 10, 15), + LocalDate(2015, 11, 19), + LocalDate(2015, 12, 17), + ).named("3thu") + + (start..end).toList().toColumn("3thu").filter { + it.toJavaLocalDate()[WeekFields.of(Locale.ENGLISH).weekOfMonth()] == 3 && + it.dayOfWeek.value == 4 + } shouldBe expected + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/HardTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/HardTests.kt new file mode 100644 index 0000000000..49b9137773 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/HardTests.kt @@ -0,0 +1,123 @@ +package org.jetbrains.kotlinx.dataframe.puzzles + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.junit.Test +import kotlin.math.round +import kotlin.random.Random + +class HardTests { + + @Test + fun `count difference back to previous zero`() { + val x = columnOf(7, 2, 0, 3, 4, 2, 5, 0, 3, 4).named("X") + val df = x.toDataFrame() + val y = columnOf(1, 2, 0, 1, 2, 3, 4, 0, 1, 2).named("Y") + + df.mapToColumn("Y") { + if (it[x] == 0) 0 else (prev()?.newValue() ?: 0) + 1 + } shouldBe y + + df.mapToColumn("Y") { + if (it["X"] == 0) 0 else (prev()?.newValue() ?: 0) + 1 + } shouldBe y + } + + @Test + fun `3 largest values`() { + val names = ('a'..'h').map { it.toString() } + val random = Random(30) + val list = List(64) { random.nextInt(1, 101) } + val df = dataFrameOf(names)(*list.toTypedArray()) + val index by column() + val vals by column() + val name by column() + + val expected = dataFrameOf("index", "name")(0, "d", 2, "c", 3, "f") + + df.add("index") { index() } + .gather { dropLast() }.into("name", "vals") + .sortByDesc { vals }.take(3)[index, name] shouldBe expected + + df.add("index") { index() } + .gather { dropLast() }.into("name", "vals") + .sortByDesc("vals").take(3)["index", "name"] shouldBe expected + } + + @Test + fun `group mean and negative values`() { + val random = Random(31) + val lab = listOf("A", "B") + + val vals by columnOf(*Array(15) { random.nextInt(-30, 30) }) + val grps by columnOf(*Array(15) { lab[random.nextInt(0, 2)] }) + + val df = dataFrameOf(vals, grps) + + val expected = dataFrameOf("vals", "grps", "patched_values")( + -17, "B", 21.0, + -7, "B", 21.0, + 16, "A", 16.0, + 28, "B", 28.0, + 9, "A", 9.0, + 16, "B", 16.0, + -21, "B", 21.0, + -14, "A", 16.0, + -19, "A", 16.0, + -22, "A", 16.0, + 19, "B", 19.0, + -2, "B", 21.0, + -1, "A", 16.0, + -19, "B", 21.0, + 23, "A", 23.0 + ) + + val means = df.filter { vals >= 0 } + .groupBy { grps }.mean() + .pivot { grps }.values { vals } + + df.add("patched_values") { + if (vals() < 0) means[grps()] as Double else vals().toDouble() + } shouldBe expected + + val meansStr = df.filter { "vals"() >= 0 } + .groupBy("grps").mean() + .pivot("grps").values("vals") + + df.add("patched_values") { + if ("vals"() < 0) meansStr["grps"()] as Double else "vals"().toDouble() + } shouldBe expected + } + + @Test + fun `rolling mean`() { + val groups by columnOf("a", "a", "b", "b", "a", "b", "b", "b", "a", "b", "a", "b") + val value by columnOf(1.0, 2.0, 3.0, Double.NaN, 2.0, 3.0, Double.NaN, 1.0, 7.0, 3.0, Double.NaN, 8.0) + val df = dataFrameOf(groups, value) + + val expected = dataFrameOf("groups", "value", "res")( + "a", 1.0, 1.0, + "a", 2.0, 2.0, + "b", 3.0, 3.0, + "b", Double.NaN, 3.0, + "a", 2.0, 2.0, + "b", 3.0, 3.0, + "b", Double.NaN, 3.0, + "b", 1.0, 2.0, + "a", 7.0, 4.0, + "b", 3.0, 2.0, + "a", Double.NaN, 4.0, + "b", 8.0, 4.0, + ) + + df.add("id") { index() } + .groupBy { groups }.add("res") { + round(relative(-2..0)[value].filter { !it.isNaN() }.mean()) + }.concat() + .sortBy("id") + .remove("id") shouldBe expected + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/MediumTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/MediumTests.kt new file mode 100644 index 0000000000..01d5155d6a --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/puzzles/MediumTests.kt @@ -0,0 +1,125 @@ +package org.jetbrains.kotlinx.dataframe.puzzles + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.junit.Test +import kotlin.math.roundToInt +import kotlin.random.Random +import kotlin.random.nextInt + +class MediumTests { + + private val random = Random(42) + + @Test + fun `filter rows that contain same value as row above`() { + val df = dataFrameOf("A")(1, 2, 2, 3, 4, 5, 5, 5, 6, 7, 7) + val a by column("A") + + val expected = dataFrameOf("A")(1, 2, 3, 4, 5, 6, 7) + + df.filter { prev()?.get(a) != a() } shouldBe expected + df.filter { prev()?.get("A") != "A"() } shouldBe expected + + df.filter { diff { a() } != 0 } shouldBe expected + df.filter { diff { "A"() } != 0 } shouldBe expected + } + + @Test + fun `subtract row mean from each element in row`() { + val df = dataFrameOf("a", "b", "c")( + 1.0, 2.0, 3.0, + 1.3, 2.3, 3.3, + 1.57, 2.57, 3.57 + ) + + val expected = dataFrameOf("a", "b", "c")( + -1, 0, 1, + -1, 0, 1, + -1, 0, 1 + ) + + df.convert { colsOf() }.with { (it - rowMean()).roundToInt() } shouldBe expected + } + + @Test + fun `smallest sum`() { + val names = ('a'..'j').map { it.toString() } + val df = dataFrameOf(names) { List(5) { random.nextDouble() } } + + df.sum().transposeTo().minBy { value }.name shouldBe "b" + df.sum().transpose().minBy("value")["name"] shouldBe "b" + } + + @Test + fun `count unique rows`() { + val df = dataFrameOf("a", "b", "c") { List(30) { random.nextInt(0..2) } } + df.countDistinct() shouldBe 19 + } + + @Test + fun `find column which contains third NaN value`() { + val nan = Double.NaN + val names = ('a'..'j').map { it.toString() } + val df = dataFrameOf(names)( + 0.04, nan, nan, 0.25, nan, 0.43, 0.71, 0.51, nan, nan, + nan, nan, nan, 0.04, 0.76, nan, nan, 0.67, 0.76, 0.16, + nan, nan, 0.5, nan, 0.31, 0.4, nan, nan, 0.24, 0.01, + 0.49, nan, nan, 0.62, 0.73, 0.26, 0.85, nan, nan, nan, + nan, nan, 0.41, nan, 0.05, nan, 0.61, nan, 0.48, 0.68 + ) + + val expected = columnOf("e", "c", "d", "h", "d").named("res") + + df.mapToColumn("res") { + namedValuesOf().filter { it.value.isNaN }.drop(2).firstOrNull()?.name + } shouldBe expected + } + + @Test + fun `sum of three greatest values`() { + val grps by columnOf("a", "a", "a", "b", "b", "c", "a", "a", "b", "c", "c", "c", "b", "b", "c") + val vals by columnOf(12, 345, 3, 1, 45, 14, 4, 52, 54, 23, 235, 21, 57, 3, 87) + val df = dataFrameOf(grps, vals) + + val expected = dataFrameOf("grps", "res")("a", 409, "b", 156, "c", 345) + + df.groupBy { grps }.aggregate { + vals().sortDesc().take(3).sum() into "res" + } shouldBe expected + + df.groupBy { grps }.aggregate { + "vals"().sortDesc().take(3).sum() into "res" + } shouldBe expected + } + + @Test + fun `sum bins`() { + val list = List(200) { random.nextInt(1, 101) } + val df = dataFrameOf("A", "B")(*list.toTypedArray()) + val a by column("A") + val b by column("B") + + val expected = dataFrameOf("A", "B")( + "(0, 10]", 353, + "(10, 20]", 873, + "(20, 30]", 321, + "(30, 40]", 322, + "(40, 50]", 432, + "(50, 60]", 754, + "(60, 70]", 405, + "(70, 80]", 561, + "(80, 90]", 657, + "(90, 100]", 527 + ) + + df.groupBy { a.map { (it - 1) / 10 } }.sum { b } + .sortBy { a }.convert { a }.with { "(${it * 10}, ${it * 10 + 10}]" } shouldBe expected + + df.groupBy { "A"().map { (it - 1) / 10 } }.sum("B").sortBy("A") + .convert { "A"() }.with { "(${it * 10}, ${it * 10 + 10}]" } shouldBe expected + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/PrecisionTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/PrecisionTests.kt new file mode 100644 index 0000000000..1cc8601118 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/PrecisionTests.kt @@ -0,0 +1,45 @@ +package org.jetbrains.kotlinx.dataframe.rendering + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.impl.scale +import org.jetbrains.kotlinx.dataframe.io.RendererDecimalFormat +import org.jetbrains.kotlinx.dataframe.io.defaultPrecision +import org.jetbrains.kotlinx.dataframe.io.format +import org.junit.Test +import java.text.DecimalFormatSymbols + +class PrecisionTests { + + @Test + fun precision() { + columnOf(1.2, 3.2).scale() shouldBe 1 + columnOf(1.1232, 3.2).scale() shouldBe 4 + columnOf(1.1220001, 12313).scale() shouldBe defaultPrecision + columnOf(1, 2).scale() shouldBe 0 + columnOf(1.0, 2).scale() shouldBe 1 + columnOf(123121.0, -1231.0).scale() shouldBe 1 + columnOf(123121.00001, -1231.120).scale() shouldBe 5 + columnOf(0.000343434343434343434343).scale() shouldBe defaultPrecision + columnOf(1E24).scale() shouldBe -23 + } + + @Test + fun format() { + val d = DecimalFormatSymbols.getInstance().decimalSeparator + val value = 1.2341 + val expected = "1${d}23" + val digits = 2 + val formatter = RendererDecimalFormat.fromPrecision(digits) + value.format(formatter) shouldBe expected + value.toFloat().format(formatter) shouldBe expected + value.toBigDecimal().format(formatter) shouldBe expected + } + + @Test + fun emptyColPrecision() { + val col by columnOf(1.0) + col.filter { false }.scale() shouldBe 0 + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTests.kt new file mode 100644 index 0000000000..ed6f5c9f1d --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTests.kt @@ -0,0 +1,128 @@ +package org.jetbrains.kotlinx.dataframe.rendering + +import io.kotest.matchers.shouldBe +import io.kotest.matchers.string.shouldContain +import io.kotest.matchers.string.shouldNotContain +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.move +import org.jetbrains.kotlinx.dataframe.api.parse +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration +import org.jetbrains.kotlinx.dataframe.io.escapeHTML +import org.jetbrains.kotlinx.dataframe.io.formatter +import org.jetbrains.kotlinx.dataframe.io.print +import org.jetbrains.kotlinx.dataframe.io.renderToString +import org.jetbrains.kotlinx.dataframe.io.renderToStringTable +import org.jetbrains.kotlinx.dataframe.io.toHTML +import org.jetbrains.kotlinx.dataframe.jupyter.DefaultCellRenderer +import org.jetbrains.kotlinx.dataframe.jupyter.RenderedContent +import org.jsoup.Jsoup +import org.junit.Test +import java.net.URL +import java.text.DecimalFormatSymbols +import kotlin.reflect.typeOf + +class RenderingTests { + + @Test + fun `render row with unicode values as table`() { + val value = "Шёл Шива по шоссе, сокрушая сущее.\r\nА на встречу Саша шла, круглое сосущая" + val col by columnOf(value) + val df = col.toDataFrame() + val rendered = df[0].renderToStringTable() + rendered shouldContain "Шива" + rendered shouldNotContain "\n" + rendered shouldNotContain "\r" + rendered shouldContain "\\r" + rendered shouldContain "А" + rendered shouldContain "..." + rendered shouldNotContain "Саша" + } + + @Test + fun `parse url`() { + val df = dataFrameOf("url")("http://www.google.com").parse() + df["url"].type() shouldBe typeOf() + } + + @Test + fun htmlTagsAreEscaped() { + val df = dataFrameOf("name", "int")(" (12)", 1) + val html = df.toHTML().toString() + html shouldContain "<Air France>" + } + + @Test + fun unicodeEscapeSequencesAreEscaped() { + val df = dataFrameOf("content")("""Hello\nfrom \x and \y""") + val html = df.toHTML().toString() + html shouldContain "Hello\nfrom \x and \y" + } + + @Test + fun `long text is trimmed without escaping`() { + val df = dataFrameOf("text")("asdfkjasdlkjfhasljkddasdasdasdasdasdasdhf") + val html = df.toHTML().toString() + html shouldNotContain "\\\\" + html shouldNotContain """ + } + + @Test + fun `non ascii text`() { + val value = "Шёл Шива по шоссе, сокрушая сущее" + val df = dataFrameOf("text")(value) + val script = df.toHTML().script + script shouldContain value.escapeHTML() + } + + @Test + fun `empty row with nested empty row`() { + val df = dataFrameOf("a", "b", "c")(null, null, null) + val grouped = df.group("a", "b").into("d").group("c", "d").into("e")[0] + + val formatted = formatter.format(grouped, DefaultCellRenderer, DisplayConfiguration()) + Jsoup.parse(formatted).text() shouldBe "{ }" + + grouped.renderToString() shouldBe "{ }" + } + + @Test + fun `render successfully`() { + dataFrameOf("a", "b")(listOf(1, 1), listOf(2, 4)) + .group("a", "b") + .into("g") + .add("a") { 1 } + .toHTML() + } + + @Test + fun `render URL`() { + val df = dataFrameOf("url")("https://api.github.com/orgs/JetBrains") + val html = df.parse().toHTML() + html.toString() shouldNotContain RenderedContent::class.simpleName!! + } + + @Test + fun `render successfully 2`() { + val df = dataFrameOf("name", "parent", "type")("Boston (MA)", "123wazxdPag5", "Campus") + .move("parent").into { "parent"["id"] } + .group { all() }.into("Campus") + df.toHTML().print() + } + + @Test + fun `render double with exponent`() { + val d = DecimalFormatSymbols.getInstance().decimalSeparator + listOf( + dataFrameOf("col")(1E27) to "1${d}000000e+27", + dataFrameOf("col")(1.123) to "1${d}123", + dataFrameOf("col")(1.0) to "1${d}0", + ).forEach { (df, rendered) -> + df.toHTML().script shouldContain rendered + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTestsBase.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTestsBase.kt new file mode 100644 index 0000000000..e80ec971cb --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/RenderingTestsBase.kt @@ -0,0 +1,18 @@ +package org.jetbrains.kotlinx.dataframe.rendering + +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration +import org.jetbrains.kotlinx.dataframe.io.formatter +import org.jetbrains.kotlinx.dataframe.jupyter.DefaultCellRenderer +import org.jsoup.Jsoup +import org.jsoup.nodes.Element + +abstract class RenderingTestsBase { + protected fun rowOf(vararg pairs: Pair) = dataFrameOf(pairs.map { it.first }).withValues(pairs.map { it.second })[0] + + protected fun Any?.truncate(limit: Int): String = format(limit).text() + + protected fun Any?.tooltip(limit: Int): String? = format(limit).children().singleOrNull()?.attr("title") + + protected fun Any?.format(limit: Int): Element = Jsoup.parse(formatter.format(this, DefaultCellRenderer, DisplayConfiguration(cellContentLimit = limit))).body() +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/TooltipTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/TooltipTests.kt new file mode 100644 index 0000000000..28caf22306 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/TooltipTests.kt @@ -0,0 +1,25 @@ +package org.jetbrains.kotlinx.dataframe.rendering + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class TooltipTests : RenderingTestsBase() { + + @Test + fun `long str`() { + "12345678".tooltip(5) shouldBe "12345678" + } + + @Test + fun `short str`() { + "1234".tooltip(5) shouldBe null + } + + @Test + fun row() { + val data = rowOf("name" to "Alice", "age" to 10) + val tooltip = "name: Alice\nage: 10" + data.tooltip(5) shouldBe tooltip + data.tooltip(15) shouldBe tooltip + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/TruncateTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/TruncateTests.kt new file mode 100644 index 0000000000..c42bb6574a --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/TruncateTests.kt @@ -0,0 +1,127 @@ +package org.jetbrains.kotlinx.dataframe.rendering + +import io.kotest.matchers.shouldBe +import org.junit.Test + +class TruncateTests : RenderingTestsBase() { + + @Test + fun `truncate str`() { + "123456789".truncate(5) shouldBe "12..." + } + + @Test + fun `truncate str to ellipsis`() { + "123456789".truncate(3) shouldBe "..." + } + + @Test + fun `no truncate`() { + "123".truncate(3) shouldBe "123" + } + + @Test + fun truncateMany() { + listOf("1", "2", "34567890").truncate(15) shouldBe "[1, 2, 3456...]" + } + + @Test + fun truncateMany2() { + listOf("1", "2345678", "9").truncate(15) shouldBe "[1, 2345678, 9]" + } + + @Test + fun truncateMany7() { + listOf("1", "23456789", "0").truncate(15) shouldBe "[1, 2345..., 0]" + } + + @Test + fun truncateMany3() { + listOf("1", "2345", "6789").truncate(15) shouldBe "[1, 2345, 6789]" + } + + @Test + fun truncateMany4() { + listOf("1", "234567", "89012").truncate(15) shouldBe "[1, 2..., 8...]" + } + + @Test + fun truncateMany5() { + listOf("1", "234567", "8901").truncate(15) shouldBe "[1, 2..., 8901]" + } + + @Test + fun truncateMany6() { + listOf("1", "234567", "8", "9").truncate(15) shouldBe "[1, 23..., ...]" + } + + @Test + fun truncateMany8() { + listOf("123456", "789", "0", "1").truncate(15) shouldBe "[123456, ...]" + } + + @Test + fun truncateMany9() { + listOf("123456789", "789", "0", "1").truncate(15) shouldBe "[12345..., ...]" + } + + @Test + fun truncateManyMany() { + listOf(1).truncate(3) shouldBe "[1]" + listOf(10).truncate(4) shouldBe "[10]" + listOf(1, 2).truncate(4) shouldBe "[..]" + listOf(100).truncate(4) shouldBe "[..]" + listOf(100).truncate(5) shouldBe "[100]" + listOf(1000).truncate(5) shouldBe "[...]" + listOf(1, 2).truncate(5) shouldBe "[...]" + listOf(1, 2).truncate(6) shouldBe "[1, 2]" + } + + @Test + fun `run truncate row`() { + testTruncates( + rowOf("name" to "Alice", "age" to 10), + listOf( + "{..}", + "{...}", + "{ ...}", + "{ ... }", + "{ n..., a... }", + "{ na..., a... }", + "{ nam..., a... }", + "{ name..., a... }", + "{ name:..., a... }", + "{ name: ..., a... }", + "{ name: A..., a... }", + "{ name: Alice, a... }", + "{ name: Alice, ag... }", + "{ name: Alice, age... }", + "{ name: Alice, age: 10 }" + ) + ) + } + + private fun testTruncates(value: Any?, truncates: List) { + val start = truncates[0].length + val end = truncates.last().length + val actual = (start..end).map { value.truncate(it) } + val expected = (start..end).map { len -> truncates.indexOfFirst { it.length > len }.let { if (it == -1) truncates.last() else truncates[it - 1] } } + actual shouldBe expected + } + + @Test + fun `run truncate many`() { + val value = listOf("Alice", "Bob", "Billy") + testTruncates( + value, + listOf( + "[..]", + "[...]", + "[A..., ...]", + "[Alice, ...]", + "[Alice, Bob, B...]", + "[Alice, Bob, Billy]", + ) + ) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/html/Browsing.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/html/Browsing.kt new file mode 100644 index 0000000000..2c456b06db --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/html/Browsing.kt @@ -0,0 +1,40 @@ +package org.jetbrains.kotlinx.dataframe.rendering.html + +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.junit.Ignore +import org.junit.Test + +class Browsing { + + @Ignore + @Test + fun test() { + data class Name(val firstName: String, val lastName: String?) + data class Score(val subject: String, val value: Int) + data class Student(val name: Name, val age: Int, val scores: List) + + val students = listOf( + Student(Name("Alice", "Cooper"), 15, listOf(Score("math", 4), Score("biology", 3))), + Student(Name("Bob", "Marley"), 20, listOf(Score("music", 5))), + Student(Name("Null", null), 100, listOf(Score("nothing", 5))), + Student(Name("Antony", "Hover"), 20, listOf(Score("russian", 1))), + Student(Name("Sally", "Fever"), 20, listOf(Score("art", 4), Score("math", 4), Score("biology", 3))) + ) + + val df = students.toDataFrame { + "year of birth" from { 2021 - it.age } + + properties(maxDepth = 1) { +// exclude(Score::subject) // `subject` property will be skipped from object graph traversal +// preserve() // `Name` objects will be stored as-is without transformation into DataFrame + } + + "summary" { + "max score" from { it.scores.maxOf { it.value } } + "min score" from { it.scores.minOf { it.value } } + } + } + + df.browse() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/html/Utils.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/html/Utils.kt new file mode 100644 index 0000000000..7497d29e09 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/rendering/html/Utils.kt @@ -0,0 +1,15 @@ +package org.jetbrains.kotlinx.dataframe.rendering.html + +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.io.initHtml +import org.jetbrains.kotlinx.dataframe.io.toHTML +import java.awt.Desktop +import java.io.File + +fun AnyFrame.browse() { + val file = File("temp.html") // File.createTempFile("df_rendering", ".html") + file.writeText(toHTML(extraHtml = initHtml()).toString()) + val uri = file.toURI() + val desktop = Desktop.getDesktop() + desktop.browse(uri) +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt new file mode 100644 index 0000000000..46d2ab2a4f --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt @@ -0,0 +1,916 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import org.jetbrains.kotlinx.dataframe.api.* +import org.junit.Test + +class Access : TestBase() { + + @Test + fun getColumnByName_properties() { + // SampleStart + df.age + df.name.lastName + // SampleEnd + } + + @Test + fun getColumnByName_accessors() { + // SampleStart + val age by column() + val name by columnGroup() + val lastName by name.column() + + df[age] + df[lastName] + // SampleEnd + } + + @Test + fun getColumnByName_strings() { + // SampleStart + df["age"] + df["name"]["firstName"] + // SampleEnd + } + + @Test + fun getColumn_properties() { + // SampleStart + df.getColumn { age } + // SampleEnd + } + + @Test + fun getColumn_accessors() { + // SampleStart + val age by column() + + df.getColumn { age } + // SampleEnd + } + + @Test + fun getColumn_strings() { + // SampleStart + df.getColumn("age") + // SampleEnd + } + + @Test + fun getColumnOrNull_properties() { + // SampleStart + df.getColumnOrNull { age } + // SampleEnd + } + + @Test + fun getColumnOrNull_accessors() { + // SampleStart + val age by column() + + df.getColumnOrNull(age) + // SampleEnd + } + + @Test + fun getColumnOrNull_strings() { + // SampleStart + df.getColumnOrNull("age") + // SampleEnd + } + + @Test + fun getColumns_properties() { + // SampleStart + df.getColumns { age and name } + // SampleEnd + } + + @Test + fun getColumns_accessors() { + // SampleStart + val age by column() + val name by columnGroup() + + df.getColumns { age and name } + // SampleEnd + } + + @Test + fun getColumns_strings() { + // SampleStart + df.getColumns("age", "name") + // SampleEnd + } + + @Test + fun getColumnGroup_properties() { + // SampleStart + df.getColumnGroup { name } + // SampleEnd + } + + @Test + fun getColumnGroup_accessors() { + // SampleStart + val name by columnGroup() + + df.getColumnGroup(name) + // SampleEnd + } + + @Test + fun getColumnGroup_strings() { + // SampleStart + df.getColumnGroup("name") + // SampleEnd + } + + @Test + fun getColumnByIndex() { + // SampleStart + df.getColumn(2) + df.getColumnGroup(0).getColumn(1) + // SampleEnd + } + + @Test + fun getRowByIndex() { + // SampleStart + df[2] + // SampleEnd + } + + @Test + fun getRowByCondition_properties() { + // SampleStart + df.single { age == 45 } + df.first { weight != null } + df.minBy { age } + df.maxBy { name.firstName.length } + df.maxByOrNull { weight } + // SampleEnd + } + + @Test + fun getRowByCondition_accessors() { + // SampleStart + val age by column() + val weight by column() + val name by columnGroup() + val firstName by name.column() + + df.single { age() == 45 } + df.first { weight() != null } + df.minBy(age) + df.maxBy { firstName().length } + df.maxByOrNull { weight() } + // SampleEnd + } + + @Test + fun getRowByCondition_strings() { + // SampleStart + df.single { "age"() == 45 } + df.first { it["weight"] != null } + df.minBy("weight") + df.maxBy { "name"["firstName"]().length } + df.maxByOrNull("weight") + // SampleEnd + } + + @Test + fun getCell_strings() { + // SampleStart + df["age"][1] + df[1]["age"] + // SampleEnd + } + + @Test + fun getCell_properties() { + // SampleStart + df.age[1] + df[1].age + // SampleEnd + } + + @Test + fun getCell_accessors() { + // SampleStart + val age by column() + + df[age][1] + df[1][age] + // SampleEnd + } + + @Test + fun getColumnsByName_properties() { + // SampleStart + df[df.age, df.weight] + // SampleEnd + } + + @Test + fun getColumnsByName_accessors() { + // SampleStart + val age by column() + val weight by column() + + df[age, weight] + // SampleEnd + } + + @Test + fun getColumnsByName_strings() { + // SampleStart + df["age", "weight"] + // SampleEnd + } + + @Test + fun select_properties() { + // SampleStart + df.select { age and weight } + // SampleEnd + } + + @Test + fun select_accessors() { + // SampleStart + val age by column() + val weight by column() + + df.select { age and weight } + df.select(age, weight) + // SampleEnd + } + + @Test + fun select_strings() { + // SampleStart + df.select { "age" and "weight" } + df.select("age", "weight") + // SampleEnd + } + + @Test + fun getSeveralRowsByIndices() { + // SampleStart + df[0, 3, 4] + // SampleEnd + } + + @Test + fun getSeveralRowsByRanges() { + // SampleStart + df[1..2] + df[0..2, 4..5] + // SampleEnd + } + + @Test + fun getRowsColumns() { + // SampleStart + df.columns() // List + df.rows() // Iterable + df.values() // Sequence + // SampleEnd + } + + @Test + fun take() { + // SampleStart + df.take(5) + // SampleEnd + } + + @Test + fun takeLast() { + // SampleStart + df.takeLast(5) + // SampleEnd + } + + @Test + fun takeWhile() { + // SampleStart + df.takeWhile { isHappy } + // SampleEnd + } + + @Test + fun drop() { + // SampleStart + df.drop(5) + // SampleEnd + } + + @Test + fun dropLast() { + // SampleStart + df.dropLast() // default 1 + df.dropLast(5) + // SampleEnd + } + + @Test + fun dropWhile() { + // SampleStart + df.dropWhile { !isHappy } + // SampleEnd + } + + @Test + fun filter_properties() { + // SampleStart + df.filter { age > 18 && name.firstName.startsWith("A") } + // SampleEnd + } + + @Test + fun filter_accessors() { + // SampleStart + val age by column() + val name by columnGroup() + val firstName by name.column() + + df.filter { age() > 18 && firstName().startsWith("A") } + // or + df.filter { it[age] > 18 && it[firstName].startsWith("A") } + // SampleEnd + } + + @Test + fun filter_strings() { + // SampleStart + df.filter { "age"() > 18 && "name"["firstName"]().startsWith("A") } + // SampleEnd + } + + @Test + fun filterBy_properties() { + // SampleStart + df.filterBy { isHappy } + // SampleEnd + } + + @Test + fun filterBy_accessors() { + // SampleStart + val isHappy by column() + df.filterBy { isHappy } + // SampleEnd + } + + @Test + fun filterBy_strings() { + // SampleStart + df.filterBy("isHappy") + // SampleEnd + } + + @Test + fun dropWhere_properties() { + // SampleStart + df.drop { weight == null || city == null } + // SampleEnd + } + + @Test + fun dropWhere_accessors() { + // SampleStart + val name by columnGroup() + val weight by column() + val city by column() + + df.drop { weight() == null || city() == null } + // or + df.drop { it[weight] == null || it[city] == null } + // SampleEnd + } + + @Test + fun dropWhere_strings() { + // SampleStart + df.drop { it["weight"] == null || it["city"] == null } + // SampleEnd + } + + @Test + fun dropNulls() { + // SampleStart + df.dropNulls() // remove rows with null value in any column + df.dropNulls(whereAllNull = true) // remove rows with null values in all columns + df.dropNulls { city } // remove rows with null value in 'city' column + df.dropNulls { city and weight } // remove rows with null value in 'city' OR 'weight' columns + df.dropNulls(whereAllNull = true) { city and weight } // remove rows with null value in 'city' AND 'weight' columns + // SampleEnd + } + + @Test + fun dropNaNs() { + // SampleStart + df.dropNaNs() // remove rows containing NaN in any column + df.dropNaNs(whereAllNaN = true) // remove rows with NaN in all columns + df.dropNaNs { weight } // remove rows where 'weight' is NaN + df.dropNaNs { age and weight } // remove rows where either 'age' or 'weight' is NaN + df.dropNaNs(whereAllNaN = true) { age and weight } // remove rows where both 'age' and 'weight' are NaN + // SampleEnd + } + + @Test + fun dropNA() { + // SampleStart + df.dropNA() // remove rows containing null or NaN in any column + df.dropNA(whereAllNA = true) // remove rows with null or NaN in all columns + df.dropNA { weight } // remove rows where 'weight' is null or NaN + df.dropNA { age and weight } // remove rows where either 'age' or 'weight' is null or NaN + df.dropNA(whereAllNA = true) { age and weight } // remove rows where both 'age' and 'weight' are null or NaN + // SampleEnd + } + + @Test + fun byColumn_strings() { + // SampleStart + df["name"][0] + df["name", "age"][3, 5, 6] + // SampleEnd + // TODO: df["age"][2..4] + } + + @Test + fun byColumn_accessors() { + // SampleStart + val name by column() + val age by column() + df[name][0] + df[name, age][3, 5, 6] + // SampleEnd + // TODO: df[age][2..4] + } + + @Test + fun byColumn_properties() { + // SampleStart + df.name[0] + df.select { name and age }[3, 5, 6] + df.age[2..4] + // SampleEnd + } + + @Test + fun byRow_strings() { + // SampleStart + df[0]["name"] + df[3, 5, 6]["name", "age"] + df[3..5]["age"] + // SampleEnd + } + + @Test + fun byRow_accessors() { + // SampleStart + val name by column() + val age by column() + df[0][name] + df[3, 5, 6][name, age] + df[3..5][age] + // SampleEnd + } + + @Test + fun byRow_properties() { + // SampleStart + df[0].name + df[3, 5, 6].select { name and age } + df[3..5].age + // SampleEnd + } + + @Test + fun namedAndRenameCol() { + // SampleStart + val unnamedCol = columnOf("Alice", "Bob") + val colRename = unnamedCol.rename("name") + val colNamed = columnOf("Alice", "Bob") named "name" + // SampleEnd + } + + @Test + fun namedColumnWithoutValues() { + // SampleStart + val name by column() + val col = column("name") + // SampleEnd + } + + @Test + fun colRefForTypedAccess() { + val df = dataFrameOf("name")("Alice", "Bob") + val name by column() + val col = column("name") + // SampleStart + df.filter { it[name].startsWith("A") } + df.sortBy { col } + // SampleEnd + } + + @Test + fun iterableApi() { + // SampleStart + df.forEach { println(it) } + df.take(5) + df.drop(2) + df.chunked(10) + // SampleEnd + } + + @Test + fun distinct() { + // SampleStart + df.distinct() + // SampleEnd + } + + @Test + fun distinctColumns_properties() { + // SampleStart + df.distinct { age and name } + // same as + df.select { age and name }.distinct() + // SampleEnd + } + + @Test + fun distinctColumns_accessors() { + // SampleStart + val age by column() + val name by columnGroup() + df.distinct { age and name } + // same as + df.select { age and name }.distinct() + // SampleEnd + } + + @Test + fun countDistinct() { + // SampleStart + df.countDistinct() + // SampleEnd + } + + @Test + fun countDistinctColumns_properties() { + // SampleStart + df.countDistinct { age and name } + // SampleEnd + } + + @Test + fun countDistinctColumns_accessors() { + // SampleStart + val age by column() + val name by columnGroup() + df.countDistinct { age and name } + // SampleEnd + } + + @Test + fun countDistinctColumns_strings() { + // SampleStart + df.countDistinct("age", "name") + // SampleEnd + } + + @Test + fun distinctColumns_strings() { + // SampleStart + df.distinct("age", "name") + // same as + df.select("age", "name").distinct() + // SampleEnd + } + + @Test + fun distinctBy_properties() { + // SampleStart + df.distinctBy { age and name } + // same as + df.groupBy { age and name }.mapToRows { group.first() } + // SampleEnd + } + + @Test + fun distinctBy_accessors() { + // SampleStart + val age by column() + val name by columnGroup() + val firstName by name.column() + + df.distinctBy { age and name } + // same as + df.groupBy { age and name }.mapToRows { group.first() } + // SampleEnd + } + + @Test + fun distinctBy_strings() { + // SampleStart + df.distinctBy("age", "name") + // same as + df.groupBy("age", "name").mapToRows { group.first() } + // SampleEnd + } + + @Test + fun columnSelectorsUsages() { + // SampleStart + df.select { age and name } + df.fillNaNs { dfsOf() }.withZero() + df.remove { cols { it.hasNulls() } } + df.update { city }.notNull { it.lowercase() } + df.gather { colsOf() }.into("key", "value") + df.move { name.firstName and name.lastName }.after { city } + // SampleEnd + } + + @Test + fun columnSelectors_properties() { + // SampleStart + // by column name + df.select { it.name } + df.select { name } + + // by column path + df.select { name.firstName } + + // with a new name + df.select { name named "Full Name" } + + // converted + df.select { name.firstName.map { it.lowercase() } } + + // column arithmetics + df.select { 2021 - age } + + // two columns + df.select { name and age } + + // range of columns + df.select { name..age } + + // all children of ColumnGroup + df.select { name.all() } + + // dfs traversal of all children columns + df.select { name.allDfs() } + + // SampleEnd + } + + @Test + fun columnSelectors_accessors() { + // SampleStart + // by column name + val name by columnGroup() + df.select { it[name] } + df.select { name } + + // by column path + val firstName by name.column() + df.select { firstName } + + // with a new name + df.select { name named "Full Name" } + + // converted + df.select { firstName.map { it.lowercase() } } + + // column arithmetics + val age by column() + df.select { 2021 - age } + + // two columns + df.select { name and age } + + // range of columns + df.select { name..age } + + // all children of ColumnGroup + df.select { name.all() } + + // dfs traversal of all children columns + df.select { name.allDfs() } + // SampleEnd + } + + @Test + fun columnSelectors_strings() { + // SampleStart + // by column name + df.select { it["name"] } + + // by column path + df.select { it["name"]["firstName"] } + df.select { "name"["firstName"] } + + // with a new name + df.select { "name" named "Full Name" } + + // converted + df.select { "name"["firstName"]().map { it.uppercase() } } + + // column arithmetics + df.select { 2021 - "age"() } + + // two columns + df.select { "name" and "age" } + + // by range of names + df.select { "name".."age" } + + // all children of ColumnGroup + df.select { "name".all() } + + // dfs traversal of all children columns + df.select { "name".allDfs() } + // SampleEnd + } + + @Test + fun columnsSelectorByIndices() { + // SampleStart + // by index + df.select { col(2) } + + // by several indices + df.select { cols(0, 1, 3) } + + // by range of indices + df.select { cols(1..4) } + // SampleEnd + } + + @Test + fun columnSelectorsMisc() { + // SampleStart + // by condition + df.select { cols { it.name().startsWith("year") } } + df.select { startsWith("year") } + + // by type + df.select { colsOf() } + + // by type with condition + df.select { colsOf { it.countDistinct() > 5 } } + + // all top-level columns + df.select { all() } + + // first/last n columns + df.select { take(2) } + df.select { takeLast(2) } + + // all except first/last n columns + df.select { drop(2) } + df.select { dropLast(2) } + + // dfs traversal of all columns, excluding ColumnGroups from result + df.select { allDfs() } + + // dfs traversal of all columns, including ColumnGroups in result + df.select { allDfs(includeGroups = true) } + + // dfs traversal with condition + df.select { dfs { it.name().contains(":") } } + + // dfs traversal of columns of given type + df.select { dfsOf() } + + // all columns except given column set + df.select { except { colsOf() } } + + // union of column sets + df.select { take(2) and col(3) } + // SampleEnd + } + + @Test + fun columnSelectorsModifySet() { + // SampleStart + // first/last n columns in column set + df.select { allDfs().take(3) } + df.select { allDfs().takeLast(3) } + + // all except first/last n columns in column set + df.select { allDfs().drop(3) } + df.select { allDfs().dropLast(3) } + + // filter column set by condition + df.select { allDfs().filter { it.name().startsWith("year") } } + + // exclude columns from column set + df.select { allDfs().except { age } } + + // keep only unique columns + df.select { (colsOf() and age).distinct() } + // SampleEnd + } + + @Test + fun forRows_properties() { + // SampleStart + for (row in df) { + println(row.age) + } + + df.forEach { + println(it.age) + } + + df.rows().forEach { + println(it.age) + } + // SampleEnd + } + + @Test + fun forRows_accessors() { + // SampleStart + val age by column() + + for (row in df) { + println(row[age]) + } + + df.forEach { + println(it[age]) + } + + df.rows().forEach { + println(it[age]) + } + // SampleEnd + } + + @Test + fun forRows_strings() { + // SampleStart + for (row in df) { + println(row["age"]) + } + + df.forEach { + println(it["age"]) + } + + df.rows().forEach { + println(it["age"]) + } + // SampleEnd + } + + @Test + fun forColumn() { + // SampleStart + df.columns().forEach { + println(it.name()) + } + // SampleEnd + } + + @Test + fun forCells() { + // SampleStart + // from top to bottom, then from left to right + df.values().forEach { + println(it) + } + + // from left to right, then from top to bottom + df.values(byRows = true).forEach { + println(it) + } + // SampleEnd + } + + @Test + fun xs() { + // SampleStart + df.xs("Charlie", "Chaplin") + + df.xs("Moscow", true) { city and isHappy } + // SampleEnd + } + + @Test + fun values() { + // SampleStart + df.values() + df.values(byRows = true) + df.values { age and weight } + // SampleEnd + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt new file mode 100644 index 0000000000..912447198d --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Analyze.kt @@ -0,0 +1,1145 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import org.jetbrains.kotlinx.dataframe.api.aggregate +import org.jetbrains.kotlinx.dataframe.api.asComparable +import org.jetbrains.kotlinx.dataframe.api.asGroupBy +import org.jetbrains.kotlinx.dataframe.api.asNumbers +import org.jetbrains.kotlinx.dataframe.api.colsOf +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnGroup +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.count +import org.jetbrains.kotlinx.dataframe.api.cumSum +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.describe +import org.jetbrains.kotlinx.dataframe.api.div +import org.jetbrains.kotlinx.dataframe.api.expr +import org.jetbrains.kotlinx.dataframe.api.frames +import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.jetbrains.kotlinx.dataframe.api.groupByOther +import org.jetbrains.kotlinx.dataframe.api.head +import org.jetbrains.kotlinx.dataframe.api.indices +import org.jetbrains.kotlinx.dataframe.api.length +import org.jetbrains.kotlinx.dataframe.api.matches +import org.jetbrains.kotlinx.dataframe.api.max +import org.jetbrains.kotlinx.dataframe.api.maxBy +import org.jetbrains.kotlinx.dataframe.api.maxByOrNull +import org.jetbrains.kotlinx.dataframe.api.maxFor +import org.jetbrains.kotlinx.dataframe.api.maxOf +import org.jetbrains.kotlinx.dataframe.api.maxOrNull +import org.jetbrains.kotlinx.dataframe.api.mean +import org.jetbrains.kotlinx.dataframe.api.meanFor +import org.jetbrains.kotlinx.dataframe.api.meanOf +import org.jetbrains.kotlinx.dataframe.api.median +import org.jetbrains.kotlinx.dataframe.api.medianFor +import org.jetbrains.kotlinx.dataframe.api.medianOf +import org.jetbrains.kotlinx.dataframe.api.min +import org.jetbrains.kotlinx.dataframe.api.minBy +import org.jetbrains.kotlinx.dataframe.api.minFor +import org.jetbrains.kotlinx.dataframe.api.minOf +import org.jetbrains.kotlinx.dataframe.api.minOrNull +import org.jetbrains.kotlinx.dataframe.api.pivot +import org.jetbrains.kotlinx.dataframe.api.pivotCounts +import org.jetbrains.kotlinx.dataframe.api.pivotMatches +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.api.std +import org.jetbrains.kotlinx.dataframe.api.stdFor +import org.jetbrains.kotlinx.dataframe.api.stdOf +import org.jetbrains.kotlinx.dataframe.api.sum +import org.jetbrains.kotlinx.dataframe.api.sumFor +import org.jetbrains.kotlinx.dataframe.api.sumOf +import org.jetbrains.kotlinx.dataframe.api.valueCounts +import org.jetbrains.kotlinx.dataframe.api.values +import org.junit.Test +import kotlin.math.ln + +class Analyze : TestBase() { + + @Test + fun basicInfo() { + // SampleStart + df.count() // same as df.rowsCount() + df.indices() // 0 until count() + df.columnsCount() + df.columnNames() + df.head() + df.schema() + df.describe() + // SampleEnd + } + + @Test + fun head() { + // SampleStart + df.head(3) + // SampleEnd + } + + @Test + fun schema() { + // SampleStart + df.schema() + // SampleEnd + } + + @Test + fun schemaGroupBy() { + // SampleStart + df.groupBy { city }.schema() + // SampleEnd + } + + @Test + fun describe() { + // SampleStart + df.describe() + // SampleEnd + } + + @Test + fun describeColumns_properties() { + // SampleStart + df.describe { age and name.all() } + // SampleEnd + } + + @Test + fun describeColumns_accessors() { + // SampleStart + val age by column() + val name by columnGroup() + + df.describe { age and name.all() } + // SampleEnd + } + + @Test + fun describeColumns_strings() { + // SampleStart + df.describe { "age" and "name".all() } + // SampleEnd + } + + @Test + fun countCondition() { + // SampleStart + df.count { age > 15 } + // SampleEnd + } + + @Test + fun count() { + // SampleStart + df.count() + // SampleEnd + } + + @Test + fun countAggregation() { + // SampleStart + df.groupBy { city }.count() + df.pivot { city }.count { age > 18 } + df.pivot { name.firstName }.groupBy { name.lastName }.count() + // SampleEnd + } + + @Test + fun sumAggregations() { + // SampleStart + df.age.sum() + df.groupBy { city }.sum() + df.pivot { city }.sum() + df.pivot { city }.groupBy { name.lastName }.sum() + // SampleEnd + } + + @Test + fun statisticModes() { + // SampleStart + df.sum() // sum of values per every numeric column + df.sum { age and weight } // sum of all values in `age` and `weight` + df.sumFor { age and weight } // sum of values per `age` and `weight` separately + df.sumOf { (weight ?: 0) / age } // sum of expression evaluated for every row + // SampleEnd + } + + @Test + fun minmaxModes() { + // SampleStart + df.min() // min of values per every comparable column + df.min { age and weight } // min of all values in `age` and `weight` + df.minFor { age and weight } // min of values per `age` and `weight` separately + df.minOf { (weight ?: 0) / age } // min of expression evaluated for every row + df.minBy { age } // DataRow with minimal `age` + // SampleEnd + } + + @Test + fun minmaxAggregations() { + // SampleStart + df.min() + df.age.min() + df.groupBy { city }.min() + df.pivot { city }.min() + df.pivot { city }.groupBy { name.lastName }.min() + // SampleEnd + } + + @Test + fun medianModes() { + // SampleStart + df.median() // median of values per every comparable column + df.median { age and weight } // median of all values in `age` and `weight` + df.medianFor { age and weight } // median of values per `age` and `weight` separately + df.medianOf { (weight ?: 0) / age } // median of expression evaluated for every row + // SampleEnd + } + + @Test + fun medianAggregations() { + // SampleStart + df.median() + df.age.median() + df.groupBy { city }.median() + df.pivot { city }.median() + df.pivot { city }.groupBy { name.lastName }.median() + // SampleEnd + } + + @Test + fun meanModes() { + // SampleStart + df.mean() // mean of values per every numeric column + df.mean(skipNA = true) { age and weight } // mean of all values in `age` and `weight`, skips NA + df.meanFor(skipNA = true) { age and weight } // mean of values per `age` and `weight` separately, skips NA + df.meanOf { (weight ?: 0) / age } // median of expression evaluated for every row + // SampleEnd + } + + @Test + fun meanAggregations() { + // SampleStart + df.mean() + df.age.mean() + df.groupBy { city }.mean() + df.pivot { city }.mean() + df.pivot { city }.groupBy { name.lastName }.mean() + // SampleEnd + } + + @Test + fun stdModes() { + // SampleStart + df.std() // std of values per every numeric column + df.std { age and weight } // std of all values in `age` and `weight` + df.stdFor { age and weight } // std of values per `age` and `weight` separately, skips NA + df.stdOf { (weight ?: 0) / age } // std of expression evaluated for every row + // SampleEnd + } + + @Test + fun stdAggregations() { + // SampleStart + df.std() + df.age.std() + df.groupBy { city }.std() + df.pivot { city }.std() + df.pivot { city }.groupBy { name.lastName }.std() + // SampleEnd + } + + @Test + fun meanAggregationsSkipNA() { + // SampleStart + df.mean(skipNA = true) + // SampleEnd + } + + @Test + fun statisticAggregations() { + // SampleStart + df.mean() + df.age.sum() + df.groupBy { city }.mean() + df.pivot { city }.median() + df.pivot { city }.groupBy { name.lastName }.std() + // SampleEnd + } + + @Test + fun statisticGroupBySingle() { + // SampleStart + df.groupBy { city }.mean { age } // [`city`, `mean`] + df.groupBy { city }.meanOf { age / 2 } // [`city`, `mean`] + // SampleEnd + } + + @Test + fun statisticGroupBySingleNamed() { + // SampleStart + df.groupBy { city }.mean("mean age") { age } // [`city`, `mean age`] + df.groupBy { city }.meanOf("custom") { age / 2 } // [`city`, `custom`] + // SampleEnd + } + + @Test + fun statisticGroupByMany() { + // SampleStart + df.groupBy { city }.meanFor { age and weight } // [`city`, `age`, `weight`] + df.groupBy { city }.mean() // [`city`, `age`, `weight`, ...] + // SampleEnd + } + + @Test + fun statisticPivotSingle_properties() { + // SampleStart + df.groupBy { city }.pivot { name.lastName }.mean { age } + df.groupBy { city }.pivot { name.lastName }.meanOf { age / 2.0 } + // SampleEnd + } + + @Test + fun statisticPivotSingle_accessors() { + // SampleStart + val city by column() + val age by column() + val name by columnGroup() + val lastName by name.column() + + df.groupBy { city }.pivot { lastName }.mean { age } + df.groupBy { city }.pivot { lastName }.meanOf { age() / 2.0 } + // SampleEnd + } + + @Test + fun statisticPivotSingle_strings() { + // SampleStart + df.groupBy("city").pivot { "name"["lastName"] }.mean("age") + df.groupBy("city").pivot { "name"["lastName"] }.meanOf { "age"() / 2.0 } + // SampleEnd + } + + @Test + fun statisticPivotMany() { + // SampleStart + df.groupBy { city }.pivot { name.lastName }.meanFor { age and weight } + df.groupBy { city }.pivot { name.lastName }.mean() + // SampleEnd + } + + @Test + fun statisticPivotManySeparate() { + // SampleStart + df.groupBy { city }.pivot { name.lastName }.meanFor(separate = true) { age and weight } + df.groupBy { city }.pivot { name.lastName }.mean(separate = true) + // SampleEnd + } + + @Test + fun columnStats_properties() { + // SampleStart + df.sum { weight } + df.min { age } + df.mean { age } + df.median { age } + + df.weight.sum() + df.age.max() + df.age.mean() + df.age.median() + // SampleEnd + } + + @Test + fun columnStats_accessors() { + // SampleStart + val weight by column() + val age by column() + + df.sum { weight } + df.min { age } + df.mean { age } + df.median { age } + + df.sum(weight) + df.min(age) + df.mean(age) + df.median(age) + + df[weight].sum() + df[age].mean() + df[age].min() + df[age].median() + // SampleEnd + } + + @Test + fun columnStats_strings() { + // SampleStart + df.sum("weight") + df.min("age") + df.mean("age") + df.median("age") + // SampleEnd + } + + @Test + fun multipleColumnsStat_properties() { + // SampleStart + df.min { colsOf() } + df.max { name.firstName and name.lastName } + df.sum { age and weight } + df.mean { cols(1, 3).asNumbers() } + df.median { name.cols().asComparable() } + // SampleEnd + } + + @Test + fun multipleColumnsStat_accessors() { + // SampleStart + val name by columnGroup() + val firstName by name.column() + val lastName by name.column() + val age by column() + val weight by column() + + df.min { colsOf() } + + df.max { firstName and lastName } + // or + df.max(firstName, lastName) + + df.sum { age and weight } + // or + df.sum(age, weight) + + df.mean { cols(1, 3).asNumbers() } + df.median { name.cols().asComparable() } + // SampleEnd + } + + @Test + fun multipleColumnsStat_strings() { + // SampleStart + + df.min { colsOf() } + + df.max { "name"["firstName"].asComparable() and "name"["lastName"].asComparable() } + + df.sum("age", "weight") + // or + df.sum { "age"() and "weight"() } + + df.mean { cols(1, 3).asNumbers() } + df.median { name.cols().asComparable() } + // SampleEnd + } + + @Test + fun columnsFor_properties() { + // SampleStart + df.minFor { colsOf() } + df.maxFor { name.firstName and name.lastName } + df.sumFor { age and weight } + df.meanFor { cols(1, 3).asNumbers() } + df.medianFor { name.cols().asComparable() } + // SampleEnd + } + + @Test + fun columnsFor_aсcessors() { + // SampleStart + val name by columnGroup() + val firstName by name.column() + val lastName by name.column() + val age by column() + val weight by column() + + df.minFor { colsOf() } + + df.maxFor { firstName and lastName } + // or + df.maxFor(firstName, lastName) + + df.sumFor { age and weight } + // or + df.sum(age, weight) + + df.mean { cols(1, 3).asNumbers() } + df.median { name.cols().asComparable() } + // SampleEnd + } + + @Test + fun columnsFor_strings() { + // SampleStart + df.minFor { colsOf() } + df.maxFor { "name"["firstName"].asComparable() and "name"["lastName"].asComparable() } + + df.sumFor("age", "weight") + // or + df.sumFor { "age"() and "weight"() } + + df.meanFor { cols(1, 3).asNumbers() } + df.medianFor { name.cols().asComparable() } + // SampleEnd + } + + @Test + fun ofExpressions_properties() { + // SampleStart + df.minOf { 2021 - age } + df.maxOf { name.firstName.length + name.lastName.length } + df.sumOf { weight?.let { it - 50 } } + df.meanOf { ln(age.toDouble()) } + df.medianOf { city?.length } + // SampleEnd + } + + @Test + fun ofExpressions_accessors() { + // SampleStart + val name by columnGroup() + val firstName by name.column() + val lastName by name.column() + val age by column() + val weight by column() + val city by column() + + df.minOf { 2021 - age() } + df.maxOf { firstName().length + lastName().length } + df.sumOf { weight()?.let { it - 50 } } + df.meanOf { ln(age().toDouble()) } + df.medianOf { city()?.length } + // SampleEnd + } + + @Test + fun ofExpressions_strings() { + // SampleStart + df.minOf { 2021 - "age"() } + df.maxOf { "name"["firstName"]().length + "name"["lastName"]().length } + df.sumOf { "weight"()?.let { it - 50 } } + df.meanOf { ln("age"().toDouble()) } + df.medianOf { "city"()?.length } + // SampleEnd + } + + @Test + fun groupBy_properties() { + // SampleStart + df.groupBy { name } + df.groupBy { city and name.lastName } + df.groupBy { age / 10 named "ageDecade" } + df.groupBy { expr { name.firstName.length + name.lastName.length } named "nameLength" } + // SampleEnd + } + + @Test + fun groupBy_accessors() { + // SampleStart + val name by columnGroup() + val lastName by name.column() + val firstName by name.column() + val age by column() + val city by column() + + df.groupBy { name } + // or + df.groupBy(name) + + df.groupBy { city and lastName } + // or + df.groupBy(city, lastName) + + df.groupBy { age / 10 named "ageDecade" } + + df.groupBy { expr { firstName().length + lastName().length } named "nameLength" } + // SampleEnd + } + + @Test + fun groupBy_strings() { + // SampleStart + df.groupBy("name") + df.groupBy { "city" and "name"["lastName"] } + df.groupBy { "age"() / 10 named "ageDecade" } + df.groupBy { expr { "name"["firstName"]().length + "name"["lastName"]().length } named "nameLength" } + // SampleEnd + } + + @Test + fun dataFrameToGroupBy() { + // SampleStart + val key by columnOf(1, 2) // create int column with name "key" + val data by columnOf(df[0..3], df[4..6]) // create frame column with name "data" + val df = dataFrameOf(key, data) // create dataframe with two columns + + df.asGroupBy { data } // convert dataframe to GroupBy by interpreting 'data' column as groups + // SampleEnd + } + + @Test + fun groupByToFrame() { + // SampleStart + df.groupBy { city }.toDataFrame() + // SampleEnd + } + + @Test + fun groupByAggregations_properties() { + // SampleStart + df.groupBy { city }.aggregate { + count() into "total" + count { age > 18 } into "adults" + median { age } into "median age" + min { age } into "min age" + maxBy { age }.name into "oldest" + } + // SampleEnd + } + + @Test + fun groupByAggregations_accessors() { + // SampleStart + val city by column() + val age by column() + val name by columnGroup() + + df.groupBy { city }.aggregate { + count() into "total" + count { age() > 18 } into "adults" + median { age } into "median age" + min { age } into "min age" + maxBy { age() }[name] into "name of oldest" + } + // or + df.groupBy(city).aggregate { + count() into "total" + count { age > 18 } into "adults" + median(age) into "median age" + min(age) into "min age" + maxBy(age)[name] into "name of oldest" + } + // or + df.groupBy(city).aggregate { + count() into "total" + age().count { it > 18 } into "adults" + age().median() into "median age" + age().min() into "min age" + maxBy(age)[name] into "name of oldest" + } + // SampleEnd + } + + @Test + fun groupByAggregations_strings() { + // SampleStart + df.groupBy("city").aggregate { + count() into "total" + count { "age"() > 18 } into "adults" + median("age") into "median age" + min("age") into "min age" + maxBy("age")["name"] into "oldest" + } + // or + df.groupBy("city").aggregate { + count() into "total" + count { "age"() > 18 } into "adults" + "age"().median() into "median age" + "age"().min() into "min age" + maxBy("age")["name"] into "oldest" + } + // SampleEnd + } + + @Test + fun groupByAggregateWithoutInto_properties() { + // SampleStart + df.groupBy { city }.aggregate { maxBy { age }.name } + // SampleEnd + } + + @Test + fun groupByAggregateWithoutInto_accessors() { + // SampleStart + val city by column() + val age by column() + val name by columnGroup() + + df.groupBy { city }.aggregate { maxBy { age() }[name] } + // or + df.groupBy(city).aggregate { maxBy(age)[name] } + // SampleEnd + } + + @Test + fun groupByAggregateWithoutInto_strings() { + // SampleStart + df.groupBy("city").aggregate { maxBy("age")["name"] } + // SampleEnd + } + + @Test + fun groupByDirectAggregations_properties() { + // SampleStart + df.groupBy { city }.max() // max for every comparable column + df.groupBy { city }.mean() // mean for every numeric column + df.groupBy { city }.max { age } // max age into column "age" + df.groupBy { city }.sum("total weight") { weight } // sum of weights into column "total weight" + df.groupBy { city }.count() // number of rows into column "count" + df.groupBy { city } + .max { name.firstName.length() and name.lastName.length() } // maximum length of firstName or lastName into column "max" + df.groupBy { city } + .medianFor { age and weight } // median age into column "age", median weight into column "weight" + df.groupBy { city } + .minFor { (age into "min age") and (weight into "min weight") } // min age into column "min age", min weight into column "min weight" + df.groupBy { city }.meanOf("mean ratio") { weight?.div(age) } // mean of weight/age into column "mean ratio" + // SampleEnd + } + + @Test + fun groupByDirectAggregations_accessors() { + // SampleStart + val city by column() + val age by column() + val weight by column() + val name by columnGroup() + val firstName by name.column() + val lastName by name.column() + + df.groupBy { city }.max() // max for every comparable column + df.groupBy { city }.mean() // mean for every numeric column + df.groupBy { city }.max { age } // max age into column "age" + df.groupBy { city }.sum("total weight") { weight } // sum of weights into column "total weight" + df.groupBy { city }.count() // number of rows into column "count" + df.groupBy { city } + .max { firstName.length() and lastName.length() } // maximum length of firstName or lastName into column "max" + df.groupBy { city } + .medianFor { age and weight } // median age into column "age", median weight into column "weight" + df.groupBy { city } + .minFor { (age into "min age") and (weight into "min weight") } // min age into column "min age", min weight into column "min weight" + df.groupBy { city }.meanOf("mean ratio") { weight()?.div(age()) } // mean of weight/age into column "mean ratio" + // SampleEnd + } + + @Test + fun groupByDirectAggregations_strings() { + // SampleStart + df.groupBy("city").max() // max for every comparable column + df.groupBy("city").mean() // mean for every numeric column + df.groupBy("city").max("age") // max age into column "age" + df.groupBy("city").sum("weight", name = "total weight") // sum of weights into column "total weight" + df.groupBy("city").count() // number of rows into column "count" + df.groupBy("city").max { + "name"["firstName"]().length() and "name"["lastName"]().length() + } // maximum length of firstName or lastName into column "max" + df.groupBy("city") + .medianFor("age", "weight") // median age into column "age", median weight into column "weight" + df.groupBy("city") + .minFor { ("age"() into "min age") and ("weight"() into "min weight") } // min age into column "min age", min weight into column "min weight" + df.groupBy("city").meanOf("mean ratio") { + "weight"()?.div("age"()) + } // mean of weight/age into column "mean ratio" + // SampleEnd + } + + @Test + fun groupByWithoutAggregation_properties() { + // SampleStart + df.groupBy { city }.values() + df.groupBy { city }.values { name and age } + df.groupBy { city }.values { weight into "weights" } + // SampleEnd + } + + @Test + fun groupByWithoutAggregation_accessors() { + // SampleStart + val city by column() + val age by column() + val weight by column() + val name by columnGroup() + + df.groupBy(city).values() + df.groupBy(city).values(name, age) + df.groupBy(city).values { weight into "weights" } + // SampleEnd + } + + @Test + fun groupByWithoutAggregation_strings() { + // SampleStart + df.groupBy("city").values() + df.groupBy("city").values("name", "age") + df.groupBy("city").values { "weight" into "weights" } + // SampleEnd + } + + @Test + fun pivot_properties() { + // SampleStart + df.pivot { city } + // SampleEnd + } + + @Test + fun pivot_accessors() { + // SampleStart + val city by column() + + df.pivot { city } + // SampleEnd + } + + @Test + fun pivot_strings() { + // SampleStart + df.pivot("city") + // SampleEnd + } + + @Test + fun pivot2_properties() { + // SampleStart + df.pivot { city and name.firstName } + df.pivot { city then name.firstName } + // SampleEnd + } + + @Test + fun pivot2_accessors() { + // SampleStart + val city by column() + val name by columnGroup() + val firstName by name.column() + + df.pivot { city and firstName } + df.pivot { city then firstName } + // SampleEnd + } + + @Test + fun pivot2_strings() { + // SampleStart + df.pivot { "city" and "name"["firstName"] } + df.pivot { "city" then "name"["firstName"] } + // SampleEnd + } + + @Test + fun pivotInward_properties() { + // SampleStart + df.pivot(inward = true) { city } + // SampleEnd + } + + @Test + fun pivotInward_accessors() { + // SampleStart + val city by column() + + df.pivot(inward = true) { city } + // SampleEnd + } + + @Test + fun pivotInward_strings() { + // SampleStart + df.pivot("city", inward = true) + // SampleEnd + } + + @Test + fun pivotAsDataRowOrFrame() { + // SampleStart + df.pivot { city }.frames() + df.pivot { city }.groupBy { name }.frames() + // SampleEnd + } + + @Test + fun pivotGroupBy_properties() { + // SampleStart + df.pivot { city }.groupBy { name } + // same as + df.groupBy { name }.pivot { city } + // SampleEnd + } + + @Test + fun pivotGroupBy_accessors() { + // SampleStart + val city by column() + val name by columnGroup() + + df.pivot { city }.groupBy { name } + // same as + df.groupBy { name }.pivot { city } + // SampleEnd + } + + @Test + fun pivotGroupBy_strings() { + // SampleStart + df.pivot("city").groupBy("name") + // same as + df.groupBy("name").pivot("city") + // SampleEnd + } + + @Test + fun pivotGroupByOther() { + // SampleStart + df.pivot { city }.groupByOther() + // SampleEnd + } + + @Test + fun pivotAggregate_properties() { + // SampleStart + df.pivot { city }.aggregate { minBy { age }.name } + df.pivot { city }.groupBy { name.firstName }.aggregate { + meanFor { age and weight } into "means" + stdFor { age and weight } into "stds" + maxByOrNull { weight }?.name?.lastName into "biggest" + } + // SampleEnd + } + + @Test + fun pivotAggregate_accessors() { + // SampleStart + val city by column() + val name by columnGroup() + val firstName by name.column() + val age by column() + val weight by column() + + df.pivot { city }.aggregate { minBy(age).name } + + df.pivot { city }.groupBy { firstName }.aggregate { + meanFor { age and weight } into "means" + stdFor { age and weight } into "stds" + maxByOrNull(weight)?.name?.lastName into "biggest" + } + // SampleEnd + } + + @Test + fun pivotAggregate_strings() { + // SampleStart + df.pivot("city").aggregate { minBy("age")["name"] } + + df.pivot("city").groupBy { "name"["firstName"] }.aggregate { + meanFor("age", "weight") into "means" + stdFor("age", "weight") into "stds" + maxByOrNull("weight")?.getColumnGroup("name")?.get("lastName") into "biggest" + } + // SampleEnd + } + + @Test + fun pivotCommonAggregations_properties() { + // SampleStart + df.pivot { city }.maxFor { age and weight } + df.groupBy { name }.pivot { city }.median { age } + // SampleEnd + } + + @Test + fun pivotCommonAggregations_accessors() { + // SampleStart + val city by column() + val name by columnGroup() + val age by column() + val weight by column() + + df.pivot { city }.maxFor { age and weight } + df.groupBy { name }.pivot { city }.median { age } + // SampleEnd + } + + @Test + fun pivotCommonAggregations_strings() { + // SampleStart + df.pivot("city").maxFor("age", "weight") + df.groupBy("name").pivot("city").median("age") + // SampleEnd + } + + @Test + fun pivotSeparate_properties() { + // SampleStart + df.pivot { city }.maxFor(separate = true) { age and weight } + df.pivot { city }.aggregate(separate = true) { + min { age } into "min age" + maxOrNull { weight } into "max weight" + } + // SampleEnd + } + + @Test + fun pivotSeparate_accessors() { + // SampleStart + val city by column() + val age by column() + val weight by column() + + df.pivot { city }.maxFor(separate = true) { age and weight } + df.pivot { city }.aggregate(separate = true) { + min { age } into "min age" + maxOrNull { weight } into "max weight" + } + // SampleEnd + } + + @Test + fun pivotSeparate_strings() { + // SampleStart + df.pivot("city").maxFor("age", "weight", separate = true) + df.pivot("city").aggregate(separate = true) { + min("age") into "min age" + maxOrNull("weight") into "max weight" + } + // SampleEnd + } + + @Test + fun pivotDefault_properties() { + // SampleStart + df.pivot { city }.groupBy { name }.aggregate { min { age } default 0 } + df.pivot { city }.groupBy { name }.aggregate { + median { age } into "median age" default 0 + minOrNull { weight } into "min weight" default 100 + } + df.pivot { city }.groupBy { name }.default(0).min() + // SampleEnd + } + + @Test + fun pivotDefault_accessors() { + // SampleStart + val city by column() + val age by column() + val weight by column() + val name by columnGroup() + + df.pivot { city }.groupBy { name }.aggregate { min { age } default 0 } + df.pivot { city }.groupBy { name }.aggregate { + median { age } into "median age" default 0 + minOrNull { weight } into "min weight" default 100 + } + df.pivot { city }.groupBy { name }.default(0).min() + // SampleEnd + } + + @Test + fun pivotDefault_strings() { + // SampleStart + df.pivot("city").groupBy("name").aggregate { min("age") default 0 } + df.pivot("city").groupBy("name").aggregate { + median("age") into "median age" default 0 + minOrNull("weight") into "min weight" default 100 + } + df.pivot("city").groupBy("name").default(0).min() + // SampleEnd + } + + @Test + fun pivotInAggregate_properties() { + // SampleStart + df.groupBy { name.firstName }.aggregate { + pivot { city }.aggregate(separate = true) { + mean { age } into "mean age" + count() into "count" + } + count() into "total" + } + // SampleEnd + } + + @Test + fun pivotInAggregate_accessors() { + // SampleStart + val city by column() + val name by columnGroup() + val firstName by name.column() + val age by column() + + df.groupBy { firstName }.aggregate { + pivot { city }.aggregate(separate = true) { + mean { age } into "mean age" + count() into "count" + } + count() into "total" + } + // SampleEnd + } + + @Test + fun pivotInAggregate_strings() { + // SampleStart + df.groupBy { "name"["firstName"] }.aggregate { + pivot("city").aggregate(separate = true) { + mean("age") into "mean age" + count() into "count" + } + count() into "total" + } + // SampleEnd + } + + @Test + fun pivotCounts() { + // SampleStart + df.pivotCounts { city } + // same as + df.pivot { city }.groupByOther().count() + + df.groupBy { name }.pivotCounts { city } + // same as + df.groupBy { name }.pivot { city }.count() + // same as + df.groupBy { name }.aggregate { + pivotCounts { city } + } + // SampleEnd + } + + @Test + fun pivotMatches() { + // SampleStart + df.pivotMatches { city } + // same as + df.pivot { city }.groupByOther().matches() + + df.groupBy { name }.pivotMatches { city } + // same as + df.groupBy { name }.pivot { city }.matches() + // same as + df.groupBy { name }.aggregate { + pivotMatches { city } + } + // SampleEnd + } + + @Test + fun cumSum() { + // SampleStart + df.cumSum { weight } + df.weight.cumSum() + df.groupBy { city }.cumSum { weight }.concat() + // SampleEnd + } + + @Test + fun valueCounts() { + // SampleStart + df.city.valueCounts() + + df.valueCounts { name and city } + // SampleEnd + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/ApiLevels.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/ApiLevels.kt new file mode 100644 index 0000000000..8daa74e1da --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/ApiLevels.kt @@ -0,0 +1,137 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.ColumnName +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.dropNulls +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.toListOf +import org.jetbrains.kotlinx.dataframe.io.read +import org.junit.Ignore +import org.junit.Test + +@Ignore +class ApiLevels { + + @Test + fun strings() { + // SampleStart + DataFrame.read("titanic.csv") + .add("lastName") { "name"().split(",").last() } + .dropNulls("age") + .filter { + "survived"() && + "home"().endsWith("NY") && + "age"() in 10..20 + } + // SampleEnd + } + + @Test + fun accessors1() { + // SampleStart + val survived by column() // accessor for Boolean column with name 'survived' + val home by column() + val age by column() + val name by column() + val lastName by column() + // SampleEnd + } + + @Test + fun accessors2() { + val survived by column() + val home by column() + val age by column() + val name by column() + val lastName by column() + // SampleStart + + DataFrame.read("titanic.csv") + .add(lastName) { name().split(",").last() } + .dropNulls { age } + .filter { survived() && home().endsWith("NY") && age()!! in 10..20 } + // SampleEnd + } + + @Test + fun accessors3() { + // SampleStart + val survived by column() + val home by column() + val age by column() + val name by column() + val lastName by column() + + DataFrame.read("titanic.csv") + .add(lastName) { name().split(",").last() } + .dropNulls { age } + .filter { survived() && home().endsWith("NY") && age()!! in 10..20 } + // SampleEnd + } + + @Test + fun kproperties1() { + // SampleStart + data class Passenger( + val survived: Boolean, + val home: String, + val age: Int, + val lastName: String + ) + + val passengers = DataFrame.read("titanic.csv") + .add(Passenger::lastName) { "name"().split(",").last() } + .dropNulls(Passenger::age) + .filter { + it[Passenger::survived] && + it[Passenger::home].endsWith("NY") && + it[Passenger::age] in 10..20 + } + .toListOf() + // SampleEnd + } + + @Test + fun kproperties2() { + // SampleStart + data class Passenger( + @ColumnName("survived") val isAlive: Boolean, + @ColumnName("home") val city: String, + val name: String + ) + + val passengers = DataFrame.read("titanic.csv") + .filter { it.get(Passenger::city).endsWith("NY") } + .toListOf() + // SampleEnd + } + + @DataSchema + interface TitanicPassenger { + val survived: Boolean + val home: String + val age: Int + val name: String + } + + @Test + fun extensionProperties2() { + val df = DataFrame.read("titanic.csv").cast() + // SampleStart + df.add("lastName") { name.split(",").last() } + .dropNulls { age } + .filter { survived && home.endsWith("NY") && age in 10..20 } + // SampleEnd + } + + @Test + fun extensionProperties1() { + // SampleStart + val df = DataFrame.read("titanic.csv") + // SampleEnd + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Collections.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Collections.kt new file mode 100644 index 0000000000..5e9ff8ced9 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Collections.kt @@ -0,0 +1,81 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.api.toListOf +import org.junit.Test + +class Collections { + + @Test + fun listInterop1() { + // SampleStart + data class Input(val a: Int, val b: Int) + + val list = listOf(Input(1, 2), Input(3, 4)) + // SampleEnd + } + + @Test + fun listInterop2() { + data class Input(val a: Int, val b: Int) + val list = listOf(Input(1, 2), Input(3, 4)) + // SampleStart + val df = list.toDataFrame() + // SampleEnd + } + + @Test + fun listInterop3() { + val list = listOf(Input(1, 2), Input(3, 4)) + val df = list.toDataFrame() + + // SampleStart + @DataSchema + data class Input(val a: Int, val b: Int) + + val df2 = df.add("c") { a + b } + // SampleEnd + } + + @DataSchema + data class Input(val a: Int, val b: Int) + + @DataSchema + interface Input2 { + val a: Int + val b: Int + } + + @Test + fun listInterop4() { + val list = listOf(Input(1, 2), Input(3, 4)) + val df2 = list.toDataFrame().add("c") { a + b } + + // SampleStart + data class Output(val a: Int, val b: Int, val c: Int) + + val result = df2.toListOf() + // SampleEnd + } + + @Test + fun listInterop5() { + // SampleStart + val df = dataFrameOf("name", "lastName", "age")("John", "Doe", 21) + .group("name", "lastName").into("fullName") + + data class FullName(val name: String, val lastName: String) + data class Person(val fullName: FullName, val age: Int) + + val persons = df.toListOf() // [Person(fullName = FullName(name = "John", lastName = "Doe"), age = 21)] + // SampleEnd + + persons shouldBe listOf(Person(FullName("John", "Doe"), 21)) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt new file mode 100644 index 0000000000..347baccd5f --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Create.kt @@ -0,0 +1,402 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.Infer +import org.jetbrains.kotlinx.dataframe.api.ValueProperty +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnGroup +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.frameColumn +import org.jetbrains.kotlinx.dataframe.api.map +import org.jetbrains.kotlinx.dataframe.api.named +import org.jetbrains.kotlinx.dataframe.api.preserve +import org.jetbrains.kotlinx.dataframe.api.print +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.toColumn +import org.jetbrains.kotlinx.dataframe.api.toColumnOf +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.api.value +import org.jetbrains.kotlinx.dataframe.api.withValues +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.kind +import org.jetbrains.kotlinx.dataframe.type +import org.junit.Test +import kotlin.reflect.typeOf + +class Create : TestBase() { + + @Test + fun createValueByColumnOf() { + // SampleStart + // Create ValueColumn with name 'student' and two elements of type String + val student by columnOf("Alice", "Bob") + // SampleEnd + } + + @Test + fun createValueByToColumn() { + // SampleStart + listOf("Alice", "Bob").toColumn("name") + // SampleEnd + } + + @Test + fun columnAccessorsUsage() { + // SampleStart + val age by column() + + // Access fourth cell in the "age" column of dataframe `df`. + // This expression returns `Int` because variable `age` has `ColumnAccessor` type. + // If dataframe `df` has no column "age" or column "age" has type which is incompatible with `Int`, + // runtime exception will be thrown. + df[age][3] + 5 + + // Access first cell in the "age" column of dataframe `df`. + df[0][age] * 2 + + // Returns new dataframe sorted by age column (ascending) + df.sortBy(age) + + // Returns new dataframe with the column "year of birth" added + df.add("year of birth") { 2021 - age } + + // Returns new dataframe containing only rows with age > 30 + df.filter { age > 30 } + // SampleEnd + } + + @Test + fun columnAccessorToColumn() { + // SampleStart + val age by column() + val ageCol1 = age.withValues(15, 20) + val ageCol2 = age.withValues(1..10) + // SampleEnd + + ageCol2.size() shouldBe 10 + } + + @Test + fun columnAccessorMap() { + // SampleStart + val age by column() + val year by age.map { 2021 - it } + + df.filter { year > 2000 } + // SampleEnd + } + + @Test + fun columnAccessorComputed_properties() { + // SampleStart + val fullName by column(df) { name.firstName + " " + name.lastName } + + df[fullName] + // SampleEnd + } + + @Test + fun columnAccessorComputed_accessors() { + // SampleStart + val name by columnGroup() + val firstName by name.column() + val lastName by name.column() + + val fullName by column { firstName() + " " + lastName() } + + df[fullName] + // SampleEnd + } + + @Test + fun columnAccessorComputed_strings() { + // SampleStart + + val fullName by column { "name"["firstName"]() + " " + "name"["lastName"]() } + + df[fullName] + // SampleEnd + } + + @Test + fun createValueColumnInferred() { + // SampleStart + val values: List = listOf(1, 2.5) + + values.toColumn("data") // type: Any? + values.toColumn("data", Infer.Type) // type: Number + values.toColumn("data", Infer.Nulls) // type: Any + // SampleEnd + } + + @Test + fun createValueColumnOfType() { + // SampleStart + val values: List = listOf(1, 2.5) + + values.toColumnOf("data") // type: Number? + // SampleEnd + } + + @Test + fun createColumnRenamed() { + // SampleStart + val column = columnOf("Alice", "Bob") named "student" + // SampleEnd + } + + @Test + fun createColumnGroup() { + // SampleStart + val firstName by columnOf("Alice", "Bob") + val lastName by columnOf("Cooper", "Marley") + + // Create ColumnGroup with two nested columns + val fullName by columnOf(firstName, lastName) + // SampleEnd + } + + @Test + fun createFrameColumn() { + // SampleStart + val df1 = dataFrameOf("name", "age")("Alice", 20, "Bob", 25) + val df2 = dataFrameOf("name", "temp")("Charlie", 36.6) + + // Create FrameColumn with two elements of type DataFrame + val frames by columnOf(df1, df2) + // SampleEnd + } + + @Test + fun createColumnAccessor() { + // SampleStart + val name by column() + // SampleEnd + } + + @Test + fun createColumnAccessorRenamed() { + // SampleStart + val accessor by column("complex column name") + // SampleEnd + accessor.name() shouldBe "complex column name" + } + + @Test + fun createDeepColumnAccessor() { + // SampleStart + val name by columnGroup() + val firstName by name.column() + // SampleEnd + } + + @Test + fun createGroupOrFrameColumnAccessor() { + // SampleStart + val columns by columnGroup() + val frames by frameColumn() + // SampleEnd + } + + @Test + fun createEmptyDataFrame() { + // SampleStart + val df = emptyDataFrame() + // SampleEnd + df.columnsCount() shouldBe 0 + df.rowsCount() shouldBe 0 + } + + @Test + fun createDataFrameOf() { + // SampleStart + // DataFrame with 2 columns and 3 rows + val df = dataFrameOf("name", "age")( + "Alice", 15, + "Bob", 20, + "Charlie", 100 + ) + // SampleEnd + } + + @Test + fun createDataFrameOfPairs() { + // SampleStart + // DataFrame with 2 columns and 3 rows + val df = dataFrameOf( + "name" to listOf("Alice", "Bob", "Charlie"), + "age" to listOf(15, 20, 100) + ) + // SampleEnd + } + + @Test + fun createDataFrameWithFill() { + // SampleStart + // Multiplication table + dataFrameOf(1..10) { x -> (1..10).map { x * it } } + // SampleEnd + } + + @Test + fun createDataFrameFillConstant() { + // SampleStart + val names = listOf("first", "second", "third") + + // DataFrame with 3 columns, fill each column with 15 `true` values + val df = dataFrameOf(names).fill(15, true) + // SampleEnd + } + + @Test + fun createDataFrameWithRandom() { + // SampleStart + // 5 columns filled with 7 random double values: + val names = (1..5).map { "column$it" } + dataFrameOf(names).randomDouble(7) + + // 5 columns filled with 7 random double values between 0 and 1 (inclusive) + dataFrameOf(names).randomDouble(7, 0.0..1.0).print() + + // 5 columns filled with 7 random int values between 0 and 100 (inclusive) + dataFrameOf(names).randomInt(7, 0..100).print() + // SampleEnd + } + + @Test + fun createDataFrameFromColumns() { + // SampleStart + + val name by columnOf("Alice", "Bob", "Charlie") + val age by columnOf(15, 20, 22) + + // DataFrame with 2 columns + val df = dataFrameOf(name, age) + // SampleEnd + } + + @Test + fun createDataFrameFromMap() { + // SampleStart + val map = mapOf("name" to listOf("Alice", "Bob", "Charlie"), "age" to listOf(15, 20, 22)) + + // DataFrame with 2 columns + map.toDataFrame() + // SampleEnd + } + + @Test + fun createDataFrameFromIterable() { + // SampleStart + val name by columnOf("Alice", "Bob", "Charlie") + val age by columnOf(15, 20, 22) + + listOf(name, age).toDataFrame() + // SampleEnd + } + + @Test + fun createDataFrameFromNamesAndValues() { + // SampleStart + val names = listOf("name", "age") + val values = listOf( + "Alice", 15, + "Bob", 20, + "Charlie", 22 + ) + val df = dataFrameOf(names, values) + // SampleEnd + df.columnNames() shouldBe listOf("name", "age") + df.rowsCount() shouldBe 3 + df["name"].type() shouldBe typeOf() + df["age"].type() shouldBe typeOf() + } + + @Test + fun readDataFrameFromValues() { + // SampleStart + val names = listOf("Alice", "Bob", "Charlie") + val df: DataFrame> = names.toDataFrame() + df.add("length") { value.length } + // SampleEnd + df.value.toList() shouldBe names + } + + @Test + fun readDataFrameFromObject() { + // SampleStart + data class Person(val name: String, val age: Int) + + val persons = listOf(Person("Alice", 15), Person("Bob", 20), Person("Charlie", 22)) + + val df = persons.toDataFrame() + // SampleEnd + df.columnsCount() shouldBe 2 + df.rowsCount() shouldBe 3 + df["name"].type() shouldBe typeOf() + df["age"].type() shouldBe typeOf() + } + + @Test + fun readDataFrameFromDeepObject() { + // SampleStart + data class Name(val firstName: String, val lastName: String) + data class Score(val subject: String, val value: Int) + data class Student(val name: Name, val age: Int, val scores: List) + + val students = listOf( + Student(Name("Alice", "Cooper"), 15, listOf(Score("math", 4), Score("biology", 3))), + Student(Name("Bob", "Marley"), 20, listOf(Score("music", 5))) + ) + + val df = students.toDataFrame(maxDepth = 1) + // SampleEnd + df.columnsCount() shouldBe 3 + df.rowsCount() shouldBe 2 + df["name"].kind shouldBe ColumnKind.Group + df["name"]["firstName"].type() shouldBe typeOf() + df["scores"].kind shouldBe ColumnKind.Frame + } + + @Test + fun readDataFrameFromDeepObjectWithExclude() { + data class Name(val firstName: String, val lastName: String) + data class Score(val subject: String, val value: Int) + data class Student(val name: Name, val age: Int, val scores: List) + + val students = listOf( + Student(Name("Alice", "Cooper"), 15, listOf(Score("math", 4), Score("biology", 3))), + Student(Name("Bob", "Marley"), 20, listOf(Score("music", 5))) + ) + + // SampleStart + val df = students.toDataFrame { + // add column + "year of birth" from { 2021 - it.age } + + // scan all properties + properties(maxDepth = 1) { + exclude(Score::subject) // `subject` property will be skipped from object graph traversal + preserve() // `Name` objects will be stored as-is without transformation into DataFrame + } + + // add column group + "summary" { + "max score" from { it.scores.maxOf { it.value } } + "min score" from { it.scores.minOf { it.value } } + } + } + // SampleEnd + df.columnsCount() shouldBe 5 + df.rowsCount() shouldBe 2 + df["name"].kind shouldBe ColumnKind.Value + df["name"].type shouldBe typeOf() + df["scores"].kind shouldBe ColumnKind.Frame + df["summary"]["min score"].values() shouldBe listOf(3, 5) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/DataRowApi.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/DataRowApi.kt new file mode 100644 index 0000000000..275c8f0297 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/DataRowApi.kt @@ -0,0 +1,45 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.at +import org.jetbrains.kotlinx.dataframe.api.diff +import org.jetbrains.kotlinx.dataframe.api.drop +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.pivot +import org.jetbrains.kotlinx.dataframe.api.prev +import org.jetbrains.kotlinx.dataframe.api.update +import org.jetbrains.kotlinx.dataframe.api.where +import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.api.withValue +import org.junit.Test + +class DataRowApi : TestBase() { + + @Test + fun expressions() { + // SampleStart + // Row expression computes values for a new column + df.add("fullName") { name.firstName + " " + name.lastName } + + // Row expression computes updated values + df.update { weight }.at(1, 3, 4).with { prev()?.weight } + + // Row expression computes cell content for values of pivoted column + df.pivot { city }.with { name.lastName.uppercase() } + // SampleEnd + } + + @Test + fun conditions() { + // SampleStart + // Row condition is used to filter rows by index + df.filter { index() % 5 == 0 } + + // Row condition is used to drop rows where `age` is the same as in previous row + df.drop { diff { age } == 0 } + + // Row condition is used to filter rows for value update + df.update { weight }.where { index() > 4 && city != "Paris" }.withValue(50) + // SampleEnd + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Join.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Join.kt new file mode 100644 index 0000000000..675e5ced21 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Join.kt @@ -0,0 +1,125 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnGroup +import org.jetbrains.kotlinx.dataframe.api.excludeJoin +import org.jetbrains.kotlinx.dataframe.api.fullJoin +import org.jetbrains.kotlinx.dataframe.api.getColumnGroup +import org.jetbrains.kotlinx.dataframe.api.innerJoin +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.join +import org.jetbrains.kotlinx.dataframe.api.leftJoin +import org.jetbrains.kotlinx.dataframe.api.rename +import org.jetbrains.kotlinx.dataframe.api.rightJoin +import org.jetbrains.kotlinx.dataframe.api.select +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.junit.Test + +class Join : TestBase() { + + private val other = df.add("year") { 2021 - age }.select { name and city and "year" } + + @Test + fun join_properties() { + // SampleStart + df.join(other) { name and city } + // SampleEnd + } + + @Test + fun join_accessors() { + // SampleStart + val name by columnGroup() + val city by column() + + df.join(other) { name and city } + // SampleEnd + } + + @Test + fun join_strings() { + // SampleStart + df.join(other, "name", "city") + // SampleEnd + } + + @Test + fun joinDefault() { + // SampleStart + df.join(other) + // SampleEnd + } + + class Right + val DataFrame.fullName: ColumnGroup get() = getColumnGroup("fullName").cast() + + @Test + fun joinWithMatch_properties() { + val other = other.rename { name }.into("fullName").cast() + val joined = + // SampleStart + df.join(other) { name match right.fullName } + // SampleEnd + joined.rowsCount() shouldBe df.rowsCount() + joined.columnsCount() shouldBe df.columnsCount() + 2 + } + + @Test + fun joinWithMatch_accessors() { + val other = df.add("year") { 2021 - age }.select { name named "fullName" and "year" } + // SampleStart + val name by columnGroup() + val fullName by columnGroup() + + df.join(other) { name match fullName } + // SampleEnd + } + + @Test + fun joinWithMatch_strings() { + val other = df.add("year") { 2021 - age }.select { name named "fullName" and "year" } + // SampleStart + df.join(other) { "name" match "fullName" } + // SampleEnd + } + + @Test + fun joinSpecial_properties() { + // SampleStart + df.innerJoin(other) { name and city } + df.leftJoin(other) { name and city } + df.rightJoin(other) { name and city } + df.fullJoin(other) { name and city } + df.excludeJoin(other) { name and city } + // SampleEnd + } + + @Test + fun joinSpecial_accessors() { + // SampleStart + val name by columnGroup() + val city by column() + + df.innerJoin(other) { name and city } + df.leftJoin(other) { name and city } + df.rightJoin(other) { name and city } + df.fullJoin(other) { name and city } + df.excludeJoin(other) { name and city } + // SampleEnd + } + + @Test + fun joinSpecial_strings() { + // SampleStart + df.innerJoin(other, "name", "city") + df.leftJoin(other, "name", "city") + df.rightJoin(other, "name", "city") + df.fullJoin(other, "name", "city") + df.excludeJoin(other, "name", "city") + // SampleEnd + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt new file mode 100644 index 0000000000..7c32b9d3e0 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -0,0 +1,1256 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.alsoDebug +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.ParserOptions +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.after +import org.jetbrains.kotlinx.dataframe.api.asFrame +import org.jetbrains.kotlinx.dataframe.api.asGroupBy +import org.jetbrains.kotlinx.dataframe.api.at +import org.jetbrains.kotlinx.dataframe.api.by +import org.jetbrains.kotlinx.dataframe.api.byName +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.colsOf +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnGroup +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.convertTo +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.default +import org.jetbrains.kotlinx.dataframe.api.dfsOf +import org.jetbrains.kotlinx.dataframe.api.dropNulls +import org.jetbrains.kotlinx.dataframe.api.explode +import org.jetbrains.kotlinx.dataframe.api.fill +import org.jetbrains.kotlinx.dataframe.api.fillNA +import org.jetbrains.kotlinx.dataframe.api.fillNaNs +import org.jetbrains.kotlinx.dataframe.api.fillNulls +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.flatten +import org.jetbrains.kotlinx.dataframe.api.gather +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.jetbrains.kotlinx.dataframe.api.gt +import org.jetbrains.kotlinx.dataframe.api.implode +import org.jetbrains.kotlinx.dataframe.api.inplace +import org.jetbrains.kotlinx.dataframe.api.insert +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.intoColumns +import org.jetbrains.kotlinx.dataframe.api.intoList +import org.jetbrains.kotlinx.dataframe.api.intoRows +import org.jetbrains.kotlinx.dataframe.api.inward +import org.jetbrains.kotlinx.dataframe.api.keysInto +import org.jetbrains.kotlinx.dataframe.api.length +import org.jetbrains.kotlinx.dataframe.api.lowercase +import org.jetbrains.kotlinx.dataframe.api.map +import org.jetbrains.kotlinx.dataframe.api.mapKeys +import org.jetbrains.kotlinx.dataframe.api.mapToColumn +import org.jetbrains.kotlinx.dataframe.api.mapToFrame +import org.jetbrains.kotlinx.dataframe.api.mapValues +import org.jetbrains.kotlinx.dataframe.api.match +import org.jetbrains.kotlinx.dataframe.api.max +import org.jetbrains.kotlinx.dataframe.api.mean +import org.jetbrains.kotlinx.dataframe.api.meanFor +import org.jetbrains.kotlinx.dataframe.api.merge +import org.jetbrains.kotlinx.dataframe.api.minus +import org.jetbrains.kotlinx.dataframe.api.move +import org.jetbrains.kotlinx.dataframe.api.named +import org.jetbrains.kotlinx.dataframe.api.notNull +import org.jetbrains.kotlinx.dataframe.api.parse +import org.jetbrains.kotlinx.dataframe.api.parser +import org.jetbrains.kotlinx.dataframe.api.pathOf +import org.jetbrains.kotlinx.dataframe.api.perCol +import org.jetbrains.kotlinx.dataframe.api.perRowCol +import org.jetbrains.kotlinx.dataframe.api.pivotCounts +import org.jetbrains.kotlinx.dataframe.api.prev +import org.jetbrains.kotlinx.dataframe.api.print +import org.jetbrains.kotlinx.dataframe.api.remove +import org.jetbrains.kotlinx.dataframe.api.rename +import org.jetbrains.kotlinx.dataframe.api.reorder +import org.jetbrains.kotlinx.dataframe.api.replace +import org.jetbrains.kotlinx.dataframe.api.reverse +import org.jetbrains.kotlinx.dataframe.api.schema +import org.jetbrains.kotlinx.dataframe.api.select +import org.jetbrains.kotlinx.dataframe.api.shuffle +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.sortByDesc +import org.jetbrains.kotlinx.dataframe.api.sortWith +import org.jetbrains.kotlinx.dataframe.api.split +import org.jetbrains.kotlinx.dataframe.api.sum +import org.jetbrains.kotlinx.dataframe.api.to +import org.jetbrains.kotlinx.dataframe.api.toFloat +import org.jetbrains.kotlinx.dataframe.api.toLeft +import org.jetbrains.kotlinx.dataframe.api.toMap +import org.jetbrains.kotlinx.dataframe.api.toPath +import org.jetbrains.kotlinx.dataframe.api.toTop +import org.jetbrains.kotlinx.dataframe.api.under +import org.jetbrains.kotlinx.dataframe.api.unfold +import org.jetbrains.kotlinx.dataframe.api.ungroup +import org.jetbrains.kotlinx.dataframe.api.update +import org.jetbrains.kotlinx.dataframe.api.where +import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.api.withNull +import org.jetbrains.kotlinx.dataframe.api.withValue +import org.jetbrains.kotlinx.dataframe.api.withZero +import org.jetbrains.kotlinx.dataframe.impl.api.mapNotNullValues +import org.jetbrains.kotlinx.dataframe.io.readJsonStr +import org.jetbrains.kotlinx.dataframe.io.renderToString +import org.jetbrains.kotlinx.dataframe.testResource +import org.jetbrains.kotlinx.dataframe.types.UtilTests +import org.junit.Test +import java.net.URL +import java.time.format.DateTimeFormatter +import java.util.Locale +import kotlin.streams.toList + +class Modify : TestBase() { + + @Test + fun update() { + // SampleStart + df.update { age }.with { it * 2 } + df.update { dfsOf() }.with { it.uppercase() } + df.update { weight }.at(1..4).notNull { it / 2 } + df.update { name.lastName and age }.at(1, 3, 4).withNull() + // SampleEnd + } + + @Test + fun updateWith() { + // SampleStart + df.update { city }.with { name.firstName + " from " + it } + // SampleEnd + } + + @Test + fun updateWithConst() { + // SampleStart + df.update { city }.where { name.firstName == "Alice" }.withValue("Paris") + // SampleEnd + } + + @Test + fun updateAsFrame() { + val res = + // SampleStart + df.update { name }.asFrame { select { lastName } } + // SampleEnd + res shouldBe df.remove { name.firstName } + } + + @Test + fun updatePerColumn() { + val updated = + // SampleStart + df.update { colsOf() }.perCol { mean(skipNA = true) } + // SampleEnd + updated.age.countDistinct() shouldBe 1 + updated.weight.countDistinct() shouldBe 1 + + val means = df.meanFor(skipNA = true) { colsOf() } + df.update { colsOf() }.perCol(means) shouldBe updated + df.update { colsOf() }.perCol(means.toMap() as Map) shouldBe updated + } + + @Test + fun updatePerRowCol() { + val updated = + // SampleStart + df.update { colsOf() }.perRowCol { row, col -> col.name() + ": " + row.index() } + // SampleEnd + } + + @Test + fun convert() { + // SampleStart + df.convert { age }.with { it.toDouble() } + df.convert { dfsOf() }.with { it.toCharArray().toList() } + // SampleEnd + } + + @Test + fun convertTo() { + // SampleStart + df.convert { age }.to() + df.convert { colsOf() }.to() + df.convert { name.firstName and name.lastName }.to { it.length() } + df.convert { weight }.toFloat() + // SampleEnd + } + + enum class Direction { + NORTH, SOUTH, WEST, EAST + } + + @Test + fun convertToEnum() { + // SampleStart + dataFrameOf("direction")("NORTH", "WEST") + .convert("direction").to() + // SampleEnd + } + + @Test + fun parseAll() { + // SampleStart + df.parse() + // SampleEnd + } + + @Test + fun parseSome() { + // SampleStart + df.parse { age and weight } + // SampleEnd + } + + @Test + fun parseWithOptions() { + // SampleStart + df.parse(options = ParserOptions(locale = Locale.CHINA, dateTimeFormatter = DateTimeFormatter.ISO_WEEK_DATE)) + // SampleEnd + } + + @Test + fun globalParserOptions() { + // SampleStart + DataFrame.parser.locale = Locale.FRANCE + DataFrame.parser.addDateTimePattern("dd.MM.uuuu HH:mm:ss") + // SampleEnd + DataFrame.parser.resetToDefault() + } + + @Test + fun replace() { + // SampleStart + df.replace { name }.with { name.firstName } + df.replace { colsOf() }.with { it.lowercase() } + df.replace { age }.with { 2021 - age named "year" } + // SampleEnd + } + + @Test + fun shuffle() { + // SampleStart + df.shuffle() + // SampleEnd + } + + @Test + fun reverse() { + // SampleStart + df.reverse() + // SampleEnd + } + + @Test + fun fillNulls() { + // SampleStart + df.fillNulls { colsOf() }.with { -1 } + // same as + df.update { colsOf() }.where { it == null }.with { -1 } + // SampleEnd + } + + @Test + fun fillNaNs() { + // SampleStart + df.fillNaNs { colsOf() }.withZero() + // SampleEnd + } + + @Test + fun fillNA() { + // SampleStart + df.fillNA { weight }.withValue(-1) + // SampleEnd + } + + @Test + fun move() { + // SampleStart + df.move { age }.toLeft() + + df.move { weight }.to(1) + + // age -> info.age + // weight -> info.weight + df.move { age and weight }.into { pathOf("info", it.name()) } + df.move { age and weight }.into { "info"[it.name()] } + df.move { age and weight }.under("info") + + // name.firstName -> fullName.first + // name.lastName -> fullName.last + df.move { name.firstName and name.lastName }.into { pathOf("fullName", it.name().dropLast(4)) } + + // a|b|c -> a.b.c + // a|d|e -> a.d.e + dataFrameOf("a|b|c", "a|d|e")(0, 0) + .move { all() }.into { it.name().split("|").toPath() } + + // name.firstName -> firstName + // name.lastName -> lastName + df.move { name.cols() }.toTop() + + // a.b.e -> be + // c.d.e -> de + df.move { dfs { it.name() == "e" } }.toTop { it.parentName + it.name() } + // SampleEnd + } + + @Test + fun sortBy_properties() { + // SampleStart + df.sortBy { age } + df.sortBy { age and name.firstName.desc() } + df.sortBy { weight.nullsLast() } + // SampleEnd + } + + @Test + fun sortBy_accessors() { + // SampleStart + val age by column() + val weight by column() + val name by columnGroup() + val firstName by name.column() + + df.sortBy { age } + df.sortBy { age and firstName } + df.sortBy { weight.nullsLast() } + // SampleEnd + } + + @Test + fun sortBy_strings() { + // SampleStart + df.sortBy("age") + df.sortBy { "age" and "name"["firstName"].desc() } + df.sortBy { "weight".nullsLast() } + // SampleEnd + } + + @Test + fun sortByDesc_properties() { + // SampleStart + df.sortByDesc { age and weight } + // SampleEnd + } + + @Test + fun sortByDesc_accessors() { + // SampleStart + val age by column() + val weight by column() + + df.sortByDesc { age and weight } + // SampleEnd + } + + @Test + fun sortByDesc_strings() { + // SampleStart + df.sortByDesc("age", "weight") + // SampleEnd + } + + @Test + fun sortWith() { + // SampleStart + df.sortWith { row1, row2 -> + when { + row1.age < row2.age -> -1 + row1.age > row2.age -> 1 + else -> row1.name.firstName.compareTo(row2.name.firstName) + } + } + // SampleEnd + } + + @Test + fun reorder_properties() { + // SampleStart + df.reorder { age..isHappy }.byName() + // SampleEnd + } + + @Test + fun reorder_accessors() { + // SampleStart + val age by column() + val isHappy by column() + + df.reorder { age..isHappy }.byName() + // SampleEnd + } + + @Test + fun reorder_strings() { + // SampleStart + df.reorder { "age".."isHappy" }.byName() + } + + @Test + fun reorderSome() { + // SampleStart + val df = dataFrameOf("c", "d", "a", "b")( + 3, 4, 1, 2, + 1, 1, 1, 1 + ) + df.reorder("d", "b").cast().by { sum() } // [c, b, a, d] + // SampleEnd + .columnNames() shouldBe listOf("c", "b", "a", "d") + // SampleEnd + } + + @Test + fun reorderInGroup() { + // SampleStart + df.reorder { name }.byName(desc = true) // [name.lastName, name.firstName] + // SampleEnd + .name.columnNames() shouldBe listOf("lastName", "firstName") + } + + @Test + fun splitInplace_properties() { + // SampleStart + df.split { name.firstName }.by { it.chars().toList() }.inplace() + // SampleEnd + } + + @Test + fun splitInplace_accessors() { + // SampleStart + val name by columnGroup() + val firstName by name.column() + + df.split { firstName }.by { it.chars().toList() }.inplace() + // SampleEnd + } + + @Test + fun splitInplace_strings() { + // SampleStart + df.split { "name"["firstName"]() }.by { it.chars().toList() }.inplace() + // SampleEnd + } + + @Test + fun split_properties() { + // SampleStart + df.split { name }.by { it.values() }.into("nameParts") + + df.split { name.lastName }.by(" ").default("").inward { "word$it" } + // SampleEnd + } + + @Test + fun split_accessors() { + // SampleStart + val name by columnGroup() + val lastName by name.column() + + df.split { name }.by { it.values() }.into("nameParts") + + df.split { lastName }.by(" ").default("").inward { "word$it" } + // SampleEnd + } + + @Test + fun split_strings() { + // SampleStart + df.split { name }.by { it.values() }.into("nameParts") + + df.split { "name"["lastName"] }.by(" ").default("").inward { "word$it" } + // SampleEnd + } + + @Test + fun splitRegex() { + val merged = df.merge { name.lastName and name.firstName }.by { it[0] + " (" + it[1] + ")" }.into("name") + val name by column() + // SampleStart + merged.split { name } + .match("""(.*) \((.*)\)""") + .inward("firstName", "lastName") + // SampleEnd + } + + @Test + fun splitFrameColumn() { + // SampleStart + val df1 = dataFrameOf("a", "b", "c")( + 1, 2, 3, + 4, 5, 6 + ) + val df2 = dataFrameOf("a", "b")( + 5, 6, + 7, 8, + 9, 10 + ) + val group by columnOf(df1, df2) + val id by columnOf("x", "y") + val df = dataFrameOf(id, group) + + df.split { group }.intoColumns() + // SampleEnd + } + + @Test + fun splitIntoRows_properties() { + // SampleStart + df.split { name.firstName }.by { it.chars().toList() }.intoRows() + + df.split { name }.by { it.values() }.intoRows() + // SampleEnd + } + + @Test + fun splitIntoRows_accessors() { + // SampleStart + val name by columnGroup() + val firstName by name.column() + + df.split { firstName }.by { it.chars().toList() }.intoRows() + + df.split { name }.by { it.values() }.intoRows() + // SampleEnd + } + + @Test + fun splitIntoRows_strings() { + // SampleStart + df.split { "name"["firstName"]() }.by { it.chars().toList() }.intoRows() + + df.split { group("name") }.by { it.values() }.intoRows() + // SampleEnd + } + + @Test + fun merge() { + // SampleStart + // Merge two columns into one column "fullName" + df.merge { name.firstName and name.lastName }.by(" ").into("fullName") + // SampleEnd + } + + @Test + fun mergeIntoList() { + // SampleStart + // Merge data from two columns into List + df.merge { name.firstName and name.lastName }.by(",").intoList() + // SampleEnd + } + + @Test + fun mergeSameWith() { + // SampleStart + df.merge { name.firstName and name.lastName } + .by { it[0] + " (" + it[1].uppercase() + ")" } + .into("fullName") + // SampleEnd + } + + @Test + fun mergeDifferentWith() { + // SampleStart + df.merge { name.firstName and age and isHappy } + .by { "${it[0]} aged ${it[1]} is " + (if (it[2] as Boolean) "" else "not ") + "happy" } + .into("status") + // SampleEnd + } + + @Test + fun mergeDefault() { + // SampleStart + df.merge { colsOf() }.into("data") + // SampleEnd + } + + @Test + fun explode_accessors() { + // SampleStart + val a by columnOf(1, 2) + val b by columnOf(listOf(1, 2), listOf(3, 4)) + + val df = dataFrameOf(a, b) + + df.explode { b } + // SampleEnd + } + + @Test + fun explode_strings() { + // SampleStart + val df = dataFrameOf("a", "b")( + 1, listOf(1, 2), + 2, listOf(3, 4) + ) + + df.explode("b") + // SampleEnd + } + + @Test + fun explodeSeveral() { + // SampleStart + val a by columnOf(listOf(1, 2), listOf(3, 4, 5)) + val b by columnOf(listOf(1, 2, 3), listOf(4, 5)) + + val df = dataFrameOf(a, b) + df.explode { a and b } + // SampleEnd + } + + @Test + fun explodeColumnList() { + // SampleStart + val col by columnOf(listOf(1, 2), listOf(3, 4)) + + col.explode() + // SampleEnd + } + + @Test + fun explodeColumnFrames() { + // SampleStart + val col by columnOf( + dataFrameOf("a", "b")(1, 2, 3, 4), + dataFrameOf("a", "b")(5, 6, 7, 8) + ) + + col.explode() + // SampleEnd + } + + @Test + fun implode() { + // SampleStart + df.implode { name and age and weight and isHappy } + // SampleEnd + } + + @Test + fun gatherNames() { + val pivoted = df.dropNulls { city }.pivotCounts(inward = false) { city } + // SampleStart + pivoted.gather { "London".."Tokyo" }.cast() + .where { it > 0 }.keysInto("city") + // SampleEnd + } + + @Test + fun gather() { + val pivoted = df.dropNulls { city }.pivotCounts(inward = false) { city } + // SampleStart + pivoted.gather { "London".."Tokyo" }.into("city", "population") + // SampleEnd + } + + @Test + fun gatherWithMapping() { + val pivoted = df.dropNulls { city }.pivotCounts(inward = false) { city } + // SampleStart + pivoted.gather { "London".."Tokyo" } + .cast() + .where { it > 10 } + .mapKeys { it.lowercase() } + .mapValues { 1.0 / it } + .into("city", "density") + // SampleEnd + } + + @Test + fun insert_properties() { + // SampleStart + df.insert("year of birth") { 2021 - age }.after { age } + // SampleEnd + } + + @Test + fun insert_accessors() { + // SampleStart + val year = column("year of birth") + val age by column() + + df.insert(year) { 2021 - age }.after { age } + // SampleEnd + } + + @Test + fun insert_strings() { + // SampleStart + df.insert("year of birth") { 2021 - "age"() }.after("age") + // SampleEnd + } + + @Test + fun insertColumn() { + // SampleStart + val score by columnOf(4, 5, 3, 5, 4, 5, 3) + df.insert(score).at(2) + // SampleEnd + } + + @Test + fun concatDfs() { + val df1 = df + val df2 = df + // SampleStart + df.concat(df1, df2) + // SampleEnd + } + + @Test + fun concatColumns() { + // SampleStart + val a by columnOf(1, 2) + val b by columnOf(3, 4) + a.concat(b) + // SampleEnd + .shouldBe(columnOf(1, 2, 3, 4).named("a")) + } + + @Test + fun concatColumnsIterable() { + // SampleStart + val a by columnOf(1, 2) + val b by columnOf(3, 4) + listOf(a, b).concat() + // SampleEnd + .shouldBe(columnOf(1, 2, 3, 4).named("a")) + } + + @Test + fun concatIterable() { + val df1 = df + val df2 = df + // SampleStart + listOf(df1, df2).concat() + // SampleEnd + } + + @Test + fun concatRows() { + // SampleStart + val rows = listOf(df[2], df[4], df[5]) + rows.concat() + // SampleEnd + } + + @Test + fun concatFrameColumn() { + // SampleStart + val x = dataFrameOf("a", "b")( + 1, 2, + 3, 4 + ) + val y = dataFrameOf("b", "c")( + 5, 6, + 7, 8 + ) + val frameColumn by columnOf(x, y) + frameColumn.concat() + // SampleEnd + } + + @Test + fun concatGroupBy() { + // SampleStart + df.groupBy { name }.concat() + // SampleEnd + } + + @Test + fun add_properties() { + // SampleStart + df.add("year of birth") { 2021 - age } + // SampleEnd + } + + @Test + fun add_accessors() { + // SampleStart + val age by column() + val yearOfBirth by column("year of birth") + + df.add(yearOfBirth) { 2021 - age } + // SampleEnd + val added = df.add(yearOfBirth) { 2021 - age } + added[yearOfBirth].name() shouldBe "year of birth" + } + + @Test + fun add_strings() { + // SampleStart + df.add("year of birth") { 2021 - "age"() } + // SampleEnd + } + + @Test + fun addRecurrent() { + // SampleStart + df.add("fibonacci") { + if (index() < 2) 1 + else prev()!!.newValue() + prev()!!.prev()!!.newValue() + } + // SampleEnd + } + + @Test + fun addExisting() { + // SampleStart + val score by columnOf(4, 3, 5, 2, 1, 3, 5) + + df.add(score) + df + score + // SampleEnd + } + + @Test + fun addDfs() { + val df1 = df.select { name named "name2" } + val df2 = df.select { age named "age2" } + // SampleStart + df.add(df1, df2) + // SampleEnd + } + + private class CityInfo(val city: String?, val population: Int, val location: String) + private fun queryCityInfo(city: String?): CityInfo { return CityInfo(city, city?.length ?: 0, "35.5 32.2") } + + @Test + fun addCalculatedApi() { + // SampleStart + class CityInfo(val city: String?, val population: Int, val location: String) + fun queryCityInfo(city: String?): CityInfo { + return CityInfo(city, city?.length ?: 0, "35.5 32.2") + } + // SampleEnd + } + + @Test + fun addCalculated_properties() { + // SampleStart + val personWithCityInfo = df.add { + val cityInfo = city.map { queryCityInfo(it) } + "cityInfo" { + cityInfo.map { it.location } into CityInfo::location + cityInfo.map { it.population } into "population" + } + } + // SampleEnd + personWithCityInfo["cityInfo"]["population"] shouldBe df.city.map { it?.length ?: 0 }.named("population") + } + + @Test + fun addCalculated_accessors() { + // SampleStart + val city by column() + val personWithCityInfo = df.add { + val cityInfo = city().map { queryCityInfo(it) } + "cityInfo" { + cityInfo.map { it.location } into CityInfo::location + cityInfo.map { it.population } into "population" + } + } + // SampleEnd + personWithCityInfo["cityInfo"]["population"] shouldBe df.city.map { it?.length ?: 0 }.named("population") + } + + @Test + fun addCalculated_strings() { + // SampleStart + val personWithCityInfo = df.add { + val cityInfo = "city"().map { queryCityInfo(it) } + "cityInfo" { + cityInfo.map { it.location } into CityInfo::location + cityInfo.map { it.population } into "population" + } + } + // SampleEnd + personWithCityInfo["cityInfo"]["population"] shouldBe df.city.map { it?.length ?: 0 }.named("population") + } + + @Test + fun addMany_properties() { + // SampleStart + df.add { + "year of birth" from 2021 - age + age gt 18 into "is adult" + "details" { + name.lastName.length() into "last name length" + "full name" from { name.firstName + " " + name.lastName } + } + } + // SampleEnd + } + + @Test + fun addMany_accessors() { + // SampleStart + val yob = column("year of birth") + val lastNameLength = column("last name length") + val age by column() + val isAdult = column("is adult") + val fullName = column("full name") + val name by columnGroup() + val details by columnGroup() + val firstName by name.column() + val lastName by name.column() + + df.add { + yob from 2021 - age + age gt 18 into isAdult + details from { + lastName.length() into lastNameLength + fullName from { firstName() + " " + lastName() } + } + } + // SampleEnd + } + + @Test + fun addMany_strings() { + // SampleStart + df.add { + "year of birth" from 2021 - "age"() + "age"() gt 18 into "is adult" + "details" { + "name"["lastName"]().length() into "last name length" + "full name" from { "name"["firstName"]() + " " + "name"["lastName"]() } + } + } + // SampleEnd + } + + @Test + fun remove_properties() { + // SampleStart + df.remove { name and weight } + // SampleEnd + } + + @Test + fun remove_accessors() { + // SampleStart + val name by columnGroup() + val weight by column() + + df.remove { name and weight } + // SampleEnd + } + + @Test + fun remove_strings() { + // SampleStart + df.remove("name", "weight") + // SampleEnd + } + + @Test + fun map() { + // SampleStart + df.map { 2021 - it.age } + // SampleEnd + } + + @Test + fun mapToColumn_properties() { + // SampleStart + df.mapToColumn("year of birth") { 2021 - age } + // SampleEnd + } + + @Test + fun mapToColumn_accessors() { + // SampleStart + val age by column() + val yearOfBirth by column("year of birth") + + df.mapToColumn(yearOfBirth) { 2021 - age } + // SampleEnd + } + + @Test + fun mapToColumn_strings() { + // SampleStart + df.mapToColumn("year of birth") { 2021 - "age"() } + // SampleEnd + } + + @Test + fun mapMany_properties() { + // SampleStart + df.mapToFrame { + "year of birth" from 2021 - age + age gt 18 into "is adult" + name.lastName.length() into "last name length" + "full name" from { name.firstName + " " + name.lastName } + +city + } + // SampleEnd + } + + @Test + fun mapMany_accessors() { + // SampleStart + val yob = column("year of birth") + val lastNameLength = column("last name length") + val age by column() + val isAdult = column("is adult") + val fullName = column("full name") + val name by columnGroup() + val firstName by name.column() + val lastName by name.column() + val city by column() + + df.mapToFrame { + yob from 2021 - age + age gt 18 into isAdult + lastName.length() into lastNameLength + fullName from { firstName() + " " + lastName() } + +city + } + // SampleEnd + } + + @Test + fun mapMany_strings() { + // SampleStart + df.mapToFrame { + "year of birth" from 2021 - "age"() + "age"() gt 18 into "is adult" + "name"["lastName"]().length() into "last name length" + "full name" from { "name"["firstName"]() + " " + "name"["lastName"]() } + +"city" + } + // SampleEnd + } + + @Test + fun group() { + // SampleStart + df.group { age and city }.into("info") + + df.group { all() }.into { it.type().toString() }.print() + // SampleEnd + } + + @Test + fun ungroup() { + // SampleStart + // name.firstName -> firstName + // name.lastName -> lastName + df.ungroup { name } + // SampleEnd + } + + @Test + fun flatten_properties() { + // SampleStart + // name.firstName -> firstName + // name.lastName -> lastName + df.flatten { name } + // SampleEnd + } + + @Test + fun flatten_strings() { + // SampleStart + // name.firstName -> firstName + // name.lastName -> lastName + df.flatten("name") + // SampleEnd + } + + @Test + fun flatten_accessors() { + // SampleStart + val name by columnGroup() + val firstName by name.column() + val lastName by name.column() + // name.firstName -> firstName + // name.lastName -> lastName + df.flatten(name) + // SampleEnd + } + + @Test + fun flatten_KProperties() { + // SampleStart + // name.firstName -> firstName + // name.lastName -> lastName + df.flatten(df::name) + // SampleEnd + } + + @Test + fun flattenAll() { + // SampleStart + df.flatten() + // SampleEnd + } + + @Test + fun multiCallOperations() { + // SampleStart + df.update { age }.where { city == "Paris" }.with { it - 5 } + .filter { isHappy && age > 100 } + .move { name.firstName and name.lastName }.after { isHappy } + .merge { age and weight }.by { "Age: ${it[0]}, weight: ${it[1]}" }.into("info") + .rename { isHappy }.into("isOK") + // SampleEnd + } + + class MyType(val value: Int) + + @DataSchema + class MySchema(val a: MyType, val b: MyType, val c: Int) + + fun customConvertersData() { + // SampleStart + class MyType(val value: Int) + + @DataSchema + class MySchema(val a: MyType, val b: MyType, val c: Int) + + // SampleEnd + } + + @Test + fun customConverters() { + // SampleStart + val df = dataFrameOf("a", "b")(1, "2") + df.convertTo { + convert().with { MyType(it) } // converts `a` from Int to MyType + parser { MyType(it.toInt()) } // converts `b` from String to MyType + fill { c }.with { a.value + b.value } // computes missing column `c` + } + // SampleEnd + } + + @Test + fun convertToColumnGroupUseCase() { + // SampleStart + class RepositoryInfo(val data: Any) + + fun downloadRepositoryInfo(url: String) = RepositoryInfo("fancy response from the API") + // SampleEnd + } + + @Test + fun convertToColumnGroupData() { + class RepositoryInfo(val data: Any) + + fun downloadRepositoryInfo(url: String) = RepositoryInfo("fancy response from the API") + + // SampleStart + val interestingRepos = dataFrameOf("name", "url")( + "dataframe", "/dataframe", + "kotlin", "/kotlin", + ) + + val initialData = interestingRepos + .add("response") { downloadRepositoryInfo("url"()) } + // SampleEnd + } + + @Test + fun convertToColumnGroup() { + class RepositoryInfo(val data: Any) + + fun downloadRepositoryInfo(url: String) = RepositoryInfo("fancy response from the API") + + val interestingRepos = dataFrameOf("name", "url")( + "dataframe", "/dataframe", + "kotlin", "/kotlin", + ) + + val initialData = interestingRepos + .add("response") { downloadRepositoryInfo("url"()) } + + // SampleStart + val df = initialData.unfold("response") + // SampleEnd + df.schema().print() + } + + @DataSchema + interface Df { + val response: DataRow + } + + @DataSchema + interface Response { + val data: Any + } + + @Test + fun convertToColumnGroupBenefits() { + class RepositoryInfo(val data: Any) + + fun downloadRepositoryInfo(url: String) = RepositoryInfo("fancy response from the API") + + val interestingRepos = dataFrameOf("name", "url")( + "dataframe", "/dataframe", + "kotlin", "/kotlin", + ) + + val initialData = interestingRepos + .add("response") { downloadRepositoryInfo("url"()) } + + val df = initialData.unfold("response").cast() + + // SampleStart + df.move { response.data }.toTop() + df.rename { response.data }.into("description") + // SampleEnd + + df.move { response.data }.toTop().alsoDebug() + df.rename { response.data }.into("description").alsoDebug() + } + + @Test + fun convertToFrameColumnAPI() { + // SampleStart + fun testResource(resourcePath: String): URL = UtilTests::class.java.classLoader.getResource(resourcePath)!! + + val interestingRepos = dataFrameOf("name", "url", "contributors")( + "dataframe", "/dataframe", testResource("dataframeContributors.json"), + "kotlin", "/kotlin", testResource("kotlinContributors.json"), + ) + // SampleEnd + } + + @Test + fun customUnfoldRead() { + val interestingRepos = dataFrameOf("name", "url", "contributors")( + "dataframe", "/dataframe", testResource("dataframeContributors.json"), + "kotlin", "/kotlin", testResource("kotlinContributors.json"), + ) + + // SampleStart + val contributors by column() + + val df = interestingRepos + .replace { contributors } + .with { + it.mapNotNullValues { url -> DataFrame.readJsonStr(url.readText()) } + } + + df.asGroupBy("contributors").max("contributions") + // SampleEnd + + df.asGroupBy("contributors").max("contributions").renderToString() shouldBe + """| name url contributions + | 0 dataframe /dataframe 111 + | 1 kotlin /kotlin 180 + |""".trimMargin() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt new file mode 100644 index 0000000000..32a61f4373 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Schemas.kt @@ -0,0 +1,124 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.by +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.count +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.inward +import org.jetbrains.kotlinx.dataframe.api.maxBy +import org.jetbrains.kotlinx.dataframe.api.print +import org.jetbrains.kotlinx.dataframe.api.split +import org.jetbrains.kotlinx.dataframe.api.toList +import org.junit.Test + +class Schemas { + + @DataSchema + interface Person { + val name: String + val age: Int + } + + fun DataFrame.splitName() = split { name }.by(",").inward("firstName", "lastName") + fun DataFrame.adults() = filter { age > 18 } + + @Test + fun createDfNullable() { + // SampleStart + val df = dataFrameOf("name", "age")( + "Alice", 15, + "Bob", null + ) + // SampleEnd + } + + @Test + fun createDf() { + // SampleStart + val df = dataFrameOf("name", "age")( + "Alice", 15, + "Bob", 20 + ) + // SampleEnd + } + + @Test + fun extendedDf() { + // SampleStart + val df = dataFrameOf("name", "age", "weight")( + "Merton, Alice", 15, 60.0, + "Marley, Bob", 20, 73.5 + ) + // SampleEnd + df.print() + } + + @Test + fun splitNameWorks() { + val df = dataFrameOf("name", "age", "weight")( + "Merton, Alice", 15, 60.0, + "Marley, Bob", 20, 73.5 + ).cast() + // SampleStart + df.splitName() + // SampleEnd + } + + @Test + fun adultsWorks() { + val df = dataFrameOf("name", "age", "weight")( + "Merton, Alice", 15, 60.0, + "Marley, Bob", 20, 73.5 + ).cast() + // SampleStart + df.adults() + // SampleEnd + } + + fun DataFrame.countAdults() = count { it[Person::age] > 18 } + + @Test + fun convertTo() { + // SampleStart + @DataSchema + data class Name(val firstName: String, val lastName: String) + @DataSchema + data class Person(val name: Name, val age: Int?) + + val df = dataFrameOf("name", "age", "weight")( + "Merton, Alice", "15", 60.0, + "Marley, Bob", "20", 73.5 + ).split { "name"() }.inward("firstName", "lastName") + + val persons = df.cast().toList() + // SampleEnd + } + + @Test + fun useProperties() { + // SampleStart + val df = dataFrameOf("name", "age")( + "Alice", 15, + "Bob", 20 + ).cast() + // age only available after executing `build` or `kspKotlin`! + val teens = df.filter { age in 10..19 } + teens.print() + // SampleEnd + } + + @Test + fun useInferredSchema() { + // SampleStart + // Repository.readCSV() has argument 'path' with default value https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv + val df = Repository.readCSV() + // Use generated properties to access data in rows + df.maxBy { stargazersCount }.print() + // Or to access columns in dataframe. + print(df.fullName.count { it.contains("kotlin") }) + // SampleEnd + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt new file mode 100644 index 0000000000..4a78c8e668 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt @@ -0,0 +1,39 @@ +package org.jetbrains.kotlinx.dataframe.samples.api + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.into + +public open class TestBase { + + val df = dataFrameOf("firstName", "lastName", "age", "city", "weight", "isHappy")( + "Alice", "Cooper", 15, "London", 54, true, + "Bob", "Dylan", 45, "Dubai", 87, true, + "Charlie", "Daniels", 20, "Moscow", null, false, + "Charlie", "Chaplin", 40, "Milan", null, true, + "Bob", "Marley", 30, "Tokyo", 68, true, + "Alice", "Wolf", 20, null, 55, false, + "Charlie", "Byrd", 30, "Moscow", 90, true + ).group("firstName", "lastName").into("name").cast() + + @DataSchema + interface Name { + val firstName: String + val lastName: String + } + + @DataSchema + interface Person { + val age: Int + val city: String? + val name: DataRow + val weight: Int? + val isHappy: Boolean + } + + infix fun T.willBe(expected: U?) = shouldBe(expected) +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/BasicMathTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/BasicMathTests.kt new file mode 100644 index 0000000000..ca9f8fcd1a --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/BasicMathTests.kt @@ -0,0 +1,22 @@ +package org.jetbrains.kotlinx.dataframe.statistics + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.mean +import org.junit.Test +import kotlin.reflect.typeOf + +class BasicMathTests { + + @Test + fun `type for column with mixed numbers`() { + val col = columnOf(10, 10.0, null) + col.type() shouldBe typeOf() + } + + @Test + fun `mean with nans and nulls`() { + columnOf(10, 20, Double.NaN, null).mean() shouldBe Double.NaN + columnOf(10, 20, Double.NaN, null).mean(skipNA = true) shouldBe 15 + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt new file mode 100644 index 0000000000..6b3e6d9448 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/cumsum.kt @@ -0,0 +1,68 @@ +package org.jetbrains.kotlinx.dataframe.statistics + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.cumSum +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.junit.Test + +class CumsumTests { + + val col by columnOf(1, 2, null, 3, 4) + val expected = listOf(1, 3, null, 6, 10) + val expectedNoSkip = listOf(1, 3, null, null, null) + + @Test + fun `int column`() { + col.cumSum().toList() shouldBe expected + col.cumSum(skipNA = false).toList() shouldBe expectedNoSkip + } + + @Test + fun frame() { + val str by columnOf("a", "b", "c", "d", "e") + val df = dataFrameOf(col, str) + + df.cumSum()[col].toList() shouldBe expected + df.cumSum(skipNA = false)[col].toList() shouldBe expectedNoSkip + + df.cumSum { col }[col].toList() shouldBe expected + df.cumSum(skipNA = false) { col }[col].toList() shouldBe expectedNoSkip + + df.cumSum(col)[col].toList() shouldBe expected + df.cumSum(col, skipNA = false)[col].toList() shouldBe expectedNoSkip + } + + @Test + fun `double column`() { + val doubles by columnOf(1.0, 2.0, null, Double.NaN, 4.0) + doubles.cumSum().toList() shouldBe listOf(1.0, 3.0, Double.NaN, Double.NaN, 7.0) + } + + @Test + fun `number column`() { + val doubles by columnOf(1, 2, null, Double.NaN, 4) + doubles.cumSum().toList() shouldBe listOf(1.0, 3.0, Double.NaN, Double.NaN, 7.0) + } + + @Test + fun `groupBy`() { + val df = dataFrameOf("str", "col")( + "a", 1, + "b", 2, + "c", null, + "a", 3, + "c", 4 + ) + df.groupBy("str").cumSum().concat() shouldBe + dataFrameOf("str", "col")( + "a", 1, + "a", 4, + "b", 2, + "c", null, + "c", 4 + ) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt new file mode 100644 index 0000000000..83648760b4 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/median.kt @@ -0,0 +1,33 @@ +package org.jetbrains.kotlinx.dataframe.statistics + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.Infer +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.mapToColumn +import org.jetbrains.kotlinx.dataframe.api.median +import org.jetbrains.kotlinx.dataframe.api.rowMedian +import org.junit.Test + +class MedianTests { + + @Test + fun `median of two columns`() { + val df = dataFrameOf("a", "b")( + 1, 4, + 2, 6, + 7, 7 + ) + df.median("a", "b") shouldBe 5 + } + + @Test + fun `row median`() { + val df = dataFrameOf("a", "b")( + 1, 3, + 2, 4, + 7, 7 + ) + df.mapToColumn("", Infer.Type) { it.rowMedian() } shouldBe columnOf(2, 3, 7) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/quickSelect.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/quickSelect.kt new file mode 100644 index 0000000000..29ad4bf1d9 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/quickSelect.kt @@ -0,0 +1,33 @@ +package org.jetbrains.kotlinx.dataframe.statistics + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.math.quickSelect +import org.junit.Test +import kotlin.random.Random + +class QuickSelectTests { + + @Test + fun empty() { + shouldThrow { + listOf().quickSelect(0) + } + } + + @Test + fun short() { + val list = listOf(2, 5, 1) + list.quickSelect(0) shouldBe 1 + list.quickSelect(1) shouldBe 2 + list.quickSelect(2) shouldBe 5 + } + + @Test + fun long() { + val random = Random(120) + val list = (0..20).shuffled(random) + for (i in list) + list.quickSelect(i) shouldBe i + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt new file mode 100644 index 0000000000..15c1f56e34 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/std.kt @@ -0,0 +1,38 @@ +package org.jetbrains.kotlinx.dataframe.statistics + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.std +import org.jetbrains.kotlinx.dataframe.math.std +import org.junit.Test +import kotlin.reflect.typeOf + +class StdTests { + + @Test + fun `std one column`() { + val value by columnOf(1, 2, 3) + val df = dataFrameOf(value) + val expected = 1.0 + + value.values().std() shouldBe expected + value.values().std(typeOf()) shouldBe expected + value.std() shouldBe expected + df[value].std() shouldBe expected + df.std { value } shouldBe expected + } + + @Test + fun `std one double column`() { + val value by columnOf(1.0, 2.0, 3.0) + val df = dataFrameOf(value) + val expected = 1.0 + + value.values().std() shouldBe expected + value.values().std(typeOf()) shouldBe expected + value.std() shouldBe expected + df[value].std() shouldBe expected + df.std { value } shouldBe expected + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/animals/AnimalsTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/animals/AnimalsTests.kt new file mode 100644 index 0000000000..ad1498a0aa --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/animals/AnimalsTests.kt @@ -0,0 +1,46 @@ +package org.jetbrains.kotlinx.dataframe.testSets.animals + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.mean +import org.jetbrains.kotlinx.dataframe.api.name +import org.jetbrains.kotlinx.dataframe.api.transpose +import org.jetbrains.kotlinx.dataframe.api.update +import org.jetbrains.kotlinx.dataframe.api.value +import org.jetbrains.kotlinx.dataframe.api.withNull +import org.jetbrains.kotlinx.dataframe.api.withValue +import org.junit.Test +import kotlin.reflect.typeOf + +class AnimalsTests { + + val animal by columnOf("cat", "cat", "snake", "dog", "dog", "cat", "snake", "cat", "dog", "dog") + val age by columnOf(2.5, 3.0, 0.5, Double.NaN, 5.0, 2.0, 4.5, Double.NaN, 7.0, 3.0) + val visits by columnOf(1, 3, 2, 3, 2, 3, 1, 1, 2, 1) + val priority by columnOf("yes", "yes", "no", "yes", "no", "no", "no", "yes", "no", "no") + + val df = dataFrameOf(animal, age, visits, priority) + + @Test + fun `ignore nans`() { + df.mean("age", skipNA = true) shouldBe 3.4375 + } + + @Test + fun `mean transpose`() { + val mean = df.mean().transpose() + mean.columnsCount() shouldBe 2 + mean.rowsCount() shouldBe 2 + mean.name.values() shouldBe listOf("age", "visits") + mean.value.type() shouldBe typeOf() + } + + @Test + fun `mean of empty`() { + val cleared = df.update { age }.withValue(Double.NaN).update { visits }.withNull() + val mean = cleared.mean() + mean[age] shouldBe Double.NaN + (mean[visits.name()] as Double).isNaN() shouldBe true + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/BaseTest.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/BaseTest.kt new file mode 100644 index 0000000000..88a7439c3b --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/BaseTest.kt @@ -0,0 +1,40 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor + +@DataSchema +interface Person { + val name: String + val age: Int + val city: String? + val weight: Int? +} + +open class BaseTest { + +// Data set + + val df = dataFrameOf("name", "age", "city", "weight")( + "Alice", 15, "London", 54, + "Bob", 45, "Dubai", 87, + "Charlie", 20, "Moscow", null, + "Charlie", 40, "Milan", null, + "Bob", 30, "Tokyo", 68, + "Alice", 20, null, 55, + "Charlie", 30, "Moscow", 90 + ) + + val typed: DataFrame = df.cast() + +// Manual Column Definitions + + val name by column() + val age = column() named "age" + val city = Person::city.toColumnAccessor() + val weight by column() +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/BuildTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/BuildTests.kt new file mode 100644 index 0000000000..42fe23861a --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/BuildTests.kt @@ -0,0 +1,33 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.toColumn +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.nrow +import org.junit.Test + +class BuildTests { + + data class Person(val name: String, val age: Int) + + val persons = listOf(Person("Alice", 15), Person("Bob", 20)) + + @Test + fun test3() { + val list = persons + listOf(null) + val df = list.toDataFrame() + df.nrow shouldBe 3 + } + + @Test(expected = IllegalArgumentException::class) + fun `unequal column sizes`() { + persons.toDataFrame() + listOf(1, 2, 3).toColumn("id") + } + + @Test + fun `create dataframe`() { + persons.toDataFrame { + expr { it.age + 4 } into "age" + } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataColumnTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataColumnTests.kt new file mode 100644 index 0000000000..83ae9780c1 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataColumnTests.kt @@ -0,0 +1,17 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.sort +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.sortByDesc +import org.jetbrains.kotlinx.dataframe.api.sortDesc +import org.junit.Test + +class DataColumnTests : BaseTest() { + + @Test + fun `sort column`() { + typed.age.sort() shouldBe typed.sortBy { age }.age + typed.age.sortDesc() shouldBe typed.sortByDesc { age }.age + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt new file mode 100644 index 0000000000..b087f590fa --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt @@ -0,0 +1,2535 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.matchers.doubles.ToleranceMatcher +import io.kotest.matchers.should +import io.kotest.matchers.shouldBe +import io.kotest.matchers.shouldNotBe +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.annotations.ColumnName +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns +import org.jetbrains.kotlinx.dataframe.api.GroupBy +import org.jetbrains.kotlinx.dataframe.api.ParserOptions +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.addAll +import org.jetbrains.kotlinx.dataframe.api.addId +import org.jetbrains.kotlinx.dataframe.api.all +import org.jetbrains.kotlinx.dataframe.api.allNulls +import org.jetbrains.kotlinx.dataframe.api.append +import org.jetbrains.kotlinx.dataframe.api.asColumnGroup +import org.jetbrains.kotlinx.dataframe.api.asDataFrame +import org.jetbrains.kotlinx.dataframe.api.asGroupBy +import org.jetbrains.kotlinx.dataframe.api.asIterable +import org.jetbrains.kotlinx.dataframe.api.at +import org.jetbrains.kotlinx.dataframe.api.between +import org.jetbrains.kotlinx.dataframe.api.by +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.chunked +import org.jetbrains.kotlinx.dataframe.api.colsOf +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnGroup +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.convertTo +import org.jetbrains.kotlinx.dataframe.api.corr +import org.jetbrains.kotlinx.dataframe.api.count +import org.jetbrains.kotlinx.dataframe.api.countDistinct +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.default +import org.jetbrains.kotlinx.dataframe.api.describe +import org.jetbrains.kotlinx.dataframe.api.dfsOf +import org.jetbrains.kotlinx.dataframe.api.digitize +import org.jetbrains.kotlinx.dataframe.api.distinct +import org.jetbrains.kotlinx.dataframe.api.distinctBy +import org.jetbrains.kotlinx.dataframe.api.div +import org.jetbrains.kotlinx.dataframe.api.drop +import org.jetbrains.kotlinx.dataframe.api.dropLast +import org.jetbrains.kotlinx.dataframe.api.dropNA +import org.jetbrains.kotlinx.dataframe.api.dropNulls +import org.jetbrains.kotlinx.dataframe.api.dropWhile +import org.jetbrains.kotlinx.dataframe.api.explode +import org.jetbrains.kotlinx.dataframe.api.expr +import org.jetbrains.kotlinx.dataframe.api.fill +import org.jetbrains.kotlinx.dataframe.api.fillNulls +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.first +import org.jetbrains.kotlinx.dataframe.api.forEach +import org.jetbrains.kotlinx.dataframe.api.forEachIndexed +import org.jetbrains.kotlinx.dataframe.api.frameColumn +import org.jetbrains.kotlinx.dataframe.api.gather +import org.jetbrains.kotlinx.dataframe.api.getColumn +import org.jetbrains.kotlinx.dataframe.api.getColumnGroup +import org.jetbrains.kotlinx.dataframe.api.getColumns +import org.jetbrains.kotlinx.dataframe.api.getFrameColumn +import org.jetbrains.kotlinx.dataframe.api.getValue +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.jetbrains.kotlinx.dataframe.api.implode +import org.jetbrains.kotlinx.dataframe.api.indices +import org.jetbrains.kotlinx.dataframe.api.inplace +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.intoColumns +import org.jetbrains.kotlinx.dataframe.api.intoList +import org.jetbrains.kotlinx.dataframe.api.intoRows +import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.api.isFrameColumn +import org.jetbrains.kotlinx.dataframe.api.isNA +import org.jetbrains.kotlinx.dataframe.api.isNumber +import org.jetbrains.kotlinx.dataframe.api.keysInto +import org.jetbrains.kotlinx.dataframe.api.last +import org.jetbrains.kotlinx.dataframe.api.leftJoin +import org.jetbrains.kotlinx.dataframe.api.lowercase +import org.jetbrains.kotlinx.dataframe.api.map +import org.jetbrains.kotlinx.dataframe.api.mapToFrame +import org.jetbrains.kotlinx.dataframe.api.match +import org.jetbrains.kotlinx.dataframe.api.matches +import org.jetbrains.kotlinx.dataframe.api.max +import org.jetbrains.kotlinx.dataframe.api.maxBy +import org.jetbrains.kotlinx.dataframe.api.mean +import org.jetbrains.kotlinx.dataframe.api.meanFor +import org.jetbrains.kotlinx.dataframe.api.meanOf +import org.jetbrains.kotlinx.dataframe.api.median +import org.jetbrains.kotlinx.dataframe.api.merge +import org.jetbrains.kotlinx.dataframe.api.min +import org.jetbrains.kotlinx.dataframe.api.minBy +import org.jetbrains.kotlinx.dataframe.api.minOf +import org.jetbrains.kotlinx.dataframe.api.minus +import org.jetbrains.kotlinx.dataframe.api.move +import org.jetbrains.kotlinx.dataframe.api.moveTo +import org.jetbrains.kotlinx.dataframe.api.moveToLeft +import org.jetbrains.kotlinx.dataframe.api.moveToRight +import org.jetbrains.kotlinx.dataframe.api.name +import org.jetbrains.kotlinx.dataframe.api.named +import org.jetbrains.kotlinx.dataframe.api.notNull +import org.jetbrains.kotlinx.dataframe.api.nullable +import org.jetbrains.kotlinx.dataframe.api.parse +import org.jetbrains.kotlinx.dataframe.api.pathOf +import org.jetbrains.kotlinx.dataframe.api.pivot +import org.jetbrains.kotlinx.dataframe.api.print +import org.jetbrains.kotlinx.dataframe.api.remove +import org.jetbrains.kotlinx.dataframe.api.rename +import org.jetbrains.kotlinx.dataframe.api.reorderColumnsByName +import org.jetbrains.kotlinx.dataframe.api.replace +import org.jetbrains.kotlinx.dataframe.api.rows +import org.jetbrains.kotlinx.dataframe.api.select +import org.jetbrains.kotlinx.dataframe.api.single +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.sortByCount +import org.jetbrains.kotlinx.dataframe.api.sortByDesc +import org.jetbrains.kotlinx.dataframe.api.sortByKey +import org.jetbrains.kotlinx.dataframe.api.sortWith +import org.jetbrains.kotlinx.dataframe.api.split +import org.jetbrains.kotlinx.dataframe.api.sum +import org.jetbrains.kotlinx.dataframe.api.sumOf +import org.jetbrains.kotlinx.dataframe.api.take +import org.jetbrains.kotlinx.dataframe.api.takeLast +import org.jetbrains.kotlinx.dataframe.api.takeWhile +import org.jetbrains.kotlinx.dataframe.api.times +import org.jetbrains.kotlinx.dataframe.api.to +import org.jetbrains.kotlinx.dataframe.api.toColumn +import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor +import org.jetbrains.kotlinx.dataframe.api.toColumnOf +import org.jetbrains.kotlinx.dataframe.api.toDataFrame +import org.jetbrains.kotlinx.dataframe.api.toDouble +import org.jetbrains.kotlinx.dataframe.api.toInt +import org.jetbrains.kotlinx.dataframe.api.toList +import org.jetbrains.kotlinx.dataframe.api.toListOf +import org.jetbrains.kotlinx.dataframe.api.toMap +import org.jetbrains.kotlinx.dataframe.api.toRight +import org.jetbrains.kotlinx.dataframe.api.toStr +import org.jetbrains.kotlinx.dataframe.api.toValueColumn +import org.jetbrains.kotlinx.dataframe.api.transpose +import org.jetbrains.kotlinx.dataframe.api.under +import org.jetbrains.kotlinx.dataframe.api.ungroup +import org.jetbrains.kotlinx.dataframe.api.update +import org.jetbrains.kotlinx.dataframe.api.value +import org.jetbrains.kotlinx.dataframe.api.values +import org.jetbrains.kotlinx.dataframe.api.valuesNotNull +import org.jetbrains.kotlinx.dataframe.api.where +import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.api.withNull +import org.jetbrains.kotlinx.dataframe.api.withValue +import org.jetbrains.kotlinx.dataframe.api.withValues +import org.jetbrains.kotlinx.dataframe.api.withZero +import org.jetbrains.kotlinx.dataframe.api.xs +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy +import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException +import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException +import org.jetbrains.kotlinx.dataframe.hasNulls +import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize +import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl +import org.jetbrains.kotlinx.dataframe.impl.between +import org.jetbrains.kotlinx.dataframe.impl.columns.isMissingColumn +import org.jetbrains.kotlinx.dataframe.impl.emptyPath +import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl +import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.impl.trackColumnAccess +import org.jetbrains.kotlinx.dataframe.index +import org.jetbrains.kotlinx.dataframe.io.renderValueForStdout +import org.jetbrains.kotlinx.dataframe.kind +import org.jetbrains.kotlinx.dataframe.math.mean +import org.jetbrains.kotlinx.dataframe.ncol +import org.jetbrains.kotlinx.dataframe.nrow +import org.jetbrains.kotlinx.dataframe.size +import org.jetbrains.kotlinx.dataframe.type +import org.jetbrains.kotlinx.dataframe.typeClass +import org.junit.Test +import java.math.BigDecimal +import java.time.LocalDate +import kotlin.reflect.jvm.jvmErasure +import kotlin.reflect.typeOf + +class DataFrameTests : BaseTest() { + + @Test + fun `create with columns`() { + dataFrameOf("name", "age", "city", "weight")(df.columns()) shouldBe df + + dataFrameOf("name", "age", "city", "weight")( + typed.name named "bla", + typed.age named "", + typed.city.rename("qq"), + typed.weight.named("asda") + ) shouldBe df + + val c1 = typed.name.toList().toValueColumn() + val c2 = typed.age.toList().toValueColumn() + val c3 = typed.city.toList().toValueColumn() + val c4 = typed.weight.toList().toValueColumn() + + dataFrameOf("name", "age", "city", "weight")(c1, c2, c3, c4) shouldBe df + } + + @Test + fun `create with columnOf`() { + val col = columnOf("Alice", "Bob") + val d = dataFrameOf("name")(col) + d.nrow shouldBe 2 + d.columnNames() shouldBe listOf("name") + } + + @Test + fun `create with unnamed columns`() { + val a = columnOf("Alice", "Bob") + val b = columnOf(1, 2) + val d = dataFrameOf(a, b) + d.nrow shouldBe 2 + d.ncol shouldBe 2 + d.columnNames() shouldBe listOf("untitled", "untitled1") + d["untitled"] shouldBe d.getColumn(0) + } + + @Test(expected = IllegalArgumentException::class) + fun `create with duplicate columns`() { + val a = columnOf("Alice", "Bob") named "col" + val b = columnOf(1, 2) named "col" + val d = dataFrameOf(a, b) + } + + @Test + fun `create column reference`() { + val name by column() + val col = name.withValues("Alice", "Bob") + val df = col.toDataFrame() + df.nrow shouldBe 2 + df.columnNames() shouldBe listOf("name") + } + + @Test + fun `add values to column reference`() { + val name by column() + val values = listOf("Alice", "Bob") + val col1 = name.withValues(values) + val col2 = values.toColumn(name) + col1 shouldBe col2 + } + + @Test + fun `create from map`() { + val data = mapOf("name" to listOf("Alice", "Bob"), "age" to listOf(15, null)) + val df = data.toDataFrame() + df.ncol shouldBe 2 + df.nrow shouldBe 2 + df.columnNames() shouldBe listOf("name", "age") + df["name"].type() shouldBe typeOf() + df["age"].type() shouldBe typeOf() + } + + @Test + fun `toMap`() { + val map = df.toMap() + map.size shouldBe 4 + map.forEach { + it.value.size shouldBe df.nrow + } + } + + @Test + fun `size`() { + df.size() shouldBe DataFrameSize(df.ncol, df.nrow) + } + + @Test + fun `slicing`() { + val sliced = typed[1..2] + sliced.nrow shouldBe 2 + sliced[0].name shouldBe typed[1].name + } + + @Test + fun `access tracking`() { + trackColumnAccess { + typed[2].age + } shouldBe listOf("age") + } + + @Test + fun `indexing`() { + val expected = 45 + val i = 1 + + fun Int.check() = this shouldBe expected + + typed[i].age.check() + typed.age[i].check() + + df[i][age].check() + df[age][i].check() + + df[i][Person::age].check() + df[Person::age][i].check() + + df[i].getValue("age").check() + (df[i]["age"] as Int).check() + + df["age"].cast()[i].check() + (df["age"][i] as Int).check() + } + + @Test + fun `null indexing`() { + val i = 5 + + fun String?.check() = this shouldBe null + + typed[i].city.check() + typed.city[i].check() + + df[i][city].check() + df[city][i].check() + + df[i][Person::city].check() + df[Person::city][i].check() + + df[i].getValue("city").check() + (df[i]["city"] as String?).check() + + df["city"].cast()[i].check() + (df["city"][i] as String?).check() + } + + @Test + fun `incorrect column nullability`() { + val col = + column().named("weight") // non-nullable column definition is incorrect here, because actual dataframe has nulls in this column + + shouldThrow { + println(df[2][col]) + } + } + + @Test + fun `chunked`() { + val res = df.chunked(2) + res.size() shouldBe 4 + res.toList().dropLast(1).forEach { + it.nrow shouldBe 2 + } + } + + @Test + fun `update`() { + fun AnyFrame.check() { + getColumn(1).name() shouldBe "age" + ncol shouldBe typed.ncol + this["age"].toList() shouldBe typed.rows().map { it.age * 2 } + } + + typed.update { age }.with { it * 2 }.check() + typed.update { age }.with { it * 2 }.check() + typed.update(typed.age) { it * 2 }.check() + + df.update { age }.with { it * 2 }.check() + df.update(age) { it * 2 }.check() + df.update(age) { it * 2 }.check() + + df.update(Person::age) { it * 2 }.check() + + df.update("age") { "age"() * 2 }.check() + } + + @Test + fun `conditional update`() { + fun AnyFrame.check() { + getColumn(1).name() shouldBe "age" + ncol shouldBe typed.ncol + this["age"].toList() shouldBe typed.rows().map { if (it.age > 25) null else it.age } + } + + typed.update { age }.where { it > 25 }.withNull().check() + typed.update { age }.where { it > 25 }.withNull().check() + typed.update(typed.age).where { it > 25 }.withNull().check() + + df.update { age }.where { it > 25 }.withNull().check() + df.update(age).where { it > 25 }.withNull().check() + df.update(age).where { it > 25 }.withNull().check() + + df.update(Person::age).where { it > 25 }.withNull().check() + + df.update("age").where { it as Int > 25 }.withNull().check() + df.update("age").where { it as Int > 25 }.withNull().check() + } + + @Test + fun `update cells by index`() { + val res = typed.update { age }.at(2, 4).withValue(100) + val expected = typed.rows().map { if (it.index == 2 || it.index == 4) 100 else it.age } + res.age.toList() shouldBe expected + } + + @Test + fun `update cells by index range`() { + val res = typed.update { age }.at(2..4).withValue(100) + val expected = typed.rows().map { if (it.index in 2..4) 100 else it.age } + res.age.toList() shouldBe expected + } + + @Test + fun `null to zero`() { + val expected = typed.weight.toList().map { it ?: 0 } + fun AnyFrame.check() { + this["weight"].toList() shouldBe expected + } + + typed.fillNulls { weight }.withValue(0).check() + typed.fillNulls(typed.weight).withValue(0).check() + + df.fillNulls { weight }.withValue(0).check() + df.fillNulls(weight).withValue(0).check() + + df.fillNulls("weight").withValue(0).check() + + typed.fillNulls { weight }.withZero().check() + typed.fillNulls { weight }.withZero().check() + typed.fillNulls(typed.weight).withZero().check() + + df.fillNulls { weight }.withZero().check() + df.fillNulls { weight }.withZero().check() + } + + @Test + fun `resetToNull`() { + val updated = typed.update { all() }.withNull() + + updated.columns().forEach { + it.forEach { it shouldBe null } + } + } + + @Test + fun `sort`() { + val expected = listOf(null, "London", "Dubai", "Tokyo", "Milan", "Moscow", "Moscow") + + fun AnyFrame.check() = this[city].toList() shouldBe expected + + typed.sortBy { name and age.desc() }.check() + typed.sortBy { it.name and it.age.desc() }.check() + + df.sortBy { name and age.desc() }.check() + + df.sortBy { Person::name and Person::age.desc() }.check() + + df.sortBy { "name"() and "age".desc() }.check() + } + + @Test + fun `sort nulls first`() { + val expected = typed.city.toList().sortedBy { it } + + fun AnyFrame.check() = this[city].toList() shouldBe expected + + typed.sortBy { city }.check() + df.sortBy { city }.check() + df.sortBy { col(Person::city) }.check() + df.sortBy { get("city") }.check() + } + + @Test + fun `sort nulls last`() { + val expected = typed.city.toList().filterNotNull().sortedBy { it } + listOf(null) + + fun AnyFrame.check() = this[city].toList() shouldBe expected + + typed.sortBy { city.nullsLast() }.check() + df.sortBy { city.nullsLast() }.check() + df.sortBy { Person::city.nullsLast() }.check() + df.sortBy { "city".nullsLast() }.check() + } + + @Test + fun `equals`() { + typed shouldBe typed.update { age }.with { age } + } + + @Test + fun `get group by single key`() { + typed.groupBy { name }.xs("Charlie").concat() shouldBe typed.filter { name == "Charlie" }.remove { name } + } + + @Test + fun `get group by complex key`() { + typed.groupBy { city and name }.xs("Tokyo", "Bob").concat() shouldBe + typed.filter { name == "Bob" && city == "Tokyo" } + .remove { name and city } + } + + @Test + fun `get group by partial key`() { + typed.groupBy { city and name }.xs("Tokyo").toDataFrame() shouldBe + typed.filter { city == "Tokyo" }.remove { city }.groupBy { name }.toDataFrame() + } + + @Test + fun `group and sort`() { + val expected = typed.sortBy { name.desc() and age } + val actual = typed.groupBy { name }.sortBy { name.desc() and age }.concat() + actual shouldBe expected + } + + @Test + fun `filter`() { + val expected = listOf("Bob", "Bob", "Charlie") + fun AnyFrame.check() = this[name].toList() shouldBe expected + + val limit = 20 + + typed.filter { it.age > limit && it.weight != null }.check() + typed.filter { age > limit && it.weight != null }.check() + + df.filter { it[Person::age] > limit && it[Person::weight] != null }.check() + + df.filter { age > limit && weight() != null }.check() + df.filter { it[age] > limit && this[weight] != null }.check() + + df.filter { "age"() > limit && "weight"() != null }.check() + } + + @Test + fun `drop nulls 1`() { + fun AnyFrame.check() = rows().forEach { get("weight") shouldNotBe null } + + typed.dropNulls(typed.weight).check() + typed.dropNulls { weight }.check() + typed.dropNulls { it.weight }.check() + + df.dropNulls(weight).check() + df.dropNulls { weight }.check() + + df.dropNulls("weight").check() + } + + @Test + fun `drop where all null`() { + val filtered = typed.update { weight }.where { name == "Alice" }.withNull() + val expected = typed.nrow - 1 + + fun AnyFrame.check() = nrow shouldBe expected + + filtered.dropNulls(typed.weight.toColumnAccessor(), typed.city.toColumnAccessor(), whereAllNull = true).check() + filtered.dropNulls(whereAllNull = true) { weight and city }.check() + filtered.dropNulls(whereAllNull = true) { it.weight and it.city }.check() + + filtered.dropNulls(Person::weight, Person::city, whereAllNull = true).check() + + filtered.dropNulls(weight, city, whereAllNull = true).check() + filtered.dropNulls(whereAllNull = true) { weight and city }.check() + + filtered.dropNulls("weight", "city", whereAllNull = true).check() + } + + @Test + fun `drop where any null`() { + val filtered = typed.update { weight }.where { name == "Alice" }.withNull() + val expected = filtered.count { weight != null && city != null } + + fun AnyFrame.check() = nrow shouldBe expected + + filtered.dropNulls(typed.weight.toColumnAccessor(), typed.city.toColumnAccessor()).check() + filtered.dropNulls { weight and city }.check() + filtered.dropNulls { it.weight and it.city }.check() + + filtered.dropNulls(Person::weight, Person::city).check() + + filtered.dropNulls(weight, city).check() + filtered.dropNulls { weight and city }.check() + + filtered.dropNulls("weight", "city").check() + + filtered.dropNulls().check() + + filtered.select { weight and city }.dropNulls().check() + } + + @Test + fun `select one `() { + val expected = listOf(typed.age) + fun AnyFrame.check() = columns() shouldBe expected + + typed.select { age }.check() + typed.select { it.age }.check() + typed.select(typed.age).check() + + df.select(Person::age).check() + + df.select { age }.check() + df.select(age).check() + + df.select("age").check() + + df.select { it["age"] }.check() + df.select { get("age") }.check() + df.select { this["age"] }.check() + } + + @Test + fun `select if`() { + val expected = listOf(typed.name, typed.city) + + fun AnyFrame.check() = columns() shouldBe expected + + typed.select { cols { it.name().length == 4 } }.check() + df.select { cols { it.name().length == 4 } }.check() + } + + @Test + fun `select two`() { + val expected = listOf(typed.age, typed.city) + fun AnyFrame.check() = columns() shouldBe expected + + typed.select { age and city }.check() + typed.select { it.age and it.city }.check() + typed.select(typed.age, typed.city).check() + + typed.select(Person::age, Person::city).check() + + df.select { age and city }.check() + df.select(age, city).check() + df[age, city].check() + + df.select { "age" and "city" }.check() + df.select("age", "city").check() + df["age", "city"].check() + } + + @Test + fun `select by type`() { + val selected = typed.select { colsOf() } + selected shouldBe typed.select { name and city } + } + + @Test + fun `select by type not nullable`() { + val selected = typed.select { colsOf { !it.hasNulls() } } + selected shouldBe typed.select { name } + } + + @Test + fun `move one column`() { + val moved = typed.moveTo(1) { city } + val expected = typed.select { cols(name, city, age, weight) } + moved shouldBe expected + } + + @Test + fun `move several columns`() { + val moved = typed.moveTo(2) { name and city } + val expected = typed.select { cols(age, weight, name, city) } + moved shouldBe expected + } + + @Test + fun `move several columns to left`() { + val moved = typed.moveToLeft { weight and age } + val expected = typed.select { cols(weight, age, name, city) } + moved shouldBe expected + } + + @Test + fun `move several columns to right`() { + val moved = typed.moveToRight { weight and name } + val expected = typed.select { cols(age, city, weight, name) } + moved shouldBe expected + } + + @Test + fun `select with rename 2`() { + val res = typed.select { name named "Name" } + res.columnNames() shouldBe listOf("Name") + df.select { name named "Name" } shouldBe res + df.select { it["name"] named "Name" } shouldBe res + } + + @Test + fun `select with map and rename`() { + val res = typed.select { name.map { it.lowercase() } named "Name" } + res.columnNames() shouldBe listOf("Name") + res["Name"].values() shouldBe typed.name.values().map { it.lowercase() } + df.select { name.map { it.lowercase() } named "Name" } shouldBe res + df.select { it[Person::name].map { it.lowercase() } named "Name" } shouldBe res + df.select { "name"().map { it.lowercase() } named "Name" } shouldBe res + } + + @Test + fun `get column with map`() { + val converted = name.map { it.lowercase() } + val res = df[converted] + res.values() shouldBe typed.name.values().map { it.lowercase() } + } + + @Test + fun `get column by accessor`() { + val res = df[0..1][name] + res.size() shouldBe 2 + } + + @Test + fun `groupBy`() { + fun AnyFrame.check() { + nrow shouldBe 3 + this["name"].toList() shouldBe listOf("Alice", "Bob", "Charlie") + this["n"].toList() shouldBe listOf(2, 2, 3) + this["old count"].toList() shouldBe listOf(0, 2, 2) + this["median age"].toList() shouldBe listOf(17, 37, 30) + this["min age"].toList() shouldBe listOf(15, 30, 20) + this["oldest origin"].toList() shouldBe listOf(null, "Dubai", "Milan") + this["youngest origin"].toList() shouldBe listOf("London", "Tokyo", "Moscow") + this["all with weights"].toList() shouldBe listOf(true, true, false) + val cities = getColumnGroup("city") + cities["from London"].toList() shouldBe listOf(1, 0, 0) + cities["from Dubai"].toList() shouldBe listOf(0, 1, 0) + cities["from Moscow"].toList() shouldBe listOf(0, 0, 2) + cities["from Milan"].toList() shouldBe listOf(0, 0, 1) + cities["from Tokyo"].toList() shouldBe listOf(0, 1, 0) + cities["from null"].toList() shouldBe listOf(1, 0, 0) + this["ages"].toList() shouldBe listOf(listOf(15, 20), listOf(45, 30), listOf(20, 40, 30)) + } + + typed.groupBy { name }.aggregate { + count() into "n" + count { age > 25 } into "old count" + median { age } into "median age" + min { age } into "min age" + all { weight != null } into "all with weights" + maxBy { age }.city into "oldest origin" + sortBy { age }.first().city into "youngest origin" + pivot { city.map { "from $it" } }.count() + age.toList() into "ages" + }.check() + + typed.groupBy { it.name }.aggregate { + it.nrow into "n" + it.count { it.age > 25 } into "old count" + it.median { it.age } into "median age" + it.min { it.age } into "min age" + it.all { it.weight != null } into "all with weights" + it.maxBy { it.age }.city into "oldest origin" + it.sortBy { it.age }.first().city into "youngest origin" + it.pivot { it.city.map { "from $it" } }.count() + it.age.toList() into "ages" + }.check() + + df.groupBy(name).aggregate { + count() into "n" + count { age > 25 } into "old count" + median(age) into "median age" + min(age) into "min age" + all { weight() != null } into "all with weights" + maxBy(age)[city] into "oldest origin" + sortBy(age).first()[city] into "youngest origin" + pivot(city.map { "from $it" }).count() + it[age].toList() into "ages" + }.check() + + df.groupBy(Person::name).aggregate { + count() into "n" + count { it[Person::age] > 25 } into "old count" + median(Person::age) into "median age" + min(Person::age) into "min age" + all { it[Person::weight] != null } into "all with weights" + maxBy(Person::age)[Person::city] into "oldest origin" + sortBy(Person::age).first()[Person::city] into "youngest origin" + pivot { it[Person::city].map { "from $it" } }.count() + it[Person::age].toList() into "ages" + }.check() + + df.groupBy("name").aggregate { + count() into "n" + count { "age"() > 25 } into "old count" + median { "age"() } into "median age" + min { "age"() } into "min age" + all { it["weight"] != null } into "all with weights" + maxBy { "age"() }["city"] into "oldest origin" + sortBy("age").first()["city"] into "youngest origin" + pivot { it["city"].map { "from $it" } }.count() + it["age"].toList() into "ages" + }.check() + } + + @Test + fun `groupBy meanOf`() { + typed.groupBy { name }.meanOf { age * 2 } shouldBe typed.groupBy { name } + .aggregate { mean { age } * 2 into "mean" } + } + + @Test + fun `min`() { + val expected = 15 + + fun Any?.check() = this shouldBe expected + + typed.minOf { age }.check() + typed.min { it.age }.check() + typed.age.min().check() + + df.min { age }.check() + df.min(age).check() + df[age].min().check() + + df.min { "age"() }.check() + df.min("age").check() + df["age"].cast().min().check() + } + + @Test + fun `nullable max`() { + val expected = 90 + + fun Int?.check() = this shouldBe expected + + typed.max { weight }.check() + typed.max { it.weight }.check() + typed.weight.max().check() + + df.max { weight }.check() + df.max(weight).check() + df[weight].max().check() + + df.max { "weight"() }.check() + df["weight"].cast().max().check() + (df.max("weight") as Int?).check() + } + + @Test + fun `nullable minBy`() { + val expected = "Alice" + + fun AnyRow?.check() = this!![name] shouldBe expected + + typed.dropNulls { weight }.minBy { weight }.check() + typed.dropNulls { it.weight }.minBy { it.weight }.check() + typed.minBy { weight }.check() + + df.dropNulls(weight).minBy(weight).check() + df.minBy(weight).check() + + df.dropNulls("weight").minBy { "weight"() }.check() + df.dropNulls("weight").minBy("weight").check() + df.minBy("weight").check() + } + + @Test + fun `maxBy`() { + val expected = "Bob" + + fun AnyRow?.check() = this!![name] shouldBe expected + + typed.maxBy { age }.check() + typed.maxBy { it.age }.check() + typed.maxBy(typed.age).check() + + df.maxBy { age() }.check() + df.maxBy(age).check() + + df.maxBy { "age"() }.check() + df.maxBy("age").check() + } + + @Test + fun `add one column`() { + val now = 2020 + val expected = typed.rows().map { now - it.age } + + fun AnyFrame.check() = this["year"].toList() shouldBe expected + + typed.add("year") { now - age }.check() + typed.add("year") { now - it.age }.check() + + df.add("year") { now - age }.check() + + df.add("year") { now - "age"() }.check() + } + + @Test + fun `add several columns`() { + val now = 2020 + val expected = typed.rows().map { now - it.age } + val g by columnGroup() + + val df = typed.add { + "a" from { now - age } + "b" from now - age + now - age into "c" + "d" { + "f" from { now - age } + } + group { + g from { + add(age.map { now - it }.named("h")) + } + } into "e" + }.remove { allBefore("a") } + + df.columnNames() shouldBe listOf("a", "b", "c", "d", "e") + df["d"].kind() shouldBe ColumnKind.Group + df["e"].kind() shouldBe ColumnKind.Group + df.getColumnGroup("d").columnNames() shouldBe listOf("f") + df.getColumnGroup("e").getColumnGroup("g").columnNames() shouldBe listOf("h") + val cols = df.getColumns { allDfs() } + cols.size shouldBe 5 + cols.forEach { + it.toList() shouldBe expected + } + } + + @Test + fun `remove one column`() { + val expected = listOf("name", "city", "weight") + fun check(body: () -> AnyFrame) = body().columnNames() shouldBe expected + + check { typed - { age } } + check { typed - { it.age } } + check { typed.remove { age } } + check { typed.remove { it.age } } + + check { df - { age } } + check { df - age } + check { df.remove(age) } + + check { df - "age" } + check { df.remove("age") } + } + + @Test + fun `remove two columns`() { + val expected = listOf("name", "city") + fun check(body: () -> AnyFrame) = body().columnNames() shouldBe expected + + check { typed - { age and weight } } + check { typed - { it.age and it.weight } } + check { typed - { age } - { weight } } + check { typed - { it.age } - { it.weight } } + check { typed.remove { age and weight } } + check { typed.remove { it.age and it.weight } } + + check { df - { age and weight } } + check { df - age - weight } + check { df - { age } - { weight } } + check { df.remove(age, weight) } + + check { df - { "age" and "weight" } } + check { df - "age" - "weight" } + check { df.remove("age", "weight") } + } + + @Test + fun `merge similar dataframes`() { + val res = typed.concat(typed, typed) + res.name.size() shouldBe 3 * typed.nrow + res.rows().forEach { it.values() shouldBe typed[it.index % typed.nrow].values() } + } + + @Test + fun `merge different dataframes`() { + val height by column() + val heightOrNull = height.nullable() + + val other = dataFrameOf(name, height)( + "Bill", + 135, + "Charlie", + 160 + ).cast() + + val res = typed.concat(other) + res.nrow shouldBe typed.nrow + other.nrow + res.take(typed.nrow).rows().forEach { it[heightOrNull] == null } + val q = res.takeLast(other.nrow) + q.rows().forEach { it[name] shouldBe other[it.index][name] } + q.rows().forEach { it[heightOrNull] shouldBe other[it.index][height] } + } + + @Test + fun `row to frame`() { + typed[1].toDataFrame().name.size() shouldBe 1 + } + + @Test + fun `compare comparable`() { + val new = df.add("date") { LocalDate.now().minusDays(index.toLong()) } + val date by column() + new.filter { date >= LocalDate.now().minusDays(3) }.nrow shouldBe 4 + } + + @Test + fun `union dataframes with different type of the same column`() { + val df2 = dataFrameOf("age")(32.6, 56.3, null) + df2["age"].type() shouldBe typeOf() + val merged = df.concat(df2) + merged["age"].type() shouldBe typeOf() + val updated = merged.convert("age") { "age"()?.toDouble() } + updated["age"].type() shouldBe typeOf() + } + + @Test + fun `distinct`() { + val expected = 6 + typed.countDistinct { name and city } shouldBe expected + typed.select { name and city }.distinct().nrow shouldBe expected + typed.select { name and city }.countDistinct() shouldBe expected + val d = typed.distinct { name and city } + d.nrow shouldBe expected + d.ncol shouldBe 2 + } + + @Test + fun `distinct by`() { + typed.distinctBy { name }.nrow shouldBe 3 + typed.distinctBy { name and city }.nrow shouldBe 6 + typed.distinctBy { expr { age / 10 } }.nrow shouldBe 4 + typed.distinctBy { age / 10 }.nrow shouldBe 4 + typed.distinctBy { expr { city?.get(0) } }.nrow shouldBe 5 + } + + @Test + fun `addRow`() { + val res = typed.append("Bob", null, "Paris", null) + res.nrow shouldBe typed.nrow + 1 + res.name.type() shouldBe typeOf() + res.age.type() shouldBe typeOf() + res.city.type() shouldBe typeOf() + res.weight.type() shouldBe typeOf() + + val row = res.last() + row.name shouldBe "Bob" + row["age"] shouldBe null + row.city shouldBe "Paris" + row.weight shouldBe null + } + + @Test + fun `rename`() { + fun AnyFrame.check() { + this["name2"].toList() shouldBe typed.name.toList() + this["age2"].toList() shouldBe typed.age.toList() + this.columnNames() shouldBe listOf("name2", "age2", "city", "weight") + this.getColumnOrNull("age") shouldBe null + } + typed.rename("name" to "name2", "age" to "age2").check() + typed.rename { name and age }.into("name2", "age2").check() + typed.rename { name and age }.into { it.name + "2" }.check() + } + + @Test + fun `select with rename`() { + val expected = typed.select { name and age }.rename { all() }.into { it.name + 2 } + typed.select { name into "name2" and age.into("age2") } shouldBe expected + } + + @Test + fun `nunique`() { + typed.name.countDistinct() shouldBe 3 + } + + @Test + fun `encode names`() { + val encoding = typed.name.distinct().addId("name_id") + val res = typed.leftJoin(encoding) + res["name_id"].toList() shouldBe listOf(0, 1, 2, 2, 1, 0, 2) + } + + @Test + fun `pivot matches`() { + val pivoted = typed.pivot { city }.groupBy { name and age and weight }.matches() + pivoted.ncol shouldBe 4 + typed.ncol + typed.city.countDistinct() - 1 + val data = pivoted.getColumnGroup("city") + for (row in 0 until typed.nrow) { + val city = typed[row][city].toString() + data[city][row] shouldBe true + for (col in 0 until data.ncol) { + val column = data.getColumn(col) + val pivotedValue = column.cast()[row] + val colName = column.name() + pivotedValue shouldBe (colName == city) + } + } + } + + @Test + fun `pivot matches equality`() { + val res1 = typed.pivot { city }.groupBy { name }.matches() + val res2 = typed.groupBy { name }.pivot { city }.matches() + val res3 = typed.groupBy { name }.aggregate { + pivot { city }.matches() + } + res2 shouldBe res1 + res3 shouldBe res1 + } + + @Test + fun `pivot matches with conversion`() { + val filtered = typed.dropNulls { city } + val res = filtered.pivot(inward = false) { city.lowercase() }.groupBy { name and age }.matches() + val cities = filtered.city.toList().map { it!!.lowercase() } + val gathered = + res.gather { colsOf { cities.contains(it.name()) } }.where { it }.keysInto("city") + val expected = filtered.select { name and age and city.map { it!!.lowercase() } }.moveToRight { city } + gathered shouldBe expected + } + + @Test + fun `pivot matches distinct rows`() { + val res = typed.pivot(inward = false) { city }.groupBy { name and age }.matches() + res.ncol shouldBe 2 + typed.city.countDistinct() + for (i in 0 until typed.nrow) { + val city = typed[i][city] + for (j in typed.ncol until res.ncol) { + val col = res.getColumn(j) + col.cast().get(i) shouldBe (col.name() == city.toString()) + } + } + } + + @Test + fun `pivot matches merged rows`() { + val selected = typed.select { name and city } + val res = typed.pivot(inward = false) { city }.groupBy { name }.matches() + + res.ncol shouldBe selected.city.countDistinct() + 1 + res.nrow shouldBe selected.name.countDistinct() + val trueValuesCount = res.columns().drop(1).sumOf { it.cast().toList().count { it } } + trueValuesCount shouldBe selected.distinct().nrow + + val pairs = (1 until res.ncol).flatMap { i -> + val col = res.getColumn(i).cast() + res.filter { it[col] }.rows().map { it.name to col.name() } + }.toSet() + + pairs shouldBe typed.rows().map { it.name to it.city.toString() }.toSet() + } + + @Test + fun `pivot to matrix`() { + val other by column() + val others = other.cast>() + val sum by column() + + val names = typed.name.distinct().toList() + + val src = typed.select { name } + .add(others) { names } + .split { others }.intoRows() + .add(sum) { name.length + other().length } + + val matrix = src.pivot { other }.groupBy { name }.with { sum } + matrix.getColumnGroup(other.name()).ncol shouldBe names.size + } + + @Test + fun `gather bool`() { + val pivoted = typed.pivot { city }.groupBy { name }.matches() + val res = pivoted.gather { dfsOf() }.where { it }.keysInto("city") + val sorted = res.sortBy { name and city } + sorted shouldBe typed.select { name and city.map { it.toString() } }.distinct().sortBy { name and city } + } + + @Test + fun `gather nothing`() { + val gat = typed.gather { city and name } + + gat.where { false } + .into("key", "value").print() + } + + @Test + fun `merge rows keep nulls`() { + val merged = typed.select { name and city }.implode(dropNA = false) { city } + + val cityList = column>().named("city") + merged[cityList].sumOf { it.size } shouldBe typed.city.size + merged[cityList].type() shouldBe typeOf>() + + val expected = typed.groupBy { name }.aggregate { it.city.toSet() into "city" } + val actual = merged.convert(cityList).with { it.toSet() } + + actual shouldBe expected + + // check that default value for 'dropNulls' is false + typed.select { name and city }.implode { city } shouldBe merged + } + + @Test + fun `merge rows drop nulls`() { + val merged = typed.select { name and city }.implode(dropNA = true) { city } + + val cityList = column>().named("city") + merged[cityList].sumOf { it.size } shouldBe typed.city.dropNulls().size + merged[cityList].type() shouldBe typeOf>() + + val expected = + typed.dropNulls { city }.groupBy { name }.aggregate { it.city.toSet() as Set into "city" } + val actual = merged.convert { cityList }.with { it.toSet() } + + actual shouldBe expected + } + + @Test + fun splitRows() { + val selected = typed.select { name and city } + val nested = selected.implode(dropNA = false) { city } + val mergedCity = column>("city") + val res = nested.split { mergedCity }.intoRows() + res.sortBy { name } shouldBe selected.sortBy { name } + } + + @Test + fun mergeCols() { + val merged = typed.merge { age and city and weight }.into("info") + merged.ncol shouldBe 2 + merged.nrow shouldBe typed.nrow + for (row in 0 until typed.nrow) { + val list = merged[row]["info"] as List + list.size shouldBe 3 + list[0] shouldBe typed.age[row] + list[1] shouldBe typed.city[row] + list[2] shouldBe typed.weight[row] + } + } + + @Test + fun joinColsToString() { + val merged = typed.merge { age and city and weight }.by(", ").into("info") + merged.ncol shouldBe 2 + merged.nrow shouldBe typed.nrow + for (row in 0 until typed.nrow) { + val joined = merged[row]["info"] as String + joined shouldBe typed.age[row].toString() + ", " + typed.city[row] + ", " + typed.weight[row] + } + } + + @Test + fun mergeIntoList() { + val parsed = typed + .merge { age and city and weight }.by(", ").intoList() + .toDataFrame { "data" from { it } } + .split("data").by(", ").into(age, city, weight) + .parse(ParserOptions(nullStrings = setOf("null"))) + + val expected = typed[age, city, weight] + parsed shouldBe expected + } + + @Test + fun mergeColsCustom() { + val merged = + typed.merge { name and city and age }.by { it[0].toString() + " from " + it[1] + " aged " + it[2] } + .into("info") + merged.ncol shouldBe 2 + merged.nrow shouldBe typed.nrow + merged[0]["info"] shouldBe "Alice from London aged 15" + } + + @Test + fun mergeColsCustom2() { + val merged = typed.merge { name and city and age }.by { "$name from $city aged $age" }.into("info") + merged.ncol shouldBe 2 + merged.nrow shouldBe typed.nrow + merged[0]["info"] shouldBe "Alice from London aged 15" + } + + @Test + fun splitCol() { + val merged = typed.merge { age and city and weight }.into("info") + val info by column>() + val res = merged.split(info).into("age", "city", "weight") + res shouldBe typed + } + + @Test + fun splitMergeFrameCol() { + val groups by frameColumn() + val grouped = typed.groupBy { name }.into(groups) + val split = grouped.split(groups).into { "rec$it" } + val merged = split.merge { drop(1) }.notNull().into(groups) + merged shouldBe grouped + } + + @Test + fun splitStringCol() { + val merged = typed.merge { age and city and weight }.by(" - ").into("info") + val info by column() + val res = merged.split { info }.by("-").into("age", "city", "weight") + val expected = typed.convert { age and city and weight }.with { it.toString() } + res shouldBe expected + } + + @Test + fun splitStringCol2() { + val merged = typed.merge { age and city and weight }.by(",").into("info") + val info by column() + val res = merged.split(info).into("age", "city", "weight") + val expected = typed.convert { age and city and weight }.with { it.toString() } + res shouldBe expected + } + + @Test + fun splitStringColGenerateNames() { + val merged = typed.merge { age and city and weight }.by(",").into("info") + val info by column() + val res = merged.split(info).into("age") { "extra$it" } + res.columnNames() shouldBe listOf("name", "age", "extra1", "extra2") + } + + @Test + fun splitStringColWithDefaultgenerator() { + val merged = typed.merge { age and city and weight }.by(",").into("info") + val info by column() + val res = merged.split(info).into("age") + res.columnNames() shouldBe listOf("name", "age", "split1", "split2") + } + + @Test + fun splitAgeIntoDigits() { + fun digits(num: Int) = sequence { + var k = num + while (k > 0) { + yield(k % 10) + k /= 10 + } + }.toList() + + val res = typed.split { age }.by { digits(it) }.into { "digit$it" } + } + + @Test + fun splitStringCol3() { + val merged = typed.merge { age and city and weight }.by(", ").into("info") + val info by column() + val res = merged.split(info).by(",").into("age", "city", "weight") + val expected = typed.convert { age and city and weight }.with { it.toString() } + res shouldBe expected + } + + @Test + fun splitStringCols() { + val merged = typed.merge { name and city }.by(", ").into("nameAndCity") + .merge { age and weight }.into("info") + val nameAndCity by column() + val info by column>() + val res = merged.split { nameAndCity }.into("name", "city").split(info).into("age", "weight") + val expected = typed.update { city }.with { it.toString() }.move { city }.to(1) + res shouldBe expected + } + + @Test + fun `split by with default`() { + val res = typed.split { city }.by('o').default("--").into { "a$it" } + res.sumOf { values().count { it == "--" } } shouldBe 7 + } + + @Test + fun `merge cols with conversion`() { + val pivoted = typed.groupBy { name }.pivot { city }.count() + val res = pivoted.merge { city.colsOf() }.by { it.filterNotNull().sum() }.into("cities") + val expected = typed.select { name and city }.groupBy { name }.count("cities") + res shouldBe expected + } + + @Test + fun `generic column type`() { + val d = typed.convert { city }.with { it?.toCharArray()?.toList() ?: emptyList() } + println(d.city.type()) + } + + @Test + fun `column group by`() { + fun DataFrame.check() { + ncol shouldBe 3 + nrow shouldBe typed.nrow + columnNames() shouldBe listOf("name", "Int", "String") + val intGroup = this["Int"].asColumnGroup() + intGroup.columnNames() shouldBe listOf("age", "weight") + + val res = listOf( + this.name, + this["Int"]["age"], + this["String"]["city"], + this["Int"]["weight"] + ).toDataFrame().cast() + res shouldBe typed + } + typed.group { cols { it != name } }.into { it.type.jvmErasure.simpleName!! }.check() + typed.group { age and city and weight }.into { it.type.jvmErasure.simpleName!! }.check() + } + + @Test + fun `column group`() { + val grouped = typed.move { age and name and city }.under("info") + grouped.ncol shouldBe 2 + grouped.columnNames() shouldBe listOf("info", "weight") + val res = listOf( + grouped["info"]["name"], + grouped["info"]["age"], + grouped["info"]["city"], + grouped.weight + ).toDataFrame().cast() + res shouldBe typed + } + + @Test + fun `column ungroup`() { + val info by columnGroup() + val res = typed.move { age and city }.under("info").ungroup { info } + res shouldBe typed + } + + @Test + fun `empty group by`() { + val ungrouped = typed.filter { false }.groupBy { name }.concat() + ungrouped.nrow shouldBe 0 + ungrouped.ncol shouldBe 0 + } + + @Test + fun `column stats`() { + typed.age.mean() shouldBe typed.age.toList().mean() + typed.age.min() shouldBe typed.age.toList().minOrNull() + typed.age.max() shouldBe typed.age.toList().maxOrNull() + typed.age.sum() shouldBe typed.age.toList().sum() + } + + @Test + fun `row to string`() { + typed[0].toString() shouldBe "{ name:Alice, age:15, city:London, weight:54 }" + } + + @Test + fun `range slice`() { + typed[3..5].name.toList() shouldBe typed.name.toList().subList(3, 6) + } + + @Test + fun `range slice two times`() { + typed[3..5][1..2].name.toList() shouldBe typed.name.toList().subList(4, 6) + } + + @Test + fun `move to position`() { + typed.getColumn(1) shouldBe typed.age + val moved = typed.move { age }.to(2) + moved.getColumn(2) shouldBe typed.age + moved.ncol shouldBe typed.ncol + } + + @Test + fun `forEachIn`() { + val pivoted = typed.pivot(inward = true) { city }.groupBy { name and weight }.with { age } + val sum = pivoted.select { "city".all() }.values().filterNotNull().sumOf { it as Int } + sum shouldBe typed.age.sum() + } + + @Test + fun `parse`() { + val toStr = typed.convert { weight }.notNull { it.toString() } + val weightStr = "weight".toColumnOf() + val parsed = toStr.convert { weightStr }.toInt() + parsed shouldBe typed + } + + @Test + fun digitize() { + val a = 20 + val b = 40 + val expected = typed.age.toList().map { + when { + it < a -> 0 + it < b -> 1 + else -> 2 + } + } + typed.age.digitize(a, b).toList() shouldBe expected + + val expectedRight = typed.age.toList().map { + when { + it <= a -> 0 + it <= b -> 1 + else -> 2 + } + } + typed.age.digitize(a, b, right = true).toList() shouldBe expectedRight + } + + @Test + fun corr() { + val fixed = typed.fillNulls { weight }.withValue(60) + val res = fixed.corr() + res.ncol shouldBe 3 + res.nrow shouldBe 2 + res["age"][0] shouldBe 1.0 + res["weight"][0] shouldBe res["age"][1] + res["weight"][0] as Double should ToleranceMatcher(0.9, 1.0) + } + + @Test + fun `aggregate into grouped column`() { + val d = typed.groupBy { name }.aggregate { + val row = meanFor { age and weight } + row into "mean" + } + d.ncol shouldBe 2 + d["mean"].isColumnGroup() shouldBe true + val mean = d.getColumnGroup("mean") + mean.ncol shouldBe 2 + mean.columnNames() shouldBe listOf("age", "weight") + mean.columns().forEach { + it.type() shouldBe typeOf() + } + } + + @Test + fun `mean for all columns`() { + val d = typed.groupBy { name }.mean() + d.columnNames() shouldBe listOf("name", "age", "weight") + d.nrow shouldBe typed.name.countDistinct() + d["age"].type() shouldBe typeOf() + d["weight"].type() shouldBe typeOf() + } + + @Test + fun `aggregate into table column`() { + val d = typed.groupBy { name }.aggregate { + val row = select { age and weight } + row into "info" + } + d.ncol shouldBe 2 + d["info"].isFrameColumn() shouldBe true + val info = d.getFrameColumn("info") + info.forEach { + it.ncol shouldBe 2 + it.columnNames() shouldBe listOf("age", "weight") + it.columns().forEach { + it.typeClass shouldBe Int::class + } + } + } + + @Test + fun `union table columns`() { + val grouped = typed.addId("id").groupBy { name }.toDataFrame() + val dfs = (0 until grouped.nrow).map { + grouped[it..it] + } + val dst = dfs.concat().asGroupBy().concat().sortBy("id").remove("id") + dst shouldBe typed + } + + @Test + fun `columns sum`() { + val name by columnOf("Alice", "Bob", "Charlie") + val age by columnOf(15, 20, 24) + val df = dataFrameOf(name, age) + + df.columnNames() shouldBe listOf("name", "age") + df.nrow shouldBe 3 + } + + @Test + fun convert1() { + val res = typed.convert { age }.to() + res.age.typeClass shouldBe Double::class + res["age"].all { it is Double } shouldBe true + } + + @Test + fun convert2() { + val res = typed.convert { weight }.to() + res.weight.typeClass shouldBe BigDecimal::class + res["weight"].all { it == null || it is BigDecimal } shouldBe true + } + + @Test + fun convert3() { + val res = typed.convert { all() }.to() + res.columns().forEach { it.typeClass shouldBe String::class } + res.columns().map { it.hasNulls() } shouldBe typed.columns().map { it.hasNulls() } + } + + @Test + fun replace() { + val res = typed.replace { age }.with(2021 - typed.age) + val expected = typed.update { age }.with { 2021 - age } + res shouldBe expected + } + + @Test + fun `replace with rename`() { + val res = typed.replace { age }.with { it.rename("age2") } + res shouldBe typed.rename { age }.into("age2") + } + + @Test(expected = IllegalArgumentException::class) + fun `replace exception`() { + typed.replace { colsOf() }.with(typed.name) + } + + @Test + fun `replace two columns`() { + val res = typed.replace { age and weight }.with(typed.age * 2, typed.weight * 2) + val expected = typed.update { age and weight }.with { it?.times(2) } + res shouldBe expected + } + + @Test + fun `replace with expression`() { + val res = typed.replace { age }.with { 2021 - age named "year" } + val expected = typed.convert { age }.with { 2021 - age }.rename { age }.into("year") + res shouldBe expected + } + + @Test + fun `add dataframe`() { + val first = typed.select { name and age } + val second = typed.select { city and weight } + first.add(second) shouldBe typed + first.addAll(second.columns()) shouldBe typed + first + second.columns() shouldBe typed + } + + @Test + fun explodeLists() { + val df = dataFrameOf("lists")(listOf(1, 2), listOf(3)) + + df.explode("lists") shouldBe dataFrameOf("lists")(1, 2, 3) + } + + @Test + fun splitUnequalLists() { + val values by columnOf(1, 2, 3, 4) + val list1 by columnOf(listOf(1, 2, 3), listOf(), listOf(1, 2), null) + val list2 by columnOf(listOf(1, 2), listOf(1, 2), listOf(1, 2), listOf(1)) + val df = dataFrameOf(values, list1, list2) + val res = df.explode { list1 and list2 } + val expected = dataFrameOf(values.name(), list1.name(), list2.name())( + 1, 1, 1, + 1, 2, 2, + 1, 3, null, + 2, null, 1, + 2, null, 2, + 3, 1, 1, + 3, 2, 2, + 4, null, 1 + ) + res shouldBe expected + } + + @Test + fun splitUnequalListAndFrames() { + val values by columnOf(1, 2, 3) + val list1 by columnOf(listOf(1, 2, 3), listOf(1), listOf(1, 2)) + val frames by listOf(listOf(1, 2), listOf(1, 2), listOf(1, 2)).map { + val data = it.toColumn("data") + val dataStr = it.map { it.toString() }.toColumn("dataStr") + dataFrameOf(data, dataStr) + }.toColumn() + frames.kind shouldBe ColumnKind.Frame + + val df = dataFrameOf(values, list1, frames) + val res = df.explode { list1 and frames }.ungroup(frames) + val expected = dataFrameOf(values.name(), list1.name(), "data", "dataStr")( + 1, 1, 1, "1", + 1, 2, 2, "2", + 1, 3, null, null, + 2, 1, 1, "1", + 2, null, 2, "2", + 3, 1, 1, "1", + 3, 2, 2, "2" + ) + res shouldBe expected + } + + @Test + fun `update nullable column with not null`() { + val df = dataFrameOf("name", "value")("Alice", 1, null, 2) + df.update("name").at(0).withValue("ALICE") + } + + @Test(expected = IllegalArgumentException::class) + fun `update with wrong type`() { + typed.update("age").with { "string" } + } + + @Test + fun `update with null`() { + val updated = typed.update { age }.at(2).withNull() + updated.age[2] shouldBe null + updated.age.hasNulls shouldBe true + } + + @Test + fun `update with two conditions`() { + fun DataFrame.check() = indices { age == 100 } shouldBe listOf(1, 3) + + typed.update { age }.at(1..3).where { it > 20 }.with { 100 }.check() + typed.update { age }.where { it > 20 }.at(1..3).with { 100 }.check() + } + + @Test + fun `update nulls`() { + typed.update { weight }.where { it == null }.with { 15 }.weight.hasNulls shouldBe false + } + + @Test + fun `mean all columns`() { + typed.mean().values() shouldBe listOf(typed.age.mean(), typed.weight.mean()) + } + + @Test + fun `mean by string`() { + typed.mean("weight") shouldBe typed.weight.mean() + } + + @Test + fun `create column with single string value`() { + val frameCol by columnOf(typed, null, typed) + frameCol.kind() shouldBe ColumnKind.Frame + frameCol.name() shouldBe "frameCol" + + val mapCol by columnOf(typed.name, typed.city) + mapCol.kind() shouldBe ColumnKind.Group + mapCol.name() shouldBe "mapCol" + + val valueCol = columnOf("Alice") named "person" + valueCol.kind() shouldBe ColumnKind.Value + valueCol.name() shouldBe "person" + } + + @Test + fun `append many`() { + val res = typed.append( + "John", + 22, + "New York", + 46, + "Philip", + 25, + "Chelyabinsk", + 36 + ) + res.nrow shouldBe typed.nrow + 2 + } + + @Test + fun `append wrong number of arguments`() { + shouldThrow { + dataFrameOf("name", "age")( + "Alice", 15, + "Bob", 20 + ) + .append("John") + } + } + + @Test + fun `first last`() { + typed.first() shouldBe typed[0] + typed.last() shouldBe typed[typed.nrow - 1] + typed.city.first() shouldBe typed[0].city + typed.city.last() shouldBe typed[typed.nrow - 1].city + } + + @Test + fun `select several rows`() { + df[2, 5].nrow shouldBe 2 + df[0, 3, 5] shouldBe df[listOf(0, 3, 5)] + df[3, 5][name, age] shouldBe df[name, age][3, 5] + } + + @Test + fun `select several column values`() { + typed.name[2, 5, 6] shouldBe typed.name[listOf(2, 5, 6)] + } + + @Test + fun `get by column accessors`() { + val animal by columnOf("cat", "snake", "dog") + val age by columnOf(2.5, 3.0, 0.5) + val visits by columnOf(1, 3, 2) + + val df = dataFrameOf(animal, age, visits) + + val d1 = df[1..1][animal, age] + d1.ncol shouldBe 2 + d1.nrow shouldBe 1 + + val d2 = df[0..1]["animal", "age"] + d2.ncol shouldBe 2 + d2.nrow shouldBe 2 + + val r1 = df[1][animal, age] + r1.values() shouldBe d1.single().values() + + val r2 = df[0]["animal", "age"] + r2 shouldBe df[animal, age][0] + } + + @Test + fun between() { + typed.filter { age.between(20, 40, false) }.nrow shouldBe 2 + + typed.filter { age in 20..40 }.nrow shouldBe 5 + + typed.age.between(20, 40).count { it } shouldBe 5 + } + + @Test + fun iterators() { + var counter = 0 + for (a in df) counter++ + counter shouldBe df.nrow + + var ageSum = 0 + for (a in typed.age) + ageSum += a + + ageSum shouldBe typed.age.sum() + } + + @Test + fun `create with random`() { + val df = dataFrameOf('a'..'f').randomInt(3) + df.nrow shouldBe 3 + df.ncol shouldBe ('a'..'f').count() + df.columns().forEach { it.type() shouldBe typeOf() } + } + + @Test + fun `create with list builder`() { + val df = dataFrameOf(4..10 step 2) { h -> List(10) { h } } + df.nrow shouldBe 10 + df.ncol shouldBe 4 + df.columns().forEach { col -> col.forEach { it shouldBe col.name().toInt() } } + } + + @Test + fun `create with vararg header and builder`() { + val df = dataFrameOf("first", "secon", "third") { name -> name.toCharArray().toList() } + df.nrow shouldBe 5 + df.ncol shouldBe 3 + df.columns().forEach { col -> col.name() shouldBe col.values().joinToString("") } + } + + @Test + fun `create with vararg doubles and fill equal`() { + val df = dataFrameOf(1.0.toString(), 2.5.toString()).fill(5, true) + df.nrow shouldBe 5 + df.ncol shouldBe 2 + df.columns().forEach { col -> col.forEach { it shouldBe true } } + } + + @Test + fun `create with list of names and fill nulls`() { + val names = listOf("first", "second") + val df = dataFrameOf(names).nulls(10) + df.nrow shouldBe 10 + df.ncol shouldBe 2 + df.columns().forEach { col -> (col.type() == typeOf() && col.allNulls()) shouldBe true } + } + + @Test + fun `create with list of names and fill true`() { + val first by column() + val second by column() + val df = dataFrameOf(first, second).fill(5) { true } + df.nrow shouldBe 5 + df.ncol shouldBe 2 + df.columns().forEach { col -> (col.type() == typeOf() && col.all { it == true }) shouldBe true } + } + + @Test + fun `create with int range header and int range data `() { + val df = dataFrameOf(1..5) { 1..5 } + df.nrow shouldBe 5 + df.ncol shouldBe 5 + df.columns().forEach { col -> col.forEachIndexed { row, value -> value shouldBe row + 1 } } + } + + @Test + fun `get typed column by name`() { + val col = df.getColumn("name").cast() + col[0].substring(0, 3) shouldBe "Ali" + } + + @Test + fun `select all after`() { + typed.select { allAfter(age) } shouldBe typed.select { city and weight } + typed.select { allSince(age) } shouldBe typed.select { age and city and weight } + typed.select { allBefore(age) } shouldBe typed.select { name } + typed.select { allUntil(age) } shouldBe typed.select { name and age } + } + + @Test + fun `cols of type`() { + val stringCols = typed.select { colsOf() } + stringCols.columnNames() shouldBe listOf("name", "city") + } + + @Test + fun `get row value by expression`() { + val expression: RowExpression = { it.age * 2 } + val added = typed.add("new") { it[expression] } + added shouldBe typed.add("new") { age * 2 } + } + + @Test + fun `render nested data frames to string`() { + val rendered = typed.drop(1).groupBy { name }.groups.asIterable() + .joinToString("\n") { renderValueForStdout(it).truncatedContent } + rendered shouldBe """ + [2 x 4] + [3 x 4] + [1 x 4] { name:Alice, age:20, weight:55 } + """.trimIndent() + } + + @Test + fun `drop where any na`() { + val updated = typed.convert { weight }.with { if (name == "Alice") Double.NaN else it?.toDouble() } + val expected = updated.count { city != null && !("weight"()?.isNaN() ?: true) } + + fun AnyFrame.check() = nrow shouldBe expected + + updated.dropNA { city and weight }.check() + updated.dropNA(city, weight).check() + updated.dropNA("city", "weight").check() + updated.dropNA(Person::city, Person::weight).check() + } + + @Test + fun `drop where all na`() { + val updated = typed.convert { weight }.with { if (name == "Alice") Double.NaN else it?.toDouble() } + val expected = updated.count { city != null || !("weight"()?.isNaN() ?: true) } + + fun AnyFrame.check() = nrow shouldBe expected + + updated.dropNA(whereAllNA = true) { city and weight }.check() + updated.dropNA(city, weight, whereAllNA = true).check() + updated.dropNA("city", "weight", whereAllNA = true).check() + updated.dropNA(Person::city, Person::weight, whereAllNA = true).check() + } + + @Test + fun sortWith() { + typed.sortWith { r1, r2 -> + when { + r1.name < r2.name -> -1 + r1.name > r2.name -> 1 + else -> -r1.age.compareTo(r2.age) + } + } shouldBe typed.sortBy { name and age.desc() } + + val comparator = Comparator> { r1, r2 -> -r1.name.compareTo(r2.name) } + typed.sortWith(comparator) shouldBe typed.sortByDesc { name } + } + + @Test + fun sortByDescDesc() { + typed.sortByDesc { name.desc() and age } shouldBe typed.sortBy { name and age.desc() } + } + + @Test + fun `get column by columnRef with data`() { + val col by columnOf(1, 2, 3) + val df = col.toDataFrame() + df[1..2][col].values() shouldBe listOf(2, 3) + } + + @Test + fun `get by column`() { + typed[1..2][{ typed.age }].size() shouldBe typed.age.size() + } + + @Test + fun `null column test`() { + val df = dataFrameOf("col")(null, null) + df["col"].kind() shouldBe ColumnKind.Value + df["col"].type() shouldBe nothingType(true) + } + + @Test + fun `groupBy with map`() { + typed.groupBy { name.map { it.lowercase() } }.toDataFrame().name.values() shouldBe typed.name.distinct() + .lowercase() + .values() + } + + @Test + fun `groupBy none`() { + val grouped = typed.groupBy { none() } + grouped.keys.ncol shouldBe 0 + grouped.groups.size shouldBe 1 + val values = grouped.values() + values.nrow shouldBe 1 + values.columns().forEach { + it.typeClass shouldBe List::class + (it[0] as List<*>).size shouldBe typed.nrow + } + values.explode() shouldBe typed + } + + @Test + fun `pivot max`() { + val pivoted = typed.pivot(inward = false) { city }.groupBy { name }.max { age } + pivoted.single { name == "Charlie" }["Moscow"] shouldBe 30 + } + + @Test + fun `pivot all values`() { + val pivoted = typed.pivot(inward = false) { city }.groupBy { name }.values() + pivoted.ncol shouldBe 1 + typed.city.countDistinct() + pivoted.columns().drop(1).forEach { + it.kind() shouldBe ColumnKind.Group + it.asColumnGroup().columnNames() shouldBe listOf("age", "weight") + } + } + + @Test + fun `pivot mean values`() { + val pivoted = typed.pivot { city }.groupBy { name }.mean() + pivoted.getColumnGroup(1).columns().forEach { + it.kind() shouldBe ColumnKind.Group + val group = it.asColumnGroup() + group.columnNames() shouldBe listOf("age", "weight") + group.columns().forEach { + it.type() shouldBe typeOf() + } + } + } + + @Test + fun `aggregate dataframe with pivot`() { + val summary = typed.aggregate { + count() into "count" + pivot { name }.max { age } + sum { weight } into "total weight" + } + val expected = dataFrameOf("count", "Alice", "Bob", "Charlie", "total weight")(7, 20, 45, 40, 354) + summary shouldBe expected.group { cols(1..3) }.into("name")[0] + } + + @Test + fun `pivot grouped max`() { + val pivoted = typed.pivot { name }.groupBy { city }.max() + pivoted.getColumnGroup("name").columns().forEach { + it.kind() shouldBe ColumnKind.Group + val group = it.asColumnGroup() + group.columnNames() shouldBe listOf("age", "weight") + } + } + + @Test + fun `find the longest string`() { + val longestCityName = "Taumatawhakatangihangakoauauotamateaturipukakapikimaungahoronukupokaiwhenuakitanatahu" + val updated = typed.update { city }.where { it == "Dubai" }.withValue(longestCityName) + updated.valuesNotNull { colsOf() }.maxByOrNull { it.length } shouldBe longestCityName + } + + @Test + fun `sort by expression`() { + val sorted = typed.sortBy { expr { name.length }.desc() } + sorted.name.values() shouldBe typed.name.values().sortedByDescending { it.length } + } + + @Test + fun `grouped sort by count`() { + val sorted = typed.groupBy { name }.sortByCount() + sorted.toDataFrame().name.values() shouldBe typed.rows().groupBy { it.name }.toList() + .sortedByDescending { it.second.size }.map { it.first } + } + + @Test + fun `grouped sort by key`() { + val sorted = typed.groupBy { name }.sortByKey() + sorted.toDataFrame().name.values() shouldBe typed.name.distinct().values().sorted() + } + + @Test + fun `infer ColumnGroup type in convert with`() { + val g by frameColumn() + val grouped = typed.groupBy { name }.toDataFrame(g.name).convert(g).with { it.first() } + grouped[g.name].kind() shouldBe ColumnKind.Group + } + + @Test + fun `filter GroupBy by groups`() { + val grouped = typed.groupBy { name } + val filtered = grouped.filter { group.nrow > 2 }.concat() + filtered shouldBe typed.filter { name == "Charlie" } + } + + @Test + fun `split inplace`() { + val split = typed.split { name }.by { it.toCharArray().asIterable() }.inplace() + split["name"] shouldBe typed.name.map { it.toCharArray().toList() } + } + + @Test + fun `split into rows with transform`() { + val split = typed.split { city }.by { it.toCharArray().toList() }.intoRows() + split.nrow shouldBe typed.city.sumOf { it?.length ?: 0 } + } + + @Test + fun `render to string`() { + val expected = """ + name age city weight + 0 Alice 15 London 54 + 1 Bob 45 Dubai 87 + 2 Charlie 20 Moscow null + 3 Charlie 40 Milan null + 4 Bob 30 Tokyo 68 + 5 Alice 20 null 55 + 6 Charlie 30 Moscow 90 + """.trimIndent() + + typed.toString().trimIndent() shouldBe expected + } + + @Test + fun `isNumber`() { + typed.age.isNumber() shouldBe true + typed.weight.isNumber() shouldBe true + } + + @Test + fun `pivot null to default`() { + val pivoted = typed.groupBy { name }.pivot { city }.default(0).min { weight } + pivoted.columns().forEach { + it.hasNulls() shouldBe false + } + } + + @Test + fun `iterable to column`() { + val ref by column() + val col = listOf("a", null).toColumn(ref) + col.hasNulls() shouldBe true + } + + @Test + fun `columnAccessor map linear`() { + // SampleStart + val age by column() + var counter = 0 + val year by age.map { + counter++ + 2021 - it + } + df.filter { year > 2000 }.nrow shouldBe 3 + counter shouldBe df.nrow + // SampleEnd + } + + @Test + fun convertTo() { + data class Target( + val name: String, + val age: Int, + val city: String?, + val weight: Int?, + ) + + df.convertTo() shouldBe df + df.convert { age }.toStr().convertTo() shouldBe df + df.add("col") { 1 }.convertTo(ExcessiveColumns.Remove) shouldBe df + + val added = df.add("col") { 1 } + added.convertTo(typeOf(), ExcessiveColumns.Keep) shouldBe added + + df.remove { city }.convertTo() shouldBe df.update { city }.withNull().move { city }.toRight() + + shouldThrow { + df.remove { age }.convertTo() + } + + df.remove { age }.convertTo { + fill { age }.with { -1 } + } shouldBe df.update { age }.with { -1 }.move { age }.toRight() + + shouldThrow { + df.update { name }.at(2).withNull().convertTo() + } + + shouldThrow { + df.convert { age }.toStr().convertToImpl( + typeOf(), + allowConversion = false, + ExcessiveColumns.Remove + ) + } + + shouldThrow { + df.add("col") { 1 }.convertTo(ExcessiveColumns.Fail) shouldBe df + } + + val list = df.toListOf() + list shouldBe df.convertTo().toList() + + val listDf = list.toDataFrame() + listDf shouldBe df + listDf.toList() shouldBe list + } + + @Test + fun typedFrameColumn() { + @DataSchema + data class Student(val name: String, val age: Int, val weight: Int?) + + @DataSchema + data class Target(val city: String?, val students: List) + + val grouped = df.groupBy { city }.toDataFrame("students") + + val list = grouped.toListOf() + list shouldBe grouped.convertTo().toList() + + val listDf = list.toDataFrame(maxDepth = 2) + listDf shouldBe grouped.update { getFrameColumn("students") }.with { it.remove("city") } + + listDf.toList() shouldBe list + } + + @Test + fun reorderColumns() { + typed.reorderColumnsByName().columnNames() shouldBe typed.columnNames().sorted() + val grouped = typed.groupBy { city }.into("a").reorderColumnsByName() + grouped.columnNames() shouldBe listOf("a", "city") + grouped.getFrameColumn("a")[0].columnNames() shouldBe typed.columnNames().sorted() + } + + @Test + fun typedColumnGroup() { + @DataSchema + data class Info(val age: Int, val weight: Int?) + + @DataSchema + data class Target(val name: String, val info: Info, val city: String?) + + val grouped = typed.group { age and weight }.into("info") + + val list = grouped.toListOf() + list shouldBe grouped.convertTo().toList() + + val listDf = list.toDataFrame(maxDepth = 2) + listDf shouldBe grouped + listDf.toList() shouldBe list + } + + @Test + fun splitWithRegex() { + val data by column() + val merged = typed.merge { name and city }.by("|").into(data) + merged.split { data }.match("""(.*)\|(.*)""".toRegex()).into("name", "city") shouldBe + typed.update { city }.with { it ?: "null" }.move { city }.to(1) + } + + @Test + fun splitIntoThisAndNewColumn() { + val split = typed.split { name }.by { listOf(it.dropLast(1), it.last()) }.into("name", "lastChar") + split.columnNames().sorted() shouldBe (typed.columnNames() + "lastChar").sorted() + } + + @Test + fun groupByAggregateSingleColumn() { + val agg = typed.groupBy { name }.aggregate { city into "city" } + agg shouldBe typed.groupBy { name }.values { city } + agg["city"].type shouldBe typeOf>() + } + + @Test + fun implodeWithNulls() { + val merged = typed.update { weight }.where { name == "Charlie" }.withNull() + .select { name and weight } + .implode(dropNA = true) { weight } + + merged["weight"].type() shouldBe typeOf>() + } + + @Test + fun updateWithZero() { + val updated = typed + .convert { weight }.toDouble() + .update { colsOf() }.where { name == "Charlie" }.withZero() + updated.age.type shouldBe typeOf() + updated["weight"].type shouldBe typeOf() + val filtered = updated.filter { name == "Charlie" } + filtered.nrow shouldBe 3 + filtered.age.forEach { + it shouldBe 0 + } + filtered["weight"].forEach { + it shouldBe .0 + } + } + + @Test + fun map() { + val mapped = typed.mapToFrame { + name into "name" + "year" from 2021 - age + "CITY" from { city?.uppercase() } + } + mapped.columnNames() shouldBe listOf("name", "year", "CITY") + } + + @Test + fun `groupByGroup name clash`() { + val groupName = GroupBy.groupedColumnAccessor.name() + typed.add(groupName) { name } + .groupBy(groupName) + .toDataFrame() + .ncol shouldBe 2 + } + + @Test + fun describe() { + val desc = typed.group { age and weight }.into("info").groupBy { city }.toDataFrame().describe() + desc.nrow shouldBe typed.ncol + 1 + desc.print() + } + + @Test + fun `index by column accessor`() { + val col = listOf(1, 2, 3, 4, 5).toColumn("name") + col.toDataFrame()[1..2][col].size shouldBe 2 + + val col2 = columnOf(1, 2, 3, 4, 5) named "name" + col2.toDataFrame()[1..2][col2].size shouldBe 2 + + val col3 by columnOf(1, 2, 3, 4, 5) + col3.toDataFrame()[1..2][col3].size shouldBe 2 + + val col4 by listOf(1, 2, 3, 4, 5).toColumn() + col4.toDataFrame()[1..2][col4].size shouldBe 2 + } + + @Test + fun `take drop in columns selector`() { + typed.select { take(3) } shouldBe typed.select { cols(0..2) } + typed.select { takeLast(2) } shouldBe typed.select { cols(2..3) } + typed.select { drop(1) } shouldBe typed.select { cols(1..3) } + typed.select { dropLast(1) } shouldBe typed.select { cols(0..2) } + } + + @Test + fun `except in columns selector`() { + typed.select { except { age and weight } } shouldBe typed.select { name and city } + + typed.group { age and weight }.into("info") + .select { dropLast(1) except { "info"["age"] } } shouldBe typed.select { name and weight } + } + + @Test + fun `get by empty path`() { + val all = typed[pathOf()] + + all.asColumnGroup().asDataFrame().columns() shouldBe typed.columns() + + typed.getColumn { emptyPath() } shouldBe all + } + + @Test + fun `update frame column to null`() { + val grouped = typed.groupBy { name }.toDataFrame("group") + grouped["group"].kind shouldBe ColumnKind.Frame + val updated = grouped.update("group").at(2).withNull() + updated["group"].kind shouldBe ColumnKind.Value + } + + @Test + fun `merge into same name`() { + typed.merge { name and city }.into("age") shouldBe + typed.merge { name and city }.into("data").remove("age").rename("data" to "age") + } + + @Test + fun `groupBy sort`() { + typed.groupBy { name }.sortByDesc { age }.xs("Charlie").concat() shouldBe typed.filter { name == "Charlie" } + .sortBy { age.desc() }.remove { name } + } + + @Test + fun `split into columns`() { + val group by frameColumn() + typed.groupBy { name }.into(group) + .split(group).intoColumns() + } + + @Test + fun `takedrop for column`() { + typed.age.take(2) shouldBe typed.age[0..1] + typed.age.drop(2) shouldBe typed.age[2 until typed.nrow] + typed.age.takeLast(2) shouldBe typed.age.drop(typed.nrow - 2) + typed.age.dropLast(2) shouldBe typed.age.take(typed.nrow - 2) + } + + @Test + fun `transpose row`() { + typed.select { age and weight }[1].transpose().maxBy { it.value as Int? }.name shouldBe "weight" + typed[2].transpose().dropNulls { value }.name.toList() shouldBe listOf("name", "age", "city") + } + + @Test + fun xs() { + typed.xs("Charlie") shouldBe typed.filter { name == "Charlie" }.remove { name } + typed.xs("Charlie", 20).nrow shouldBe 1 + typed.xs(20) { age }.nrow shouldBe 2 + shouldThrow { + typed.xs(20) { age and weight } + } + shouldThrow { + typed.xs("Charlie", 20) { name } + } + shouldThrow { + typed.xs("Charlie", 20, "Moscow", null, 1) + } + } + + @Test + fun `groupBy xs`() { + typed.groupBy { name }.xs("Charlie").concat() shouldBe typed.xs("Charlie") + typed.groupBy { name }.xs("Moscow") { city }.concat().print() + } + + @Test + fun getMissingColumn() { + val col = typed.getColumnsImpl(UnresolvedColumnsPolicy.Create) { "unknown"() } + col.size shouldBe 1 + col[0].name shouldBe "unknown" + col[0].isMissingColumn() shouldBe true + } + + @Test + fun getMissingColumn2() { + val col = typed.remove { city }.getColumnsImpl(UnresolvedColumnsPolicy.Create) { city } + col.size shouldBe 1 + col[0].name shouldBe typed.city.name() + col[0].isMissingColumn() shouldBe true + } + + @Test + fun `groupBy into accessor or kproperty`() { + val n by column() + + data class Data(@ColumnName("total") val count: Int) + + typed.groupBy { name }.aggregate { + count() into n + count() into Data::count + } shouldBe typed.groupBy { name }.count(n.name()) + .add("total") { "n"() } + } + + @Test + fun `aggregate null row`() { + val aggregated = typed.groupBy { name }.aggregate { + (if (name.first().startsWith("A")) first() else null) into "agg" + }["agg"] + + aggregated.kind shouldBe ColumnKind.Group + aggregated.size shouldBe 3 + aggregated.count { it.isNA } shouldBe 2 + } + + @Test + fun takeWhile() { + typed.takeWhile { weight != null } shouldBe typed[0..1] + typed.takeWhile { true } shouldBe typed + } + + @Test + fun dropWhile() { + typed.dropWhile { weight != null } shouldBe typed.drop(2) + typed.dropWhile { false } shouldBe typed + } + + @Test + fun takeLast() { + typed.takeLast(2) shouldBe typed[5..6] + shouldThrow { + typed.takeLast(-1) + } + typed.takeLast(20) shouldBe typed + } + + @Test + fun dropLast() { + typed.dropLast(2) shouldBe typed[0..4] + shouldThrow { + typed.dropLast(-1) + } + typed.dropLast(20) shouldBe typed.take(0) + } + + @Test + fun drop() { + typed.drop(2) shouldBe typed[2..6] + shouldThrow { + typed.drop(-1) + } + typed.drop(typed.nrow) shouldBe typed.filter { false } + typed.drop(20) shouldBe typed.filter { false } + } + + @Test + fun take() { + typed.take(2) shouldBe typed[0..1] + shouldThrow { + typed.take(-1) + } + typed.take(typed.nrow) shouldBe typed + typed.take(20) shouldBe typed + } + + @Test + fun `select into accessor`() { + val newName by column() + typed.select { name into newName and age }.columnNames() shouldBe listOf("newName", "age") + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt new file mode 100644 index 0000000000..d76ad386cd --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt @@ -0,0 +1,737 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import io.kotest.assertions.fail +import io.kotest.assertions.throwables.shouldThrow +import io.kotest.matchers.shouldBe +import io.kotest.matchers.shouldNotBe +import org.jetbrains.dataframe.impl.codeGen.CodeGenerator +import org.jetbrains.dataframe.impl.codeGen.InterfaceGenerationMode +import org.jetbrains.dataframe.impl.codeGen.generate +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.GroupBy +import org.jetbrains.kotlinx.dataframe.api.GroupWithKey +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.addId +import org.jetbrains.kotlinx.dataframe.api.after +import org.jetbrains.kotlinx.dataframe.api.append +import org.jetbrains.kotlinx.dataframe.api.asColumnGroup +import org.jetbrains.kotlinx.dataframe.api.asDataFrame +import org.jetbrains.kotlinx.dataframe.api.asGroupBy +import org.jetbrains.kotlinx.dataframe.api.at +import org.jetbrains.kotlinx.dataframe.api.by +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnGroup +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.columnsCount +import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.count +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.dfsOf +import org.jetbrains.kotlinx.dataframe.api.distinct +import org.jetbrains.kotlinx.dataframe.api.dropNulls +import org.jetbrains.kotlinx.dataframe.api.duplicate +import org.jetbrains.kotlinx.dataframe.api.duplicateRows +import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame +import org.jetbrains.kotlinx.dataframe.api.explode +import org.jetbrains.kotlinx.dataframe.api.expr +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.forEach +import org.jetbrains.kotlinx.dataframe.api.frameColumn +import org.jetbrains.kotlinx.dataframe.api.getColumnGroup +import org.jetbrains.kotlinx.dataframe.api.getColumnPath +import org.jetbrains.kotlinx.dataframe.api.getColumnWithPath +import org.jetbrains.kotlinx.dataframe.api.getColumns +import org.jetbrains.kotlinx.dataframe.api.getValue +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.jetbrains.kotlinx.dataframe.api.implode +import org.jetbrains.kotlinx.dataframe.api.indices +import org.jetbrains.kotlinx.dataframe.api.insert +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.intoRows +import org.jetbrains.kotlinx.dataframe.api.inward +import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.api.isEmpty +import org.jetbrains.kotlinx.dataframe.api.isFrameColumn +import org.jetbrains.kotlinx.dataframe.api.join +import org.jetbrains.kotlinx.dataframe.api.last +import org.jetbrains.kotlinx.dataframe.api.map +import org.jetbrains.kotlinx.dataframe.api.max +import org.jetbrains.kotlinx.dataframe.api.maxBy +import org.jetbrains.kotlinx.dataframe.api.median +import org.jetbrains.kotlinx.dataframe.api.minus +import org.jetbrains.kotlinx.dataframe.api.move +import org.jetbrains.kotlinx.dataframe.api.moveTo +import org.jetbrains.kotlinx.dataframe.api.moveToLeft +import org.jetbrains.kotlinx.dataframe.api.moveToRight +import org.jetbrains.kotlinx.dataframe.api.pathOf +import org.jetbrains.kotlinx.dataframe.api.perRowCol +import org.jetbrains.kotlinx.dataframe.api.pivot +import org.jetbrains.kotlinx.dataframe.api.remove +import org.jetbrains.kotlinx.dataframe.api.rename +import org.jetbrains.kotlinx.dataframe.api.rows +import org.jetbrains.kotlinx.dataframe.api.select +import org.jetbrains.kotlinx.dataframe.api.single +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.split +import org.jetbrains.kotlinx.dataframe.api.sumOf +import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor +import org.jetbrains.kotlinx.dataframe.api.toTop +import org.jetbrains.kotlinx.dataframe.api.under +import org.jetbrains.kotlinx.dataframe.api.ungroup +import org.jetbrains.kotlinx.dataframe.api.update +import org.jetbrains.kotlinx.dataframe.api.values +import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.api.withNull +import org.jetbrains.kotlinx.dataframe.api.xs +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.columns.FrameColumn +import org.jetbrains.kotlinx.dataframe.columns.depth +import org.junit.Test +import kotlin.reflect.typeOf + +class DataFrameTreeTests : BaseTest() { + + @DataSchema + interface NameAndCity { + val name: String + val city: String? + } + + @DataSchema + interface GroupedPerson { + val nameAndCity: DataRow + val age: Int + val weight: Int? + } + + val df2 = df.move { name and city }.under("nameAndCity") + val typed2 = df2.cast() + + val nameAndCity by columnGroup() + val nameInGroup = nameAndCity.column("name") + + @Test + fun create() { + val nameAndCity by columnOf(typed.name, typed.city) + val df3 = dataFrameOf(nameAndCity, typed.age, typed.weight) + df3 shouldBe df2 + } + + @Test + fun createFrameColumn() { + val rowsColumn by columnOf(typed[0..3], typed[4..5], typed[6..6]) + val df = dataFrameOf(rowsColumn).asGroupBy { rowsColumn } + val res = df.concat() + res shouldBe typed + } + + @Test + fun createFrameColumn2() { + val id by column(typed.indices()) + val groups by id.map { typed[it..it] } + val df = dataFrameOf(id, groups) + df.rowsCount() shouldBe typed.rowsCount() + df.forEach { + val rowId = it[id] + groups() shouldBe typed[rowId..rowId] + } + } + + @Test + fun `select dfs under group`() { + df2.select { nameAndCity.dfsOf() } shouldBe typed2.select { nameAndCity.name } + df2.select { nameAndCity.dfsOf() } shouldBe typed2.select { nameAndCity.name and nameAndCity.city } + } + + @Test + fun `selects`() { + df2.select { nameAndCity.cols() } shouldBe typed2.nameAndCity.select { all() } + df2.select { nameAndCity.cols { !it.hasNulls() } } shouldBe typed2.select { nameAndCity.name } + df2.select { nameAndCity.cols(0..1) } shouldBe typed2.nameAndCity.select { all() } + df2.select { nameAndCity.col(1) } shouldBe typed2.select { nameAndCity.city } + df2.select { nameAndCity["city"] } shouldBe typed2.select { nameAndCity.city } + df2.select { nameAndCity.cols("city", "name") } shouldBe typed2.select { nameAndCity.city and nameAndCity.name } + df2.select { nameAndCity.cols(name, city) } shouldBe typed2.select { nameAndCity.all() } + df2.select { nameAndCity[name] } shouldBe typed2.nameAndCity.select { name } + df2.select { nameAndCity.cols().drop(1) } shouldBe typed2.nameAndCity.select { city } + + typed2.select { nameAndCity.cols() } shouldBe typed2.nameAndCity.select { all() } + typed2.select { nameAndCity.cols { !it.hasNulls() } } shouldBe typed2.select { nameAndCity.name } + typed2.select { nameAndCity.cols(0..1) } shouldBe typed2.nameAndCity.select { all() } + typed2.select { nameAndCity.col(1) } shouldBe typed2.select { nameAndCity.city } + typed2.select { nameAndCity["city"] } shouldBe typed2.select { nameAndCity.city } + typed2.select { nameAndCity.cols("city", "name") } shouldBe typed2.select { nameAndCity.city and nameAndCity.name } + typed2.select { nameAndCity.cols(name, city) } shouldBe typed2.select { nameAndCity.all() } + typed2.select { nameAndCity[name] } shouldBe typed2.nameAndCity.select { name } + typed2.select { nameAndCity.cols().drop(1) } shouldBe typed2.nameAndCity.select { city } + + df2.select { col(1) } shouldBe typed2.select { age } + df2.select { nameInGroup } shouldBe typed2.nameAndCity.select { name } + + df2[nameInGroup] shouldBe typed2.nameAndCity.name + } + + @Test + fun getColumnPath() { + typed2.getColumnPath { nameAndCity["city"] }.size shouldBe 2 + typed2.getColumnPath { nameAndCity.col(1) }.size shouldBe 2 + } + + @Test + fun `group indexing`() { + df2[nameAndCity][city] shouldBe typed.city + typed2.nameAndCity.city shouldBe typed.city + df2["nameAndCity"]["city"] shouldBe typed.city + } + + @Test + fun `convert column group`() { + val expected = typed.select { city.rename("nameAndCity") and age and weight } + + df2.convert { nameAndCity }.with { it[city] } shouldBe expected + df2.convert { nameAndCity }.with { this[nameAndCity][city] } shouldBe expected + typed2.convert { nameAndCity }.with { nameAndCity.city } shouldBe expected + typed2.convert { nameAndCity }.with { it.city } shouldBe expected + } + + @Test + fun `slice`() { + val expected = typed[0..2].name + val actual = typed2[0..2].nameAndCity.name + actual shouldBe expected + } + + @Test + fun `filter`() { + val expected = typed.filter { city == null }.select { weight } + typed2.filter { nameAndCity.city == null }.select { weight } shouldBe expected + df2.filter { it[nameAndCity][city] == null }.select { weight } shouldBe expected + } + + @Test + fun `select`() { + val expected = typed.select { name and age } + typed2.select { nameAndCity.name and age } shouldBe expected + df2.select { it[nameAndCity][name] and age } shouldBe expected + } + + @Test + fun `sort`() { + val expected = typed.sortBy { name and age }.moveTo(1) { city } + typed2.sortBy { nameAndCity.name and age }.ungroup { nameAndCity } shouldBe expected + } + + @Test + fun `move`() { + val actual = typed2.move { nameAndCity.name }.into("name") + actual.columnNames() shouldBe listOf("nameAndCity", "name", "age", "weight") + actual.getColumnGroup("nameAndCity").columnNames() shouldBe listOf("city") + } + + @Test + fun `groupBy`() { + val expected = typed.groupBy { name }.max { age } + typed2.groupBy { nameAndCity.name }.max { age } shouldBe expected + } + + @Test + fun `distinct`() { + val duplicated = typed2.concat(typed2) + duplicated.rowsCount() shouldBe typed2.rowsCount() * 2 + val dist = duplicated.nameAndCity.distinct() + dist shouldBe typed2.nameAndCity.distinct() + dist.rowsCount() shouldBe typed2.rowsCount() - 1 + } + + @Test + fun selectDfs() { + val cols = typed2.select { dfs { it.hasNulls } } + cols shouldBe typed2.select { nameAndCity.city and weight } + } + + @Test + fun `get child column by accessor`() { + val cityCol by column("city") + val selected = typed2.getColumnWithPath { + val g = nameAndCity + val c = g.get(cityCol) + c + } + selected.path shouldBe pathOf("nameAndCity", "city") + } + + @Test + fun splitRows() { + val selected = typed2.select { nameAndCity } + val nested = selected.implode(dropNA = false) { nameAndCity.city } + val mergedCity = column>("city") + val res = nested.split { + nameAndCity[mergedCity] + }.intoRows() + val expected = selected.sortBy { nameAndCity.name } + val actual = res.sortBy { nameAndCity.name } + actual shouldBe expected + } + + @Test + fun pivot() { + val modified = df.append("Alice", 55, "Moscow", 100) + val df2 = modified.move { name and city }.under("nameAndCity") + val typed2 = df2.cast() + + fun GroupBy.map(body: Selector, R>): List = keys.rows().mapIndexedNotNull { index, row -> + val group = groups[index] + val g = GroupWithKey(row, group) + body(g, g) + } + + val expected = modified.cast().groupBy { name and city }.map { + val value = if (key.city == "Moscow") group.age.toList() + else group.age[0] + (key.name to key.city.toString()) to value + }.plus("Bob" to "Moscow" to emptyList()).toMap() + + fun DataFrame.check() { + columnsCount() shouldBe 2 + val cities = getColumnGroup("nameAndCity").getColumnGroup("city") + cities.columnsCount() shouldBe typed2.nameAndCity.city.countDistinct() + this[name] shouldBe typed.name.distinct() + val data = cities.columns() + data.forEach { + if (it.name() == "Moscow") it.type() shouldBe typeOf>() + else it.type() shouldBe typeOf() + } + + val actual = data.flatMap { col -> + val city = col.name() + rows().map { (it[name] to city) to col[it.index()] }.filter { it.second != null } + }.toMap() + actual shouldBe expected + } + + typed2.pivot { nameAndCity.city }.groupBy { nameAndCity.name }.values { age }.check() + df2.pivot(nameAndCity[city]).groupBy { nameAndCity[name] }.values(age).check() + df2.pivot { it[GroupedPerson::nameAndCity][NameAndCity::city] }.groupBy { it[GroupedPerson::nameAndCity][NameAndCity::name] }.values( + GroupedPerson::age + ).check() + df2.pivot { it["nameAndCity"]["city"] }.groupBy { it["nameAndCity"]["name"] }.values("age").check() + } + + @Test + fun `pivot grouped column`() { + val grouped = typed.group { age and weight }.into("info") + val pivoted = grouped.pivot { city }.groupBy { name }.values("info") + pivoted.columnsCount() shouldBe 2 + + val expected = + typed.rows().groupBy { it.name to (it.city ?: "null") }.mapValues { it.value.map { it.age to it.weight } } + val dataCols = pivoted.getColumns { col(1).all() } + + dataCols.forEach { (it.isColumnGroup() || it.isFrameColumn()) shouldBe true } + + val names = pivoted.name + dataCols.forEach { col -> + val city = col.name() + pivoted.indices().forEach { row -> + val name = names[row] + val value = col[row] + val expValues = expected[name to city] + when { + expValues == null -> when (value) { + null -> { + } + is AnyRow -> value.isEmpty() shouldBe true + is AnyFrame -> value.columnsCount() shouldBe 0 + } + expValues.size == 1 -> { + value shouldNotBe null + val single = + if (value is AnyRow) value else if (value is AnyFrame) value[0] else fail("invalid value type") + single.columnsCount() shouldBe 2 + single.getValue("age") to single.getValue("weight") shouldBe expValues[0] + } + else -> { + val df = value as? AnyFrame + df shouldNotBe null + df!!.rows().map { it["age"] as Int to it["weight"] as Int? } + .sortedBy { it.first } shouldBe expValues.sortedBy { it.first } + } + } + } + } + } + + @Test + fun splitCols() { + val split = typed2.split { nameAndCity.name }.by { it.toCharArray().toList() }.inward { "char$it" } + split.columnNames() shouldBe typed2.columnNames() + split.rowsCount() shouldBe typed2.rowsCount() + split.nameAndCity.columnNames() shouldBe typed2.nameAndCity.columnNames() + val nameGroup = split.nameAndCity.name.asColumnGroup() + nameGroup.name() shouldBe "name" + nameGroup.columnsCount() shouldBe typed2.nameAndCity.name.map { it.length }.max() + nameGroup.columnNames() shouldBe (1..nameGroup.columnsCount()).map { "char$it" } + } + + @Test + fun `split into rows`() { + val split = typed2.split { nameAndCity.name }.by { it.toCharArray().toList() }.intoRows() + val merged = split.implode { nameAndCity.name } + val joined = merged.convert { nameAndCity.name }.cast>().with { it.joinToString("") } + joined shouldBe typed2 + } + + @Test + fun `all except`() { + val info by columnGroup() + val moved = typed.group { except(name) }.into(info) + val actual = moved.select { except(info) } + actual shouldBe typed.select { name } + } + + @Test + fun `move and group`() { + val info by columnGroup() + val moved = typed.group { except(name) }.into(info) + val grouped = moved.groupBy { except(info) }.toDataFrame() + grouped.rowsCount() shouldBe typed.name.countDistinct() + } + + @Test + fun `merge rows into table`() { + val info by columnGroup() + val moved = typed.group { except(name) }.into(info) + val merged = moved.implode { info } + val grouped = typed.groupBy { name }.updateGroups { remove { name } } + val expected = grouped.toDataFrame().rename(grouped.groups).into(info) + merged shouldBe expected + } + + @Test + fun `update grouped column to table`() { + val info by columnGroup() + val grouped = typed.group { age and weight }.into(info) + val updated = grouped.convert(info).perRowCol { row, column -> column.asColumnGroup().asDataFrame() } + val col = updated[info.name()] + col.kind() shouldBe ColumnKind.Frame + val table = col as FrameColumn<*> + table.schema.value.columns.map { it.key }.sorted() shouldBe typed.select { age and weight }.columnNames() + .sorted() + } + + @Test + fun extensionPropertiesTest() { + val code = CodeGenerator.create().generate( + interfaceMode = InterfaceGenerationMode.None, + extensionProperties = true + ).declarations + val columnsContainer = ColumnsContainer::class.qualifiedName + val dataFrameRowBase = DataRow::class.qualifiedName + val dataFrameRow = DataRow::class.qualifiedName + val className = GroupedPerson::class.qualifiedName + val shortName = GroupedPerson::class.simpleName!! + val nameAndCity = NameAndCity::class.qualifiedName + val groupedColumn = ColumnGroup::class.qualifiedName + val columnData = DataColumn::class.qualifiedName + val expected = """ + val $columnsContainer<$className>.age: $columnData @JvmName("${shortName}_age") get() = this["age"] as $columnData + val $dataFrameRowBase<$className>.age: kotlin.Int @JvmName("${shortName}_age") get() = this["age"] as kotlin.Int + val $columnsContainer<$className?>.age: $columnData @JvmName("Nullable${shortName}_age") get() = this["age"] as $columnData + val $dataFrameRowBase<$className?>.age: kotlin.Int? @JvmName("Nullable${shortName}_age") get() = this["age"] as kotlin.Int? + val $columnsContainer<$className>.nameAndCity: $groupedColumn<$nameAndCity> @JvmName("${shortName}_nameAndCity") get() = this["nameAndCity"] as $groupedColumn<$nameAndCity> + val $dataFrameRowBase<$className>.nameAndCity: $dataFrameRow<$nameAndCity> @JvmName("${shortName}_nameAndCity") get() = this["nameAndCity"] as $dataFrameRow<$nameAndCity> + val $columnsContainer<$className?>.nameAndCity: $groupedColumn<$nameAndCity?> @JvmName("Nullable${shortName}_nameAndCity") get() = this["nameAndCity"] as $groupedColumn<$nameAndCity?> + val $dataFrameRowBase<$className?>.nameAndCity: $dataFrameRow<$nameAndCity?> @JvmName("Nullable${shortName}_nameAndCity") get() = this["nameAndCity"] as $dataFrameRow<$nameAndCity?> + val $columnsContainer<$className>.weight: $columnData @JvmName("${shortName}_weight") get() = this["weight"] as $columnData + val $dataFrameRowBase<$className>.weight: kotlin.Int? @JvmName("${shortName}_weight") get() = this["weight"] as kotlin.Int? + val $columnsContainer<$className?>.weight: $columnData @JvmName("Nullable${shortName}_weight") get() = this["weight"] as $columnData + val $dataFrameRowBase<$className?>.weight: kotlin.Int? @JvmName("Nullable${shortName}_weight") get() = this["weight"] as kotlin.Int? + """.trimIndent() + code shouldBe expected + } + + @Test + fun parentColumnTest() { + val res = typed2.move { dfs { it.depth > 0 } }.toTop { it.parentName + "-" + it.name } + res.columnsCount() shouldBe 4 + res.columnNames() shouldBe listOf("nameAndCity-name", "nameAndCity-city", "age", "weight") + } + + @Test + fun `group cols`() { + val joined = typed2.move { allDfs() }.into { pathOf(it.path.joinToString(".")) } + val grouped = joined.group { nameContains(".") }.into { it.name().substringBefore(".") } + val expected = typed2.rename { nameAndCity.all() }.into { it.path.joinToString(".") } + grouped shouldBe expected + } + + @Test + fun `group into column`() { + val grouped = typed2.group { age }.into { nameAndCity } + grouped.nameAndCity.columnsCount() shouldBe 3 + grouped.columnsCount() shouldBe 2 + } + + @Test + fun rename() { + val res = typed2.rename { nameAndCity.all() }.into { it.name().capitalize() } + res.nameAndCity.columnNames() shouldBe typed2.nameAndCity.columnNames().map { it.capitalize() } + } + + @Test + fun moveAfter() { + val moved = typed2.move { age }.after { nameAndCity.name } + moved.columnsCount() shouldBe 2 + moved.nameAndCity.columnsCount() shouldBe 3 + moved.nameAndCity.select { all() } shouldBe dataFrameOf( + typed2.nameAndCity.name, + typed2.age, + typed2.nameAndCity.city + ) + } + + @Test + fun moveAfter2() { + val moved = typed2.move { nameAndCity.name }.after { age } + moved.columnsCount() shouldBe 4 + moved.nameAndCity.columnsCount() shouldBe 1 + moved.remove { nameAndCity } shouldBe typed2.select { age and nameAndCity.name and weight } + } + + @Test + fun splitFrameColumnsIntoRows() { + val grouped = typed.groupBy { city } + val groupCol = grouped.groups.name() + val plain = grouped.toDataFrame() + val res = + plain.split(grouped.groups).intoRows().remove { it[groupCol]["city"] }.ungroup(groupCol).sortBy { name and age } + res shouldBe typed.sortBy { name and age }.moveToLeft { city } + } + + @Test + fun splitFrameColumnIntoColumns() { + val grouped = typed.groupBy { city } + val groupCol = grouped.groups.name() + val plain = grouped.toDataFrame() + val res = + plain.split(grouped.groups).intoRows().remove { it[groupCol]["city"] }.ungroup(groupCol).sortBy { name and age } + res shouldBe typed.sortBy { name and age }.moveToLeft { city } + } + + @Test + fun explodeFrameColumnWithNulls() { + val grouped = typed.groupBy { city } + val groupCol = grouped.groups.toColumnAccessor() + val plain = grouped.toDataFrame() + .update { groupCol }.at(1).withNull() + .update { groupCol }.at(2).with { emptyDataFrame() } + .update { groupCol }.at(3).with { it.filter { false } } + val res = plain.explode(dropEmpty = false) { groupCol } + val expected = plain[groupCol.name()].sumOf { Math.max((it as AnyFrame?)?.rowsCount() ?: 0, 1) } + res.rowsCount() shouldBe expected + } + + @Test + fun `join with left path`() { + val joined = (typed2 - { weight }).join(typed - { city }) { nameAndCity.name.match(right.name) and age } + joined shouldBe typed2 + } + + @Test + fun `join with right path`() { + val joined = (typed - { city }).join(typed2 - { weight }) { name.match(right.nameAndCity.name) and age } + val expected = typed.moveToRight { city }.move { city }.under("nameAndCity") + joined shouldBe expected + } + + @Test + fun `join by map column`() { + val nameAndAge by columnGroup() + val cityFirst by nameAndAge.column() + val grouped = typed.group { name and age }.into(nameAndAge).add(cityFirst) { city?.get(0) } + grouped[nameAndAge].columnsCount() shouldBe 3 + + val left = grouped - { weight } + val right = grouped - { city } + val joined = left.join(right) { nameAndAge } + joined shouldBe grouped + } + + @Test + fun `join by frame column`() { + val left = typed.groupBy { name }.updateGroups { it.remove { name and city } } + val right = + typed.update { name }.with { it.reversed() }.groupBy { name }.updateGroups { it.remove { name and city } } + val groupCol = left.groups.toColumnAccessor() + val joined = left.toDataFrame().join(right.toDataFrame()) { groupCol } + joined.columnsCount() shouldBe 3 + val name1 by column() + joined.columnNames() shouldBe listOf(typed.name.name(), groupCol.name(), name1.name()) + joined[groupCol].kind() shouldBe ColumnKind.Frame + joined.select { cols(0, 1) } shouldBe left.toDataFrame() + joined.select { cols(2, 1) }.rename(name1).into(typed.name) shouldBe right.toDataFrame() + joined.name shouldBe left.keys.name + joined.forEach { it[name1] shouldBe it.name.reversed() } + } + + @Test + fun `add frame column`() { + val frameCol by frameColumn() + val added = typed2.add(frameCol) { nameAndCity.duplicate(3) } + added[frameCol].kind() shouldBe ColumnKind.Frame + added[frameCol].forEach { it.rowsCount() shouldBe 3 } + } + + @Test + fun `insert column`() { + val colName = "reversed" + fun DataFrame.check() { + nameAndCity.columnsCount() shouldBe 3 + nameAndCity.columnNames() shouldBe listOf( + typed2.nameAndCity.name.name(), + colName, + typed2.nameAndCity.city.name() + ) + } + + typed2.insert(colName) { nameAndCity.name.reversed() }.after { nameAndCity.name }.check() + } + + @Test + fun append() { + val res = typed2.append(listOf("Bill", "San Francisco"), null, 66) + res.rowsCount() shouldBe typed2.rowsCount() + 1 + res.nameAndCity.last().values() shouldBe listOf("Bill", "San Francisco") + res.age.hasNulls() shouldBe true + } + + @Test + fun `append nulls`() { + val res = typed2.append(null, null, null) + res.rowsCount() shouldBe typed2.rowsCount() + 1 + res.nameAndCity.last().values() shouldBe listOf(null, null) + res.age.hasNulls() shouldBe true + res.nameAndCity.name.hasNulls() shouldBe true + } + + @Test + fun `create data frame from map column`() { + val df = dataFrameOf(typed.name, typed2.nameAndCity) + df.rowsCount() shouldBe typed.rowsCount() + } + + @Test + fun `column group properties`() { + typed2.nameAndCity.name() shouldBe "nameAndCity" + val renamed = typed2.nameAndCity.rename("newName") + renamed.name() shouldBe "newName" + renamed.select { name } shouldBe typed2.select { nameAndCity.name } + renamed.filter { name.startsWith("A") }.rowsCount() shouldBe typed.count { name.startsWith("A") } + } + + @Test + fun `distinct at column group`() { + typed2.nameAndCity.distinct().filter { name.startsWith("A") }.columns() shouldBe typed.select { name and city }.distinct() + .filter { name.startsWith("A") }.columns() + } + + @Test + fun `check column path`() { + typed2.getColumnPath { nameAndCity.name }.size shouldBe 2 + } + + @Test + fun `filter not null without arguments`() { + typed2.dropNulls() shouldBe typed.dropNulls { weight }.group { name and city }.into("nameAndCity") + } + + @Test + fun `select group`() { + val groupCol = typed2[nameAndCity] + typed2.select { groupCol and age }.columnNames() shouldBe listOf("nameAndCity", "age") + } + + @Test + fun `select columns range`() { + val added = typed2.move { age }.after { nameAndCity.name } + val expected = typed2.select { nameAndCity.name and age and nameAndCity.city } + + added.select { nameAndCity.name..nameAndCity.city } shouldBe expected + + shouldThrow { + added.select { nameAndCity.name..weight } + } + + shouldThrow { + added.select { weight..nameAndCity.name } + } + + added.select { nameAndCity.city..nameAndCity.name }.isEmpty() shouldBe true + + added.select { nameAndCity.select { name..city } } shouldBe expected + } + + @Test + fun groupByAggregateSingleColumn() { + val agg = typed2.groupBy { age }.aggregate { nameAndCity into "nameAndCity" } + agg["nameAndCity"].kind() shouldBe ColumnKind.Frame + typed2.groupBy { age }.aggregate { nameAndCity.asDataFrame() into "nameAndCity" } shouldBe agg + typed2.groupBy { age }.values { nameAndCity } shouldBe agg + } + + @Test + fun `xs nested columns`() { + typed2.xs("Bob", "Tokyo").rowsCount() shouldBe 1 + } + + @Test + fun `duplicate dataframe`() { + typed2.duplicate(2) shouldBe columnOf(typed2, typed2) + } + + @Test + fun `duplicate row`() { + typed2[2].duplicate(2) shouldBe typed2[2, 2] + } + + @Test + fun `duplicate selected rows`() { + typed2.duplicateRows(2) { nameAndCity.name == "Alice" } shouldBe typed2[0, 0, 1, 2, 3, 4, 5, 5, 6] + } + + @Test + fun `duplicate all rows`() { + typed2.duplicateRows(2) shouldBe typed2.addId("id").let { + it.concat(it).sortBy("id").remove("id") + } + } + + @Test + fun `select column group`() { + typed2.aggregate { + nameAndCity()[2..3].name.distinct().single() into "name" + }["name"] shouldBe "Charlie" + } + + @Test + fun `select frame column`() { + val group by frameColumn() + + typed2 + .groupBy { expr { age > 30 } into "isOld" }.into(group) + .aggregate { + group().maxBy { rowsCount() }.weight.median() into "m" + }["m"] shouldBe 61 + } + + @Test + fun `column group as DataFrame`() { + val a: DataFrame = typed2["nameAndCity"].cast>().asDataFrame() + val b: DataFrame = typed2[nameAndCity].cast().asDataFrame() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataRowTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataRowTests.kt new file mode 100644 index 0000000000..d8ded86c84 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataRowTests.kt @@ -0,0 +1,108 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.by +import org.jetbrains.kotlinx.dataframe.api.columnNames +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.diff +import org.jetbrains.kotlinx.dataframe.api.drop +import org.jetbrains.kotlinx.dataframe.api.dropLast +import org.jetbrains.kotlinx.dataframe.api.first +import org.jetbrains.kotlinx.dataframe.api.intoList +import org.jetbrains.kotlinx.dataframe.api.mapToColumn +import org.jetbrains.kotlinx.dataframe.api.merge +import org.jetbrains.kotlinx.dataframe.api.name +import org.jetbrains.kotlinx.dataframe.api.namedValues +import org.jetbrains.kotlinx.dataframe.api.namedValuesOf +import org.jetbrains.kotlinx.dataframe.api.next +import org.jetbrains.kotlinx.dataframe.api.prev +import org.jetbrains.kotlinx.dataframe.api.relative +import org.jetbrains.kotlinx.dataframe.api.rowMean +import org.jetbrains.kotlinx.dataframe.api.rowStd +import org.jetbrains.kotlinx.dataframe.api.rowSum +import org.jetbrains.kotlinx.dataframe.api.toDouble +import org.jetbrains.kotlinx.dataframe.api.transposeTo +import org.jetbrains.kotlinx.dataframe.api.update +import org.jetbrains.kotlinx.dataframe.api.value +import org.jetbrains.kotlinx.dataframe.api.valuesOf +import org.jetbrains.kotlinx.dataframe.api.with +import org.junit.Test +import kotlin.math.sqrt + +class DataRowTests : BaseTest() { + + @Test + fun prevNext() { + typed.update { age }.with { prev()?.age }.age.drop(1) shouldBe typed.age.dropLast(1) + typed.update { age }.with { next()?.age }.age.dropLast(1) shouldBe typed.age.drop(1) + } + + @Test + fun diff() { + typed.update { age }.with { diff { age } }.age.drop(1).values() shouldBe typed.age.values() + .zipWithNext { curr, next -> next - curr } + } + + @Test + fun mean() { + typed.mapToColumn("mean") { rowMean() }.values() shouldBe typed.age.values() + .zip(typed.weight.values()) { a, b -> if (b != null) (a + b) / 2.0 else a } + } + + @Test + fun std() { + typed.mapToColumn("std") { rowStd(skipNA = true, ddof = 0) }.values() shouldBe typed.age.values() + .zip(typed.weight.values()) { a, b -> + if (b == null) .0 + else { + val mean = (a + b) / 2.0 + sqrt(((a - mean) * (a - mean) + (b - mean) * (b - mean)) / 2) + } + } + } + + @Test + fun sum() { + typed.convert { weight }.toDouble() + .mapToColumn("sum") { rowSum() }.values() shouldBe typed.age.values().zip(typed.weight.values()) { a, b -> a + (b ?: 0).toDouble() } + } + + @Test + fun namedValuesOf() { + typed.mapToColumn("vals") { + namedValuesOf().map { it.value } + }.values() shouldBe typed.merge { age and weight }.by { it.filterNotNull() }.intoList() + } + + @Test + fun valuesOf() { + typed.mapToColumn("vals") { + valuesOf() + }.values() shouldBe typed.merge { name and city }.by { it.filterNotNull() }.intoList() + } + + @Test + fun namedValuesFilter() { + typed.mapToColumn("vals") { + namedValues().firstOrNull { it.value == null }?.name + } shouldBe typed.mapToColumn("vals") { + val firstNullIndex = values().indexOfFirst { it == null } + if (firstNullIndex == -1) null else columnNames()[firstNullIndex] + } + } + + @Test + fun transposeTo() { + val df = dataFrameOf("a", "b")(1, 2).first().transposeTo() + df.name.toList() shouldBe listOf("a", "b") + df.value.toList() shouldBe listOf(1, 2) + } + + @Test + fun relativeTest() { + typed[1].relative(0..0) shouldBe typed[1..1] + typed[1].relative(-2..2) shouldBe typed[0..3] + typed[1].relative(listOf(2, -1, -3, 0)) shouldBe typed[3, 0, 1] + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/FormattingTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/FormattingTests.kt new file mode 100644 index 0000000000..74bd041bfb --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/FormattingTests.kt @@ -0,0 +1,49 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import io.kotest.matchers.shouldBe +import io.kotest.matchers.string.shouldContain +import org.jetbrains.kotlinx.dataframe.api.FormattingDSL +import org.jetbrains.kotlinx.dataframe.api.FormattingDSL.gray +import org.jetbrains.kotlinx.dataframe.api.FormattingDSL.green +import org.jetbrains.kotlinx.dataframe.api.FormattingDSL.red +import org.jetbrains.kotlinx.dataframe.api.and +import org.jetbrains.kotlinx.dataframe.api.colsOf +import org.jetbrains.kotlinx.dataframe.api.format +import org.jetbrains.kotlinx.dataframe.api.linearBg +import org.jetbrains.kotlinx.dataframe.api.where +import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.impl.api.encode +import org.jetbrains.kotlinx.dataframe.impl.api.linearGradient +import org.jetbrains.kotlinx.dataframe.index +import org.jetbrains.kotlinx.dataframe.io.DisplayConfiguration +import org.jetbrains.kotlinx.dataframe.nrow +import org.junit.Test + +class FormattingTests : BaseTest() { + + @Test + fun `conditional formatting`() { + val formattedFrame = typed.format { colsOf() }.with { + if (it > 10) background(white) and bold and italic + else textColor(linear(it, 30.5 to red, 50 to green)) and underline + } + + val formatter = formattedFrame.formatter!! + for (row in 0 until typed.nrow) + FormattingDSL.formatter(typed[row], typed.age)!!.attributes().size shouldBe if (typed[row].age > 10) 3 else 2 + + formattedFrame.toHTML(DisplayConfiguration.DEFAULT).toString() shouldContain "font-style:italic" + } + + @Test + fun `override format`() { + val formatter = typed.format { age }.linearBg(20 to green, 80 to red) + .format { age and weight }.where { index % 2 == 0 }.with { background(gray) }.formatter!! + + for (row in 0 until typed.nrow step 2) + FormattingDSL.formatter(typed[row], typed.age)!!.attributes() shouldBe listOf("background-color" to gray.encode()) + + for (row in 1 until typed.nrow step 2) + FormattingDSL.formatter(typed[row], typed.age)!!.attributes() shouldBe listOf("background-color" to linearGradient(typed[row].age.toDouble(), 20.0, green, 80.0, red).encode()) + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/HtmlRenderingTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/HtmlRenderingTests.kt new file mode 100644 index 0000000000..ff55301011 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/HtmlRenderingTests.kt @@ -0,0 +1,43 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import io.kotest.matchers.shouldNotBe +import io.kotest.matchers.string.shouldContain +import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.parse +import org.jetbrains.kotlinx.dataframe.io.html +import org.jetbrains.kotlinx.dataframe.io.initHtml +import org.jetbrains.kotlinx.dataframe.io.toHTML +import org.jetbrains.kotlinx.jupyter.findNthSubstring +import org.junit.Ignore +import org.junit.Test +import java.awt.Desktop +import java.io.File + +class HtmlRenderingTests : BaseTest() { + + fun AnyFrame.browse() { + val file = File("temp.html") // File.createTempFile("df_rendering", ".html") + file.writeText(toHTML(extraHtml = initHtml()).toString()) + val uri = file.toURI() + val desktop = Desktop.getDesktop() + desktop.browse(uri) + } + + @Ignore + @Test + fun test() { + typed.group { name and age }.into("temp").browse() + } + + @Test + fun `render url`() { + val address = "http://www.google.com" + val df = dataFrameOf("url")(address).parse() + val html = df.html() + html shouldContain "href" + html.findNthSubstring(address, 2) shouldNotBe -1 + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/JoinTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/JoinTests.kt new file mode 100644 index 0000000000..47fa300d4c --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/JoinTests.kt @@ -0,0 +1,121 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.addId +import org.jetbrains.kotlinx.dataframe.api.all +import org.jetbrains.kotlinx.dataframe.api.append +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.count +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.distinct +import org.jetbrains.kotlinx.dataframe.api.excludeJoin +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.filterJoin +import org.jetbrains.kotlinx.dataframe.api.fullJoin +import org.jetbrains.kotlinx.dataframe.api.innerJoin +import org.jetbrains.kotlinx.dataframe.api.leftJoin +import org.jetbrains.kotlinx.dataframe.api.remove +import org.jetbrains.kotlinx.dataframe.api.rightJoin +import org.jetbrains.kotlinx.dataframe.api.select +import org.junit.Test + +class JoinTests : BaseTest() { + + val df2 = dataFrameOf("name", "origin", "grade", "age")( + "Alice", "London", 3, "young", + "Alice", "London", 5, "old", + "Bob", "Tokyo", 4, "young", + "Bob", "Paris", 5, "old", + "Charlie", "Moscow", 1, "young", + "Charlie", "Moscow", 2, "old", + "Bob", "Paris", 4, null + ) + +// Generated Code + + @DataSchema + interface Person2 { + val name: String + val origin: String? + val grade: Int + } + + val typed2: DataFrame = df2.cast() + + @Test + fun `inner join`() { + val res = typed.innerJoin(typed2) { name and it.city.match(right.origin) } + res.columnsCount() shouldBe 6 + res.rowsCount() shouldBe 7 + res["age1"].hasNulls() shouldBe false + res.count { name == "Charlie" && city == "Moscow" } shouldBe 4 + res.select { city and name }.distinct().rowsCount() shouldBe 3 + res[Person2::grade].hasNulls() shouldBe false + } + + @Test + fun `left join`() { + val res = typed.leftJoin(typed2) { name and it.city.match(right.origin) } + + res.columnsCount() shouldBe 6 + res.rowsCount() shouldBe 10 + res["age1"].hasNulls() shouldBe true + res.select { city and name }.distinct().rowsCount() shouldBe 6 + res.count { it["grade"] == null } shouldBe 3 + res.age.hasNulls() shouldBe false + } + + @Test + fun `right join`() { + val res = typed.rightJoin(typed2) { name and it.city.match(right.origin) } + + res.columnsCount() shouldBe 6 + res.rowsCount() shouldBe 9 + res["age1"].hasNulls() shouldBe true + res.select { city and name }.distinct().rowsCount() shouldBe 4 + res[Person2::grade].hasNulls() shouldBe false + res.age.hasNulls() shouldBe true + val newEntries = res.filter { it["age"] == null } + newEntries.rowsCount() shouldBe 2 + newEntries.all { name == "Bob" && city == "Paris" && weight == null } shouldBe true + } + + @Test + fun `outer join`() { + val res = typed.fullJoin(typed2) { name and it.city.match(right.origin) } + println(res) + res.columnsCount() shouldBe 6 + res.rowsCount() shouldBe 12 + res.name.hasNulls() shouldBe false + res.columns().filter { it != res.name }.all { it.hasNulls() } shouldBe true + res.select { city and name }.distinct().rowsCount() shouldBe 7 + val distinct = res.select { name and age and city and weight }.distinct() + val expected = typed.append("Bob", null, "Paris", null) + distinct shouldBe expected + } + + @Test + fun `filter join`() { + val res = typed.filterJoin(typed2) { city.match(right.origin) } + val expected = typed.innerJoin(typed2.select { origin }) { city.match(right.origin) } + res shouldBe expected + } + + @Test + fun `filter not join`() { + val res = typed.excludeJoin(typed2) { city.match(right.origin) } + res.rowsCount() shouldBe 3 + res.city.toSet() shouldBe typed.city.toSet() - typed2.origin.toSet() + + val indexColumn = column("__index__") + val withIndex = typed.addId(indexColumn) + val joined = withIndex.filterJoin(typed2) { city.match(right.origin) } + val joinedIndices = joined[indexColumn].toSet() + val expected = withIndex.filter { !joinedIndices.contains(it[indexColumn]) }.remove(indexColumn) + + res shouldBe expected + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt new file mode 100644 index 0000000000..5dea9d318e --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt @@ -0,0 +1,514 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.Infer +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.asColumnGroup +import org.jetbrains.kotlinx.dataframe.api.associate +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.column +import org.jetbrains.kotlinx.dataframe.api.columnNames +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.columnsCount +import org.jetbrains.kotlinx.dataframe.api.convert +import org.jetbrains.kotlinx.dataframe.api.count +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.drop +import org.jetbrains.kotlinx.dataframe.api.dropNulls +import org.jetbrains.kotlinx.dataframe.api.explodeLists +import org.jetbrains.kotlinx.dataframe.api.expr +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.first +import org.jetbrains.kotlinx.dataframe.api.frames +import org.jetbrains.kotlinx.dataframe.api.gather +import org.jetbrains.kotlinx.dataframe.api.getColumnGroup +import org.jetbrains.kotlinx.dataframe.api.getColumns +import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.jetbrains.kotlinx.dataframe.api.groupByOther +import org.jetbrains.kotlinx.dataframe.api.implode +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.isList +import org.jetbrains.kotlinx.dataframe.api.join +import org.jetbrains.kotlinx.dataframe.api.last +import org.jetbrains.kotlinx.dataframe.api.map +import org.jetbrains.kotlinx.dataframe.api.mapKeys +import org.jetbrains.kotlinx.dataframe.api.mapValues +import org.jetbrains.kotlinx.dataframe.api.matches +import org.jetbrains.kotlinx.dataframe.api.named +import org.jetbrains.kotlinx.dataframe.api.notNull +import org.jetbrains.kotlinx.dataframe.api.pivot +import org.jetbrains.kotlinx.dataframe.api.print +import org.jetbrains.kotlinx.dataframe.api.remove +import org.jetbrains.kotlinx.dataframe.api.replace +import org.jetbrains.kotlinx.dataframe.api.rows +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.jetbrains.kotlinx.dataframe.api.sumOf +import org.jetbrains.kotlinx.dataframe.api.toInt +import org.jetbrains.kotlinx.dataframe.api.ungroup +import org.jetbrains.kotlinx.dataframe.api.update +import org.jetbrains.kotlinx.dataframe.api.values +import org.jetbrains.kotlinx.dataframe.api.where +import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.columns.ColumnKind +import org.jetbrains.kotlinx.dataframe.impl.asList +import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.jetbrains.kotlinx.dataframe.io.renderToString +import org.jetbrains.kotlinx.dataframe.typeClass +import org.junit.Test +import java.util.AbstractSet +import kotlin.reflect.KClass +import kotlin.reflect.typeOf + +class PivotTests { + + val df = dataFrameOf( + "name", "key", "value" + )( + "Alice", "age", 15, + "Alice", "city", "London", + "Alice", "weight", 54, + "Bob", "age", 45, + "Bob", "weight", 87, + "Charlie", "age", 20, + "Charlie", "city", "Moscow", + "Charlie", "weight", null, + "Alice", "age", 55, + ) + + val defaultExpected = dataFrameOf( + "name", "age", "city", "weight", + )( + "Alice", listOf(15, 55), "London", 54, + "Bob", listOf(45), "-", 87, + "Charlie", listOf(20), "Moscow", "-", + ) + +// Generated Code + + @DataSchema + interface Person { + val name: String + val key: String + val value: Any? + } + + val typed: DataFrame = df.cast() + + val name by column() + val key by column() + val value by column() + +// Tests + + val keyConverter: (String) -> String = { "__$it" } + val valueConverter: (Any?) -> Any? = { (it as? Int)?.toDouble() ?: it } + + val expectedFiltered = typed.dropNulls { value }.sortBy { name and key } + + @Test + fun `pivot matches`() { + val filtered = typed.drop(1) + val res = filtered.pivot(inward = false) { key }.groupBy { name }.matches() + res.columnsCount() shouldBe 1 + filtered.key.countDistinct() + res.rowsCount() shouldBe filtered.name.countDistinct() + + val expected = filtered.rows().map { (it.name to it.key) }.toSet() + val actual = res.columns().subList(1, res.columnsCount()).flatMap { + val columnName = it.name() + res.rows().map { + val value = it[columnName] as Boolean + if (value) { + (it.name to columnName) + } else null + }.filterNotNull() + }.toSet() + + actual shouldBe expected + res["age"].type() shouldBe typeOf() + res["city"].type() shouldBe typeOf() + res["weight"].type() shouldBe typeOf() + } + + @Test + fun `simple pivot`() { + val res = typed.pivot { key }.groupBy { name }.values { value default "-" } + + res.columnsCount() shouldBe 2 + res.rowsCount() shouldBe typed.name.countDistinct() + + val data = res.getColumnGroup("key") + + data["age"].type() shouldBe typeOf>() + data["city"].type() shouldBe typeOf() + data["weight"].type() shouldBe typeOf>() + + res.renderToString(columnTypes = true, title = true) shouldBe + defaultExpected.group { drop(1) }.into("key").renderToString(columnTypes = true, title = true) + + typed.pivot { key }.groupBy { name }.default("-").values { value } shouldBe res + typed.pivot { key }.groupBy { name }.default("-").with { value } shouldBe res + df.pivot { key }.groupBy { name }.default("-").values { value } shouldBe res + df.pivot(key).groupBy(name).default("-").values(value) shouldBe res + df.pivot(key).groupBy(name).default("-").with { value } shouldBe res + typed.groupBy { name }.pivot { key }.default("-").values { value } shouldBe res + + typed.pivot { key }.groupBy { name }.default("-").with { value.toString() } + } + + @Test + fun `pivot with transform`() { + val pivoted = typed.pivot { key.map { "_$it" } }.groupBy { name }.with { value } + pivoted.getColumns { "key".all() }.map { it.name() }.toSet() shouldBe typed.key.distinct().map { "_$it" } + .toSet() + } + + @Test + fun `pivot with index transform`() { + val pivoted = typed.pivot { key }.groupBy { name.map { "_$it" } }.with { value } + pivoted.name shouldBe typed.name.distinct().map { "_$it" } + } + + @Test + fun `pivot with value map`() { + val pivoted = typed.pivot(inward = false) { key }.groupBy { name }.values { value.map { "_$it" } } + + pivoted shouldBe dataFrameOf("name", "age", "city", "weight")( + "Alice", listOf("_15", "_55"), "_London", "_54", + "Bob", listOf("_45"), null, "_87", + "Charlie", listOf("_20"), "_Moscow", "_null" + ) + } + + @Test + fun `pivot two values`() { + val pivoted = typed.pivot(inward = false) { key }.groupBy { name } + .values { value and (expr { value?.toString() } into "str") default "-" } + + val expected = defaultExpected.replace("age", "city", "weight").with { + columnOf( + it named "value", + it.map(Infer.Type) { + if (it is List<*>) it.map { it?.toString() }.asList() + else it?.toString() + } named "str" + ) named it.name() + } + + pivoted.renderToString(title = true, columnTypes = true) shouldBe expected.renderToString( + title = true, + columnTypes = true + ) + } + + @Test + fun `pivot two values group by value`() { + val type by column?>() + val pivoted = typed.add(type) { value?.javaClass?.kotlin } + .pivot { key }.groupBy { name }.values(separate = true) { value and type } + pivoted.columnsCount() shouldBe 3 + } + + @Test + fun `pivot two columns with then`() { + val pivoted = typed + .add("index") { 1 } + .pivot(inward = false) { name then key } + .groupBy("index") + .with { value } + + pivoted.columnNames() shouldBe listOf("index") + typed.name.distinct().values() + pivoted.rowsCount() shouldBe 1 + + val keys = typed.key.distinct().values() + pivoted.columns().drop(1).forEach { + val group = it.asColumnGroup() + group.columnNames() shouldBe if (it.name() == "Bob") keys - "city" else keys + } + + val leafColumns = pivoted.getColumnsWithPaths { all().drop(1).allDfs() } + leafColumns.size shouldBe typed.name.countDistinct() * typed.key.countDistinct() - 1 + leafColumns.forEach { it.path.size shouldBe 2 } + + val data = leafColumns.associate { it.path[0] to it.path[1] to it.data[0] } + val expected = typed.associate { name to key to value }.toMutableMap() + expected["Alice" to "age"] = listOf(15, 55) + data shouldBe expected + + val pivotedNoIndex = typed.pivot { name then key }.with { value } + pivotedNoIndex shouldBe pivoted.remove("index")[0] + } + + @Test + fun `pivot two columns with and`() { + val withIndex = typed.add("index") { 1 } + val pivoted = withIndex.pivot { name and key }.groupBy("index").with { value } + pivoted shouldBe + withIndex.pivot(inward = true) { name }.groupBy("index").with { value } + .join(withIndex.pivot(inward = true) { key }.groupBy("index").with { value }) + + val pivotedNoIndex = typed.pivot { name and key }.with { value } + pivotedNoIndex shouldBe pivoted.remove("index")[0] + } + + @Test + fun `pivot with two index columns`() { + val pivoted = typed.dropNulls { value }.pivot { value.map { it!!.javaClass.kotlin.simpleName } } + .groupBy { name and key }.with { value } + + val expected = typed.dropNulls { value }.add { + "Int" from { value as? Int } + "String" from { value as? String } + }.remove("value") + .implode("Int", dropNA = true) + .group("Int", "String").into("value") + + pivoted shouldBe expected + } + + @Test + fun `pivot two values without groupBy`() { + typed.print(columnTypes = true) + val pivotedRow = typed + .pivot { name then key } + .values { value and (value.map { it?.javaClass?.kotlin } into "type") } + + val pivotedDf = pivotedRow.df() + pivotedRow.columnsCount() shouldBe typed.name.countDistinct() + + val nullGroup = pivotedDf["Charlie"]["weight"].asColumnGroup() + nullGroup.columnNames() shouldBe listOf("value", "type") +// nullGroup.columnTypes() shouldBe listOf(typeOf?>(), typeOf?>()) + nullGroup.columnTypes() shouldBe listOf(nothingType(true), nothingType(true)) + + val cols = pivotedDf.getColumnsWithPaths { all().allDfs() } + cols.size shouldBe 2 * typed.name.countDistinct() * typed.key.countDistinct() - 2 + + cols.forEach { + when { + it.isList() -> it.path().dropLast(1) shouldBe listOf("Alice", "age") + it.hasNulls() -> { + it.path().dropLast(1) shouldBe listOf("Charlie", "weight") + } + + it.name() == "type" -> it.typeClass shouldBe KClass::class + else -> it.name() shouldBe "value" + } + } + pivotedRow.getColumnGroup("Bob").getColumnGroup("weight")["value"] shouldBe 87 + } + + @Test + fun `pivot two values without index group by value`() { + val pivoted = typed.pivot { name }.values(separate = true) { key and value } + pivoted.df().columnNames() shouldBe listOf("key", "value") + (pivoted.getColumnGroup("key")["Alice"] as List).size shouldBe 4 + pivoted.df().getColumnGroup("value")["Bob"].type() shouldBe typeOf>() + pivoted.getColumnGroup("value")["Bob"] shouldBe listOf(45, 87) + } + + @Test + fun `pivot in group aggregator`() { + val pivoted = typed.groupBy { name }.aggregate { + pivot { key }.with { value } into "key" + } + pivoted.columnsCount() shouldBe 2 + pivoted.print() + pivoted.ungroup("key") shouldBe typed.pivot(inward = false) { key }.groupBy { name }.with { value } + } + + @Test + fun `equal pivots`() { + val expected = typed.pivot { key }.groupBy { name }.with { value } + typed.groupBy { name }.pivot { key }.values { value } shouldBe expected + val pivoted = typed.groupBy { name }.aggregate { + pivot { key }.with { value } + } + pivoted.print() + pivoted shouldBe expected + } + + @Test + fun gather() { + val res = typed.pivot(inward = false) { key }.groupBy { name }.with { value } + val gathered = res.gather { drop(1) }.notNull().into("key", "value") + gathered shouldBe typed.dropNulls { value }.sortBy { name and "key" } + } + + @Test + fun `gather with filter`() { + val pivoted = typed.pivot { key }.groupBy { name }.with { value } + val gathered = pivoted.gather { "key".all() }.explodeLists().where { it is Int }.into("key", "value") + gathered shouldBe typed.filter { value is Int }.sortBy("name", "key").convert("value") + .toInt() // TODO: replace convert with cast + } + + @Test + fun `grouped pivot with key and value conversions`() { + val grouped = typed.groupBy { name } + + val pivoted = grouped.pivot(inward = false) { key.map(transform = keyConverter) }.with { valueConverter(value) } + + val pivoted2 = grouped.aggregate { + pivot(inward = false) { key.map(transform = keyConverter) }.with { valueConverter(value) } + } + + val pivoted3 = + typed.pivot(inward = false) { key.map(transform = keyConverter) }.groupBy { name } + .values { value.map(transform = valueConverter) } + + pivoted2 shouldBe pivoted + pivoted3 shouldBe pivoted + + val gathered = pivoted.gather { drop(1) }.notNull().into("key", "value") + .convert { value } + .with { it as? Comparable<*> } // cast to make the equality test succeed (values are already the same) + val expected = expectedFiltered.update { key }.with { keyConverter(it) } + .convert { value }.with { valueConverter(it) as? Comparable<*> } + gathered shouldBe expected + } + + @Test + fun `gather with value conversion`() { + val pivoted = typed.pivot { key }.groupBy { name }.with { valueConverter(value) } + val gathered = + pivoted.gather { "key".all() }.explodeLists().notNull().mapValues { (it as? Double)?.toInt() ?: it } + .into("key", "value") + gathered shouldBe expectedFiltered + } + + @Test + fun `gather doubles with value conversion`() { + val pivoted = typed.pivot { key }.groupBy { name }.with { valueConverter(value) } + val gathered = pivoted.remove { "key"["city"] }.gather { "key".all() }.explodeLists().notNull().cast() + .mapValues { it.toInt() }.into("key", "value") + val expected = typed.filter { key != "city" && value != null }.convert { value }.toInt().sortBy { name and key } + gathered shouldBe expected + } + + @Test + fun `gather with name conversion`() { + val pivoted = typed.pivot { key.map(transform = keyConverter) }.groupBy { name }.with { value } + val gathered = pivoted.gather { "key".all() }.notNull().mapKeys { it.substring(2) }.into("key", "value") + gathered shouldBe expectedFiltered + } + + @Test + fun `type arguments inference in pivot with index`() { + val id by columnOf(1, 1, 2, 2) + val name by columnOf("set", "list", "set", "list") + val data by columnOf(setOf(1), listOf(1), setOf(2), listOf(2)) + val df = dataFrameOf(id, name, data) + df[data].type() shouldBe typeOf>() + val pivoted = df.pivot(inward = false) { name }.groupBy { id }.values { data } + pivoted.rowsCount() shouldBe 2 + pivoted.columnsCount() shouldBe 3 + pivoted["set"].type() shouldBe typeOf>() + pivoted["list"].type() shouldBe typeOf>() + } + + @Test + fun `type arguments inference in pivot`() { + val name by columnOf("set", "list") + val data by columnOf(setOf(1), listOf(1)) + val df = dataFrameOf(name, data) + df[data].type() shouldBe typeOf>() + val pivoted = df.pivot { name }.values { data } + pivoted.columnsCount() shouldBe 2 + pivoted.df()["set"].type() shouldBe typeOf>() + pivoted.df()["list"].type() shouldBe typeOf>() + } + + @Test + fun `pivot with grouping`() { + val pivoted = typed.pivot(inward = true) { key }.groupBy { name }.with { value } + pivoted.columnNames() shouldBe listOf("name", "key") + pivoted["key"].asColumnGroup().columnNames() shouldBe typed.key.distinct().values() + } + + @Test + fun `pivot matches yes no`() { + val pivoted = typed.drop(1).pivot(inward = false) { key }.groupBy { name }.matches("yes", "no") + pivoted.sumOf { values().count { it == "yes" } } shouldBe typed.rowsCount() - 1 + pivoted.sumOf { values().count { it == "no" } } shouldBe 1 + } + + @Test + fun `pivot aggregate into`() { + val pivoted = typed.pivot(inward = false) { key }.groupBy { name }.aggregate { + value.first() into "value" + } + pivoted.columns().drop(1).forEach { + it.kind() shouldBe ColumnKind.Group + it.asColumnGroup().columnNames() shouldBe listOf("value") + } + } + + @Test + fun `pivot aggregate several into`() { + val pivoted = typed.pivot { key }.groupBy { name }.aggregate { + value.first() into "first value" + value.last() into "last value" + "unused" + } + pivoted.getColumnGroup("key").columns().forEach { + it.kind() shouldBe ColumnKind.Group + it.asColumnGroup().columnNames() shouldBe listOf("first value", "last value") + } + } + + @Test + fun `pivot two value columns into one name`() { + val type by column>() + + val pivoted = + typed.add(type) { value?.javaClass?.kotlin ?: Unit::class } + .pivot { key }.groupBy { name }.values { value and (type default Any::class) into "data" } + + pivoted.getColumnGroup("key").columns().forEach { + val group = it.asColumnGroup() + group.columnNames() shouldBe listOf("data") + group["data"].asColumnGroup().columnNames() shouldBe listOf("value", "type") + group["data"]["type"].hasNulls() shouldBe false + } + } + + @Test + fun `pivot one value without index`() { + val pivoted = typed.pivot { name then key }.with { value } + pivoted.columnNames() shouldBe typed.name.distinct().values() + pivoted.df()["Alice"].asColumnGroup().columnNames() shouldBe typed.key.distinct().values() + pivoted.df()["Bob"].asColumnGroup().columnNames() shouldBe listOf("age", "weight") + pivoted.df()["Charlie"].asColumnGroup().columnNames() shouldBe typed.key.distinct().values() + pivoted.df()["Alice"]["age"].type() shouldBe typeOf>() + pivoted.df()["Charlie"]["weight"].type() shouldBe nothingType(true) + } + + @Test + fun `pivot plain`() { + val pivoted = typed.pivot { name }.frames() + pivoted.columnNames() shouldBe typed.name.distinct().toList() + pivoted["Bob"] shouldBe typed.filter { name == "Bob" } + } + + @Test + fun `pivot columns inward`() { + typed.pivot(inward = true) { name }.count().columnsCount() shouldBe 1 + typed.pivot { name }.count() shouldBe typed.pivot(inward = false) { name }.count() + + typed.pivot { name and key }.count().columnNames() shouldBe listOf("name", "key") + typed.pivot(inward = false) { name and key }.count() + .columnsCount() shouldBe typed.name.countDistinct() + typed.key.countDistinct() + typed.pivot(inward = true) { name and key }.count() shouldBe typed.pivot { name and key }.count() + } + + @Test + fun `pivot from group`() { + val pivoted = typed.group { key and value }.into("info") + .pivot(inward = true) { "info"["value"] }.groupByOther().count() + pivoted.getColumnGroup("info").getColumnGroup("value").columnsCount() shouldBe typed.value.countDistinct() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person2/Base.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person2/Base.kt new file mode 100644 index 0000000000..42fcef20fa --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person2/Base.kt @@ -0,0 +1,35 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person2 + +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.group +import org.jetbrains.kotlinx.dataframe.api.into + +open class Base { + val df = dataFrameOf("firstName", "lastName", "age", "city", "weight")( + "Alice", "Cooper", 15, "London", 54, + "Bob", "Dylan", 45, "Dubai", 87, + "Mark", "Antony", 20, "Moscow", null, + "Mark", "Avrely", 40, "Milan", null, + "Bob", "Marley", 30, "Tokyo", 68, + "Alice", "Lindt", 20, null, 55, + "Mark", "Petrov", 30, "Moscow", 90 + ).group("firstName", "lastName").into("name") + .cast() + + @DataSchema + interface FullName { + val firstName: String + val lastName: String + } + + @DataSchema + interface Person { + val name: DataRow + val age: Int + val city: String? + val weight: Int? + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person2/merge.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person2/merge.kt new file mode 100644 index 0000000000..76c49773ed --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person2/merge.kt @@ -0,0 +1,20 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person2 + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.by +import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.merge +import org.jetbrains.kotlinx.dataframe.api.remove +import org.jetbrains.kotlinx.dataframe.api.rename +import org.junit.Test + +class MergeTests : Base() { + + @Test + fun `merge inplace`() { + val merged = df.merge { name.firstName and city }.by { it[0] + " from " + it[1] }.into("name") + + merged shouldBe df.merge { name.firstName and city }.by { it[0] + " from " + it[1] }.into("name2") + .remove { name }.rename("name2" to "name") + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person2/replace.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person2/replace.kt new file mode 100644 index 0000000000..8e34ef67cd --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person2/replace.kt @@ -0,0 +1,21 @@ +package org.jetbrains.kotlinx.dataframe.testSets.person2 + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.asColumnGroup +import org.jetbrains.kotlinx.dataframe.api.byDesc +import org.jetbrains.kotlinx.dataframe.api.reorder +import org.jetbrains.kotlinx.dataframe.api.replace +import org.jetbrains.kotlinx.dataframe.api.select +import org.jetbrains.kotlinx.dataframe.api.with +import org.junit.Test + +class ReplaceTests : Base() { + + @Test + fun `reorder columns in group`() { + val reordered = df.replace { name }.with { + it.asColumnGroup().select { lastName and firstName }.asColumnGroup(it.name()) + } + reordered shouldBe df.reorder { name.firstName and name.lastName }.byDesc { it.name() } + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/weather/SeriesTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/weather/SeriesTests.kt new file mode 100644 index 0000000000..d906cc0210 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/weather/SeriesTests.kt @@ -0,0 +1,74 @@ +package org.jetbrains.kotlinx.dataframe.testSets.weather + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema +import org.jetbrains.kotlinx.dataframe.api.add +import org.jetbrains.kotlinx.dataframe.api.cast +import org.jetbrains.kotlinx.dataframe.api.concat +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.diff +import org.jetbrains.kotlinx.dataframe.api.groupBy +import org.jetbrains.kotlinx.dataframe.api.movingAverage +import org.jetbrains.kotlinx.dataframe.api.rows +import org.jetbrains.kotlinx.dataframe.api.sortBy +import org.junit.Test + +class SeriesTests { + + val df = dataFrameOf("city", "day", "temp")( + "Moscow", 1, 14, + "London", 1, 10, + "Moscow", 3, 18, + "London", 3, 16, + "Moscow", 6, 16, + "London", 6, 23, + "Moscow", 4, 13, + "London", 4, 22, + "Moscow", 2, 20, + "London", 2, 15, + "Moscow", 5, 10, + "London", 5, 18 + ) + + // Generated code + + @DataSchema + interface Weather { + val city: String + val day: Int + val temp: Int + } + + val typed = df.cast() + + @Test + fun `diff test`() { + val withDiff = typed + .sortBy { city and day } + .groupBy { city } + .add("diff") { diff { temp } ?: 0 } + .concat() + + val srcData = typed.rows().map { (it.city to it.day) to it.temp }.toMap() + val expected = typed.sortBy { city and day }.rows().map { row -> srcData[row.city to (row.day - 1)]?.let { row.temp - it } ?: 0 } + withDiff["diff"].toList() shouldBe expected + } + + @Test + fun `movingAverage`() { + val k = 3 + val withMa = typed + .groupBy { city } + .sortBy { city and day } + .add("ma_temp") { it.movingAverage(k) { it.temp } } + .concat() + + val srcData = typed.rows().map { (it.city to it.day) to it.temp }.toMap() + val expected = typed + .sortBy { city and day } + .rows() + .map { row -> (0 until k).map { srcData[row.city to row.day - it] }.filterNotNull().let { it.sum().toDouble() / it.size } } + + withMa["ma_temp"].toList() shouldBe expected + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/DoubleTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/DoubleTests.kt new file mode 100644 index 0000000000..9c81304aa4 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/DoubleTests.kt @@ -0,0 +1,21 @@ +package org.jetbrains.kotlinx.dataframe.types + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.api.columnOf +import org.jetbrains.kotlinx.dataframe.api.dataFrameOf +import org.jetbrains.kotlinx.dataframe.api.filter +import org.jetbrains.kotlinx.dataframe.api.isNaN +import org.jetbrains.kotlinx.dataframe.nrow +import org.junit.Test + +class DoubleTests { + + @Test + fun `filter not null with nans`() { + val age by columnOf(2.3, Double.NaN, 1.0, "asd", 3, 'a') + val df = dataFrameOf(age) + df.filter { age() == Double.NaN }.nrow shouldBe 1 + df.filter { age().isNaN }.nrow shouldBe 1 + df.filter { it[age].isNaN }.nrow shouldBe 1 + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/TypeProjectionTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/TypeProjectionTests.kt new file mode 100644 index 0000000000..77deb59c31 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/TypeProjectionTests.kt @@ -0,0 +1,58 @@ +package org.jetbrains.kotlinx.dataframe.types + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.SingleColumn +import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupWithParent +import org.jetbrains.kotlinx.dataframe.impl.commonType +import org.jetbrains.kotlinx.dataframe.impl.createTypeUsing +import org.junit.Test +import kotlin.reflect.typeOf + +class TypeProjectionTests { + class TypeInferenceTest1 { + interface A + interface X : A> + + @Test + fun test() { + X::class.createTypeUsing>>() shouldBe typeOf>() + A::class.createTypeUsing>() shouldBe typeOf>>() + } + } + + class TypeInferenceTest2 { + interface A + interface B : A> + interface C : A> + interface D + interface X : C, D + + @Test + fun test() { + X::class.createTypeUsing>>>() shouldBe typeOf>() + A::class.createTypeUsing?>() shouldBe typeOf>?>() + } + } + + @Test + fun `collection to list projection`() { + List::class.createTypeUsing?>() shouldBe typeOf?>() + Collection::class.createTypeUsing>() shouldBe typeOf>() + } + + @Test + fun `column group projections`() { + ColumnGroup::class.createTypeUsing>>() shouldBe typeOf>() + SingleColumn::class.createTypeUsing>() shouldBe typeOf>>() + } + + @Test + fun `common type tests`() { + listOf(typeOf>(), typeOf>()).commonType() shouldBe typeOf>() + listOf(typeOf>(), typeOf>()).commonType() shouldBe typeOf>() + listOf(typeOf>(), typeOf?>()).commonType() shouldBe typeOf?>() + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt new file mode 100644 index 0000000000..b179b779f7 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/types/UtilTests.kt @@ -0,0 +1,253 @@ +package org.jetbrains.kotlinx.dataframe.types + +import io.kotest.matchers.shouldBe +import org.jetbrains.kotlinx.dataframe.impl.commonParent +import org.jetbrains.kotlinx.dataframe.impl.commonParents +import org.jetbrains.kotlinx.dataframe.impl.commonType +import org.jetbrains.kotlinx.dataframe.impl.commonTypeListifyValues +import org.jetbrains.kotlinx.dataframe.impl.createType +import org.jetbrains.kotlinx.dataframe.impl.guessValueType +import org.jetbrains.kotlinx.dataframe.impl.nothingType +import org.junit.Test +import java.io.Serializable +import kotlin.reflect.KClass +import kotlin.reflect.KType +import kotlin.reflect.typeOf + +class UtilTests { + + @Test + fun commonParentsTests() { + commonParents(Int::class, Int::class) shouldBe listOf(Int::class) + commonParents(Double::class, Int::class) shouldBe listOf(Comparable::class, Number::class) + commonParents(Int::class, String::class) shouldBe listOf(Comparable::class, Serializable::class) + commonParents(IllegalArgumentException::class, UnsupportedOperationException::class) shouldBe + listOf(RuntimeException::class) + commonParents(Nothing::class, Nothing::class) shouldBe listOf(Nothing::class) + commonParents() shouldBe emptyList() + commonParents(List::class, Set::class) shouldBe listOf(Collection::class) + } + + @Test + fun commonParentTests() { + commonParent(Int::class, Int::class) shouldBe Int::class + commonParent(Double::class, Int::class) shouldBe Number::class + commonParent(Int::class, String::class) shouldBe Comparable::class + commonParent(String::class, Int::class) shouldBe Comparable::class + commonParent(Nothing::class, Nothing::class) shouldBe Nothing::class + commonParent(Int::class, Nothing::class) shouldBe Int::class + commonParent() shouldBe null + commonParent(List::class, Map::class) shouldBe Any::class + commonParent(List::class, Set::class) shouldBe Collection::class + } + + @Test + fun `createType test`() { + emptyList>().createType(nullable = false) shouldBe typeOf() + emptyList>().createType(nullable = true) shouldBe typeOf() + + listOf(Nothing::class).createType(nullable = false) shouldBe nothingType(nullable = false) + listOf(Nothing::class).createType(nullable = true) shouldBe nothingType(nullable = true) + } + + @Test + fun `commonType classes test`() { + emptyList>().commonType(false, typeOf>()) shouldBe typeOf>() + emptyList>().commonType(true, typeOf>()) shouldBe typeOf?>() + + listOf(Nothing::class).commonType(false) shouldBe nothingType(nullable = false) + listOf(Nothing::class).commonType(true) shouldBe nothingType(nullable = true) + } + + @Test + fun `guessValueType no listification`() { + guessValueType(sequenceOf(1, 2)) shouldBe typeOf() + guessValueType(sequenceOf(1, 2, null)) shouldBe typeOf() + + guessValueType(sequenceOf(1, 2.0)) shouldBe typeOf() + guessValueType(sequenceOf(1, 2.0, null)) shouldBe typeOf() + + guessValueType(sequenceOf(1, 2.0, "a")) shouldBe typeOf>() + guessValueType(sequenceOf(1, 2.0, "a", null)) shouldBe typeOf?>() + + guessValueType(sequenceOf(1, 2.0, "a", listOf(1, 2))) shouldBe typeOf() + guessValueType(sequenceOf(1, 2.0, "a", null, listOf(1, 2))) shouldBe typeOf() + + guessValueType(sequenceOf(null, null)) shouldBe nothingType(nullable = true) + + guessValueType(sequenceOf(listOf(null))) shouldBe typeOf>() + guessValueType(sequenceOf(emptyList())) shouldBe typeOf>() + guessValueType(sequenceOf(listOf(null), emptyList())) shouldBe typeOf>() + guessValueType(sequenceOf(emptyList(), null)) shouldBe typeOf?>() + + guessValueType(sequenceOf(listOf(1), emptyList())) shouldBe typeOf>() + guessValueType(sequenceOf(listOf(1, null), emptyList())) shouldBe typeOf>() + guessValueType(sequenceOf(listOf(1), listOf(null))) shouldBe typeOf>() + + guessValueType(sequenceOf(1, emptyList())) shouldBe typeOf() + + guessValueType(sequenceOf(1, 2, listOf(1), emptySet())) shouldBe typeOf() + guessValueType(sequenceOf(listOf(1), setOf(1.0, 2.0))) shouldBe typeOf>() + } + + @Test + fun `guessValueType with listification`() { + guessValueType(sequenceOf(1, 2), listifyValues = true) shouldBe typeOf() + guessValueType(sequenceOf(1, 2, null), listifyValues = true) shouldBe typeOf() + + guessValueType(sequenceOf(1, 2.0), listifyValues = true) shouldBe typeOf() + guessValueType(sequenceOf(1, 2.0, null), listifyValues = true) shouldBe typeOf() + + guessValueType(sequenceOf(1, 2.0, "a"), listifyValues = true) shouldBe typeOf>() + guessValueType(sequenceOf(1, 2.0, "a", null), listifyValues = true) shouldBe typeOf?>() + + guessValueType(sequenceOf(1, 2, listOf(1)), listifyValues = true) shouldBe typeOf>() + guessValueType(sequenceOf(1, 2, listOf(1), null), listifyValues = true) shouldBe typeOf>() + guessValueType(sequenceOf(1, 2, listOf(1, null)), listifyValues = true) shouldBe typeOf>() + guessValueType(sequenceOf(1, 2, listOf(1, null), null), listifyValues = true) shouldBe typeOf>() + + guessValueType(sequenceOf(1, 2, listOf(null)), listifyValues = true) shouldBe typeOf>() + guessValueType(sequenceOf(1, 2, listOf(null), null), listifyValues = true) shouldBe typeOf>() + + guessValueType(sequenceOf(emptyList()), listifyValues = true) shouldBe typeOf>() + guessValueType(sequenceOf(emptyList(), null), listifyValues = true) shouldBe typeOf>() + guessValueType(sequenceOf(listOf(null)), listifyValues = true) shouldBe typeOf>() + guessValueType(sequenceOf(listOf(null), null), listifyValues = true) shouldBe typeOf>() + + guessValueType( + values = sequenceOf(1, 2, listOf(1), emptySet()), + listifyValues = true, + ) shouldBe typeOf() + guessValueType( + values = sequenceOf(1, 2, listOf(1), null, emptySet()), + listifyValues = true, + ) shouldBe typeOf() + guessValueType( + values = sequenceOf(1, 2, listOf(1, null), emptySet()), + listifyValues = true, + ) shouldBe typeOf() + guessValueType( + values = sequenceOf(1, 2, listOf(1, null), null, emptySet()), + listifyValues = true, + ) shouldBe typeOf() + + guessValueType( + values = sequenceOf(1, 2, listOf(null), emptySet()), + listifyValues = true, + ) shouldBe typeOf() + guessValueType( + values = sequenceOf(1, 2, listOf(null), null, emptySet()), + listifyValues = true, + ) shouldBe typeOf() + + guessValueType( + values = sequenceOf(emptyList(), emptySet()), + listifyValues = true, + ) shouldBe typeOf>() + guessValueType( + values = sequenceOf(emptyList(), null, emptySet()), + listifyValues = true, + ) shouldBe typeOf?>() + guessValueType( + values = sequenceOf(listOf(null), emptySet()), + listifyValues = true, + ) shouldBe typeOf>() + guessValueType( + values = sequenceOf(listOf(null), null, emptySet()), + listifyValues = true, + ) shouldBe typeOf?>() + } + + @Test + fun `commonType ktypes test`() { + listOf(typeOf(), typeOf()).commonType() shouldBe typeOf() + listOf(typeOf(), typeOf()).commonType() shouldBe typeOf() + listOf(typeOf(), nothingType(true)).commonType() shouldBe typeOf() + listOf(typeOf(), nothingType(false)).commonType() shouldBe typeOf() + listOf(typeOf>(), typeOf>()).commonType() shouldBe typeOf>>() + listOf(typeOf>(), typeOf>()).commonType() shouldBe typeOf>>() + listOf(typeOf>(), typeOf>()).commonType() shouldBe typeOf>() + listOf(nothingType(false)).commonType() shouldBe nothingType(false) + listOf(nothingType(true)).commonType() shouldBe nothingType(true) + listOf().commonType() shouldBe typeOf() + } + + @Test + fun `commonTypeListifyValues test`() { + listOf().commonTypeListifyValues() shouldBe typeOf() + listOf(typeOf(), typeOf()).commonTypeListifyValues() shouldBe typeOf() + listOf(typeOf(), typeOf()).commonTypeListifyValues() shouldBe typeOf() + listOf(typeOf(), nothingType(true)).commonTypeListifyValues() shouldBe typeOf() + listOf(typeOf(), nothingType(false)).commonTypeListifyValues() shouldBe typeOf() + listOf(typeOf(), typeOf(), nothingType(true)).commonTypeListifyValues() shouldBe typeOf() + listOf( + typeOf>(), + typeOf>() + ).commonTypeListifyValues() shouldBe typeOf>>() + listOf( + typeOf>(), + typeOf>() + ).commonTypeListifyValues() shouldBe typeOf>>() + + listOf( + typeOf(), + typeOf>(), + ).commonTypeListifyValues() shouldBe typeOf>() + + listOf( + typeOf(), + typeOf>(), + nothingType(true), + ).commonTypeListifyValues() shouldBe typeOf>() + + listOf( + typeOf(), + typeOf>(), + nothingType(true), + ).commonTypeListifyValues() shouldBe typeOf>() + + listOf( + typeOf>(), + typeOf>(), + ).commonTypeListifyValues() shouldBe typeOf>() + + listOf( + typeOf>(), + nothingType(true), + ).commonTypeListifyValues() shouldBe typeOf>() + + listOf( + typeOf(), + typeOf>(), + typeOf>(), + ).commonTypeListifyValues() shouldBe typeOf() + + listOf( + typeOf(), + typeOf>(), + typeOf>(), + nothingType(true), + ).commonTypeListifyValues() shouldBe typeOf() + + listOf( + typeOf>(), + typeOf>(), + ).commonTypeListifyValues() shouldBe typeOf>() + + listOf( + typeOf>(), + typeOf>(), + nothingType(true) + ).commonTypeListifyValues() shouldBe typeOf?>() + + listOf( + typeOf>(), + typeOf>(), + ).commonTypeListifyValues() shouldBe typeOf>() + + listOf( + typeOf>(), + typeOf?>(), + ).commonTypeListifyValues() shouldBe typeOf?>() + } +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt index 74552bae8d..064bbeb022 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/aliases.kt @@ -74,6 +74,17 @@ public typealias RowValueExpression = DataRow.(it: C) -> R */ public typealias RowColumnExpression = (row: DataRow, col: DataColumn) -> R +/** + * [ColumnExpression] is used to express or select any instance of `R` using the given instance of [DataColumn]`` as + * `this` and `it`. + * + * Shorthand for: + * ```kotlin + * DataColumn.(it: DataColumn) -> R + * ``` + */ +public typealias ColumnExpression = Selector, R> + /** * [ColumnSelector] is used to express or select a single column, represented by [SingleColumn]``, using the * context of [ColumnsSelectionDsl]`` as `this` and `it`. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt index 60ca70a447..7e837b558d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -11,36 +11,27 @@ import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.Predicate -import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn -import org.jetbrains.kotlinx.dataframe.columns.renamedReference +import org.jetbrains.kotlinx.dataframe.columns.* +import org.jetbrains.kotlinx.dataframe.documentation.AccessApi import org.jetbrains.kotlinx.dataframe.hasNulls import org.jetbrains.kotlinx.dataframe.impl.columnName -import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnsList -import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet -import org.jetbrains.kotlinx.dataframe.impl.columns.addPath -import org.jetbrains.kotlinx.dataframe.impl.columns.allColumnsExcept -import org.jetbrains.kotlinx.dataframe.impl.columns.changePath -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnSet -import org.jetbrains.kotlinx.dataframe.impl.columns.getAt -import org.jetbrains.kotlinx.dataframe.impl.columns.getChildrenAt -import org.jetbrains.kotlinx.dataframe.impl.columns.single -import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns -import org.jetbrains.kotlinx.dataframe.impl.columns.top -import org.jetbrains.kotlinx.dataframe.impl.columns.transform -import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle +import org.jetbrains.kotlinx.dataframe.impl.columns.* import org.jetbrains.kotlinx.dataframe.impl.columns.tree.dfs import kotlin.reflect.KProperty import kotlin.reflect.KType import kotlin.reflect.typeOf +/** + * Referring to a column in the selection DSL can be done in several ways corresponding to all + * [Access APIs][AccessApi]: + * TODO: [Issue #286](https://github.com/Kotlin/dataframe/issues/286) + */ +private interface CommonColumnSelectionExamples + +/** [Column Selection DSL][ColumnSelectionDsl] */ +internal interface ColumnSelectionDslLink + +/** TODO: [Issue #286](https://github.com/Kotlin/dataframe/issues/286) */ public interface ColumnSelectionDsl : ColumnsContainer { public operator fun ColumnReference.invoke(): DataColumn = get(this) @@ -56,6 +47,10 @@ public interface ColumnSelectionDsl : ColumnsContainer { public operator fun String.get(column: String): ColumnPath = pathOf(this, column) } +/** [Columns Selection DSL][ColumnsSelectionDsl] */ +internal interface ColumnsSelectionDslLink + +/** TODO: [Issue #286](https://github.com/Kotlin/dataframe/issues/286) */ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColumn> { public fun ColumnSet.first(condition: ColumnFilter): SingleColumn = @@ -70,24 +65,26 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun ColumnsContainer<*>.group(name: String): ColumnGroupReference = name.toColumnOf() - public operator fun String.rangeTo(endInclusive: String): ColumnSet<*> = toColumnAccessor().rangeTo(endInclusive.toColumnAccessor()) - - public operator fun AnyColumnReference.rangeTo(endInclusive: AnyColumnReference): ColumnSet<*> = object : ColumnSet { - override fun resolve(context: ColumnResolutionContext): List> { - val startPath = this@rangeTo.resolveSingle(context)!!.path - val endPath = endInclusive.resolveSingle(context)!!.path - val parentPath = startPath.parent()!! - require(parentPath == endPath.parent()) { "Start and end columns have different parent column paths" } - val parentCol = context.df.getColumnGroup(parentPath) - val startIndex = parentCol.getColumnIndex(startPath.name) - val endIndex = parentCol.getColumnIndex(endPath.name) - return (startIndex..endIndex).map { - parentCol.getColumn(it).let { - it.addPath(parentPath + it.name) + public operator fun String.rangeTo(endInclusive: String): ColumnSet<*> = + toColumnAccessor().rangeTo(endInclusive.toColumnAccessor()) + + public operator fun AnyColumnReference.rangeTo(endInclusive: AnyColumnReference): ColumnSet<*> = + object : ColumnSet { + override fun resolve(context: ColumnResolutionContext): List> { + val startPath = this@rangeTo.resolveSingle(context)!!.path + val endPath = endInclusive.resolveSingle(context)!!.path + val parentPath = startPath.parent()!! + require(parentPath == endPath.parent()) { "Start and end columns have different parent column paths" } + val parentCol = context.df.getColumnGroup(parentPath) + val startIndex = parentCol.getColumnIndex(startPath.name) + val endIndex = parentCol.getColumnIndex(endPath.name) + return (startIndex..endIndex).map { + parentCol.getColumn(it).let { + it.addPath(parentPath + it.name) + } } } } - } public fun none(): ColumnSet<*> = ColumnsList(emptyList()) @@ -115,7 +112,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun ColumnSet>.select(vararg columns: String): ColumnSet<*> = select { columns.toColumns() } - public fun ColumnSet>.select(vararg columns: KProperty): ColumnSet = select { columns.toColumns() } + public fun ColumnSet>.select(vararg columns: KProperty): ColumnSet = + select { columns.toColumns() } public fun ColumnSet>.select(selector: ColumnsSelector): ColumnSet = createColumnSet { this@select.resolve(it).flatMap { group -> @@ -145,7 +143,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region allDfs - public fun ColumnSet<*>.allDfs(includeGroups: Boolean = false): ColumnSet = if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() } + public fun ColumnSet<*>.allDfs(includeGroups: Boolean = false): ColumnSet = + if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() } public fun String.allDfs(includeGroups: Boolean = false): ColumnSet = toColumnAccessor().allDfs(includeGroups) @@ -242,7 +241,9 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun col(property: KProperty): ColumnAccessor = property.toColumnAccessor() - public operator fun ColumnSet<*>.get(colName: String): ColumnSet = transform { it.mapNotNull { it.getChild(colName) } } + public operator fun ColumnSet<*>.get(colName: String): ColumnSet = + transform { it.mapNotNull { it.getChild(colName) } } + public operator fun ColumnSet<*>.get(column: ColumnReference): ColumnSet = cols(column) public fun SingleColumn.take(n: Int): ColumnSet<*> = transformSingle { it.children().take(n) } @@ -288,8 +289,11 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public infix fun ColumnReference.into(column: KProperty<*>): ColumnReference = named(column.columnName) public infix fun String.into(newName: String): ColumnReference = toColumnAccessor().into(newName) - public infix fun String.into(column: ColumnAccessor<*>): ColumnReference = toColumnAccessor().into(column.name()) - public infix fun String.into(column: KProperty<*>): ColumnReference = toColumnAccessor().into(column.columnName) + public infix fun String.into(column: ColumnAccessor<*>): ColumnReference = + toColumnAccessor().into(column.name()) + + public infix fun String.into(column: KProperty<*>): ColumnReference = + toColumnAccessor().into(column.columnName) public infix fun ColumnReference.named(newName: String): ColumnReference = renamedReference(newName) public infix fun ColumnReference.named(name: KProperty<*>): ColumnReference = named(name.columnName) @@ -311,6 +315,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public infix fun KProperty.and(other: String): ColumnSet = toColumnAccessor() and other public infix fun KProperty.and(other: KProperty): ColumnSet = toColumnAccessor() and other.toColumnAccessor() + public infix fun KProperty.and(other: ColumnsSelector): ColumnSet = toColumnAccessor() and other() // endregion @@ -339,7 +344,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public inline fun ColumnsSelectionDsl.expr( name: String = "", infer: Infer = Infer.Nulls, - noinline expression: AddExpression + noinline expression: AddExpression, ): DataColumn = mapToColumn(name, infer, expression) internal fun ColumnsSelector.filter(predicate: (ColumnWithPath) -> Boolean): ColumnsSelector = diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt index 2b05fc3a93..3812b65f66 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/Nulls.kt @@ -1,36 +1,82 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyCol -import org.jetbrains.kotlinx.dataframe.AnyColumnReference -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.Update.UpdateOperationArg import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.documentation.* import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns -import org.jetbrains.kotlinx.dataframe.kind -import org.jetbrains.kotlinx.dataframe.typeClass import kotlin.reflect.KProperty // region fillNulls -public fun DataFrame.fillNulls(cols: ColumnsSelector): Update = - update(cols).where { it == null } - -public fun DataFrame.fillNulls(vararg cols: String): Update = - fillNulls { cols.toColumns() } - -public fun DataFrame.fillNulls(vararg cols: KProperty): Update = - fillNulls { cols.toColumns() } - -public fun DataFrame.fillNulls(vararg cols: ColumnReference): Update = - fillNulls { cols.toColumns() } +/** + * ## The Fill Nulls Operation + * + * Replaces `null` values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNulls` Operation Usage][FillNulls.Usage]. + * + * For more information: {@include [DocumentationUrls.Fill.FillNulls]} + */ +internal interface FillNulls { + + /** @include [Update.Usage] {@arg [UpdateOperationArg] [fillNulls][fillNulls]} */ + interface Usage +} -public fun DataFrame.fillNulls(cols: Iterable>): Update = - fillNulls { cols.toColumnSet() } +/** {@arg [SelectingColumns.OperationArg] [fillNulls][fillNulls]} */ +private interface SetFillNullsOperationArg + +/** + * @include [FillNulls] {@comment Description of the fillNulls operation.} + * @include [LineBreak] + * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} + * + * ## This Fill Nulls Overload + * + */ +private interface CommonFillNullsFunctionDoc + +/** + * @include [CommonFillNullsFunctionDoc] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNullsOperationArg]} + * @include [Update.DslParam] + */ +public fun DataFrame.fillNulls(columns: ColumnsSelector): Update = + update(columns).where { it == null } + +/** + * @include [CommonFillNullsFunctionDoc] + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetFillNullsOperationArg]} + * @include [Update.ColumnNamesParam] + */ +public fun DataFrame.fillNulls(vararg columns: String): Update = + fillNulls { columns.toColumns() } + +/** + * @include [CommonFillNullsFunctionDoc] + * @include [SelectingColumns.KProperties.WithExample] {@include [SetFillNullsOperationArg]} + * @include [Update.KPropertiesParam] + */ +public fun DataFrame.fillNulls(vararg columns: KProperty): Update = + fillNulls { columns.toColumns() } + +/** + * @include [CommonFillNullsFunctionDoc] + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNullsOperationArg]} + * @include [Update.ColumnAccessorsParam] + */ +public fun DataFrame.fillNulls(vararg columns: ColumnReference): Update = + fillNulls { columns.toColumns() } + +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.fillNulls(columns: Iterable>): Update = + fillNulls { columns.toColumnSet() } // endregion @@ -56,65 +102,251 @@ internal inline val Float?.isNA: Boolean get() = this == null || this.isNaN() // region fillNaNs -public fun DataFrame.fillNaNs(cols: ColumnsSelector): Update = - update(cols).where { it.isNaN } - -public fun DataFrame.fillNaNs(vararg cols: String): Update = - fillNaNs { cols.toColumns() } - -public fun DataFrame.fillNaNs(vararg cols: KProperty): Update = - fillNaNs { cols.toColumns() } - -public fun DataFrame.fillNaNs(vararg cols: ColumnReference): Update = - fillNaNs { cols.toColumns() } +/** + * ## The Fill NaNs Operation + * + * Replaces [`NaN`][NaN] values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNaNs` Operation Usage][FillNaNs.Usage]. + * + * For more information: {@include [DocumentationUrls.Fill.FillNaNs]} + */ +internal interface FillNaNs { + + /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNaNs][fillNaNs]} */ + interface Usage +} -public fun DataFrame.fillNaNs(cols: Iterable>): Update = - fillNaNs { cols.toColumnSet() } +/** {@arg [SelectingColumns.OperationArg] [fillNaNs][fillNaNs]} */ +internal interface SetFillNaNsOperationArg + +/** + * @include [FillNaNs] {@comment Description of the fillNaNs operation.} + * @include [LineBreak] + * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} + * ## This Fill NaNs Overload + */ +private interface CommonFillNaNsFunctionDoc + +/** + * @include [CommonFillNaNsFunctionDoc] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNaNsOperationArg]} + * @include [Update.DslParam] + */ +public fun DataFrame.fillNaNs(columns: ColumnsSelector): Update = + update(columns).where { it.isNaN } + +/** + * @include [CommonFillNaNsFunctionDoc] + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetFillNaNsOperationArg]} + * @include [Update.ColumnNamesParam] + */ +public fun DataFrame.fillNaNs(vararg columns: String): Update = + fillNaNs { columns.toColumns() } + +/** + * @include [CommonFillNaNsFunctionDoc] + * @include [SelectingColumns.KProperties.WithExample] {@include [SetFillNaNsOperationArg]} + * @include [Update.KPropertiesParam] + */ +public fun DataFrame.fillNaNs(vararg columns: KProperty): Update = + fillNaNs { columns.toColumns() } + +/** + * @include [CommonFillNaNsFunctionDoc] + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNaNsOperationArg]} + * @include [Update.ColumnAccessorsParam] + */ +public fun DataFrame.fillNaNs(vararg columns: ColumnReference): Update = + fillNaNs { columns.toColumns() } + +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.fillNaNs(columns: Iterable>): Update = + fillNaNs { columns.toColumnSet() } // endregion // region fillNA -public fun DataFrame.fillNA(cols: ColumnsSelector): Update = - update(cols).where { it.isNA } +/** + * ## The Fill NA Operation + * + * Replaces [`NA`][NA] values with given value or expression. + * Specific case of [update]. + * + * Check out the [`fillNA` Operation Usage][FillNA.Usage]. + * + * For more information: {@include [DocumentationUrls.Fill.FillNA]} + */ +internal interface FillNA { + + /** @include [Update.Usage] {@arg [Update.UpdateOperationArg] [fillNA][fillNA]} */ + interface Usage +} -public fun DataFrame.fillNA(vararg cols: String): Update = - fillNA { cols.toColumns() } +/** {@arg [SelectingColumns.OperationArg] [fillNA][fillNA]} */ +internal interface SetFillNAOperationArg + +/** + * @include [FillNA] {@comment Description of the fillNA operation.} + * @include [LineBreak] + * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} + * ## This Fill NA Overload + */ +private interface CommonFillNAFunctionDoc + +/** + * @include [CommonFillNAFunctionDoc] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNAOperationArg]} + * @include [Update.DslParam] + */ +public fun DataFrame.fillNA(columns: ColumnsSelector): Update = + update(columns).where { it.isNA } + +/** + * @include [CommonFillNAFunctionDoc] + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetFillNAOperationArg]} + * @include [Update.ColumnNamesParam] + */ +public fun DataFrame.fillNA(vararg columns: String): Update = + fillNA { columns.toColumns() } + +/** + * @include [CommonFillNAFunctionDoc] + * @include [SelectingColumns.KProperties.WithExample] {@include [SetFillNAOperationArg]} + * @include [Update.KPropertiesParam] + */ +public fun DataFrame.fillNA(vararg columns: KProperty): Update = + fillNA { columns.toColumns() } + +/** + * @include [CommonFillNAFunctionDoc] + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetFillNAOperationArg]} + * @include [Update.ColumnAccessorsParam] + */ +public fun DataFrame.fillNA(vararg columns: ColumnReference): Update = + fillNA { columns.toColumns() } + +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.fillNA(columns: Iterable>): Update = + fillNA { columns.toColumnSet() } -public fun DataFrame.fillNA(vararg cols: KProperty): Update = - fillNA { cols.toColumns() } +// endregion -public fun DataFrame.fillNA(vararg cols: ColumnReference): Update = - fillNA { cols.toColumns() } +/** @param columns The {@include [SelectingColumns.DslLink]} used to select the columns of this [DataFrame] to drop rows in. */ +private interface DropDslParam -public fun DataFrame.fillNA(cols: Iterable>): Update = - fillNA { cols.toColumnSet() } +/** @param columns The {@include [SelectingColumns.KPropertiesLink]} used to select the columns of this [DataFrame] to drop rows in. */ +private interface DropKPropertiesParam -// endregion +/** @param columns The {@include [SelectingColumns.ColumnNamesLink]} used to select the columns of this [DataFrame] to drop rows in. */ +private interface DropColumnNamesParam + +/** @param columns The {@include [SelectingColumns.ColumnAccessors]} used to select the columns of this [DataFrame] to drop rows in. */ +private interface DropColumnAccessorsParam // region dropNulls -public fun DataFrame.dropNulls(whereAllNull: Boolean = false, selector: ColumnsSelector): DataFrame { - val cols = this[selector] +/** + * ## The Drop Nulls Operation + * + * Removes rows with `null` values. Specific case of [drop][DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see {@include [SelectingColumnsLink]}). + * Also, you can supply `whereAllNull = true` to only drop rows where all selected cells are `null`. By default, + * rows are dropped if any of the selected cells are `null`. + * + * For more information: {@include [DocumentationUrls.Drop.DropNulls]} + */ +internal interface DropNulls { + + /** + * @param whereAllNull `false` by default. + * If `true`, rows are dropped if all selected cells are `null`. + * If `false`, rows are dropped if any of the selected cells is `null`. + */ + interface WhereAllNullParam +} + +/** {@arg [SelectingColumns.OperationArg] [dropNulls][dropNulls]} */ +private interface SetDropNullsOperationArg + +/** + * @include [DropNulls] {@comment Description of the dropNulls operation.} + * ## This Drop Nulls Overload + */ +private interface CommonDropNullsFunctionDoc + +/** + * @include [CommonDropNullsFunctionDoc] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetDropNullsOperationArg]} + * `df.`[dropNulls][dropNulls]`(whereAllNull = true) { `[colsOf][colsOf]`<`[Double][Double]`>() }` + * @include [DropNulls.WhereAllNullParam] + * @include [DropDslParam] + */ +public fun DataFrame.dropNulls(whereAllNull: Boolean = false, columns: ColumnsSelector): DataFrame { + val cols = this[columns] return if (whereAllNull) drop { row -> cols.all { col -> col[row] == null } } else drop { row -> cols.any { col -> col[row] == null } } } +/** + * @include [CommonDropNullsFunctionDoc] + * This overload operates on all columns in the [DataFrame]. + * @include [DropNulls.WhereAllNullParam] + */ public fun DataFrame.dropNulls(whereAllNull: Boolean = false): DataFrame = dropNulls(whereAllNull) { all() } -public fun DataFrame.dropNulls(vararg cols: KProperty<*>, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumns() } - -public fun DataFrame.dropNulls(vararg cols: String, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumns() } - -public fun DataFrame.dropNulls(vararg cols: AnyColumnReference, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumns() } - -public fun DataFrame.dropNulls(cols: Iterable, whereAllNull: Boolean = false): DataFrame = - dropNulls(whereAllNull) { cols.toColumnSet() } - +/** + * @include [CommonDropNullsFunctionDoc] + * @include [SelectingColumns.KProperties.WithExample] {@include [SetDropNullsOperationArg]} + * `df.`[dropNulls][dropNulls]`(Person::length, whereAllNull = true)` + * @include [DropNulls.WhereAllNullParam] + * @include [DropKPropertiesParam] + */ +public fun DataFrame.dropNulls(vararg columns: KProperty<*>, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } + +/** + * @include [CommonDropNullsFunctionDoc] + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetDropNullsOperationArg]} + * `df.`[dropNulls][dropNulls]`("length", whereAllNull = true)` + * @include [DropNulls.WhereAllNullParam] + * @include [DropColumnNamesParam] + */ +public fun DataFrame.dropNulls(vararg columns: String, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } + +/** + * @include [CommonDropNullsFunctionDoc] + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetDropNullsOperationArg]} + * `df.`[dropNulls][dropNulls]`(length, whereAllNull = true)` + * @include [DropNulls.WhereAllNullParam] + * @include [DropColumnAccessorsParam] + */ +public fun DataFrame.dropNulls(vararg columns: AnyColumnReference, whereAllNull: Boolean = false): DataFrame = + dropNulls(whereAllNull) { columns.toColumns() } + +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.dropNulls( + columns: Iterable, + whereAllNull: Boolean = false, +): DataFrame = + dropNulls(whereAllNull) { columns.toColumnSet() } + +/** + * ## The Drop Nulls Operation + * + * Removes `null` values from this [DataColumn], adjusting the type accordingly. + */ public fun DataColumn.dropNulls(): DataColumn = (if (!hasNulls()) this else filter { it != null }) as DataColumn @@ -122,28 +354,98 @@ public fun DataColumn.dropNulls(): DataColumn = // region dropNA -public fun DataFrame.dropNA(whereAllNA: Boolean = false, selector: ColumnsSelector): DataFrame { - val cols = this[selector] +/** + * ## The Drop `NA` Operation + * + * Removes rows with [`NA`][NA] values. Specific case of [drop][DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see {@include [SelectingColumnsLink]}). + * Also, you can supply `whereAllNA = true` to only drop rows where all selected cells are [`NA`][NA]. By default, + * rows are dropped if any of the selected cells are [`NA`][NA]. + * + * For more information: {@include [DocumentationUrls.Drop.DropNA]} + */ +internal interface DropNA { + + /** + * @param whereAllNA `false` by default. + * If `true`, rows are dropped if all selected cells are [`NA`][NA]. + * If `false`, rows are dropped if any of the selected cells is [`NA`][NA]. + */ + interface WhereAllNAParam +} +/** {@arg [SelectingColumns.OperationArg] [dropNA][dropNA]} */ +private interface SetDropNAOperationArg + +/** + * @include [DropNA] {@comment Description of the dropNA operation.} + * ## This Drop NA Overload + */ +private interface CommonDropNAFunctionDoc + +/** + * @include [CommonDropNAFunctionDoc] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetDropNAOperationArg]} + * `df.`[dropNA][dropNA]`(whereAllNA = true) { `[colsOf][colsOf]`<`[Double][Double]`>() }` + * @include [DropNA.WhereAllNAParam] + * @include [DropDslParam] + */ +public fun DataFrame.dropNA(whereAllNA: Boolean = false, columns: ColumnsSelector): DataFrame { + val cols = this[columns] return if (whereAllNA) drop { cols.all { this[it].isNA } } else drop { cols.any { this[it].isNA } } } -public fun DataFrame.dropNA(vararg cols: KProperty<*>, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumns() } - -public fun DataFrame.dropNA(vararg cols: String, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumns() } - -public fun DataFrame.dropNA(vararg cols: AnyColumnReference, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumns() } - -public fun DataFrame.dropNA(cols: Iterable, whereAllNA: Boolean = false): DataFrame = - dropNA(whereAllNA) { cols.toColumnSet() } - +/** + * @include [CommonDropNAFunctionDoc] + * @include [SelectingColumns.KProperties.WithExample] {@include [SetDropNAOperationArg]} + * `df.`[dropNA][dropNA]`(Person::length, whereAllNA = true)` + * @include [DropNA.WhereAllNAParam] + * @include [DropKPropertiesParam] + */ +public fun DataFrame.dropNA(vararg columns: KProperty<*>, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } + +/** + * @include [CommonDropNAFunctionDoc] + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetDropNAOperationArg]} + * `df.`[dropNA][dropNA]`("length", whereAllNA = true)` + * @include [DropNA.WhereAllNAParam] + * @include [DropColumnNamesParam] + */ +public fun DataFrame.dropNA(vararg columns: String, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } + +/** + * @include [CommonDropNAFunctionDoc] + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetDropNAOperationArg]} + * `df.`[dropNA][dropNA]`(length, whereAllNA = true)` + * @include [DropNA.WhereAllNAParam] + * @include [DropColumnAccessorsParam] + */ +public fun DataFrame.dropNA(vararg columns: AnyColumnReference, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumns() } + +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.dropNA(columns: Iterable, whereAllNA: Boolean = false): DataFrame = + dropNA(whereAllNA) { columns.toColumnSet() } + +/** + * @include [CommonDropNAFunctionDoc] + * This overload operates on all columns in the [DataFrame]. + * @include [DropNA.WhereAllNAParam] + */ public fun DataFrame.dropNA(whereAllNA: Boolean = false): DataFrame = dropNA(whereAllNA) { all() } +/** + * ## The Drop `NA` Operation + * + * Removes [`NA`][NA] values from this [DataColumn], adjusting the type accordingly. + */ public fun DataColumn.dropNA(): DataColumn = when (typeClass) { Double::class, Float::class -> filter { !it.isNA }.cast() @@ -154,28 +456,101 @@ public fun DataColumn.dropNA(): DataColumn = // region dropNaNs -public fun DataFrame.dropNaNs(whereAllNaN: Boolean = false, selector: ColumnsSelector): DataFrame { - val cols = this[selector] +/** + * ## The Drop `NaN` Operation + * + * Removes rows with [`NaN`][Double.isNaN] values. Specific case of [drop][DataFrame.drop]. + * + * Optionally, you can select which columns to operate on (see {@include [SelectingColumnsLink]}). + * Also, you can supply `whereAllNaN = true` to only drop rows where all selected cells are [`NaN`][Double.isNaN]. By default, + * rows are dropped if any of the selected cells are [`NaN`][Double.isNaN]. + * + * For more information: {@include [DocumentationUrls.Drop.DropNaNs]} + */ +internal interface DropNaNs { + + /** + * @param whereAllNaN `false` by default. + * If `true`, rows are dropped if all selected cells are [`NaN`][Double.isNaN]. + * If `false`, rows are dropped if any of the selected cells is [`NaN`][Double.isNaN]. + */ + interface WhereAllNaNParam +} +/** {@arg [SelectingColumns.OperationArg] [dropNaNs][dropNaNs]} */ +private interface SetDropNaNsOperationArg + +/** + * @include [DropNaNs] {@comment Description of the dropNaNs operation.} + * ## This Drop NaNs Overload + */ +private interface CommonDropNaNsFunctionDoc + +/** + * @include [CommonDropNaNsFunctionDoc] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetDropNaNsOperationArg]} + * `df.`[dropNaNs][dropNaNs]`(whereAllNaN = true) { `[colsOf][colsOf]`<`[Double][Double]`>() }` + * @include [DropNaNs.WhereAllNaNParam] + * @include [DropDslParam] + */ +public fun DataFrame.dropNaNs(whereAllNaN: Boolean = false, columns: ColumnsSelector): DataFrame { + val cols = this[columns] return if (whereAllNaN) drop { cols.all { this[it].isNaN } } else drop { cols.any { this[it].isNaN } } } -public fun DataFrame.dropNaNs(vararg cols: KProperty<*>, whereAllNaN: Boolean = false): DataFrame = - dropNaNs(whereAllNaN) { cols.toColumns() } - -public fun DataFrame.dropNaNs(vararg cols: String, whereAllNaN: Boolean = false): DataFrame = - dropNaNs(whereAllNaN) { cols.toColumns() } - -public fun DataFrame.dropNaNs(vararg cols: AnyColumnReference, whereAllNaN: Boolean = false): DataFrame = - dropNaNs(whereAllNaN) { cols.toColumns() } - -public fun DataFrame.dropNaNs(cols: Iterable, whereAllNaN: Boolean = false): DataFrame = - dropNaNs(whereAllNaN) { cols.toColumnSet() } - +/** + * @include [CommonDropNaNsFunctionDoc] + * @include [SelectingColumns.KProperties.WithExample] {@include [SetDropNaNsOperationArg]} + * `df.`[dropNaNs][dropNaNs]`(Person::length, whereAllNaN = true)` + * @include [DropNaNs.WhereAllNaNParam] + * @include [DropKPropertiesParam] + */ +public fun DataFrame.dropNaNs(vararg columns: KProperty<*>, whereAllNaN: Boolean = false): DataFrame = + dropNaNs(whereAllNaN) { columns.toColumns() } + +/** + * @include [CommonDropNaNsFunctionDoc] + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetDropNaNsOperationArg]} + * `df.`[dropNaNs][dropNaNs]`("length", whereAllNaN = true)` + * @include [DropNaNs.WhereAllNaNParam] + * @include [DropColumnNamesParam] + */ +public fun DataFrame.dropNaNs(vararg columns: String, whereAllNaN: Boolean = false): DataFrame = + dropNaNs(whereAllNaN) { columns.toColumns() } + +/** + * @include [CommonDropNaNsFunctionDoc] + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetDropNaNsOperationArg]} + * `df.`[dropNaNs][dropNaNs]`(length, whereAllNaN = true)` + * @include [DropNaNs.WhereAllNaNParam] + * @include [DropColumnAccessorsParam] + */ +public fun DataFrame.dropNaNs(vararg columns: AnyColumnReference, whereAllNaN: Boolean = false): DataFrame = + dropNaNs(whereAllNaN) { columns.toColumns() } + +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.dropNaNs( + columns: Iterable, + whereAllNaN: Boolean = false, +): DataFrame = + dropNaNs(whereAllNaN) { columns.toColumnSet() } + +/** + * @include [CommonDropNaNsFunctionDoc] + * This overload operates on all columns in the [DataFrame]. + * @include [DropNaNs.WhereAllNaNParam] + */ public fun DataFrame.dropNaNs(whereAllNaN: Boolean = false): DataFrame = dropNaNs(whereAllNaN) { all() } +/** + * ## The Drop `NaN` Operation + * + * Removes [`NaN`][NaN] values from this [DataColumn], adjusting the type accordingly. + */ public fun DataColumn.dropNaNs(): DataColumn = when (typeClass) { Double::class, Float::class -> filter { !it.isNaN }.cast() diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt index 58c3c48047..2a9375869b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt @@ -50,7 +50,8 @@ public fun DataFrame.add(vararg columns: AnyBaseCol): DataFrame = addA * @throws [UnequalColumnSizesException] if columns in expected result have different sizes * @return new [DataFrame] with added columns */ -public fun DataFrame.addAll(columns: Iterable): DataFrame = dataFrameOf(columns() + columns).cast() +public fun DataFrame.addAll(columns: Iterable): DataFrame = + dataFrameOf(columns() + columns).cast() /** * Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list. @@ -75,7 +76,8 @@ public fun DataFrame.add(vararg dataFrames: AnyFrame): DataFrame = add * @return new [DataFrame] with added columns */ @JvmName("addAllFrames") -public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = addAll(dataFrames.flatMap { it.columns() }) +public fun DataFrame.addAll(dataFrames: Iterable): DataFrame = + addAll(dataFrames.flatMap { it.columns() }) // endregion @@ -96,7 +98,16 @@ public interface AddDataRow : DataRow { public fun AnyRow.newValue(): C } -public typealias AddExpression = Selector, C> +/** + * [AddExpression] is used to express or select any instance of `R` using the given instance of [AddDataRow]`` as + * `this` and `it`. + * + * Shorthand for: + * ```kotlin + * AddDataRow.(it: AddDataRow) -> R + * ``` + */ +public typealias AddExpression = Selector, R> /** * Creates new column using row [expression] and adds it to the end of [DataFrame] @@ -166,11 +177,15 @@ public class AddDsl(@PublishedApi internal val df: DataFrame) : ColumnsCon return df.mapToColumn("", Infer.Nulls, expression) } - public inline infix fun String.from(noinline expression: RowExpression): Boolean = add(this, Infer.Nulls, expression) + public inline infix fun String.from(noinline expression: RowExpression): Boolean = + add(this, Infer.Nulls, expression) // TODO: use path instead of name - public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = name().from(expression) - public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = add(name, Infer.Nulls, expression) + public inline infix fun ColumnAccessor.from(noinline expression: RowExpression): Boolean = + name().from(expression) + + public inline infix fun KProperty.from(noinline expression: RowExpression): Boolean = + add(name, Infer.Nulls, expression) public infix fun String.from(column: AnyColumnReference): Boolean = add(column.rename(this)) public inline infix fun ColumnAccessor.from(column: ColumnReference): Boolean = name() from column diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt index a65dfcc1f0..119ab658a7 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/gather.kt @@ -2,10 +2,9 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.Predicate +import org.jetbrains.kotlinx.dataframe.RowValueFilter import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.impl.and import org.jetbrains.kotlinx.dataframe.impl.api.gatherImpl import org.jetbrains.kotlinx.dataframe.impl.columnName import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns @@ -13,18 +12,33 @@ import kotlin.reflect.KProperty import kotlin.reflect.KType import kotlin.reflect.typeOf -public fun DataFrame.gather(selector: ColumnsSelector): Gather = Gather( - this, selector, null, typeOf(), - { it }, null -) -public fun DataFrame.gather(vararg columns: String): Gather = gather { columns.toColumns() } -public fun DataFrame.gather(vararg columns: ColumnReference): Gather = gather { columns.toColumns() } -public fun DataFrame.gather(vararg columns: KProperty): Gather = gather { columns.toColumns() } +public fun DataFrame.gather(selector: ColumnsSelector): Gather = + Gather( + df = this, + columns = selector, + filter = null, + keyType = typeOf(), + keyTransform = { it }, + valueTransform = null, + ) -public fun Gather.where(filter: Predicate): Gather = copy(filter = this.filter and filter) -public fun Gather.notNull(): Gather = where { it != null } as Gather +public fun DataFrame.gather(vararg columns: String): Gather = + gather { columns.toColumns() } -public fun Gather.explodeLists(): Gather = copy(explode = true) +public fun DataFrame.gather(vararg columns: ColumnReference): Gather = + gather { columns.toColumns() } + +public fun DataFrame.gather(vararg columns: KProperty): Gather = + gather { columns.toColumns() } + +public fun Gather.where(filter: RowValueFilter): Gather = + copy(filter = this.filter and filter) + +public fun Gather.notNull(): Gather = + where { it != null } as Gather + +public fun Gather.explodeLists(): Gather = + copy(explode = true) public inline fun Gather.mapKeys(noinline transform: (String) -> K): Gather = copy(keyTransform = transform as ((String) -> Nothing), keyType = typeOf()) as Gather @@ -35,7 +49,7 @@ public fun Gather.mapValues(transform: (C) -> R): Gathe public data class Gather( internal val df: DataFrame, internal val columns: ColumnsSelector, - internal val filter: ((C) -> Boolean)? = null, + internal val filter: RowValueFilter? = null, internal val keyType: KType? = null, internal val keyTransform: ((String) -> K), internal val valueTransform: ((C) -> R)? = null, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt index 31e8ee8695..f6eb6cdfd9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -1,8 +1,8 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.columns.ColumnReference @@ -21,15 +21,20 @@ public data class Reorder( } public fun DataFrame.reorder(selector: ColumnsSelector): Reorder = Reorder(this, selector, false) -public fun DataFrame.reorder(vararg columns: ColumnReference): Reorder = reorder { columns.toColumns() } +public fun DataFrame.reorder(vararg columns: ColumnReference): Reorder = + reorder { columns.toColumns() } + public fun DataFrame.reorder(vararg columns: KProperty): Reorder = reorder { columns.toColumns() } public fun DataFrame.reorder(vararg columns: String): Reorder = reorder { columns.toColumns() } -public fun > Reorder.by(expression: Selector, V>): DataFrame = reorderImpl(false, expression) +public fun > Reorder.by(expression: ColumnExpression): DataFrame = + reorderImpl(false, expression) -public fun Reorder.byName(desc: Boolean = false): DataFrame = if (desc) byDesc { it.name } else by { it.name } +public fun Reorder.byName(desc: Boolean = false): DataFrame = + if (desc) byDesc { it.name } else by { it.name } -public fun > Reorder.byDesc(expression: Selector, V>): DataFrame = reorderImpl(true, expression) +public fun > Reorder.byDesc(expression: ColumnExpression): DataFrame = + reorderImpl(true, expression) public fun > DataFrame.reorderColumnsBy( dfs: Boolean = true, @@ -37,6 +42,7 @@ public fun > DataFrame.reorderColumnsBy( expression: Selector ): DataFrame = Reorder(this, { if (dfs) allDfs(true) else all() }, dfs).reorderImpl(desc, expression) -public fun DataFrame.reorderColumnsByName(dfs: Boolean = true, desc: Boolean = false): DataFrame = reorderColumnsBy(dfs, desc) { name() } +public fun DataFrame.reorderColumnsByName(dfs: Boolean = true, desc: Boolean = false): DataFrame = + reorderColumnsBy(dfs, desc) { name() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt index e134fd9dd0..4a91e748f6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/select.kt @@ -10,12 +10,28 @@ import kotlin.reflect.KProperty // region DataFrame -public fun DataFrame.select(columns: ColumnsSelector): DataFrame = get(columns).toDataFrame().cast() -public fun DataFrame.select(vararg columns: KProperty<*>): DataFrame = select(columns.map { it.columnName }) -public fun DataFrame.select(vararg columns: String): DataFrame = select(columns.asIterable()) -public fun DataFrame.select(vararg columns: AnyColumnReference): DataFrame = select { columns.toColumns() } -@JvmName("selectT") -public fun DataFrame.select(columns: Iterable): DataFrame = columns.map { get(it) }.toDataFrame().cast() -public fun DataFrame.select(columns: Iterable): DataFrame = select { columns.toColumnSet() } +public fun DataFrame.select(columns: ColumnsSelector): DataFrame = + get(columns).toDataFrame().cast() + +public fun DataFrame.select(vararg columns: KProperty<*>): DataFrame = + select(columns.asIterable()) + +@JvmName("selectKPropertyIterable") +public fun DataFrame.select(columns: Iterable>): DataFrame = + select(columns.map { it.columnName }) + +public fun DataFrame.select(vararg columns: String): DataFrame = + select(columns.asIterable()) + +@JvmName("selectStringIterable") +public fun DataFrame.select(columns: Iterable): DataFrame = + columns.map { get(it) }.toDataFrame().cast() + +public fun DataFrame.select(vararg columns: AnyColumnReference): DataFrame = + select { columns.toColumns() } + +@JvmName("selectAnyColumnReferenceIterable") +public fun DataFrame.select(columns: Iterable): DataFrame = + select { columns.toColumnSet() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index 34e1e21158..444661f8bf 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -24,7 +24,8 @@ public inline fun Iterable.toDataFrame(): DataFrame = toDataFr properties() } -public inline fun Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = createDataFrameImpl(T::class, body) +public inline fun Iterable.toDataFrame(noinline body: CreateDataFrameDsl.() -> Unit): DataFrame = + createDataFrameImpl(T::class, body) public inline fun Iterable.toDataFrame(vararg props: KProperty<*>, maxDepth: Int = 0): DataFrame = toDataFrame { @@ -77,6 +78,7 @@ public fun Iterable>.toDataFrameFromPairs(): Da when (path.size) { 0 -> { } + 1 -> { val name = path[0] val uniqueName = nameGenerator.addUnique(name) @@ -86,6 +88,7 @@ public fun Iterable>.toDataFrameFromPairs(): Da columns.add(col.rename(uniqueName)) columnIndices[uniqueName] = index } + else -> { val name = path[0] val uniqueName = columnGroupName.getOrPut(name) { @@ -273,7 +276,12 @@ public fun Map>.toDataFrame(): AnyFrame { @JvmName("toDataFrameColumnPathAnyNullable") public fun Map>.toDataFrame(): AnyFrame { - return map { it.key to DataColumn.createWithTypeInference(it.key.last(), it.value.asList()) }.toDataFrameFromPairs() + return map { + it.key to DataColumn.createWithTypeInference( + it.key.last(), + it.value.asList() + ) + }.toDataFrameFromPairs() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt index 8c719d8702..0404b3938e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/update.kt @@ -1,113 +1,450 @@ package org.jetbrains.kotlinx.dataframe.api -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.ColumnsSelector -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataFrameExpression -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.RowColumnExpression -import org.jetbrains.kotlinx.dataframe.RowValueExpression -import org.jetbrains.kotlinx.dataframe.RowValueFilter -import org.jetbrains.kotlinx.dataframe.Selector +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.Update.Usage +import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.documentation.* import org.jetbrains.kotlinx.dataframe.impl.api.asFrameImpl import org.jetbrains.kotlinx.dataframe.impl.api.updateImpl import org.jetbrains.kotlinx.dataframe.impl.api.updateWithValuePerColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.columns.toColumns import org.jetbrains.kotlinx.dataframe.impl.headPlusArray -import org.jetbrains.kotlinx.dataframe.index import kotlin.reflect.KProperty +/** + * ## The Update Operation + * + * Returns the [DataFrame] with changed values in some cells + * (column types can not be changed). + * + * Check out the [`update` Operation Usage][Usage]. + * + * For more information: {@include [DocumentationUrls.Update]} + */ +public data class Update( + val df: DataFrame, + val filter: RowValueFilter?, + val columns: ColumnsSelector, +) { + public fun cast(): Update = + Update(df, filter as RowValueFilter?, columns as ColumnsSelector) + + /** This argument providing the (clickable) name of the update-like function. + * Note: If clickable, make sure to [alias][your type]. + */ + internal interface UpdateOperationArg + + /** + * ## {@includeArg [UpdateOperationArg]} Operation Usage + * + * {@includeArg [UpdateOperationArg]} `{ `[columns][SelectingColumns]` }` + * + * - `[.`[where][Update.where]` { `[rowValueCondition][SelectingRows.RowValueCondition.WithExample]` } ]` + * + * - `[.`[at][Update.at]` (`[rowIndices][CommonUpdateAtFunctionDoc.RowIndicesParam]`) ]` + * + * - `.`[with][Update.with]` { `[rowExpression][ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[notNull][Update.notNull]` { `[rowExpression][ExpressionsGivenRow.RowValueExpression.WithExample]` } + * | .`[perCol][Update.perCol]` { `[colExpression][ExpressionsGivenColumn.ColumnExpression.WithExample]` } + * | .`[perRowCol][Update.perRowCol]` { `[rowColExpression][ExpressionsGivenRowAndColumn.RowColumnExpression.WithExample]` } + * | .`[withValue][Update.withValue]`(value) + * | .`[withNull][Update.withNull]`() + * | .`[withZero][Update.withZero]`() + * | .`[asFrame][Update.asFrame]` { `[dataFrameExpression][ExpressionsGivenDataFrame.DataFrameExpression.WithExample]` }` + * {@arg [UpdateOperationArg] [update][update]}{@comment The default name of the `update` operation function name.} + */ + public interface Usage + + /** The columns to update need to be selected. See {@include [SelectingColumnsLink]} for all the selecting options. */ + public interface Columns + + /** @param columns The {@include [SelectingColumns.DslLink]} used to select the columns of this [DataFrame] to update. */ + internal interface DslParam + + /** @param columns The {@include [SelectingColumns.ColumnAccessorsLink]} of this [DataFrame] to update. */ + internal interface ColumnAccessorsParam + + /** @param columns The {@include [SelectingColumns.KPropertiesLink]} corresponding to columns of this [DataFrame] to update. */ + internal interface KPropertiesParam + + /** @param columns The {@include [SelectingColumns.ColumnNamesLink]} belonging to this [DataFrame] to update. */ + internal interface ColumnNamesParam +} + +// region update + +/** {@arg [SelectingColumns.OperationArg] [update][update]} */ +private interface SetSelectingColumnsOperationArg + +/** + * @include [Update] {@comment Description of the update operation.} + * @include [LineBreak] + * @include [Update.Columns] {@comment Description of what this function expects the user to do: select columns} + * ## This Update Overload + */ +private interface CommonUpdateFunctionDoc + +/** + * ## Optional + * Combine `df.`[update][update]`(...).`[with][Update.with]` { ... }` + * into `df.`[update][update]`(...) { ... }` + */ +private interface UpdateWithNote + +/** + * @include [CommonUpdateFunctionDoc] + * @include [SelectingColumns.Dsl.WithExample] {@include [SetSelectingColumnsOperationArg]} + * @include [Update.DslParam] + */ public fun DataFrame.update(columns: ColumnsSelector): Update = Update(this, null, columns) -public fun DataFrame.update(columns: Iterable>): Update = - update { columns.toColumnSet() } - +/** + * @include [CommonUpdateFunctionDoc] + * @include [SelectingColumns.ColumnNames.WithExample] {@include [SetSelectingColumnsOperationArg]} + * @include [UpdateWithNote] + * @include [Update.ColumnNamesParam] + */ public fun DataFrame.update(vararg columns: String): Update = update { columns.toColumns() } + +/** + * @include [CommonUpdateFunctionDoc] + * @include [SelectingColumns.KProperties.WithExample] {@include [SetSelectingColumnsOperationArg]} + * @include [UpdateWithNote] + * @include [Update.KPropertiesParam] + */ public fun DataFrame.update(vararg columns: KProperty): Update = update { columns.toColumns() } + +/** + * @include [CommonUpdateFunctionDoc] + * @include [SelectingColumns.ColumnAccessors.WithExample] {@include [SetSelectingColumnsOperationArg]} + * @include [UpdateWithNote] + * @include [Update.ColumnAccessorsParam] + */ public fun DataFrame.update(vararg columns: ColumnReference): Update = update { columns.toColumns() } -public data class Update( - val df: DataFrame, - val filter: RowValueFilter?, - val columns: ColumnsSelector -) { - public fun cast(): Update = - Update(df, filter as RowValueFilter?, columns as ColumnsSelector) -} +/** + * TODO this will be deprecated [PR #286](https://github.com/Kotlin/dataframe/pull/320) + */ +public fun DataFrame.update(columns: Iterable>): Update = + update { columns.toColumnSet() } + +// endregion +/** ## Where + * @include [SelectingRows.RowValueCondition.WithExample] + * {@arg [SelectingRows.FirstOperationArg] [update][update]} + * {@arg [SelectingRows.SecondOperationArg] [where][where]} + * + * @param predicate The [row value filter][RowValueFilter] to select the rows to update. + */ public fun Update.where(predicate: RowValueFilter): Update = copy(filter = filter and predicate) +/** ## At + * Only update the columns at certain given [row indices][CommonUpdateAtFunctionDoc.RowIndicesParam]: + * + * Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. + * + * For example: + * + * `df.`[update][update]` { city }.`[at][at]`(5..10).`[with][with]` { "Paris" }` + * + * `df.`[update][update]` { name }.`[at][at]`(1, 2, 3, 4).`[with][with]` { "Empty" }` + * + * ## This At Overload + */ +private interface CommonUpdateAtFunctionDoc { + + /** The indices of the rows to update. Either a [Collection]<[Int]>, an [IntRange], or just `vararg` indices. */ + interface RowIndicesParam +} + +/** + * @include [CommonUpdateAtFunctionDoc] + * + * Provide a [Collection]<[Int]> of row indices to update. + * + * @param rowIndices {@include [CommonUpdateAtFunctionDoc.RowIndicesParam]} + */ public fun Update.at(rowIndices: Collection): Update = where { index in rowIndices } + +/** + * @include [CommonUpdateAtFunctionDoc] + * + * Provide a `vararg` of [Ints][Int] of row indices to update. + * + * @param rowIndices {@include [CommonUpdateAtFunctionDoc.RowIndicesParam]} + */ public fun Update.at(vararg rowIndices: Int): Update = at(rowIndices.toSet()) + +/** + * @include [CommonUpdateAtFunctionDoc] + * + * Provide an [IntRange] of row indices to update. + * + * @param rowRange {@include [CommonUpdateAtFunctionDoc.RowIndicesParam]} + */ public fun Update.at(rowRange: IntRange): Update = where { index in rowRange } +/** ## Per Row Col + * @include [ExpressionsGivenRowAndColumn.RowColumnExpression.WithExample] + * {@arg [ExpressionsGivenRowAndColumn.OperationArg] [update][update]` { age \}.`[perRowCol][perRowCol]} + * + * ## See Also + * - {@include [SeeAlsoWith]} + * - {@include [SeeAlsoPerCol]} + * @param expression The {@include [ExpressionsGivenRowAndColumn.RowColumnExpressionLink]} to provide a new value for every selected cell giving its row and column. + */ public fun Update.perRowCol(expression: RowColumnExpression): DataFrame = updateImpl { row, column, _ -> expression(row, column) } +/** [Update per row col][Update.perRowCol] to provide a new value for every selected cell giving its row and column. */ +private interface SeeAlsoPerRowCol + +/** ## Update Expression + * @see ExpressionsGivenRow.RowValueExpression.WithExample + * @see ExpressionsGivenRow.AddDataRowNote + */ // doc processor plugin does not work with type aliases yet public typealias UpdateExpression = AddDataRow.(C) -> R +/** ## With + * {@include [ExpressionsGivenRow.RowValueExpression.WithExample]} + * {@arg [ExpressionsGivenRow.OperationArg] [update][update]` { city \}.`[with][with]} + * + * ## Note + * @include [ExpressionsGivenRow.AddDataRowNote] + * ## See Also + * - {@include [SeeAlsoPerCol]} + * - {@include [SeeAlsoPerRowCol]} + * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with. + */ public fun Update.with(expression: UpdateExpression): DataFrame = updateImpl { row, _, value -> expression(row, value) } +/** [Update with][Update.with] to provide a new value for every selected cell giving its row. */ +private interface SeeAlsoWith + +/** ## As Frame + * + * Updates selected [column group][ColumnGroup] as a [DataFrame] with the given [expression]. + * + * {@include [ExpressionsGivenDataFrame.DataFrameExpression.WithExample]} + * {@arg [ExpressionsGivenDataFrame.OperationArg] `df.`[update][update]` { name \}.`[asFrame][asFrame]} + * @param expression The {@include [ExpressionsGivenDataFrame.DataFrameExpressionLink]} to replace the selected column group with. + */ public fun Update>.asFrame(expression: DataFrameExpression>): DataFrame = asFrameImpl(expression) +@Deprecated( + "Useless unless in combination with `withValue(null)`, but then users can just use `with { null }`...", + ReplaceWith("this as Update") +) public fun Update.asNullable(): Update = this as Update +/** ## Per Col + * + * Per Col can be used for two different types of operations: + * - {@include [ExpressionsGivenColumn.ColumnExpression]} + * - {@include [UpdatePerColMap]} + * + * ## See Also + * - {@include [SeeAlsoWith]} + * - {@include [SeeAlsoPerRowCol]} + * ## This Per Col Overload + */ +private interface CommonUpdatePerColDoc + +/** Provide a new value for every selected cell per column using a [Map][Map]`<`[colName: String][String]`, value: C>` + * or [DataRow][DataRow] as Map. */ +private interface UpdatePerColMap + +/** + * @include [CommonUpdatePerColDoc] + * @include [UpdatePerColMap] + * + * For example: + * + * `val defaults = {@includeArg [CommonUpdatePerColMapDoc]}` + * + * `df.`[update][update]` { name and age }.`[where][Update.where]` { ... }.`[perCol][perCol]`(defaults)` + * + * @throws IllegalArgumentException if a value for a selected cell's column is not defined in [values\]. + */ +private interface CommonUpdatePerColMapDoc + +/** + * @include [CommonUpdatePerColMapDoc] + * {@arg [CommonUpdatePerColMapDoc] `[mapOf][mapOf]`("name" to "Empty", "age" to 0)} + * + * @param values The [Map]<[String], Value> to provide a new value for every selected cell. + * For each selected column, there must be a value in the map with the same name. + */ public fun Update.perCol(values: Map): DataFrame = updateWithValuePerColumnImpl { values[it.name()] ?: throw IllegalArgumentException("Update value for column ${it.name()} is not defined") } +/** + * @include [CommonUpdatePerColMapDoc] + * {@arg [CommonUpdatePerColMapDoc] df.`[getRows][DataFrame.getRows]`(`[listOf][listOf]`(0))` + * + * `.`[update][update]` { name \}.`[with][Update.with]` { "Empty" \}` + * + * `.`[update][update]` { age \}.`[with][Update.with]` { 0 \}` + * + * `.first()} + * + * @param values The [DataRow] to provide a new value for every selected cell. + */ public fun Update.perCol(values: AnyRow): DataFrame = perCol(values.toMap() as Map) -public fun Update.perCol(valueSelector: Selector, C>): DataFrame = +/** + * @include [CommonUpdatePerColDoc] + * @include [ExpressionsGivenColumn.ColumnExpression.WithExample] + * {@arg [ExpressionsGivenColumn.OperationArg] [update][update]` { age \}.`[perCol][perCol]} + * + * @param valueSelector The {@include [ExpressionsGivenColumn.ColumnExpressionLink]} to provide a new value for every selected cell giving its column. + */ +public fun Update.perCol(valueSelector: ColumnExpression): DataFrame = updateWithValuePerColumnImpl(valueSelector) +/** [Update per col][Update.perCol] to provide a new value for every selected cell giving its column. */ +private interface SeeAlsoPerCol + +/** Chains up two row value filters together. */ internal infix fun RowValueFilter?.and(other: RowValueFilter): RowValueFilter { if (this == null) return other val thisExp = this return { thisExp(this, it) && other(this, it) } } +/** @include [Update.notNull] */ public fun Update.notNull(): Update = - copy(filter = filter and { it != null }) as Update + where { it != null } as Update -public fun Update.notNull(expression: RowValueExpression): DataFrame = - notNull().updateImpl { row, column, value -> - expression(row, value) - } +/** + * ## Not Null + * + * Selects only the rows where the values in the selected columns are not null. + * + * Shorthand for: [update][update]` { ... }.`[where][Update.where]` { it != null }` + * + * For example: + * + * `df.`[update][update]` { `[colsOf][colsOf]`<`[Number][Number]`?>() }.`[notNull][notNull]`()`.[perCol][Update.perCol] `{ `[mean][mean]`() }` + * + * ### Optional + * Provide an [expression] to update the rows with. + * This combines [with][Update.with] with [notNull]. + * + * For example: + * + * `df.`[update][update]` { city }.`[notNull][Update.notNull]` { it.`[toUpperCase][String.toUpperCase]`() }` + * + * @param expression Optional {@include [ExpressionsGivenRow.RowExpressionLink]} to update the rows with. + */ +public fun Update.notNull(expression: UpdateExpression): DataFrame = + notNull().with(expression) +/** + * @include [CommonUpdateFunctionDoc] + * ### This overload is a combination of [update] and [with][Update.with]. + * + * @include [SelectingColumns.ColumnAccessors] + * + * {@include [ExpressionsGivenRow.RowValueExpression.WithExample]} + * {@arg [ExpressionsGivenRow.OperationArg] [update][update]`("city")` } + * + * @include [Update.ColumnAccessorsParam] + * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with. + */ public fun DataFrame.update( firstCol: ColumnReference, vararg cols: ColumnReference, - expression: RowValueExpression + expression: UpdateExpression ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) +/** + * @include [CommonUpdateFunctionDoc] + * ### This overload is a combination of [update] and [with][Update.with]. + * + * @include [SelectingColumns.KProperties] + * + * {@include [ExpressionsGivenRow.RowValueExpression.WithExample]} + * {@arg [ExpressionsGivenRow.OperationArg] [update][update]`("city")` } + * + * @include [Update.KPropertiesParam] + * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with. + */ public fun DataFrame.update( firstCol: KProperty, vararg cols: KProperty, - expression: RowValueExpression + expression: UpdateExpression ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) +/** + * @include [CommonUpdateFunctionDoc] + * ### This overload is a combination of [update] and [with][Update.with]. + * + * @include [SelectingColumns.ColumnNames] + * + * {@include [ExpressionsGivenRow.RowValueExpression.WithExample]} + * {@arg [ExpressionsGivenRow.OperationArg] [update][update]`("city")` } + * + * @include [Update.ColumnNamesParam] + * @param expression The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with. + */ public fun DataFrame.update( firstCol: String, vararg cols: String, - expression: RowValueExpression + expression: UpdateExpression ): DataFrame = update(*headPlusArray(firstCol, cols)).with(expression) -public fun Update.withNull(): DataFrame = asNullable().withValue(null) +/** + * Specific version of [with] that simply sets the value of each selected row to {@includeArg [CommonSpecificWithDocFirstArg]}. + * + * For example: + * + * `df.`[update][update]` { id }.`[where][Update.where]` { it < 0 }.`{@includeArg [CommonSpecificWithDocSecondArg]}` + */ +private interface CommonSpecificWithDoc + +/** Arg for the resulting value */ +private interface CommonSpecificWithDocFirstArg + +/** Arg for the function call */ +private interface CommonSpecificWithDocSecondArg + +/** + * ## With Null + * @include [CommonSpecificWithDoc] + * {@arg [CommonSpecificWithDocFirstArg] `null`} + * {@arg [CommonSpecificWithDocSecondArg] [withNull][withNull]`()} + */ +public fun Update.withNull(): DataFrame = with { null } +/** + * ## With Zero + * @include [CommonSpecificWithDoc] + * {@arg [CommonSpecificWithDocFirstArg] `0`} + * {@arg [CommonSpecificWithDocSecondArg] [withZero][withZero]`()} + */ public fun Update.withZero(): DataFrame = updateWithValuePerColumnImpl { 0 as C } +/** + * ## With Value + * @include [CommonSpecificWithDoc] + * {@arg [CommonSpecificWithDocFirstArg] [value]} + * {@arg [CommonSpecificWithDocSecondArg] [withValue][withValue]`(-1)} + * + * @param value The value to set the selected rows to. In contrast to [with][Update.with], this must be the same exact type. + */ public fun Update.withValue(value: C): DataFrame = with { value } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt new file mode 100644 index 0000000000..032a10cf19 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/AccessApi.kt @@ -0,0 +1,97 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.documentation.AccessApi.* + +/** + * ## Access APIs + * + * By nature, data frames are dynamic objects, column labels depend on the input source and also new columns could be added + * or deleted while wrangling. Kotlin, in contrast, is a statically typed language and all types are defined and verified + * ahead of execution. That's why creating a flexible, handy, and, at the same time, safe API to a data frame is tricky. + * + * In `Kotlin DataFrame` we provide four different ways to access columns, and, while they're essentially different, they + * look pretty similar in the data wrangling DSL. These include: + * - {@include [ExtensionPropertiesApiLink]} + * - {@include [KPropertiesApiLink]} + * - {@include [ColumnAccessorsApiLink]} + * - {@include [StringApiLink]} + * + * For more information: {@include [DocumentationUrls.AccessApis]} + * + * @comment We can link to here whenever we want to explain the different access APIs. + */ +internal interface AccessApi { + + /** API: + * - {@include [ExtensionPropertiesApiLink]} + * - {@include [KPropertiesApiLink]} + * - {@include [ColumnAccessorsApiLink]} + * - {@include [StringApiLink]} + * */ + interface AnyApiLinks + + /** + * String API. + * In this [AccessApi], columns are accessed by a [String] representing their name. + * Type-checking is done at runtime, name-checking too. + * + * For more information: {@include [DocumentationUrls.AccessApis.StringApi]} + * + * For example: + * @sample [org.jetbrains.kotlinx.dataframe.samples.api.ApiLevels.strings] + */ + interface StringApi + + /** [String API][StringApi] */ + interface StringApiLink + + /** + * Column Accessors API. + * In this [AccessApi], every column has a descriptor; + * a variable that represents its name and type. + * + * For more information: {@include [DocumentationUrls.AccessApis.ColumnAccessorsApi]} + * + * For example: + * @sample [org.jetbrains.kotlinx.dataframe.samples.api.ApiLevels.accessors3] + */ + interface ColumnAccessorsApi + + /** [Column Accessors API][AccessApi.ColumnAccessorsApi] */ + interface ColumnAccessorsApiLink + + /** + * KProperties API. + * In this [AccessApi], columns accessed by the + * [`KProperty`](https://kotlinlang.org/docs/reflection.html#property-references) + * of some class. + * The name and type of column should match the name and type of property, respectively. + * + * For more information: {@include [DocumentationUrls.AccessApis.KPropertiesApi]} + * + * For example: + * @sample [org.jetbrains.kotlinx.dataframe.samples.api.ApiLevels.kproperties1] + */ + interface KPropertiesApi + + /** [KProperties API][KPropertiesApi] */ + interface KPropertiesApiLink + + /** + * Extension Properties API. + * In this [AccessApi], extension access properties are generated based on the dataframe schema. + * The name and type of properties are inferred from the name and type of the corresponding columns. + * + * For more information: {@include [DocumentationUrls.AccessApis.ExtensionPropertiesApi]} + * + * For example: + * @sample [org.jetbrains.kotlinx.dataframe.samples.api.ApiLevels.extensionProperties1] + */ + interface ExtensionPropertiesApi + + /** [Extension Properties API][ExtensionPropertiesApi] */ + interface ExtensionPropertiesApiLink +} + +/** [Access API][AccessApi] */ +internal interface AccessApiLink diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt new file mode 100644 index 0000000000..4ae40ffc14 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -0,0 +1,66 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +private interface DocumentationUrls { + + interface NameArg + + /** See {@includeArg [NameArg]} on the documentation website. */ + interface Text + + /** https://kotlin.github.io/dataframe */ + interface Url + + interface DataRow { + + /** [{@include [Text]}{@arg [NameArg] Row Expressions}]({@include [Url]}/datarow.html#row-expressions) */ + interface RowExpressions + + /** [{@include [Text]}{@arg [NameArg] Row Conditions}]({@include [Url]}/datarow.html#row-conditions) */ + interface RowConditions + } + + /** [{@include [Text]}{@arg [NameArg] `update`}]({@include [Url]}/update.html) */ + interface Update + + /** [{@include [Text]}{@arg [NameArg] `fill`}]({@include [Url]}/fill.html) */ + interface Fill { + + /** [{@include [Text]}{@arg [NameArg] `fillNulls`}]({@include [Url]}/fill.html#fillnulls) */ + interface FillNulls + + /** [{@include [Text]}{@arg [NameArg] `fillNaNs`}]({@include [Url]}/fill.html#fillnans) */ + interface FillNaNs + + /** [{@include [Text]}{@arg [NameArg] `fillNA`}]({@include [Url]}/fill.html#fillna) */ + interface FillNA + } + + /** [{@include [Text]}{@arg [NameArg] `drop`}]({@include [Url]}/drop.html) */ + interface Drop { + + /** [{@include [Text]}{@arg [NameArg] `dropNulls`}]({@include [Url]}/drop.html#dropnulls) */ + interface DropNulls + + /** [{@include [Text]}{@arg [NameArg] `dropNaNs`}]({@include [Url]}/drop.html#dropnans) */ + interface DropNaNs + + /** [{@include [Text]}{@arg [NameArg] `dropNA`}]({@include [Url]}/drop.html#dropna) */ + interface DropNA + } + + /** [{@include [Text]}{@arg [NameArg] Access APIs}]({@include [Url]}/apilevels.html) */ + interface AccessApis { + + /** [{@include [Text]}{@arg [NameArg] String API}]({@include [Url]}/stringapi.html) */ + interface StringApi + + /** [{@include [Text]}{@arg [NameArg] Column Accessors API}]({@include [Url]}/columnaccessorsapi.html) */ + interface ColumnAccessorsApi + + /** [{@include [Text]}{@arg [NameArg] KProperties API}]({@include [Url]}/kpropertiesapi.html) */ + interface KPropertiesApi + + /** [{@include [Text]}{@arg [NameArg] Extension Properties API}]({@include [Url]}/extensionpropertiesapi.html) */ + interface ExtensionPropertiesApi + } +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt new file mode 100644 index 0000000000..3a0f611092 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenColumn.kt @@ -0,0 +1,42 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenColumn.ColumnExpressionLink +import org.jetbrains.kotlinx.dataframe.ColumnExpression as DfColumnExpression + +/** + * ## Expressions Given Column + * Expressing values using a "Column Expression" can occur exclusively in a + * {@include [ColumnExpressionLink]}. + */ +internal interface ExpressionsGivenColumn { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + + /** {@arg [OperationArg] operation} */ + interface SetDefaultOperationArg + + /** Provide a new value for every selected cell given its column using a [column expression][DfColumnExpression]. */ + interface ColumnExpression { + + /** + * {@include [ColumnExpression]} + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { `[mean][DataColumn.mean]`(skipNA = true) }` + * + * `df.`{@includeArg [OperationArg]}` { `[count][DataColumn.count]` { it > 10 } }` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } + + /** [Column Expression][ColumnExpression] */ + interface ColumnExpressionLink +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt new file mode 100644 index 0000000000..eba50e1657 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenDataFrame.kt @@ -0,0 +1,32 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenDataFrame.DataFrameExpressionLink +import org.jetbrains.kotlinx.dataframe.DataFrameExpression as DfDataFrameExpression + +/** + * ## Expressions Given DataFrame + * Expressing values using a "Data Frame Expression" can occur exclusively in a + * {@include [DataFrameExpressionLink]}. + */ +internal interface ExpressionsGivenDataFrame { + + interface OperationArg + + /** Provide a new value for every selected data frame using a [dataframe expression][DfDataFrameExpression]. */ + interface DataFrameExpression { + + /** + * @include [DataFrameExpression] + * + * For example: + * + * {@includeArg [OperationArg]}` { `[select][DataFrame.select]` { lastName } }` + */ + interface WithExample + } + + /** [Data Frame Expression][DataFrameExpression] */ + interface DataFrameExpressionLink +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt new file mode 100644 index 0000000000..aa3fa6d010 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRow.kt @@ -0,0 +1,90 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRow.* +import org.jetbrains.kotlinx.dataframe.RowExpression as DfRowExpression +import org.jetbrains.kotlinx.dataframe.RowValueExpression as DfRowValueExpression + +/** + * ## Expressions Given Row + * Expressing values using a "Row Expression" ({@include [DocumentationUrls.DataRow.RowExpressions]}) + * can occur in the following two types of operations: + * + * - Providing a new value for every selected cell given the row of that cell ({@include [RowExpressionLink]}), + * for instance in [map][DataFrame.map], [add][DataFrame.add], and [insert][DataFrame.insert] + * (using [RowExpression][DfRowExpression]). + * + * - Providing a new value for every selected cell given the row of that cell and its previous value ({@include [RowValueExpressionLink]}), + * for instance in [update.with][Update.with], and [convert.notNull][Convert.notNull] + * (using [RowValueExpression][DfRowValueExpression]). + * + * NOTE: + * + * @include [AddDataRowNote] + * + * A {@include [RowExpressionLink]} is similar to a {@include [RowConditionLink]} but that expects a [Boolean] as result. + */ +internal interface ExpressionsGivenRow { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + + /** {@arg [OperationArg] operation} */ + interface SetDefaultOperationArg + + /** + * [update with][org.jetbrains.kotlinx.dataframe.api.Update.with]- and [add][org.jetbrains.kotlinx.dataframe.api.add]-like expressions use [AddDataRow] instead of [DataRow] as the DSL's receiver type. + * This is an extension to [RowValueExpression][DfRowValueExpression] and + * [RowExpression][DfRowExpression] that provides access to + * the modified/generated value of the preceding row ([AddDataRow.newValue]). + */ + interface AddDataRowNote + + /** Provide a new value for every selected cell given its row using a [row expression][DfRowExpression]. */ + interface RowExpression { + + /** + * {@include [RowExpression]} + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { name.firstName + " " + name.lastName }` + * + * `df.`{@includeArg [OperationArg]}` { 2021 - age }` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } + + /** [Row Expression][RowExpression.WithExample] */ + interface RowExpressionLink + + /** Provide a new value for every selected cell given its row and its previous value using a + * [row value expression][DfRowValueExpression]. + */ + interface RowValueExpression { + + /** + * {@include [RowValueExpression]} + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { name.firstName + " from " + it }` + * + * `df.`{@includeArg [OperationArg]}` { it.uppercase() }` + * {@include [SetDefaultOperationArg]} + */ + interface WithExample + } + + /** [Row Value Expression][RowValueExpression.WithExample] */ + interface RowValueExpressionLink +} + +/** [Row Expression][ExpressionsGivenRow] */ +internal interface RowExpressionsLink diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRowAndColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRowAndColumn.kt new file mode 100644 index 0000000000..c640597d6c --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/ExpressionsGivenRowAndColumn.kt @@ -0,0 +1,45 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.ExpressionsGivenRowAndColumn.RowColumnExpressionLink +import org.jetbrains.kotlinx.dataframe.RowColumnExpression as DfRowColumnExpression + +/** + * ## Expressions Given Row and Column + * Expressing values using a "Row-Column Expression" can occur exclusively in a + * {@include [RowColumnExpressionLink]}. + */ +internal interface ExpressionsGivenRowAndColumn { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + + /** {@arg [OperationArg] operation} */ + interface SetDefaultOperationArg + + /** Provide a new value for every selected cell given both its row and column using a [row-column expression][DfRowColumnExpression]. */ + interface RowColumnExpression { + + /** + * @include [RowColumnExpression] + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { row, col ->` + * + * `row.age / col.`[mean][DataColumn.mean]`(skipNA = true)` + * + * `}` + * + * @include [SetDefaultOperationArg] + */ + interface WithExample + } + + /** [Row Column Expression][RowColumnExpression] */ + interface RowColumnExpressionLink +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NA.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NA.kt new file mode 100644 index 0000000000..35251f23fd --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NA.kt @@ -0,0 +1,18 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.api.dropNA +import org.jetbrains.kotlinx.dataframe.api.fillNA + +/** + * ## `NA` + * `NA` in Dataframe can be seen as "[NaN] or `null`". + * + * [Floats][Float] or [Doubles][Double] can be represented as [Float.NaN] or [Double.NaN], respectively, + * in cases where a mathematical operation is undefined, such as dividing by zero. + * + * You can also use [fillNA][fillNA] to replace `NAs` in certain columns with a given value or expression + * or [dropNA][dropNA] to drop rows with `NAs` in them. + * + * @see NaN + */ +internal interface NA diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NaN.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NaN.kt new file mode 100644 index 0000000000..18b9792947 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/NaN.kt @@ -0,0 +1,16 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.api.dropNaNs +import org.jetbrains.kotlinx.dataframe.api.fillNaNs + +/** + * ## `NaN` + * [Floats][Float] or [Doubles][Double] can be represented as [Float.NaN] or [Double.NaN], respectively, + * in cases where a mathematical operation is undefined, such as dividing by zero. + * + * You can also use [fillNaNs][fillNaNs] to replace `NaNs` in certain columns with a given value or expression + * or [dropNaNs][dropNaNs] to drop rows with `NaNs` in them. + * + * @see NA + */ +internal interface NaN diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt new file mode 100644 index 0000000000..755ae5e4ee --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingColumns.kt @@ -0,0 +1,121 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.ColumnSelector +import org.jetbrains.kotlinx.dataframe.ColumnsSelector +import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.columns.ColumnReference +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.SingleColumn +import org.jetbrains.kotlinx.dataframe.documentation.SelectingColumns.* +import kotlin.reflect.KProperty + +/** [Selecting Columns][SelectingColumns] */ +internal interface SelectingColumnsLink + +/** + * ## Selecting Columns + * Selecting columns for various operations (including but not limited to + * [DataFrame.select], [DataFrame.update], [DataFrame.gather], and [DataFrame.fillNulls]) + * can be done in the following ways: + * - {@include [Dsl.WithExample]} + * - {@include [ColumnNames.WithExample]} + * - {@include [ColumnAccessors.WithExample]} + * - {@include [KProperties.WithExample]} + */ +internal interface SelectingColumns { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface OperationArg + + /** {@arg [OperationArg] operation} */ + interface SetDefaultOperationArg + + /** Select or express columns using the Column(s) Selection DSL. + * (Any {@include [AccessApiLink]}). + * + * This DSL comes in the form of either a [Column Selector][ColumnSelector]- or [Columns Selector][ColumnsSelector] lambda, + * which operate in the {@include [ColumnSelectionDslLink]} or the {@include [ColumnsSelectionDslLink]} and + * expect you to return a [SingleColumn] or [ColumnSet], respectively. + */ + interface Dsl { + + /** {@include [Dsl]} + * + * For example: + * + * `df.`{@includeArg [OperationArg]}` { length `[and][ColumnsSelectionDsl.and]` age }` + * + * `df.`{@includeArg [OperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }` + * + * `df.`{@includeArg [OperationArg]}` { `[colsOf][colsOf]`<`[Double][Double]`>() }` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } + + /** [Columns selector DSL][Dsl.WithExample] */ + interface DslLink + + /** Select columns using their [column names][String] + * ({@include [AccessApi.StringApiLink]}). + */ + interface ColumnNames { + + /** {@include [ColumnNames]} + * + * For example: + * + * `df.`{@includeArg [OperationArg]}`("length", "age")` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } + + /** [Column names][ColumnNames.WithExample] */ + interface ColumnNamesLink + + /** Select columns using [column accessors][ColumnReference] + * ({@include [AccessApi.ColumnAccessorsApiLink]}). + */ + interface ColumnAccessors { + + /** {@include [ColumnAccessors]} + * + * For example: + * + * `val length by `[column][column]`<`[Double][Double]`>()` + * + * `val age by `[column][column]`<`[Double][Double]`>()` + * + * `df.`{@includeArg [OperationArg]}`(length, age)` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } + + /** [Column references][ColumnAccessors.WithExample] */ + interface ColumnAccessorsLink + + /** Select columns using [KProperties][KProperty] ({@include [AccessApi.KPropertiesApiLink]}). */ + interface KProperties { + + /** {@include [KProperties]} + * + * For example: + * ```kotlin + * data class Person(val length: Double, val age: Double) + * ``` + * + * `df.`{@includeArg [OperationArg]}`(Person::length, Person::age)` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } + + /** [KProperties][KProperties.WithExample] */ + interface KPropertiesLink +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt new file mode 100644 index 0000000000..c560e0966b --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/SelectingRows.kt @@ -0,0 +1,81 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +import org.jetbrains.kotlinx.dataframe.RowFilter +import org.jetbrains.kotlinx.dataframe.RowValueFilter +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowConditionLink +import org.jetbrains.kotlinx.dataframe.documentation.SelectingRows.RowValueConditionLink +import org.jetbrains.kotlinx.dataframe.index + +/** + * ## Selecting Rows + * Selecting rows that satisfy a "Row Condition" ({@include [DocumentationUrls.DataRow.RowConditions]}) + * can occur in the following two types of operations: + * - Selecting entire rows ({@include [RowConditionLink]}), for instance in [filter], [drop], [first], and [count] + * (using [RowFilter]). + * - Selecting parts of rows using a `where` operation after selecting columns ({@include [RowValueConditionLink]}), + * such as with [update], [gather], and [format] + * (using [RowValueFilter]). + * + * A Row Condition is similar to a {@include [RowExpressionsLink]} but expects a [Boolean] as result. + */ +internal interface SelectingRows { + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface FirstOperationArg + + /** + * The key for an @arg that will define the operation name for the examples below. + * Make sure to [alias][your examples]. + */ + interface SecondOperationArg + + /** {@arg [FirstOperationArg] operation}{@arg [SecondOperationArg] where} */ + interface SetDefaultOperationArg + + /** [Entire-Row Condition][EntireRowCondition.WithExample] */ + interface RowConditionLink + + /** Filter or find rows to operate on using a [row filter][RowFilter]. */ + interface EntireRowCondition { + + /** + * {@include [EntireRowCondition]} + * + * For example: + * + * `df.`{@includeArg [FirstOperationArg]}` { `[index][index]`() % 2 == 0 }` + * + * `df.`{@includeArg [FirstOperationArg]}` { `[diff][diff]` { age } == 0 }` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } + + /** [Row-Value Condition][RowValueCondition.WithExample] */ + interface RowValueConditionLink + + /** Filter or find rows to operate on after [selecting columns][SelectingColumns] using a + * [row value filter][RowValueFilter]. + */ + interface RowValueCondition { + + /** + * {@include [RowValueCondition]} + * + * For example: + * + * `df.`{@includeArg [FirstOperationArg]}` { length }.`{@includeArg [SecondOperationArg]}` { it > 10.0 }` + * + * `df.`{@includeArg [FirstOperationArg]}` { `[cols][ColumnsSelectionDsl.cols]`(1..5) }.`{@includeArg [SecondOperationArg]}` { `[index][index]`() > 4 && city != "Paris" }` + * @include [SetDefaultOperationArg] + */ + interface WithExample + } +} + +/** [Row Condition][SelectingRows] */ +internal interface RowConditionLink diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt new file mode 100644 index 0000000000..3660dd5397 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/utils.kt @@ -0,0 +1,4 @@ +package org.jetbrains.kotlinx.dataframe.documentation + +/** ## ‎ */ +internal interface LineBreak diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt index 4050782550..e11e8bc0b3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/convertTo.kt @@ -31,6 +31,7 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException +import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.emptyPath import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyColumn import org.jetbrains.kotlinx.dataframe.impl.schema.createEmptyDataFrame @@ -265,8 +266,8 @@ internal fun AnyFrame.convertToImpl( dsl.fillers.forEach { filler -> val paths = result.getColumnPaths(filler.columns) - missingPaths.removeAll(paths) - result = result.update(paths).with { + missingPaths.removeAll(paths.toSet()) + result = result.update { paths.toColumnSet() }.with { filler.expr(this, this) } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt index af046e5efb..4844853ddf 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/gather.kt @@ -60,9 +60,9 @@ internal fun Gather.gatherImpl( } // explode keys and values - when { - keysColumn != null && valuesColumn != null -> df = df.explode(keysColumn, valuesColumn) - else -> df = df.explode(keysColumn ?: valuesColumn!!) + df = when { + keysColumn != null && valuesColumn != null -> df.explode(keysColumn, valuesColumn) + else -> df.explode(keysColumn ?: valuesColumn!!) } // explode values in lists @@ -76,7 +76,7 @@ internal fun Gather.gatherImpl( val value = col[row] when { explode && value is List<*> -> { - val filtered = (value as List).filter(filter) + val filtered = (value as List).filter { filter(it) } val transformed = valueTransform?.let { filtered.map(it) } ?: filtered keys[colIndex] to transformed } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt index db04a39c19..2ce9f4db8b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/reorder.kt @@ -1,9 +1,9 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.AnyFrame +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.api.Reorder import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.cast @@ -23,7 +23,7 @@ import kotlin.reflect.typeOf internal fun > Reorder.reorderImpl( desc: Boolean, - expression: Selector, V> + expression: ColumnExpression ): DataFrame { data class ColumnInfo( val treeNode: TreeNode, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt index 02e72afa2e..9f05b8f961 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/update.kt @@ -2,12 +2,12 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.AnyFrame import org.jetbrains.kotlinx.dataframe.AnyRow +import org.jetbrains.kotlinx.dataframe.ColumnExpression import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataFrameExpression import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.RowValueFilter -import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.api.AddDataRow import org.jetbrains.kotlinx.dataframe.api.Update import org.jetbrains.kotlinx.dataframe.api.asColumnGroup @@ -36,7 +36,7 @@ internal fun Update.updateImpl(expression: (AddDataRow, DataColu if (df.isEmpty()) df else df.replace(columns).with { it.updateImpl(df, filter, expression) } -internal fun Update.updateWithValuePerColumnImpl(selector: Selector, C>) = +internal fun Update.updateWithValuePerColumnImpl(selector: ColumnExpression) = if (df.isEmpty()) df else { df.replace(columns).with { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 5ba0ffaaf4..68f50e0e47 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -189,7 +189,7 @@ internal fun Array>.toColumns(): ColumnSet = map { it.to @PublishedApi internal fun Array>.toColumns(): ColumnSet = asIterable().toColumnSet() -internal fun Iterable.toColumns() = map { it.toColumnAccessor() }.toColumnSet() +internal fun Iterable.toColumns(): ColumnSet = map { it.toColumnAccessor() }.toColumnSet() // endregion