diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 1c82c1a55..5b3a57cfb 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -4,6 +4,7 @@ import io.github.devcrocod.korro.KorroTask import nl.jolanrensen.docProcessor.defaultProcessors.* import nl.jolanrensen.docProcessor.gradle.creatingProcessDocTask import org.gradle.jvm.tasks.Jar +import org.jetbrains.kotlin.gradle.tasks.KotlinCompile import org.jmailen.gradle.kotlinter.tasks.LintTask import xyz.ronella.gradle.plugin.simple.git.task.GitTask @@ -77,7 +78,7 @@ val samplesImplementation by configurations.getting { extendsFrom(configurations.testImplementation.get()) } -val compileSamplesKotlin = tasks.named("compileSamplesKotlin") { +val compileSamplesKotlin = tasks.named("compileSamplesKotlin") { friendPaths.from(sourceSets["main"].output.classesDirs) source(sourceSets["test"].kotlin) destinationDirectory.set(file("$buildDir/classes/testWithOutputs/kotlin")) @@ -106,6 +107,7 @@ val samplesTest = tasks.register("samplesTest") { dependsOn(compileSamplesKotlin) dependsOn(clearTestResults) + outputs.upToDateWhen { false } environment("DATAFRAME_SAVE_OUTPUTS", "") @@ -129,6 +131,12 @@ val clearSamplesOutputs by tasks.creating { } } +val addSamplesToGit by tasks.creating(GitTask::class) { + directory.set(file(".")) + command.set("add") + args.set(listOf("-A", "../docs/StardustDocs/snippets")) +} + val copySamplesOutputs = tasks.register("copySamplesOutputs") { group = "documentation" mainClass.set("org.jetbrains.kotlinx.dataframe.explainer.SampleAggregatorKt") @@ -136,6 +144,10 @@ val copySamplesOutputs = tasks.register("copySamplesOutputs") { dependsOn(clearSamplesOutputs) dependsOn(samplesTest) classpath = sourceSets.test.get().runtimeClasspath + + doLast { + addSamplesToGit.executeCommand() + } } tasks.withType { @@ -307,7 +319,7 @@ tasks.withType { targetCompatibility = JavaVersion.VERSION_1_8.toString() } -tasks.withType { +tasks.withType { kotlinOptions { freeCompilerArgs = freeCompilerArgs + listOf("-Xinline-classes", "-Xopt-in=kotlin.RequiresOptIn") } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index 37d387567..76a752d7f 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -1,26 +1,8 @@ package org.jetbrains.kotlinx.dataframe -import org.jetbrains.kotlinx.dataframe.api.Infer -import org.jetbrains.kotlinx.dataframe.api.asDataColumn -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.take -import org.jetbrains.kotlinx.dataframe.columns.BaseColumn -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnKind -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn -import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl -import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl -import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl -import org.jetbrains.kotlinx.dataframe.impl.columns.addPath -import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType -import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.columns.* +import org.jetbrains.kotlinx.dataframe.impl.columns.* import org.jetbrains.kotlinx.dataframe.impl.getValuesType import org.jetbrains.kotlinx.dataframe.impl.splitByIndices import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema @@ -54,7 +36,7 @@ public interface DataColumn : BaseColumn { values: List, type: KType, infer: Infer = Infer.None, - defaultValue: T? = null + defaultValue: T? = null, ): ValueColumn = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue) /** @@ -67,7 +49,11 @@ public interface DataColumn : BaseColumn { * @param values list of column values * @param infer column type inference mode */ - public inline fun createValueColumn(name: String, values: List, infer: Infer = Infer.None): ValueColumn = createValueColumn( + public inline fun createValueColumn( + name: String, + values: List, + infer: Infer = Infer.None, + ): ValueColumn = createValueColumn( name, values, getValuesType( values, @@ -81,17 +67,21 @@ public interface DataColumn : BaseColumn { public fun createFrameColumn( name: String, df: DataFrame, - startIndices: Iterable + startIndices: Iterable, ): FrameColumn = FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() }) public fun createFrameColumn( name: String, groups: List>, - schema: Lazy? = null + schema: Lazy? = null, ): FrameColumn = FrameColumnImpl(name, groups, schema) - public fun createWithTypeInference(name: String, values: List, nullable: Boolean? = null): DataColumn = guessColumnType(name, values, nullable = nullable) + public fun createWithTypeInference( + name: String, + values: List, + nullable: Boolean? = null, + ): DataColumn = guessColumnType(name, values, nullable = nullable) public fun create(name: String, values: List, type: KType, infer: Infer = Infer.None): DataColumn { return when (type.toColumnKind()) { @@ -101,7 +91,8 @@ public interface DataColumn : BaseColumn { } } - public inline fun create(name: String, values: List, infer: Infer = Infer.None): DataColumn = create(name, values, typeOf(), infer) + public inline fun create(name: String, values: List, infer: Infer = Infer.None): DataColumn = + create(name, values, typeOf(), infer) public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList(), typeOf()) } @@ -116,7 +107,8 @@ public interface DataColumn : BaseColumn { override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = this.addPath() - override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn = super.getValue(thisRef, property) as DataColumn + override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn = + super.getValue(thisRef, property) as DataColumn public operator fun iterator(): Iterator = values().iterator() @@ -133,6 +125,8 @@ public val AnyCol.indices: IntRange get() = indices() public val AnyCol.type: KType get() = type() public val AnyCol.kind: ColumnKind get() = kind() -public val AnyCol.typeClass: KClass<*> get() = type.classifier as? KClass<*> ?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type") +public val AnyCol.typeClass: KClass<*> + get() = type.classifier as? KClass<*> + ?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type") public fun AnyBaseCol.indices(): IntRange = 0 until size() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt index f89e2ff51..e49b08d53 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -9,39 +9,15 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.Predicate -import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn -import org.jetbrains.kotlinx.dataframe.columns.renamedReference -import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.documentation.AccessApi import org.jetbrains.kotlinx.dataframe.documentation.ColumnExpression import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls import org.jetbrains.kotlinx.dataframe.documentation.LineBreak -import org.jetbrains.kotlinx.dataframe.hasNulls import org.jetbrains.kotlinx.dataframe.impl.aggregation.toColumns import org.jetbrains.kotlinx.dataframe.impl.columnName -import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnsList -import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet -import org.jetbrains.kotlinx.dataframe.impl.columns.addPath -import org.jetbrains.kotlinx.dataframe.impl.columns.allColumnsExcept -import org.jetbrains.kotlinx.dataframe.impl.columns.changePath -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnSet -import org.jetbrains.kotlinx.dataframe.impl.columns.getAt -import org.jetbrains.kotlinx.dataframe.impl.columns.getChildrenAt -import org.jetbrains.kotlinx.dataframe.impl.columns.singleImpl -import org.jetbrains.kotlinx.dataframe.impl.columns.top -import org.jetbrains.kotlinx.dataframe.impl.columns.transform -import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle -import org.jetbrains.kotlinx.dataframe.impl.columns.transformWithContext -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.dfs +import org.jetbrains.kotlinx.dataframe.impl.columns.* +import org.jetbrains.kotlinx.dataframe.impl.columns.tree.flattenRecursively import org.jetbrains.kotlinx.dataframe.impl.headPlusArray import kotlin.reflect.KProperty import kotlin.reflect.KType @@ -290,8 +266,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @see [last] */ - public fun ColumnSet.first(condition: ColumnFilter = { true }): SingleColumn = - transform { listOf(it.first(condition)) }.singleImpl() + public fun ColumnSet.first(condition: ColumnFilter = { true }): TransformableSingleColumn = + transform { listOf(it.first(condition)) }.singleWithTransformerImpl() /** * ## First @@ -303,12 +279,14 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup.`[first][first]`() }` * + * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` + * * @param [condition] The optional [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] condition that the column must adhere to. * @return A [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] containing the first column that adheres to the given [condition]. * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @see [last] */ - public fun SingleColumn.first(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun SingleColumn<*>.first(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = all().first(condition) /** @@ -324,23 +302,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @see [last] */ - public fun String.first(condition: ColumnFilter<*> = { true }): SingleColumn<*> = - colGroup(this).first(condition) - - /** - * ## First - * Returns the first column in this [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] or [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup] that adheres to the given [condition]. - * - * #### For example: - * - * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` - * - * @param [condition] The optional [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] condition that the column must adhere to. - * @return A [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] containing the first column that adheres to the given [condition]. - * @throws [NoSuchElementException] if no column adheres to the given [condition]. - * @see [last] - */ - public fun ColumnPath.first(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun String.first(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).first(condition) /** @@ -356,7 +318,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @see [last] */ - public fun KProperty<*>.first(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun KProperty<*>.first(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).first(condition) /** @@ -393,8 +355,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @see [first] */ - public fun ColumnSet.last(condition: ColumnFilter = { true }): SingleColumn = - transform { listOf(it.last(condition)) }.singleImpl() + public fun ColumnSet.last(condition: ColumnFilter = { true }): TransformableSingleColumn = + transform { listOf(it.last(condition)) }.singleWithTransformerImpl() /** * ## Last @@ -406,12 +368,14 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup.`[last][last]`() }` * + * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` + * * @param [condition] The optional [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] condition that the column must adhere to. * @return A [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] containing the last column that adheres to the given [condition]. * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @see [first] */ - public fun SingleColumn.last(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun SingleColumn<*>.last(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = all().last(condition) /** @@ -427,23 +391,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @see [first] */ - public fun String.last(condition: ColumnFilter<*> = { true }): SingleColumn<*> = - colGroup(this).last(condition) - - /** - * ## Last - * Returns the last column in this [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] or [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup] that adheres to the given [condition]. - * - * #### For example: - * - * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` - * - * @param [condition] The optional [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] condition that the column must adhere to. - * @return A [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] containing the last column that adheres to the given [condition]. - * @throws [NoSuchElementException] if no column adheres to the given [condition]. - * @see [first] - */ - public fun ColumnPath.last(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun String.last(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).last(condition) /** @@ -459,7 +407,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @see [first] */ - public fun KProperty<*>.last(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun KProperty<*>.last(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).last(condition) /** @@ -496,8 +444,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @throws [IllegalArgumentException] if more than one column adheres to the given [condition]. */ - public fun ColumnSet.single(condition: ColumnFilter = { true }): SingleColumn = - transform { listOf(it.single(condition)) }.singleImpl() + public fun ColumnSet.single(condition: ColumnFilter = { true }): TransformableSingleColumn = + transform { listOf(it.single(condition)) }.singleWithTransformerImpl() /** * ## Single @@ -509,12 +457,14 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup.`[single][single]`() }` * + * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[single][single]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` + * * @param [condition] The optional [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] condition that the column must adhere to. * @return A [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] containing the single column that adheres to the given [condition]. * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @throws [IllegalArgumentException] if more than one column adheres to the given [condition]. */ - public fun SingleColumn.single(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun SingleColumn<*>.single(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = all().single(condition) /** @@ -530,23 +480,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @throws [IllegalArgumentException] if more than one column adheres to the given [condition]. */ - public fun String.single(condition: ColumnFilter<*> = { true }): SingleColumn<*> = - colGroup(this).single(condition) - - /** - * ## Single - * Returns the single column in this [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] or [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup] that adheres to the given [condition]. - * - * #### For example: - * - * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[single][single]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` - * - * @param [condition] The optional [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] condition that the column must adhere to. - * @return A [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] containing the single column that adheres to the given [condition]. - * @throws [NoSuchElementException] if no column adheres to the given [condition]. - * @throws [IllegalArgumentException] if more than one column adheres to the given [condition]. - */ - public fun ColumnPath.single(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun String.single(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).single(condition) /** @@ -562,7 +496,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @throws [NoSuchElementException] if no column adheres to the given [condition]. * @throws [IllegalArgumentException] if more than one column adheres to the given [condition]. */ - public fun KProperty<*>.single(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun KProperty<*>.single(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).single(condition) /** @@ -736,8 +670,9 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum */ public operator fun AnyColumnReference.rangeTo(endInclusive: AnyColumnReference): ColumnSet<*> = object : ColumnSet { - override fun resolve(context: ColumnResolutionContext): List> { - val startPath = this@rangeTo.resolveSingle(context)!!.path + + private fun process(col: AnyColumnReference, context: ColumnResolutionContext): List> { + val startPath = col.resolveSingle(context)!!.path val endPath = endInclusive.resolveSingle(context)!!.path val parentPath = startPath.parent()!! require(parentPath == endPath.parent()) { "Start and end columns have different parent column paths" } @@ -750,6 +685,9 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } } } + + override fun resolve(context: ColumnResolutionContext): List> = + process(this@rangeTo, context) } /** @@ -1066,7 +1004,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum */ public fun ColumnGroupReference.col(property: KProperty): ColumnAccessor = column(property) - public fun SingleColumn.col(index: Int): SingleColumn = getChildrenAt(index).singleImpl() + public fun SingleColumn<*>.col(index: Int): SingleColumn = getChildrenAt(index).singleImpl() public operator fun ColumnSet.get(index: Int): SingleColumn = getAt(index) @@ -1677,7 +1615,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet]) from a parent [ColumnSet], -[ColumnGroup], or -[DataFrame]. + * Creates a subset of columns ([ColumnSet]) from the current [ColumnSet]. + * + * If the current [ColumnSet] is a [SingleColumn] + * (and thus consists of only one column (or [column group][ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter] or any of the `vararg` overloads for all * [APIs][AccessApi] (+ [ColumnPath]). * @@ -1700,7 +1643,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -1726,7 +1674,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -1762,7 +1715,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -1797,7 +1755,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface ColumnSetColsPredicateDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -1832,15 +1795,15 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum @Suppress("UNCHECKED_CAST") public fun ColumnSet.cols( predicate: ColumnFilter = { true }, - ): ColumnSet = transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(predicate as ColumnFilter<*>) - .resolve(this) - } as ColumnSet + ): TransformableColumnSet = colsInternal(predicate as ColumnFilter<*>) as TransformableColumnSet /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -1874,11 +1837,16 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum */ public operator fun ColumnSet.get( predicate: ColumnFilter = { true }, - ): ColumnSet = cols(predicate) + ): TransformableColumnSet = cols(predicate) /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -1900,17 +1868,18 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup`.[cols][cols]` { "e" `[in][String.contains]` it.`[name][ColumnPath.name]`() } }` * - * `// same as `[all][all] + * `df.`[select][select]` { `[cols][cols]`() } // same as `[all][all] * - * `df.`[select][select]` { `[cols][cols]`() }` + * `df.`[select][select]` { "pathTo"["myGroupCol"].`[cols][cols]` { "e" `[in][String.contains]` it.`[name][ColumnPath.name]`() } }` + * + * `df.`[select][select]` { "pathTo"["myGroupCol"]`[`[`][cols]`{ it.`[any][ColumnWithPath.any]` { it == "Alice" } }`[`]`][cols]` }` + * + * `df.`[select][select]` { "pathTo"["myGroupCol"].`[cols][cols]`() } // identity call, same as `[all][all] * * `// NOTE: there's a `[DataFrame.get]` overload that prevents this:` * * `df.`[select][select]` { myColumnGroup`[`[`][cols]`{ ... }`[`]`][cols]` }` * - * `// use `[cols][cols]` instead` - * `df.`[select][select]` { myColumnGroup`.[cols][cols]` { ... } }` - * * @see [all] * * @@ -1920,7 +1889,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface SingleColumnAnyRowColsPredicateDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -1942,30 +1916,36 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`.[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { "e" `[in][String.contains]` it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnPath.name]`() } }` * - * `// same as `[all][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.all] + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`() } // same as `[all][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.all] * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`() }` + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { "e" `[in][String.contains]` it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnPath.name]`() } }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`{ it.`[any][ColumnWithPath.any]` { it == "Alice" } }`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`() } // identity call, same as `[all][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.all] * * `// NOTE: there's a `[DataFrame.get][org.jetbrains.kotlinx.dataframe.DataFrame.get]` overload that prevents this:` * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`{ ... }`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` * - * `// use `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` instead` - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`.[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { ... } }` - * * @see [all] * * * @param [predicate] A [ColumnFilter function][org.jetbrains.kotlinx.dataframe.ColumnFilter] that takes a [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] and returns a [Boolean]. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that match the given [predicate]. */ - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( predicate: ColumnFilter<*> = { true }, - ): ColumnSet<*> = colsInternal(predicate) + ): TransformableColumnSet<*> = colsInternal(predicate) /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -1987,17 +1967,18 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`.[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { "e" `[in][String.contains]` it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnPath.name]`() } }` * - * `// same as `[all][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.all] + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`() } // same as `[all][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.all] * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`() }` + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { "e" `[in][String.contains]` it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnPath.name]`() } }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`{ it.`[any][ColumnWithPath.any]` { it == "Alice" } }`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`() } // identity call, same as `[all][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.all] * * `// NOTE: there's a `[DataFrame.get][org.jetbrains.kotlinx.dataframe.DataFrame.get]` overload that prevents this:` * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`{ ... }`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` * - * `// use `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` instead` - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`.[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { ... } }` - * * @see [all] * * @@ -2005,13 +1986,18 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @param [predicate] A [ColumnFilter function][org.jetbrains.kotlinx.dataframe.ColumnFilter] that takes a [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] and returns a [Boolean]. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that match the given [predicate]. */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( predicate: ColumnFilter<*> = { true }, - ): ColumnSet = cols(predicate) + ): TransformableColumnSet = cols(predicate) /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2042,7 +2028,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface StringColsPredicateDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2072,10 +2063,15 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum */ public fun String.cols( predicate: ColumnFilter<*> = { true }, - ): ColumnSet<*> = colGroup(this).cols(predicate) + ): TransformableColumnSet<*> = colGroup(this).cols(predicate) /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2105,115 +2101,16 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum */ public operator fun String.get( predicate: ColumnFilter<*> = { true }, - ): ColumnSet = cols(predicate) + ): TransformableColumnSet = cols(predicate) /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][select]` { "pathTo"["myGroupCol"].`[cols][cols]` { "e" `[in][String.contains]` it.`[name][ColumnPath.name]`() } }` - * - * `df.`[select][select]` { "pathTo"["myGroupCol"]`[`[`][cols]`{ it.`[any][ColumnWithPath.any]` { it == "Alice" } }`[`]`][cols]` }` - * - * `// identity call, same as `[all][all] - * - * `df.`[select][select]` { "pathTo"["myGroupCol"].`[cols][cols]`() }` - * - * @see [all] - * - * - * @param [predicate] A [ColumnFilter function][org.jetbrains.kotlinx.dataframe.ColumnFilter] that takes a [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] and returns a [Boolean]. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that match the given [predicate]. - */ - private interface ColumnPathColsPredicateDocs - - /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { "e" `[in][String.contains]` it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnPath.name]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`{ it.`[any][ColumnWithPath.any]` { it == "Alice" } }`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` - * - * `// identity call, same as `[all][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.all] - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`() }` - * - * @see [all] - * - * - * @param [predicate] A [ColumnFilter function][org.jetbrains.kotlinx.dataframe.ColumnFilter] that takes a [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] and returns a [Boolean]. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that match the given [predicate]. - */ - public fun ColumnPath.cols( - predicate: ColumnFilter<*> = { true }, - ): ColumnSet<*> = colGroup(this).cols(predicate) - - /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { "e" `[in][String.contains]` it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnPath.name]`() } }` + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`{ it.`[any][ColumnWithPath.any]` { it == "Alice" } }`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` - * - * `// identity call, same as `[all][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.all] - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["myGroupCol"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`() }` + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. * - * @see [all] - * - * - * @param [predicate] A [ColumnFilter function][org.jetbrains.kotlinx.dataframe.ColumnFilter] that takes a [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] and returns a [Boolean]. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that match the given [predicate]. - */ - public operator fun ColumnPath.get( - predicate: ColumnFilter<*> = { true }, - ): ColumnSet = cols(predicate) - - /** - * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2246,7 +2143,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface KPropertyColsPredicateDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2278,10 +2180,15 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum */ public fun KProperty<*>.cols( predicate: ColumnFilter<*> = { true }, - ): ColumnSet<*> = colGroup(this).cols(predicate) + ): TransformableColumnSet<*> = colGroup(this).cols(predicate) /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2313,7 +2220,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum */ public operator fun KProperty<*>.get( predicate: ColumnFilter<*> = { true }, - ): ColumnSet = cols(predicate) + ): TransformableColumnSet = cols(predicate) // endregion @@ -2321,7 +2228,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2351,7 +2263,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface ColumnSetColsVarargColumnReferenceDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2389,7 +2306,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2423,7 +2345,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2447,6 +2374,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup.`[cols][cols]`(columnA, columnB) }` * + * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`(columnA, columnB) }` + * + * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`("pathTo"["colA"], "pathTo"["colB"]) }` + * + * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`columnA, columnB`[`]`][cols]` }` + * * `// NOTE: there's a `[DataFrame.get]` overload that prevents this:` * * `df.`[select][select]` { myColumnGroup`[`[`][cols]`columnA, columnB`[`]`][cols]` }` @@ -2459,7 +2392,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface SingleColumnColsVarargColumnReferenceDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2483,6 +2421,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("pathTo"["colA"], "pathTo"["colB"]) }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * * `// NOTE: there's a `[DataFrame.get][org.jetbrains.kotlinx.dataframe.DataFrame.get]` overload that prevents this:` * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` @@ -2492,7 +2436,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @param [otherCols] Optional additional [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( firstCol: ColumnReference, vararg otherCols: ColumnReference, ): ColumnSet = headPlusArray(firstCol, otherCols).let { refs -> @@ -2503,7 +2447,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2527,6 +2476,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("pathTo"["colA"], "pathTo"["colB"]) }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * * `// NOTE: there's a `[DataFrame.get][org.jetbrains.kotlinx.dataframe.DataFrame.get]` overload that prevents this:` * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` @@ -2537,14 +2492,19 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @param [otherCols] Optional additional [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( firstCol: ColumnReference, vararg otherCols: ColumnReference, ): ColumnSet = cols(firstCol, *otherCols) /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2574,7 +2534,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface StringColsVarargColumnReferenceDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2607,7 +2572,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum ): ColumnSet = colGroup(this).cols(firstCol, *otherCols) /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2641,7 +2611,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2657,21 +2632,26 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * #### Examples for this overload: * - * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`(columnA, columnB) }` + * `df.`[select][select]` { Type::myColumnGroup.`[cols][cols]`(columnA, columnB) }` * - * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`("pathTo"["colA"], "pathTo"["colB"]) }` + * `df.`[select][select]` { Type::myColumnGroup.`[cols][cols]`("pathTo"["colA"], "pathTo"["colB"]) }` * - * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`columnA, columnB`[`]`][cols]` }` + * `df.`[select][select]` { Type::myColumnGroup`[`[`][cols]`columnA, columnB`[`]`][cols]` }` * * * @param [firstCol] A [ColumnReference] that points to a column. * @param [otherCols] Optional additional [ColumnReference]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - private interface ColumnPathColsVarargColumnReferenceDocs + private interface KPropertyColsVarargColumnReferenceDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2687,24 +2667,29 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * #### Examples for this overload: * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("pathTo"["colA"], "pathTo"["colB"]) }` + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("pathTo"["colA"], "pathTo"["colB"]) }` * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` * * * @param [firstCol] A [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] that points to a column. * @param [otherCols] Optional additional [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - public fun ColumnPath.cols( + public fun KProperty<*>.cols( firstCol: ColumnReference, vararg otherCols: ColumnReference, ): ColumnSet = colGroup(this).cols(firstCol, *otherCols) /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2720,108 +2705,11 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * #### Examples for this overload: * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("pathTo"["colA"], "pathTo"["colB"]) }` + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("pathTo"["colA"], "pathTo"["colB"]) }` * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` - * - * - * @param [firstCol] A [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] that points to a column. - * @param [otherCols] Optional additional [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference]s that point to columns. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. - */ - public operator fun ColumnPath.get( - firstCol: ColumnReference, - vararg otherCols: ColumnReference, - ): ColumnSet = cols(firstCol, *otherCols) - - /** - * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][select]` { Type::myColumnGroup.`[cols][cols]`(columnA, columnB) }` - * - * `df.`[select][select]` { Type::myColumnGroup.`[cols][cols]`("pathTo"["colA"], "pathTo"["colB"]) }` - * - * `df.`[select][select]` { Type::myColumnGroup`[`[`][cols]`columnA, columnB`[`]`][cols]` }` - * - * - * @param [firstCol] A [ColumnReference] that points to a column. - * @param [otherCols] Optional additional [ColumnReference]s that point to columns. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. - */ - private interface KPropertyColsVarargColumnReferenceDocs - - /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("pathTo"["colA"], "pathTo"["colB"]) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` - * - * - * @param [firstCol] A [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] that points to a column. - * @param [otherCols] Optional additional [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference]s that point to columns. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. - */ - public fun KProperty<*>.cols( - firstCol: ColumnReference, - vararg otherCols: ColumnReference, - ): ColumnSet = colGroup(this).cols(firstCol, *otherCols) - - /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("pathTo"["colA"], "pathTo"["colB"]) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { Type::myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`columnA, columnB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` * * * @param [firstCol] A [ColumnReference][org.jetbrains.kotlinx.dataframe.columns.ColumnReference] that points to a column. @@ -2839,7 +2727,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2867,7 +2760,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface ColumnSetColsVarargStringDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2892,20 +2790,21 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @param [otherCols] Optional additional [String]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - @Suppress("UNCHECKED_CAST") public fun ColumnSet.cols( firstCol: String, vararg otherCols: String, - ): ColumnSet = transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(firstCol, *otherCols) - .resolve(this) as List> + ): ColumnSet = headPlusArray(firstCol, otherCols).let { names -> + filter { it.name in names } } /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2937,7 +2836,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2959,6 +2863,10 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup.`[cols][cols]`("columnA", "columnB") }` * + * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`("columnA", "columnB") }` + * + * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`"columnA", "columnB"`[`]`][cols]` }` + * * `// NOTE: there's a `[DataFrame.get]` overload that prevents this:` * * `df.`[select][select]` { myColumnGroup`[`[`][cols]`"columnA", "columnB"`[`]`][cols]` }` @@ -2971,7 +2879,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface SingleColumnColsVarargStringDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -2993,6 +2906,10 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("columnA", "columnB") }` * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("columnA", "columnB") }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`"columnA", "columnB"`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * * `// NOTE: there's a `[DataFrame.get][org.jetbrains.kotlinx.dataframe.DataFrame.get]` overload that prevents this:` * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`"columnA", "columnB"`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` @@ -3002,7 +2919,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @param [otherCols] Optional additional [String]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( firstCol: String, vararg otherCols: String, ): ColumnSet<*> = headPlusArray(firstCol, otherCols).let { names -> @@ -3011,7 +2928,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3033,6 +2955,10 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("columnA", "columnB") }` * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("columnA", "columnB") }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`"columnA", "columnB"`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * * `// NOTE: there's a `[DataFrame.get][org.jetbrains.kotlinx.dataframe.DataFrame.get]` overload that prevents this:` * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`"columnA", "columnB"`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` @@ -3043,14 +2969,19 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @param [otherCols] Optional additional [String]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( firstCol: String, vararg otherCols: String, ): ColumnSet<*> = cols(firstCol, *otherCols) /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3078,7 +3009,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface StringColsVarargStringDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3109,7 +3045,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum ): ColumnSet<*> = colGroup(this).cols(firstCol, *otherCols) /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3141,98 +3082,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`("columnA", "columnB") }` - * - * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`"columnA", "columnB"`[`]`][cols]` }` - * - * - * @param [firstCol] A [String] that points to a column. - * @param [otherCols] Optional additional [String]s that point to columns. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. - */ - private interface ColumnPathColsVarargStringDocs - - /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("columnA", "columnB") }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`"columnA", "columnB"`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` - * - * - * @param [firstCol] A [String] that points to a column. - * @param [otherCols] Optional additional [String]s that point to columns. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. - */ - public fun ColumnPath.cols( - firstCol: String, - vararg otherCols: String, - ): ColumnSet<*> = colGroup(this).cols(firstCol, *otherCols) - - /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`("columnA", "columnB") }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`"columnA", "columnB"`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. * - * - * @param [firstCol] A [String] that points to a column. - * @param [otherCols] Optional additional [String]s that point to columns. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. - */ - public operator fun ColumnPath.get( - firstCol: String, - vararg otherCols: String, - ): ColumnSet<*> = cols(firstCol, *otherCols) - - /** - * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3260,7 +3115,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface KPropertiesColsVarargStringDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3291,7 +3151,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum ): ColumnSet<*> = colGroup(this).cols(firstCol, *otherCols) /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3327,7 +3192,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3355,7 +3225,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface ColumnSetColsVarargKPropertyDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3383,15 +3258,17 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun ColumnSet.cols( firstCol: KProperty, vararg otherCols: KProperty, - ): ColumnSet = transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(firstCol, *otherCols) - .resolve(this) + ): ColumnSet = headPlusArray(firstCol, otherCols).map { it.name }.let { names -> + filter { it.name in names } } /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3423,7 +3300,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3447,6 +3329,10 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup`[`[`][cols]`Type::colA, Type::colB`[`]`][cols]` }` * + * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`(Type::colA, Type::colB) }` + * + * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`Type::colA, Type::colB`[`]`][cols]` }` + * * * @param [firstCol] A [KProperty] that points to a column. * @param [otherCols] Optional additional [KProperty]s that point to columns. @@ -3455,7 +3341,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface SingleColumnColsVarargKPropertyDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3479,12 +3370,16 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`Type::colA, Type::colB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(Type::colA, Type::colB) }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`Type::colA, Type::colB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * * * @param [firstCol] A [KProperty] that points to a column. * @param [otherCols] Optional additional [KProperty]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( firstCol: KProperty, vararg otherCols: KProperty, ): ColumnSet = headPlusArray(firstCol, otherCols).let { props -> @@ -3492,7 +3387,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3516,19 +3416,28 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { myColumnGroup`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`Type::colA, Type::colB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(Type::colA, Type::colB) }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`Type::colA, Type::colB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * * * @param [firstCol] A [KProperty] that points to a column. * @param [otherCols] Optional additional [KProperty]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( firstCol: KProperty, vararg otherCols: KProperty, ): ColumnSet = cols(firstCol, *otherCols) /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3556,7 +3465,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface StringColsVarargKPropertyDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3587,7 +3501,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum ): ColumnSet = colGroup(this).cols(firstCol, *otherCols) /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3619,7 +3538,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3635,110 +3559,24 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * #### Examples for this overload: * - * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`(Type::colA, Type::colB) }` + * `df.`[select][select]` { Type::myColumnGroup.`[cols][cols]`(Type::colA, Type::colB) }` * - * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`Type::colA, Type::colB`[`]`][cols]` }` + * `df.`[select][select]` { Type::myColumnGroup`[`[`][cols]`Type::colA, Type::colB`[`]`][cols]` }` * * * @param [firstCol] A [KProperty] that points to a column. * @param [otherCols] Optional additional [KProperty]s that point to columns. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. */ - private interface ColumnPathColsVarargKPropertyDocs - - /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(Type::colA, Type::colB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`Type::colA, Type::colB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` - * - * - * @param [firstCol] A [KProperty] that points to a column. - * @param [otherCols] Optional additional [KProperty]s that point to columns. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. - */ - public fun ColumnPath.cols( - firstCol: KProperty, - vararg otherCols: KProperty, - ): ColumnSet = colGroup(this).cols(firstCol, *otherCols) + private interface KPropertyColsVarargKPropertyDocs /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"].`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(Type::colA, Type::colB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.select]` { "pathTo"["columnGroup"]`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`Type::colA, Type::colB`[`]`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` }` + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. * - * @param [firstCol] A [KProperty] that points to a column. - * @param [otherCols] Optional additional [KProperty]s that point to columns. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. - */ - public operator fun ColumnPath.get( - firstCol: KProperty, - vararg otherCols: KProperty, - ): ColumnSet = cols(firstCol, *otherCols) - - /** - * ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. - * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all - * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). - * - * Aside from calling [cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols] directly, you can also use the [get][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.get] operator in most cases. - * - * #### For example: - * `df.`[remove][org.jetbrains.kotlinx.dataframe.DataFrame.remove]` { `[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]` { it.`[hasNulls][org.jetbrains.kotlinx.dataframe.hasNulls]`() } }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroupCol.`[cols][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`(columnA, columnB) }` - * - * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.colsOf]`<`[String][String]`>()`[`[`][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.cols]`1, 3, 5] }` - * - * - * #### Examples for this overload: - * - * `df.`[select][select]` { Type::myColumnGroup.`[cols][cols]`(Type::colA, Type::colB) }` - * - * `df.`[select][select]` { Type::myColumnGroup`[`[`][cols]`Type::colA, Type::colB`[`]`][cols]` }` - * - * - * @param [firstCol] A [KProperty] that points to a column. - * @param [otherCols] Optional additional [KProperty]s that point to columns. - * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns that [firstCol] and [otherCols] point to. - */ - private interface KPropertyColsVarargKPropertyDocs - - /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3769,7 +3607,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum ): ColumnSet = colGroup(this).cols(firstCol, *otherCols) /** ## Cols - * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from a parent [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet], -[ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], or -[DataFrame][org.jetbrains.kotlinx.dataframe.DataFrame]. + * Creates a subset of columns ([ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]) from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] + * (and thus consists of only one column (or [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter][org.jetbrains.kotlinx.dataframe.ColumnFilter] or any of the `vararg` overloads for all * [APIs][org.jetbrains.kotlinx.dataframe.documentation.AccessApi] (+ [ColumnPath][org.jetbrains.kotlinx.dataframe.columns.ColumnPath]). * @@ -3803,33 +3646,25 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region indices - @Suppress("UNCHECKED_CAST") public fun ColumnSet.cols( firstIndex: Int, vararg otherIndices: Int, - ): ColumnSet = transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(firstIndex, *otherIndices) - .resolve(this) as List> - } + ): ColumnSet = colsInternal(headPlusArray(firstIndex, otherIndices)) as ColumnSet public operator fun ColumnSet.get( firstIndex: Int, vararg otherIndices: Int, ): ColumnSet = cols(firstIndex, *otherIndices) - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( firstIndex: Int, vararg otherIndices: Int, - ): ColumnSet<*> = headPlusArray(firstIndex, otherIndices).let { indices -> - transform { it.flatMap { it.children().let { children -> indices.map { children[it] } } } } - } + ): ColumnSet<*> = colsInternal(headPlusArray(firstIndex, otherIndices)) /** * */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( firstIndex: Int, vararg otherIndices: Int, ): ColumnSet<*> = cols(firstIndex, *otherIndices) @@ -3844,16 +3679,6 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum vararg otherIndices: Int, ): ColumnSet<*> = cols(firstIndex, *otherIndices) - public fun ColumnPath.cols( - firstIndex: Int, - vararg otherIndices: Int, - ): ColumnSet<*> = colGroup(this).cols(firstIndex, *otherIndices) - - public operator fun ColumnPath.get( - firstIndex: Int, - vararg otherIndices: Int, - ): ColumnSet<*> = cols(firstIndex, *otherIndices) - public fun KProperty<*>.cols( firstIndex: Int, vararg otherIndices: Int, @@ -3868,33 +3693,22 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region ranges - @Suppress("UNCHECKED_CAST") public fun ColumnSet.cols(range: IntRange): ColumnSet = - transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(range) - .resolve(this) as List> - } + colsInternal(range) as ColumnSet public operator fun ColumnSet.get(range: IntRange): ColumnSet = cols(range) - public fun SingleColumn.cols(range: IntRange): ColumnSet<*> = - transform { it.flatMap { it.children().subList(range.first, range.last + 1) } } + public fun SingleColumn<*>.cols(range: IntRange): ColumnSet<*> = colsInternal(range) /** * */ - public operator fun SingleColumn.get(range: IntRange): ColumnSet<*> = cols(range) + public operator fun SingleColumn<*>.get(range: IntRange): ColumnSet<*> = cols(range) public fun String.cols(range: IntRange): ColumnSet<*> = colGroup(this).cols(range) public operator fun String.get(range: IntRange): ColumnSet<*> = cols(range) - public fun ColumnPath.cols(range: IntRange): ColumnSet<*> = colGroup(this).cols(range) - - public operator fun ColumnPath.get(range: IntRange): ColumnSet<*> = cols(range) - public fun KProperty<*>.cols(range: IntRange): ColumnSet<*> = colGroup(this).cols(range) public operator fun KProperty<*>.get(range: IntRange): ColumnSet<*> = cols(range) @@ -3925,10 +3739,32 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region dfs + @Deprecated( + message = "dfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols(predicate).recursively(includeTopLevel = false)"), + level = DeprecationLevel.WARNING, + ) public fun ColumnSet.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet = dfsInternal(predicate) + @Deprecated( + message = "dfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols(predicate).recursively()"), + level = DeprecationLevel.WARNING, + ) + public fun SingleColumn<*>.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet = dfsInternal(predicate) + + @Deprecated( + message = "dfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols(predicate).recursively()"), + level = DeprecationLevel.WARNING, + ) public fun String.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet<*> = toColumnAccessor().dfs(predicate) + @Deprecated( + message = "dfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols(predicate).recursively()"), + level = DeprecationLevel.WARNING, + ) public fun KProperty.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet<*> = toColumnAccessor().dfs(predicate) @@ -3936,22 +3772,321 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region all - public fun SingleColumn<*>.all(): ColumnSet<*> = transformSingle { it.children() } + /** + * ## All + * Creates a new [ColumnSet] that contains all columns from the current [ColumnSet]. + * + * If the current [ColumnSet] is a [SingleColumn] and consists of only one [column group][ColumnGroup], + * then `all` will create a new [ColumnSet] consisting of its children. + * + * This makes the function equivalent to [cols()][ColumnSet.cols]. + * + * #### For example: + * `df.`[move][DataFrame.move]` { `[all][ColumnSet.all]`().`[recursively][recursively]`() }.`[under][MoveClause.under]`("info")` + * + * `df.`[select][DataFrame.select]` { myGroup.`[all][ColumnSet.all]`() }` + * + * #### Examples for this overload: + * + * {@includeArg [CommonAllDocs.Examples]} + * + * @see [cols\] + */ + private interface CommonAllDocs { - public fun String.all(): ColumnSet<*> = toColumnAccessor().transformSingle { it.children() } + /** Example argument */ + interface Examples + } - public fun KProperty<*>.all(): ColumnSet<*> = toColumnAccessor().transformSingle { it.children() } + /** + * ## All + * Creates a new [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] that contains all columns from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] and consists of only one [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], + * then `all` will create a new [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] consisting of its children. + * + * This makes the function equivalent to [cols()][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.cols]. + * + * #### For example: + * `df.`[move][org.jetbrains.kotlinx.dataframe.DataFrame.move]` { `[all][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.all]`().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }.`[under][org.jetbrains.kotlinx.dataframe.api.MoveClause.under]`("info")` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroup.`[all][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.all]`() }` + * + * #### Examples for this overload: + * + * `df.`[select][select]` { `[cols][cols]` { "a" in `[name][ColumnWithPath.name]` }.`[all][all]`() }` + * ## ‎ + * NOTE: This is an identity call and can be omitted in most cases. However, it can still prove useful + * for readability or in combination with [recursively]. + * + * @see [cols] + */ + public fun ColumnSet.all(): TransformableColumnSet = allInternal() + + /** + * ## All + * Creates a new [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] that contains all columns from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] and consists of only one [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], + * then `all` will create a new [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] consisting of its children. + * + * This makes the function equivalent to [cols()][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.cols]. + * + * #### For example: + * `df.`[move][org.jetbrains.kotlinx.dataframe.DataFrame.move]` { `[all][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.all]`().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }.`[under][org.jetbrains.kotlinx.dataframe.api.MoveClause.under]`("info")` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroup.`[all][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.all]`() }` + * + * #### Examples for this overload: + * + * `df.`[select][select]` { `[all][all]`() }` + * + * `df.`[select][select]` { myGroup.`[all][all]`() }` + * + * `df.`[select][select]` { "pathTo"["myGroup"].`[all][all]`() }` + * + * @see [cols] + */ + public fun SingleColumn<*>.all(): TransformableColumnSet<*> = allInternal() + + /** + * ## All + * Creates a new [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] that contains all columns from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] and consists of only one [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], + * then `all` will create a new [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] consisting of its children. + * + * This makes the function equivalent to [cols()][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.cols]. + * + * #### For example: + * `df.`[move][org.jetbrains.kotlinx.dataframe.DataFrame.move]` { `[all][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.all]`().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }.`[under][org.jetbrains.kotlinx.dataframe.api.MoveClause.under]`("info")` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroup.`[all][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.all]`() }` + * + * #### Examples for this overload: + * + * `df.`[select][select]` { "myGroupCol".`[all][all]`() }` + * + * @see [cols] + */ + public fun String.all(): TransformableColumnSet<*> = toColumnAccessor().all() + + /** + * ## All + * Creates a new [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] that contains all columns from the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet]. + * + * If the current [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] and consists of only one [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], + * then `all` will create a new [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] consisting of its children. + * + * This makes the function equivalent to [cols()][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.cols]. + * + * #### For example: + * `df.`[move][org.jetbrains.kotlinx.dataframe.DataFrame.move]` { `[all][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.all]`().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }.`[under][org.jetbrains.kotlinx.dataframe.api.MoveClause.under]`("info")` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myGroup.`[all][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.all]`() }` + * + * #### Examples for this overload: + * + * `df.`[select][select]` { Type::columnGroup.`[all][all]`() }` + * + * @see [cols] + */ + public fun KProperty<*>.all(): TransformableColumnSet<*> = toColumnAccessor().all() // region allDfs + @Deprecated( + message = "allDfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols { includeGroups || !it.isColumnGroup() }.recursively()"), + level = DeprecationLevel.WARNING, + ) public fun ColumnSet<*>.allDfs(includeGroups: Boolean = false): ColumnSet = if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() } + @Deprecated( + message = "allDfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols { includeGroups || !it.isColumnGroup() }.recursively()"), + level = DeprecationLevel.WARNING, + ) + public fun SingleColumn<*>.allDfs(includeGroups: Boolean = false): ColumnSet = + if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() } + + @Deprecated( + message = "allDfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols { includeGroups || !it.isColumnGroup() }.recursively()"), + level = DeprecationLevel.WARNING, + ) public fun String.allDfs(includeGroups: Boolean = false): ColumnSet = toColumnAccessor().allDfs(includeGroups) + @Deprecated( + message = "allDfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols { includeGroups || !it.isColumnGroup() }.recursively()"), + level = DeprecationLevel.WARNING, + ) public fun KProperty<*>.allDfs(includeGroups: Boolean = false): ColumnSet = toColumnAccessor().allDfs(includeGroups) + /** + * ## Recursively / Rec + * + * Modifies the previous call to run not only on the current column set, + * but also on all columns inside [column groups][ColumnGroup]. + * + * `df.`[select][DataFrame.select]` { `[colsOf][ColumnSet.colsOf]`<`[String][String]`>() }` + * + * returns all columns of type [String] in the top-level, as expected. However, what if you want ALL + * columns of type [String] even if they are inside a nested [column group][ColumnGroup]? Then you can use [recursively]: + * + * `df.`[select][DataFrame.select]` { `[colsOf][ColumnSet.colsOf]`<`[String][String]`>().`[recursively][recursively]`() }` + * + * This will return the columns of type [String] in all levels. + * + * More examples: + * + * `df.`[select][DataFrame.select]` { `[first][ColumnSet.first]` { col -> col.`[any][DataColumn.any]` { it == "Alice" } }.`[recursively][recursively]`() }` + * + * `df.`[select][DataFrame.select]` { `[cols][ColumnSet.cols]` { "name" in it.`[name][ColumnReference.name]` }.`[recursively][recursively]`() }` + * + * #### Examples for this overload: + * + * {@includeArg [CommonRecursivelyDocs.Examples]} + * + * @param [includeTopLevel\] Whether to include the top-level columns in the result. `true` by default. + */ + private interface CommonRecursivelyDocs { + + /** Example argument */ + interface Examples + } + + /** + * ## Recursively / Rec + * + * Modifies the previous call to run not only on the current column set, + * but also on all columns inside [column groups][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]. + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.colsOf]`<`[String][String]`>() }` + * + * returns all columns of type [String] in the top-level, as expected. However, what if you want ALL + * columns of type [String] even if they are inside a nested [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]? Then you can use [recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.colsOf]`<`[String][String]`>().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * This will return the columns of type [String] in all levels. + * + * More examples: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[first][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.first]` { col -> col.`[any][org.jetbrains.kotlinx.dataframe.DataColumn.any]` { it == "Alice" } }.`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[cols][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.cols]` { "name" in it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnReference.name]` }.`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * #### Examples for this overload: + * + * `df.`[select][DataFrame.select]` { `[colsOf][ColumnSet.colsOf]`<`[String][String]`>().`[recursively][recursively]`() }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[all][ColumnSet.all]`().`[rec][rec]`() }` + * + * `df.`[select][DataFrame.select]` { `[groups][ColumnSet.groups]`().`[recursively][recursively]`() }` + * + * @param [includeTopLevel] Whether to include the top-level columns in the result. `true` by default. + */ + public fun TransformableColumnSet.recursively(): ColumnSet = + recursivelyImpl(includeTopLevel = true, includeGroups = true) + + /** ## Recursively / Rec + * + * Modifies the previous call to run not only on the current column set, + * but also on all columns inside [column groups][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]. + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.colsOf]`<`[String][String]`>() }` + * + * returns all columns of type [String] in the top-level, as expected. However, what if you want ALL + * columns of type [String] even if they are inside a nested [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]? Then you can use [recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.colsOf]`<`[String][String]`>().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * This will return the columns of type [String] in all levels. + * + * More examples: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[first][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.first]` { col -> col.`[any][org.jetbrains.kotlinx.dataframe.DataColumn.any]` { it == "Alice" } }.`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[cols][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.cols]` { "name" in it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnReference.name]` }.`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * #### Examples for this overload: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.colsOf]`<`[String][String]`>().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myColumnGroup.`[all][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.all]`().`[rec][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.rec]`() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[groups][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.groups]`().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * @param [includeTopLevel] Whether to include the top-level columns in the result. `true` by default. + */ + public fun TransformableColumnSet.rec(): ColumnSet = recursively() + + /** + * ## Recursively / Rec + * + * Modifies the previous call to run not only on the current column set, + * but also on all columns inside [column groups][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]. + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.colsOf]`<`[String][String]`>() }` + * + * returns all columns of type [String] in the top-level, as expected. However, what if you want ALL + * columns of type [String] even if they are inside a nested [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]? Then you can use [recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.colsOf]`<`[String][String]`>().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * This will return the columns of type [String] in all levels. + * + * More examples: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[first][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.first]` { col -> col.`[any][org.jetbrains.kotlinx.dataframe.DataColumn.any]` { it == "Alice" } }.`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[cols][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.cols]` { "name" in it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnReference.name]` }.`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * #### Examples for this overload: + * + * `df.`[select][DataFrame.select]` { `[first][ColumnSet.first]` { col -> col.`[any][DataColumn.any]` { it == "Alice" } }.`[recursively][recursively]`() }` + * + * `df.`[select][DataFrame.select]` { `[single][ColumnSet.single]` { it.name == "myCol" }.`[rec][rec]`() }` + * + * @param [includeTopLevel] Whether to include the top-level columns in the result. `true` by default. + */ + public fun TransformableSingleColumn<*>.recursively(): SingleColumn<*> = + recursivelyImpl(includeTopLevel = true, includeGroups = true) + + /** ## Recursively / Rec + * + * Modifies the previous call to run not only on the current column set, + * but also on all columns inside [column groups][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]. + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.colsOf]`<`[String][String]`>() }` + * + * returns all columns of type [String] in the top-level, as expected. However, what if you want ALL + * columns of type [String] even if they are inside a nested [column group][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup]? Then you can use [recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.colsOf]`<`[String][String]`>().`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * This will return the columns of type [String] in all levels. + * + * More examples: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[first][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.first]` { col -> col.`[any][org.jetbrains.kotlinx.dataframe.DataColumn.any]` { it == "Alice" } }.`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[cols][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.cols]` { "name" in it.`[name][org.jetbrains.kotlinx.dataframe.columns.ColumnReference.name]` }.`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * #### Examples for this overload: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[first][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.first]` { col -> col.`[any][org.jetbrains.kotlinx.dataframe.DataColumn.any]` { it == "Alice" } }.`[recursively][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.recursively]`() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[single][org.jetbrains.kotlinx.dataframe.columns.ColumnSet.single]` { it.name == "myCol" }.`[rec][org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl.rec]`() }` + * + * @param [includeTopLevel] Whether to include the top-level columns in the result. `true` by default. + */ + public fun TransformableSingleColumn<*>.rec(): SingleColumn<*> = recursively() + // endregion // region allAfter @@ -4085,68 +4220,129 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // endregion // region groups + public fun ColumnSet<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): TransformableColumnSet = + groupsInternal(filter) - public fun SingleColumn<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): ColumnSet = - children { it.isColumnGroup() && filter(it.asColumnGroup()) } as ColumnSet + public fun SingleColumn<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): TransformableColumnSet = + groupsInternal(filter) - public fun String.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): ColumnSet = + public fun String.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): TransformableColumnSet = toColumnAccessor().groups(filter) - public fun KProperty<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): ColumnSet = + public fun KProperty<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): TransformableColumnSet = toColumnAccessor().groups(filter) // endregion // region children - public fun ColumnSet<*>.children(predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet = + // takes children of all columns in the column set + public fun ColumnSet<*>.children(predicate: ColumnFilter = { true }): TransformableColumnSet = transform { it.flatMap { it.children().filter { predicate(it) } } } - public fun ColumnGroupReference.children(): ColumnSet = transformSingle { it.children() } + // same as cols + public fun SingleColumn<*>.children(predicate: ColumnFilter = { true }): TransformableColumnSet = + (this as ColumnSet<*>).children(predicate) // endregion public operator fun List>.get(range: IntRange): ColumnSet = ColumnsList(subList(range.first, range.last + 1)) - public fun SingleColumn.take(n: Int): ColumnSet<*> = transformSingle { it.children().take(n) } - public fun SingleColumn.takeLast(n: Int): ColumnSet<*> = transformSingle { it.children().takeLast(n) } - public fun SingleColumn.drop(n: Int): ColumnSet<*> = transformSingle { it.children().drop(n) } - public fun SingleColumn.dropLast(n: Int = 1): ColumnSet<*> = transformSingle { it.children().dropLast(n) } + public fun SingleColumn<*>.take(n: Int): ColumnSet<*> = transformSingle { it.children().take(n) } + public fun SingleColumn<*>.takeLast(n: Int): ColumnSet<*> = transformSingle { it.children().takeLast(n) } + public fun SingleColumn<*>.drop(n: Int): ColumnSet<*> = transformSingle { it.children().drop(n) } + public fun SingleColumn<*>.dropLast(n: Int = 1): ColumnSet<*> = transformSingle { it.children().dropLast(n) } public fun ColumnSet.drop(n: Int): ColumnSet = transform { it.drop(n) } public fun ColumnSet.take(n: Int): ColumnSet = transform { it.take(n) } public fun ColumnSet.dropLast(n: Int = 1): ColumnSet = transform { it.dropLast(n) } public fun ColumnSet.takeLast(n: Int): ColumnSet = transform { it.takeLast(n) } - public fun ColumnSet.top(): ColumnSet = transform { it.top() } - public fun ColumnSet.takeWhile(predicate: Predicate>): ColumnSet = + + @Deprecated("Use roots() instead", ReplaceWith("roots()")) + public fun ColumnSet.top(): ColumnSet = roots() + + /** + * ## Roots + * + * Returns a sub-set of columns that are roots of the trees of columns. + * + * In practice, this means that if a column in [this] is a child of another column in [this], + * it will not be included in the result. + * + * If [this] is a [SingleColumn] containing a single [ColumnGroup] it will run on the children of that group, + * else it simply runs on the columns in the [ColumnSet] itself. + */ + public fun ColumnSet.roots(): ColumnSet = rootsInternal() as ColumnSet + + /** + * ## Roots + * + * Returns a sub-set of columns that are roots of the trees of columns. + * + * In practice, this means that if a column in [this] is a child of another column in [this], + * it will not be included in the result. + * + * If [this] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] containing a single [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup] it will run on the children of that group, + * else it simply runs on the columns in the [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] itself. + */ + public fun SingleColumn<*>.roots(): ColumnSet<*> = rootsInternal() + + public fun ColumnSet.takeWhile(predicate: ColumnFilter): ColumnSet = transform { it.takeWhile(predicate) } - public fun ColumnSet.takeLastWhile(predicate: Predicate>): ColumnSet = + public fun ColumnSet.takeLastWhile(predicate: ColumnFilter): ColumnSet = transform { it.takeLastWhile(predicate) } - public fun ColumnSet.filter(predicate: Predicate>): ColumnSet = - transform { it.filter(predicate) } + public fun ColumnSet.filter(predicate: ColumnFilter): TransformableColumnSet = + colsInternal(predicate as ColumnFilter<*>) as TransformableColumnSet + + public fun SingleColumn<*>.nameContains(text: CharSequence): TransformableColumnSet<*> = + cols { it.name.contains(text) } + + public fun ColumnSet.nameContains(text: CharSequence): TransformableColumnSet = + cols { it.name.contains(text) } - public fun SingleColumn.nameContains(text: CharSequence): ColumnSet<*> = cols { it.name.contains(text) } - public fun ColumnSet.nameContains(text: CharSequence): ColumnSet = cols { it.name.contains(text) } - public fun SingleColumn.nameContains(regex: Regex): ColumnSet<*> = cols { it.name.contains(regex) } - public fun ColumnSet.nameContains(regex: Regex): ColumnSet = cols { it.name.contains(regex) } - public fun SingleColumn.startsWith(prefix: CharSequence): ColumnSet<*> = cols { it.name.startsWith(prefix) } - public fun ColumnSet.startsWith(prefix: CharSequence): ColumnSet = cols { it.name.startsWith(prefix) } - public fun SingleColumn.endsWith(suffix: CharSequence): ColumnSet<*> = cols { it.name.endsWith(suffix) } - public fun ColumnSet.endsWith(suffix: CharSequence): ColumnSet = cols { it.name.endsWith(suffix) } + public fun SingleColumn<*>.nameContains(regex: Regex): TransformableColumnSet<*> = cols { it.name.contains(regex) } - public fun ColumnSet.except(vararg other: ColumnSet<*>): ColumnSet<*> = except(other.toColumnSet()) - public fun ColumnSet.except(vararg other: String): ColumnSet<*> = except(other.toColumnSet()) + public fun ColumnSet.nameContains(regex: Regex): TransformableColumnSet = cols { it.name.contains(regex) } - public fun ColumnSet.withoutNulls(): ColumnSet = transform { it.filter { !it.hasNulls } } as ColumnSet + public fun SingleColumn<*>.startsWith(prefix: CharSequence): TransformableColumnSet<*> = + cols { it.name.startsWith(prefix) } - public infix fun ColumnSet.except(other: ColumnSet<*>): ColumnSet<*> = - createColumnSet { resolve(it).allColumnsExcept(other.resolve(it)) } + public fun ColumnSet.startsWith(prefix: CharSequence): TransformableColumnSet = + cols { it.name.startsWith(prefix) } - public infix fun ColumnSet.except(selector: ColumnsSelector): ColumnSet = - except(selector.toColumns()) as ColumnSet + public fun SingleColumn<*>.endsWith(suffix: CharSequence): TransformableColumnSet<*> = + cols { it.name.endsWith(suffix) } + + public fun ColumnSet.endsWith(suffix: CharSequence): TransformableColumnSet = + cols { it.name.endsWith(suffix) } + + public fun ColumnSet.except(vararg other: ColumnSet<*>): TransformableColumnSet<*> = + except(other.toColumnSet()) + + public fun ColumnSet.except(vararg other: String): TransformableColumnSet<*> = except(other.toColumnSet()) + + public fun ColumnSet.withoutNulls(): TransformableColumnSet = + transform { it.filter { !it.hasNulls() } } as TransformableColumnSet + + public infix fun ColumnSet.except(other: ColumnSet<*>): TransformableColumnSet<*> = + createTransformableColumnSet( + resolver = { context -> + this@except + .resolve(context) + .allColumnsExcept(other.resolve(context)) + }, + transformResolve = { context, transformer -> + transformer.transform(this@except) + .resolve(context) + .allColumnsExcept(other.resolve(context)) + }, + ) + + public infix fun ColumnSet.except(selector: ColumnsSelector): TransformableColumnSet = + except(selector.toColumns()) as TransformableColumnSet public operator fun ColumnsSelector.invoke(): ColumnSet = this(this@ColumnsSelectionDsl, this@ColumnsSelectionDsl) @@ -4224,9 +4420,25 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun ColumnSet.distinct(): ColumnSet = DistinctColumnSet(this) + @Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(type, predicate).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), + ) public fun String.dfsOf(type: KType, predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet<*> = toColumnAccessor().dfsOf(type, predicate) + @Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(type, predicate).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), + ) public fun KProperty<*>.dfsOf(type: KType, predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet<*> = toColumnAccessor().dfsOf(type, predicate) @@ -4335,18 +4547,101 @@ public inline fun ColumnsSelectionDsl.expr( internal fun ColumnsSelector.filter(predicate: (ColumnWithPath) -> Boolean): ColumnsSelector = { this@filter(it, it).filter(predicate) } -internal fun ColumnSet<*>.colsInternal(predicate: ColumnFilter<*>) = - transform { it.flatMap { it.children().filter { predicate(it) } } } +/** + * If this [ColumnSet] is a [SingleColumn], it + * returns a new [ColumnSet] containing the children of this [SingleColumn] that + * match the given [predicate]. + * + * Else, it returns a new [ColumnSet] containing all columns in this [ColumnSet] that + * match the given [predicate]. + */ +internal fun ColumnSet<*>.colsInternal(predicate: ColumnFilter<*>): TransformableColumnSet<*> = + allInternal().transform { it.filter(predicate) } -internal fun ColumnSet<*>.dfsInternal(predicate: (ColumnWithPath<*>) -> Boolean) = - transform { it.filter { it.isColumnGroup() }.flatMap { it.children().dfs().filter(predicate) } } +internal fun ColumnSet<*>.colsInternal(indices: IntArray): TransformableColumnSet<*> = + allInternal().transform { cols -> + indices.map { cols[it] } + } +internal fun ColumnSet<*>.colsInternal(range: IntRange): TransformableColumnSet<*> = + allInternal().transform { + it.subList(range.first, range.last + 1) + } + +internal fun ColumnSet<*>.rootsInternal(): ColumnSet<*> = + allInternal().transform { it.roots() } + +internal fun ColumnSet<*>.groupsInternal(filter: (ColumnGroup<*>) -> Boolean): TransformableColumnSet = + colsInternal { it.isColumnGroup() && filter(it.asColumnGroup()) } as TransformableColumnSet + +/** + * If [this] is a [SingleColumn] containing a single [ColumnGroup], it + * returns a [(transformable) ColumnSet][TransformableColumnSet] containing the children of this [ColumnGroup], + * else it simply returns a [(transformable) ColumnSet][TransformableColumnSet] from [this]. + */ +internal fun ColumnSet.allInternal(): TransformableColumnSet = + transform { + if (this.isSingleColumnWithGroup(it)) { + it.single().children() + } else { + it + } + }.cast() + +/** If [this] is a [SingleColumn][org.jetbrains.kotlinx.dataframe.columns.SingleColumn] containing a single [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], it + * returns a [(transformable) ColumnSet][org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet] containing the children of this [ColumnGroup][org.jetbrains.kotlinx.dataframe.columns.ColumnGroup], + * else it simply returns a [(transformable) ColumnSet][org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet] from [this]. */ +internal fun SingleColumn<*>.allInternal(): TransformableColumnSet<*> = + (this as ColumnSet<*>).allInternal() + +@Deprecated("Replaced with recursively()") +internal fun ColumnSet<*>.dfsInternal(predicate: (ColumnWithPath<*>) -> Boolean) = + transform { it.filter { it.isColumnGroup() }.flatMap { it.children().flattenRecursively().filter(predicate) } } + +@Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(type, predicate).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), +) public fun ColumnSet<*>.dfsOf(type: KType, predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet<*> = dfsInternal { it.isSubtypeOf(type) && predicate(it.cast()) } +@Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(type, predicate).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), +) +public fun SingleColumn<*>.dfsOf(type: KType, predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet<*> = + dfsInternal { it.isSubtypeOf(type) && predicate(it.cast()) } + +@Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(filter).recursively(includeTopLevel = false)", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), +) public inline fun ColumnSet<*>.dfsOf(noinline filter: (ColumnWithPath) -> Boolean = { true }): ColumnSet = dfsOf(typeOf(), filter) as ColumnSet +@Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(filter).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), +) +public inline fun SingleColumn<*>.dfsOf(noinline filter: (ColumnWithPath) -> Boolean = { true }): ColumnSet = + dfsOf(typeOf(), filter) as ColumnSet + /** * ## Cols Of * Get columns by a given type and an optional filter. @@ -4449,8 +4744,96 @@ private interface CommonColsOfDocs { * @param [filter] an optional filter function that takes a column of type [C] and returns `true` if the column should be included. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns of given type that were included by [filter]. */ -public fun ColumnSet<*>.colsOf(type: KType, filter: (DataColumn) -> Boolean = { true }): ColumnSet = - colsInternal { it.isSubtypeOf(type) && filter(it.cast()) } as ColumnSet +public fun ColumnSet<*>.colsOf( + type: KType, + filter: (DataColumn) -> Boolean = { true }, +): TransformableColumnSet = + colsInternal { it.isSubtypeOf(type) && filter(it.cast()) } as TransformableColumnSet + +/** + * ## Cols Of + * Get columns by a given type and an optional filter. + * + * #### For example: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Int][Int]`>() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Int][Int]`> { it.`[size][org.jetbrains.kotlinx.dataframe.DataColumn.size]` > 10 } }` + * ## ‎ + * Alternatively, [colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf] can also be called on existing columns: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myColumnGroup.`[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Int][Int]`>() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { "myColumnGroup"().`[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Int][Int]`> { it.`[size][org.jetbrains.kotlinx.dataframe.DataColumn.size]` > 10 } }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { (Type::myColumnGroup)().`[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * ## ‎ + * Finally, [colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf] can also take a [KType] argument instead of a reified type. + * This is useful when the type is not known at compile time or when the API function cannot be inlined. + * + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { "myColumnGroup".`[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) { it: `[DataColumn][org.jetbrains.kotlinx.dataframe.DataColumn]`<`[Int][Int]`> -> it.`[size][org.jetbrains.kotlinx.dataframe.DataColumn.size]` > 10 } }` + * ## This Cols Of Overload + * Get (sub-)columns by a given type with or without [filter]. + * #### For example: + * + * `df.`[select][DataFrame.select]` { `[colsOf][colsOf]`<`[Int][Int]`>() }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`<`[Int][Int]`> { it.`[size][DataColumn.size]` > 10 } }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`<`[Int][Int]`>() }` + * + * @param [filter] an optional filter function that takes a column of type [C] and returns `true` if the column should be included. + * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns of given type that were included by [filter]. + */ +public inline fun ColumnSet<*>.colsOf(noinline filter: (DataColumn) -> Boolean = { true }): TransformableColumnSet = + colsOf(typeOf(), filter) + +/** + * ## Cols Of + * Get columns by a given type and an optional filter. + * + * #### For example: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Int][Int]`>() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Int][Int]`> { it.`[size][org.jetbrains.kotlinx.dataframe.DataColumn.size]` > 10 } }` + * ## ‎ + * Alternatively, [colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf] can also be called on existing columns: + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { myColumnGroup.`[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Int][Int]`>() }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { "myColumnGroup"().`[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Int][Int]`> { it.`[size][org.jetbrains.kotlinx.dataframe.DataColumn.size]` > 10 } }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { (Type::myColumnGroup)().`[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`<`[Double][Double]`>() }` + * ## ‎ + * Finally, [colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf] can also take a [KType] argument instead of a reified type. + * This is useful when the type is not known at compile time or when the API function cannot be inlined. + * + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { `[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }` + * + * `df.`[select][org.jetbrains.kotlinx.dataframe.DataFrame.select]` { "myColumnGroup".`[colsOf][org.jetbrains.kotlinx.dataframe.api.colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) { it: `[DataColumn][org.jetbrains.kotlinx.dataframe.DataColumn]`<`[Int][Int]`> -> it.`[size][org.jetbrains.kotlinx.dataframe.DataColumn.size]` > 10 } }` + * ## This Cols Of Overload + * Get (sub-)columns by [type] with or without [filter]. + * #### For example: + * + * `df.`[select][DataFrame.select]` { `[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) { it: `[DataColumn][DataColumn]`<`[Int][Int]`> -> it.`[size][DataColumn.size]` > 10 } }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }` + * + * @param [filter] an optional filter function that takes a column of type [C] and returns `true` if the column should be included. + * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns of given type that were included by [filter]. + */ +public fun SingleColumn<*>.colsOf( + type: KType, + filter: (DataColumn) -> Boolean = { true }, +): TransformableColumnSet = + colsInternal { it.isSubtypeOf(type) && filter(it.cast()) } as TransformableColumnSet /** * ## Cols Of @@ -4490,7 +4873,7 @@ public fun ColumnSet<*>.colsOf(type: KType, filter: (DataColumn) -> Boole * @param [filter] an optional filter function that takes a column of type [C] and returns `true` if the column should be included. * @return A [ColumnSet][org.jetbrains.kotlinx.dataframe.columns.ColumnSet] containing the columns of given type that were included by [filter]. */ -public inline fun ColumnSet<*>.colsOf(noinline filter: (DataColumn) -> Boolean = { true }): ColumnSet = +public inline fun SingleColumn<*>.colsOf(noinline filter: (DataColumn) -> Boolean = { true }): TransformableColumnSet = colsOf(typeOf(), filter) /* TODO: [Issue: #325, context receiver support](https://github.com/Kotlin/dataframe/issues/325) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt index 921fdd412..a77aa4c62 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt @@ -1,3 +1,5 @@ +@file:Suppress("UNCHECKED_CAST") + package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol @@ -15,6 +17,8 @@ import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.SingleColumn import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableSingleColumn import kotlin.reflect.typeOf public fun AnyFrame.cast(): DataFrame = this as DataFrame @@ -44,6 +48,10 @@ public fun ColumnAccessor<*>.cast(): ColumnAccessor = this as ColumnAcces public fun ColumnSet<*>.cast(): ColumnSet = this as ColumnSet +public fun TransformableColumnSet<*>.cast(): TransformableColumnSet = this as TransformableColumnSet + public fun ColumnReference<*>.cast(): ColumnReference = this as ColumnReference public fun SingleColumn<*>.cast(): SingleColumn = this as SingleColumn + +public fun TransformableSingleColumn<*>.cast(): TransformableSingleColumn = this as TransformableSingleColumn diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt index 0e9cb3198..3212310ad 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt @@ -18,7 +18,7 @@ public data class Corr( internal val columns: ColumnsSelector, ) -public fun DataFrame.corr(): DataFrame = corr { dfs { it.isSuitableForCorr() } }.withItself() +public fun DataFrame.corr(): DataFrame = corr { cols { it.isSuitableForCorr() }.rec() }.withItself() public fun DataFrame.corr(columns: ColumnsSelector): Corr = Corr(this, columns) public fun DataFrame.corr(vararg columns: String): Corr = corr { columns.toColumnSet() } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt index ec2d861a7..40d929b4f 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -52,7 +52,9 @@ public fun DataFrame.cumSum( public fun DataFrame.cumSum(vararg columns: KProperty<*>, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumnSet() } -public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { allDfs() } +public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { + cols { !it.isColumnGroup() }.recursively() +} // endregion @@ -77,7 +79,8 @@ public fun GroupBy.cumSum( skipNA: Boolean = defaultCumSumSkipNA, ): GroupBy = cumSum(skipNA) { columns.toColumnSet() } -public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA): GroupBy = - cumSum(skipNA) { allDfs() } +public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { + cols { !it.isColumnGroup() }.recursively() +} // endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt index 793dc2f87..11615b113 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt @@ -39,7 +39,7 @@ public fun DataColumn.describe(): DataFrame = describe // region DataFrame -public fun DataFrame.describe(): DataFrame = describe { allDfs() } +public fun DataFrame.describe(): DataFrame = describe { cols { !it.isColumnGroup() }.recursively() } public fun DataFrame.describe(columns: ColumnsSelector): DataFrame = describeImpl(getColumnsWithPaths(columns)) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt index a816fde4b..43214008f 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt @@ -10,7 +10,7 @@ import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.api.explodeImpl import kotlin.reflect.KProperty -private val defaultExplodeColumns: ColumnsSelector<*, *> = { dfs { it.isList() || it.isFrameColumn() } } +private val defaultExplodeColumns: ColumnsSelector<*, *> = { cols { it.isList() || it.isFrameColumn() }.rec() } // region explode DataFrame diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt index 8453af76f..3e28c405c 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt @@ -14,7 +14,7 @@ public fun AnyCol.inferType(): DataColumn<*> = guessColumnType(name, toList(), t // region DataFrame -public fun DataFrame.inferType(): DataFrame = inferType { allDfs() } +public fun DataFrame.inferType(): DataFrame = inferType { cols { !it.isColumnGroup() }.recursively() } public fun DataFrame.inferType(columns: ColumnsSelector): DataFrame = replace(columns).with { it.inferType() } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt index 25fb9147a..6fd14236b 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt @@ -2,11 +2,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.impl.api.joinImpl import kotlin.reflect.KProperty diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt index e4d086f18..a60747a77 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -55,7 +55,9 @@ public data class ParserOptions( public fun DataColumn.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options) -public fun DataFrame.parse(options: ParserOptions? = null): DataFrame = parse(options) { allDfs() } +public fun DataFrame.parse(options: ParserOptions? = null): DataFrame = parse(options) { + cols { !it.isColumnGroup() }.recursively() +} public fun DataColumn.parse(options: ParserOptions? = null): DataColumn<*> = tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt index 2b8c88cb2..1c06afae9 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt @@ -42,28 +42,29 @@ public fun DataFrame.rename(cols: Iterable>): Renam public data class RenameClause(val df: DataFrame, val columns: ColumnsSelector) -public fun DataFrame.renameToCamelCase(): DataFrame { +public fun DataFrame.renameToCamelCase(): DataFrame = this // recursively rename all column groups to camel case - return rename { - dfs { it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX } + .rename { + groups { it.name() matches DELIMITED_STRING_REGEX }.recursively() }.toCamelCase() - // recursively rename all other columns to camel case - .rename { - dfs { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX } - }.toCamelCase() - - // take all frame columns recursively and call renameToCamelCase() on all dataframes inside - .update { - dfsOf() - }.with { it.renameToCamelCase() } - - // convert all first chars of all columns to the lowercase - .rename { - allDfs() - }.into { - it.name.replaceFirstChar { it.lowercaseChar() } - } -} + + // recursively rename all other columns to camel case + .rename { + cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively() + }.toCamelCase() + + // take all frame columns recursively and call renameToCamelCase() on all dataframes inside + .update { + colsOf().recursively() + }.with { it.renameToCamelCase() } + + // convert all first chars of all columns to the lowercase + .rename { + cols { !it.isColumnGroup() }.recursively() + }.into { + it.name.replaceFirstChar { it.lowercaseChar() } + } + public fun RenameClause.into(vararg newColumns: ColumnReference<*>): DataFrame = into(*newColumns.map { it.name() }.toTypedArray()) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt index ca32ebd92..0f6672e17 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -39,12 +39,17 @@ public fun > Reorder.byDesc(expression: ColumnExpr reorderImpl(true, expression) public fun > DataFrame.reorderColumnsBy( - dfs: Boolean = true, + recursively: Boolean = true, desc: Boolean = false, expression: Selector, -): DataFrame = Reorder(this, { if (dfs) allDfs(true) else all() }, dfs).reorderImpl(desc, expression) - -public fun DataFrame.reorderColumnsByName(dfs: Boolean = true, desc: Boolean = false): DataFrame = - reorderColumnsBy(dfs, desc) { name() } +): DataFrame = + Reorder( + df = this, + columns = { if (recursively) all().recursively() else all() }, + inFrameColumns = recursively, + ).reorderImpl(desc, expression) + +public fun DataFrame.reorderColumnsByName(recursively: Boolean = true, desc: Boolean = false): DataFrame = + reorderColumnsBy(recursively, desc) { name() } // endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt index 0905c0f18..4e2d27e99 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt @@ -38,7 +38,7 @@ public fun DataFrame.replace(columns: Iterable>): R public fun DataFrame.replaceAll( vararg valuePairs: Pair, - columns: ColumnsSelector = { allDfs() }, + columns: ColumnsSelector = { cols { !it.isColumnGroup() }.recursively() }, ): DataFrame { val map = valuePairs.toMap() return update(columns).with { map[it] ?: it } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index c03c68178..0cd779e58 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -131,12 +131,12 @@ public fun Iterable>>.toDataFrameFromPairs(): AnyFra public interface TraversePropertiesDsl { /** - * Skip given [classes] during dfs traversal + * Skip given [classes] during recursive (dfs) traversal */ public fun exclude(vararg classes: KClass<*>) /** - * Skip given [properties] during dfs traversal + * Skip given [properties] during recursive (dfs) traversal */ public fun exclude(vararg properties: KProperty<*>) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt index 3706590e7..75c843770 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt @@ -6,16 +6,22 @@ import org.jetbrains.kotlinx.dataframe.impl.api.xsImpl // region DataFrame -public fun DataFrame.xs(vararg keyValues: Any?): DataFrame = xs(*keyValues) { allDfs().take(keyValues.size) } +public fun DataFrame.xs(vararg keyValues: Any?): DataFrame = xs(*keyValues) { + cols { !it.isColumnGroup() }.recursively().take(keyValues.size) +} -public fun DataFrame.xs(vararg keyValues: C, keyColumns: ColumnsSelector): DataFrame = xsImpl(keyColumns, false, *keyValues) +public fun DataFrame.xs(vararg keyValues: C, keyColumns: ColumnsSelector): DataFrame = + xsImpl(keyColumns, false, *keyValues) // endregion // region GroupBy -public fun GroupBy.xs(vararg keyValues: Any?): GroupBy = xs(*keyValues) { allDfs().take(keyValues.size) } +public fun GroupBy.xs(vararg keyValues: Any?): GroupBy = xs(*keyValues) { + cols { !it.isColumnGroup() }.recursively().take(keyValues.size) +} -public fun GroupBy.xs(vararg keyValues: C, keyColumns: ColumnsSelector): GroupBy = xsImpl(*keyValues, keyColumns = keyColumns) +public fun GroupBy.xs(vararg keyValues: C, keyColumns: ColumnsSelector): GroupBy = + xsImpl(*keyValues, keyColumns = keyColumns) // endregion diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt index e65d38476..8ed0f7840 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt @@ -1,10 +1,6 @@ package org.jetbrains.kotlinx.dataframe.columns -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.api.name import org.jetbrains.kotlinx.dataframe.impl.columnName import org.jetbrains.kotlinx.dataframe.impl.columns.RenamedColumnReference @@ -20,7 +16,8 @@ import kotlin.reflect.KProperty */ public interface ColumnReference : SingleColumn { - public operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnReference = renamedReference(property.columnName) + public operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnReference = + renamedReference(property.columnName) public fun name(): String @@ -32,13 +29,16 @@ public interface ColumnReference : SingleColumn { public fun getValueOrNull(row: AnyRow): C? = resolveFor(row.df())?.get(row.index()) - override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? { - return context.df.getColumn(path(), context.unresolvedColumnsPolicy)?.addPath(path()) - } + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + context.df + .getColumn(path(), context.unresolvedColumnsPolicy) + ?.addPath(path()) } -internal fun ColumnReference.renamedReference(newName: String): ColumnReference = RenamedColumnReference(this, newName) +internal fun ColumnReference.renamedReference(newName: String): ColumnReference = + RenamedColumnReference(this, newName) internal fun ColumnReference<*>.shortPath() = ColumnPath(name) -internal fun ColumnReference.resolveFor(df: AnyFrame): ColumnWithPath? = resolveSingle(ColumnResolutionContext(df, UnresolvedColumnsPolicy.Skip)) +internal fun ColumnReference.resolveFor(df: AnyFrame): ColumnWithPath? = + resolveSingle(ColumnResolutionContext(df, UnresolvedColumnsPolicy.Skip)) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt index 794f94aab..b534a61c1 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt @@ -3,21 +3,31 @@ package org.jetbrains.kotlinx.dataframe.columns import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.columns.* /** + * ## ColumnSet * Entity that can be resolved into a list of [columns][DataColumn]. * * Used as a return type of [ColumnsSelector]. * @param C common type of resolved columns + * @see [SingleColumn] + * @see [TransformableColumnSet] + * @see [TransformableSingleColumn] */ public interface ColumnSet { + /** + * Resolves this [ColumnSet] as a [List]<[ColumnWithPath]<[C]>>. + * In many cases this function [transforms][ColumnSet.transform] a parent [ColumnSet] to reach + * the current [ColumnSet] result. + */ public fun resolve(context: ColumnResolutionContext): List> } -public class ColumnResolutionContext internal constructor ( +public class ColumnResolutionContext internal constructor( internal val df: DataFrame<*>, - internal val unresolvedColumnsPolicy: UnresolvedColumnsPolicy + internal val unresolvedColumnsPolicy: UnresolvedColumnsPolicy, ) { public val allowMissingColumns: Boolean = unresolvedColumnsPolicy != UnresolvedColumnsPolicy.Fail diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt index 4ef6b1f21..45d841dc5 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt @@ -19,15 +19,24 @@ public interface ColumnWithPath : DataColumn { public fun depth(): Int = path.depth() - public fun getChild(accessor: ColumnReference): ColumnWithPath? = asColumnGroup().getColumnOrNull(accessor)?.addPath(path + accessor.path()) + public fun getChild(accessor: ColumnReference): ColumnWithPath? = + asColumnGroup().getColumnOrNull(accessor)?.addPath(path + accessor.path()) - public fun getChild(name: String): ColumnWithPath? = asColumnGroup().getColumnOrNull(name)?.addParentPath(path) + public fun getChild(name: String): ColumnWithPath? = + asColumnGroup().getColumnOrNull(name)?.addParentPath(path) - public fun getChild(index: Int): ColumnWithPath? = asColumnGroup().getColumnOrNull(index)?.addParentPath(path) + public fun getChild(index: Int): ColumnWithPath? = + asColumnGroup().getColumnOrNull(index)?.addParentPath(path) - public fun getChild(accessor: KProperty): ColumnWithPath? = asColumnGroup().getColumnOrNull(accessor)?.addParentPath(path) + public fun getChild(accessor: KProperty): ColumnWithPath? = + asColumnGroup().getColumnOrNull(accessor)?.addParentPath(path) - public fun children(): List> = if (isColumnGroup()) data.asColumnGroup().columns().map { it.addParentPath(path) } else emptyList() + public fun children(): List> = + if (isColumnGroup()) { + data.asColumnGroup().columns().map { it.addParentPath(path) } + } else { + emptyList() + } override fun path(): ColumnPath = path diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt index f822f9774..68a0615d9 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt @@ -1,15 +1,30 @@ package org.jetbrains.kotlinx.dataframe.columns import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.impl.columns.* /** * Entity that can be [resolved][resolveSingle] into [DataColumn]. * * @param C Column [type][BaseColumn.type] of resolved column. + * @see [ColumnSet] + * @see [TransformableColumnSet] + * @see [TransformableSingleColumn] */ public interface SingleColumn : ColumnSet { - override fun resolve(context: ColumnResolutionContext): List> = resolveSingle(context)?.let { listOf(it) } ?: emptyList() + override fun resolve( + context: ColumnResolutionContext, + ): List> = resolveSingle(context)?.let { listOf(it) } ?: emptyList() public fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? } + +public fun ColumnSet<*>.isSingleColumn(): Boolean = this is SingleColumn<*> + +/** + * Returns true if [this] is a [SingleColumn] and [cols] consists of a single column group. + */ +public fun ColumnSet<*>.isSingleColumnWithGroup(cols: List>): Boolean = + isSingleColumn() && cols.singleOrNull()?.isColumnGroup() == true diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt index 3345f87fe..25e877c02 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt @@ -1,27 +1,13 @@ package org.jetbrains.kotlinx.dataframe.impl -import org.jetbrains.kotlinx.dataframe.ColumnSelector -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.api.asColumnGroup -import org.jetbrains.kotlinx.dataframe.api.asDataColumn -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn -import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupWithParent import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupWithPathImpl import org.jetbrains.kotlinx.dataframe.impl.columns.addPath import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingColumnGroup import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingDataColumn -import org.jetbrains.kotlinx.dataframe.nrow private fun DataFrame.unbox(): DataFrame = when (this) { is ColumnGroupWithParent -> source.unbox() @@ -29,11 +15,12 @@ private fun DataFrame.unbox(): DataFrame = when (this) { else -> this } -internal abstract class DataFrameReceiverBase(protected val df: DataFrame) : DataFrameImpl(df.columns(), df.nrow) +internal abstract class DataFrameReceiverBase(protected val df: DataFrame) : + DataFrameImpl(df.columns(), df.nrow) internal open class DataFrameReceiver( source: DataFrame, - private val unresolvedColumnsPolicy: UnresolvedColumnsPolicy + private val unresolvedColumnsPolicy: UnresolvedColumnsPolicy, ) : DataFrameReceiverBase(source.unbox()), SingleColumn> { private fun DataColumn?.check(path: ColumnPath): DataColumn = @@ -61,11 +48,14 @@ internal open class DataFrameReceiver( } override fun getColumnOrNull(path: ColumnPath) = super.getColumnOrNull(path).check(path) - override fun getColumnOrNull(column: ColumnSelector) = getColumnsImpl(unresolvedColumnsPolicy, column).singleOrNull() + override fun getColumnOrNull(column: ColumnSelector) = + getColumnsImpl(unresolvedColumnsPolicy, column).singleOrNull() - override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath>? = DataColumn.createColumnGroup("", df).addPath(emptyPath()) + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath>? = + DataColumn.createColumnGroup("", df).addPath(emptyPath()) - override fun columns() = df.columns().map { if (it.isColumnGroup()) ColumnGroupWithParent(null, it.asColumnGroup()) else it } + override fun columns() = + df.columns().map { if (it.isColumnGroup()) ColumnGroupWithParent(null, it.asColumnGroup()) else it } override fun columnNames() = df.columnNames() diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt index 6675dcc0c..585dc87fa 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt @@ -6,18 +6,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedBody import org.jetbrains.kotlinx.dataframe.aggregation.NamedValue -import org.jetbrains.kotlinx.dataframe.api.GroupBy -import org.jetbrains.kotlinx.dataframe.api.GroupedRowFilter -import org.jetbrains.kotlinx.dataframe.api.asGroupBy -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.getColumn -import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.minus -import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.api.rename +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.impl.aggregation.AggregatableInternal import org.jetbrains.kotlinx.dataframe.impl.aggregation.GroupByReceiverImpl @@ -95,7 +84,7 @@ internal fun aggregateGroupBy( if (!removeColumns) removedNode.data.wasRemoved = false - val columnsToInsert = groupedFrame.getColumnsWithPaths { allDfs() }.map { + val columnsToInsert = groupedFrame.getColumnsWithPaths { cols { !it.isColumnGroup() }.rec() }.map { ColumnToInsert(insertPath + it.path, it, removedNode) } val src = if (removeColumns) removed.df else df diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ConfiguredAggregateColumn.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ConfiguredAggregateColumn.kt index daae75d99..6a0f6ea05 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ConfiguredAggregateColumn.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ConfiguredAggregateColumn.kt @@ -1,11 +1,6 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation -import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.shortPath +import org.jetbrains.kotlinx.dataframe.columns.* internal class ConfiguredAggregateColumn private constructor( val columns: ColumnSet, @@ -22,7 +17,7 @@ internal class ConfiguredAggregateColumn private constructor( else -> AggregateColumnDescriptor(col, default, if (keepName) newPath?.plus(col.name) else newPath) } - override fun resolve(context: ColumnResolutionContext): List> { + private fun resolve(context: ColumnResolutionContext, columns: ColumnSet): List> { val resolved = columns.resolve(context) if (resolved.size == 1) return listOf(resolved[0].toDescriptor(false)) else return resolved.map { @@ -30,6 +25,9 @@ internal class ConfiguredAggregateColumn private constructor( } } + override fun resolve(context: ColumnResolutionContext): List> = + resolve(context, columns) + companion object { fun withDefault(src: ColumnSet, default: C?): ColumnSet = when (src) { diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/corr.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/corr.kt index 2a3143be9..cac43096d 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/corr.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/corr.kt @@ -2,16 +2,7 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.api.Corr -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.castToNotNullable -import org.jetbrains.kotlinx.dataframe.api.convertToDouble -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths -import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.isSuitableForCorr -import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.api.toValueColumn +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath import org.jetbrains.kotlinx.dataframe.math.varianceAndMean @@ -25,7 +16,7 @@ internal fun Corr.corrImpl(otherColumns: ColumnsSelector): // extract nested number columns from ColumnGroups if (it.isColumnGroup()) { val groupPath = it.path - df.getColumnsWithPaths { groupPath.dfs { it.isSuitableForCorr() } }.map { it.cast() } + df.getColumnsWithPaths { groupPath.cols { it.isSuitableForCorr() }.rec() }.map { it.cast() } } else listOf(it) } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt index 671480716..74b0567b2 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt @@ -31,12 +31,25 @@ import org.jetbrains.kotlinx.dataframe.type import kotlin.reflect.jvm.jvmErasure internal fun describeImpl(cols: List): DataFrame { - fun List.collectAll(dfs: Boolean): List = flatMap { col -> + fun List.collectAll(recursively: Boolean): List = flatMap { col -> when (col.kind) { - ColumnKind.Frame -> col.asAnyFrameColumn().concat().columns().map { - it.addPath(col.path() + it.name) - }.collectAll(true) - ColumnKind.Group -> if (dfs) col.asColumnGroup().columns().map { it.addPath(col.path() + it.name) }.collectAll(true) else listOf(col) + ColumnKind.Frame -> + col.asAnyFrameColumn() + .concat() + .columns() + .map { it.addPath(col.path() + it.name) } + .collectAll(true) + + ColumnKind.Group -> + if (recursively) { + col.asColumnGroup() + .columns() + .map { it.addPath(col.path() + it.name) } + .collectAll(true) + } else { + listOf(col) + } + ColumnKind.Value -> listOf(col) } } @@ -55,14 +68,18 @@ internal fun describeImpl(cols: List): DataFrame { ColumnDescription::count from { it.size } ColumnDescription::unique from { it.countDistinct() } ColumnDescription::nulls from { it.values.count { it == null } } - ColumnDescription::top from inferType { it.values.filterNotNull().groupBy { it }.maxByOrNull { it.value.size }?.key } + ColumnDescription::top from inferType { + it.values.filterNotNull().groupBy { it }.maxByOrNull { it.value.size }?.key + } if (hasNumeric) { ColumnDescription::mean from { if (it.isNumber()) it.asNumbers().mean() else null } ColumnDescription::std from { if (it.isNumber()) it.asNumbers().std() else null } } if (hasComparable) { ColumnDescription::min from inferType { if (it.isComparable()) it.asComparable().minOrNull() else null } - ColumnDescription::median from inferType { if (it.isComparable()) it.asComparable().medianOrNull() else null } + ColumnDescription::median from inferType { + if (it.isComparable()) it.asComparable().medianOrNull() else null + } ColumnDescription::max from inferType { if (it.isComparable()) it.asComparable().maxOrNull() else null } } } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt index 5ed842f65..d410fb525 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt @@ -16,17 +16,19 @@ internal fun DataFrame.flattenImpl( columns: ColumnsSelector, keepParentNameForColumns: Boolean = false ): DataFrame { - val rootColumns = getColumnsWithPaths { columns.toColumnSet().filter { it.isColumnGroup() }.top() } + val rootColumns = getColumnsWithPaths { + columns.toColumnSet().filter { it.isColumnGroup() }.roots() + } val rootPrefixes = rootColumns.map { it.path }.toSet() - val nameGenerators = rootPrefixes.map { it.dropLast() }.distinct().associate { path -> + val nameGenerators = rootPrefixes.map { it.dropLast() }.distinct().associateWith { path -> val usedNames = get(path).asColumnGroup().columns().filter { path + it.name() !in rootPrefixes }.map { it.name() } - path to ColumnNameGenerator(usedNames) + ColumnNameGenerator(usedNames) } fun getRootPrefix(path: ColumnPath) = (1 until path.size).asSequence().map { path.take(it) }.first { rootPrefixes.contains(it) } - val result = move { rootPrefixes.toColumnSet().allDfs() } + val result = move { rootPrefixes.toColumnSet().cols { !it.isColumnGroup() }.recursively() } .into { val targetPath = getRootPrefix(it.path).dropLast(1) val nameGen = nameGenerators[targetPath]!! diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/join.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/join.kt index 4624fa6de..daad1cde2 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/join.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/join.kt @@ -74,8 +74,8 @@ internal fun DataFrame.joinImpl( val leftCol = leftJoinColumns[i] val rightCol = rightJoinColumns[i] if (leftCol.isColumnGroup() && rightCol.isColumnGroup()) { - val leftColumns = getColumnsWithPaths { leftCol.allDfs() } - val rightColumns = other.getColumnsWithPaths { rightCol.allDfs() } + val leftColumns = getColumnsWithPaths { leftCol.cols { !it.isColumnGroup() }.recursively()} + val rightColumns = other.getColumnsWithPaths { rightCol.cols { !it.isColumnGroup() }.recursively() } val leftPrefixLength = leftCol.path.size val rightPrefixLength = rightCol.path.size @@ -147,12 +147,13 @@ internal fun DataFrame.joinImpl( outputRowsCount += rightUnmatchedCount } - val leftColumns = getColumnsWithPaths { allDfs() } + val leftColumns = getColumnsWithPaths { cols { !it.isColumnGroup() }.recursively() } - val rightJoinColumnPaths = allRightJoinColumns.map { it.path to it.data }.toMap() + val rightJoinColumnPaths = allRightJoinColumns.associate { it.path to it.data } val newRightColumns = - if (addNewColumns) other.getColumnsWithPaths { dfs { !it.isColumnGroup() && !rightJoinColumnPaths.contains(it.path) } } else emptyList() + if (addNewColumns) other.getColumnsWithPaths { cols { !it.isColumnGroup() && !rightJoinColumnPaths.contains(it.path) }.rec() } + else emptyList() // for every column index from the left dataframe store matching column from the right dataframe val leftToRightColumns = leftColumns.map { rightJoinColumnPaths[pathMapping[it.path()]] } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/pivot.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/pivot.kt index a29c3aa41..292320ce1 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/pivot.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/pivot.kt @@ -8,12 +8,8 @@ import org.jetbrains.kotlinx.dataframe.api.PivotColumnsSelector import org.jetbrains.kotlinx.dataframe.api.forEach import org.jetbrains.kotlinx.dataframe.api.groupBy import org.jetbrains.kotlinx.dataframe.api.toPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy -import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.aggregation.GroupByReceiverImpl import org.jetbrains.kotlinx.dataframe.impl.aggregation.receivers.AggregateInternalDsl import org.jetbrains.kotlinx.dataframe.impl.aggregation.receivers.AggregatePivotDslImpl diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/sort.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/sort.kt index fac8e3967..a83d14824 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/sort.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/sort.kt @@ -9,11 +9,8 @@ import org.jetbrains.kotlinx.dataframe.api.castFrameColumn import org.jetbrains.kotlinx.dataframe.api.getFrameColumn import org.jetbrains.kotlinx.dataframe.api.update import org.jetbrains.kotlinx.dataframe.api.with -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.impl.columns.addPath import org.jetbrains.kotlinx.dataframe.impl.columns.assertIsComparable import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingColumnGroup @@ -106,7 +103,8 @@ internal fun ColumnWithPath.addFlag(flag: SortFlag): ColumnWithPath { } internal class ColumnsWithSortFlag(val column: ColumnSet, val flag: SortFlag) : ColumnSet { - override fun resolve(context: ColumnResolutionContext) = column.resolve(context).map { it.addFlag(flag) } + override fun resolve(context: ColumnResolutionContext) = + column.resolve(context).map { it.addFlag(flag) } } internal class SortColumnDescriptor( diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnAccessorImpl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnAccessorImpl.kt index 3f6e14e82..dbc7d0944 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnAccessorImpl.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnAccessorImpl.kt @@ -6,11 +6,7 @@ import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.isColumnGroup import org.jetbrains.kotlinx.dataframe.api.toPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.* internal class ColumnAccessorImpl(val path: ColumnPath) : ColumnAccessor { diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnsList.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnsList.kt index 4fe7c9885..35d15fc7c 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnsList.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnsList.kt @@ -6,5 +6,6 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnSet internal class ColumnsList(val columns: List>) : ColumnSet { constructor(vararg columns: ColumnSet) : this(columns.toList()) - override fun resolve(context: ColumnResolutionContext) = columns.flatMap { it.resolve(context) } + override fun resolve(context: ColumnResolutionContext) = + columns.flatMap { it.resolve(context) } } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Recursively.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Recursively.kt new file mode 100644 index 000000000..5f6b1de32 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Recursively.kt @@ -0,0 +1,92 @@ +package org.jetbrains.kotlinx.dataframe.impl.columns + +import org.jetbrains.kotlinx.dataframe.api.allInternal +import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.* +import org.jetbrains.kotlinx.dataframe.impl.columns.tree.flattenRecursively + +/** + * Recursively implementation for [TransformableColumnSet]. + * This converts a [TransformableColumnSet] into a [ColumnSet] by redirecting [ColumnSet.resolve] + * to [TransformableColumnSet.transformResolve] with a correctly configured [RecursivelyTransformer]. + */ +internal fun TransformableColumnSet.recursivelyImpl( + includeGroups: Boolean = true, + includeTopLevel: Boolean = true, +): ColumnSet = object : ColumnSet { + + override fun resolve(context: ColumnResolutionContext): List> = + this@recursivelyImpl.transformResolve( + context = context, + transformer = RecursivelyTransformer( + includeGroups = includeGroups, + includeTopLevel = includeTopLevel, + ), + ) +} + +/** + * Recursively implementation for [TransformableSingleColumn]. + * This converts a [TransformableSingleColumn] into a [SingleColumn] by redirecting [SingleColumn.resolveSingle] + * to [TransformableSingleColumn.transformResolveSingle] with a correctly configured [RecursivelyTransformer]. + */ +internal fun TransformableSingleColumn.recursivelyImpl( + includeGroups: Boolean = true, + includeTopLevel: Boolean = true, +): SingleColumn = object : SingleColumn { + + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + this@recursivelyImpl.transformResolveSingle( + context = context, + transformer = RecursivelyTransformer( + includeGroups = includeGroups, + includeTopLevel = includeTopLevel, + ), + ) +} + +/** + * ## Recursively transformer. + * A [ColumnSetTransformer] implementation around the [ColumnSet.flattenRecursively] function. + * Created only using [recursivelyImpl]. + */ +private class RecursivelyTransformer( + val includeGroups: Boolean = true, + val includeTopLevel: Boolean = true, +) : ColumnSetTransformer { + + override fun transform(columnSet: ColumnSet<*>): ColumnSet<*> = + columnSet.flattenRecursively( + includeGroups = includeGroups, + includeTopLevel = includeTopLevel, + ) + + override fun transformSingle(singleColumn: SingleColumn<*>): ColumnSet<*> = + singleColumn.flattenRecursively( + includeGroups = includeGroups, + includeTopLevel = includeTopLevel, + ) +} + +/** + * Flattens a [ColumnSet]/[SingleColumn] recursively. + * + * If [this] is a [SingleColumn] containing a single [ColumnGroup], the "top-level" is + * considered to be the [ColumnGroup]'s children, otherwise, if this is a [ColumnSet], + * the "top-level" is considered to be the columns in the [ColumnSet]. + * + * @param includeGroups Whether to include [ColumnGroup]s in the result. + * @param includeTopLevel Whether to include the "top-level" columns in the result. + */ +internal fun ColumnSet<*>.flattenRecursively( + includeGroups: Boolean = true, + includeTopLevel: Boolean = true, +): ColumnSet<*> = allInternal().transform { cols -> + if (includeTopLevel) { + cols.flattenRecursively() + } else { + cols + .filter { it.isColumnGroup() } + .flatMap { it.children().flattenRecursively() } + }.filter { includeGroups || !it.isColumnGroup() } +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/TransformableColumnSet.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/TransformableColumnSet.kt new file mode 100644 index 000000000..37b6e02e3 --- /dev/null +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/TransformableColumnSet.kt @@ -0,0 +1,58 @@ +package org.jetbrains.kotlinx.dataframe.impl.columns + +import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl +import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.SingleColumn + +/** + * ## Transformable ColumnSet + * This type of [ColumnSet] can be [transformed][transformResolve] before being resolved. + * + * This is especially useful for calls like + * [cols { }][ColumnsSelectionDsl.cols].[recursively()][ColumnsSelectionDsl.recursively], + * where [recursively][ColumnsSelectionDsl.recursively] modifies the [ColumnSet][ColumnSet] + * that [cols { }][ColumnsSelectionDsl.cols] operates on before it's evaluated. + * + * @see [ColumnSet] + * @see [TransformableSingleColumn] + * @see [SingleColumn] + */ +public interface TransformableColumnSet : ColumnSet { + public fun transformResolve( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): List> +} + +/** + * ## Transformable SingleColumn + * This type of [SingleColumn] can be [transformed][transformResolveSingle] before being resolved. + * + * This is especially useful for calls like + * [first { }][ColumnsSelectionDsl.first].[recursively()][ColumnsSelectionDsl.recursively], + * where [recursively][ColumnsSelectionDsl.recursively] modifies the [ColumnSet][ColumnSet] + * that [first { }][ColumnsSelectionDsl.first] operates on before it's evaluated. + * + * @see [SingleColumn] + * @see [TransformableColumnSet] + * @see [ColumnSet] + */ +public interface TransformableSingleColumn : SingleColumn { + public fun transformResolveSingle( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): ColumnWithPath? +} + +/** + * ## Column set transformer. + * This contains implementations for both [transform][ColumnSet.transform] and + * [transformSingle][SingleColumn.transformSingle] and can be passed around. + */ +public interface ColumnSetTransformer { + public fun transform(columnSet: ColumnSet<*>): ColumnSet<*> + + public fun transformSingle(singleColumn: SingleColumn<*>): ColumnSet<*> +} diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt index e30b80ac9..197e9092d 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt @@ -9,27 +9,12 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.name import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.columns.BaseColumn -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnKind -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn -import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.columns.values import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl import org.jetbrains.kotlinx.dataframe.impl.asNullable import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingDataColumn -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.ColumnPosition -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.TreeNode -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.collectTree -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.getOrPut -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.put -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.topDfs +import org.jetbrains.kotlinx.dataframe.impl.columns.tree.* import org.jetbrains.kotlinx.dataframe.impl.equalsByElement import org.jetbrains.kotlinx.dataframe.impl.rollingHash import org.jetbrains.kotlinx.dataframe.nrow @@ -93,34 +78,108 @@ internal fun DataColumn.assertIsComparable(): DataColumn { return this } -internal fun SingleColumn.transformSingle(converter: (ColumnWithPath) -> List>): ColumnSet = - object : ColumnSet { - override fun resolve(context: ColumnResolutionContext): List> = - this@transformSingle.resolveSingle(context)?.let { converter(it) } ?: emptyList() - } +/** + * Applies a transformation on [this] [SingleColumn] by converting its + * single [ColumnWithPath]<[A]> to [List]<[ColumnWithPath]<[B]>] using [converter]. + * Since [converter] allows you to return multiple columns, the result is turned into a [ColumnSet]<[B]>. + */ +internal fun SingleColumn.transformSingle( + converter: (ColumnWithPath) -> List>, +): ColumnSet = object : ColumnSet { + override fun resolve(context: ColumnResolutionContext): List> = + this@transformSingle + .resolveSingle(context) + ?.let(converter) + ?: emptyList() +} -internal fun ColumnSet.transform(converter: (List>) -> List>): ColumnSet = - object : ColumnSet { - override fun resolve(context: ColumnResolutionContext) = converter(this@transform.resolve(context)) - } +/** + * Applies a transformation on [this] by converting its [List]<[ColumnWithPath]<[A]>] to [List]<[ColumnWithPath]<[B]>] + * using [converter]. + * + * The result can either be used as a normal [ColumnSet]<[B]>, + * which resolves [this] and then applies [converter] on the result, + * + * or it can be used as a [TransformableColumnSet]<[B]>, where a [ColumnSetTransformer] can be injected before + * the [converter] is applied. + */ +internal fun ColumnSet.transform( + converter: (List>) -> List>, +): TransformableColumnSet = object : TransformableColumnSet { + override fun resolve(context: ColumnResolutionContext) = + this@transform + .resolve(context) + .let(converter) + + override fun transformResolve( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): List> = + transformer.transform(this@transform) + .resolve(context) + .let { converter(it as List>) } +} +/** + * Applies a transformation on [this] by converting its [List]<[ColumnWithPath]<[A]>] to [List]<[ColumnWithPath]<[B]>] + * using [converter], but also providing the [ColumnResolutionContext] to the converter. + * + * The result can either be used as a normal [ColumnSet]<[B]>, + * which resolves [this] and then applies [converter] on the result, + * + * or it can be used as a [TransformableColumnSet]<[B]>, where a [ColumnSetTransformer] can be injected before + * the [converter] is applied. + */ internal fun ColumnSet.transformWithContext( converter: ColumnResolutionContext.(List>) -> List>, -): ColumnSet = object : ColumnSet { +): TransformableColumnSet = object : TransformableColumnSet { override fun resolve(context: ColumnResolutionContext) = - converter(context, this@transformWithContext.resolve(context)) + this@transformWithContext + .resolve(context) + .let { converter(context, it) } + + override fun transformResolve( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): List> = + transformer.transform(this@transformWithContext) + .resolve(context) + .let { converter(context, it as List>) } } -internal fun ColumnSet.singleImpl() = object : SingleColumn { - override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? { - return this@singleImpl.resolve(context).singleOrNull() - } +/** + * Converts [this] [ColumnSet] to a [SingleColumn]. + * [resolveSingle] will return the single column of [this] if there is only one, else it will return `null`. + * If the result used as a [ColumnSet], `null` will be converted to an empty list. + */ +internal fun ColumnSet.singleImpl(): SingleColumn = object : SingleColumn { + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + this@singleImpl.resolve(context).singleOrNull() } -internal fun ColumnSet.getAt(index: Int) = object : SingleColumn { - override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? { - return this@getAt.resolve(context).getOrNull(index) +/** + * Same as [singleImpl], however, it passes any [ColumnSetTransformer] back to [this] if it is supplied. + */ +internal fun TransformableColumnSet.singleWithTransformerImpl(): TransformableSingleColumn = + object : TransformableSingleColumn { + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + this@singleWithTransformerImpl.resolve(context).singleOrNull() + + override fun transformResolveSingle( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): ColumnWithPath? = + this@singleWithTransformerImpl.transformResolve( + context = context, + transformer = transformer, + ).singleOrNull() } + +internal fun ColumnSet.getAt(index: Int): SingleColumn = object : SingleColumn { + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + this@getAt + .resolve(context) + .getOrNull(index) } internal fun ColumnSet.getChildrenAt(index: Int): ColumnSet = @@ -146,10 +205,16 @@ internal fun ColumnsContainer<*>.getColumn(path: ColumnPath, policy: Unresol UnresolvedColumnsPolicy.Create -> DataColumn.empty().cast() } -internal fun List>.top(): List> { - val root = TreeNode.createRoot?>(null) - forEach { root.put(it.path, it) } - return root.topDfs { it.data != null }.map { it.data!! } +/** + * Returns a sub-list of columns that are roots of the trees of columns. + * + * In practice, this means that if a column in [this] is a child of another column in [this], + * it will not be included in the result. + */ +internal fun List>.roots(): List> { + val emptyRoot = TreeNode.createRoot?>(data = null) + this.forEach { emptyRoot.put(it.path, it) } + return emptyRoot.topmostChildren { it.data != null }.map { it.data!! } } internal fun List>.allColumnsExcept(columns: Iterable>): List> { @@ -157,14 +222,14 @@ internal fun List>.allColumnsExcept(columns: Iterable ColumnSet.resolve( df: DataFrame<*>, - unresolvedColumnsPolicy: UnresolvedColumnsPolicy = UnresolvedColumnsPolicy.Fail + unresolvedColumnsPolicy: UnresolvedColumnsPolicy = UnresolvedColumnsPolicy.Fail, ) = resolve(ColumnResolutionContext(df, unresolvedColumnsPolicy)) internal fun SingleColumn.resolveSingle( df: DataFrame<*>, - unresolvedColumnsPolicy: UnresolvedColumnsPolicy = UnresolvedColumnsPolicy.Fail + unresolvedColumnsPolicy: UnresolvedColumnsPolicy = UnresolvedColumnsPolicy.Fail, ): ColumnWithPath? = resolveSingle(ColumnResolutionContext(df, unresolvedColumnsPolicy)) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index b21d8f6f6..49253bf0b 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -2,9 +2,7 @@ package org.jetbrains.kotlinx.dataframe.impl.columns import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.columns.* import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -13,9 +11,8 @@ internal open class ValueColumnImpl( name: String, type: KType, val defaultValue: T? = null, - distinct: Lazy>? = null -) : - DataColumnImpl(values, name, type, distinct), ValueColumn { + distinct: Lazy>? = null, +) : DataColumnImpl(values, name, type, distinct), ValueColumn { override fun distinct() = ValueColumnImpl(toSet().toList(), name, type, defaultValue, distinct) @@ -40,7 +37,8 @@ internal open class ValueColumnImpl( return createWithValues(newValues, nullable) } - override fun get(columnName: String) = throw UnsupportedOperationException("Can not get nested column '$columnName' from ValueColumn '$name'") + override fun get(columnName: String) = + throw UnsupportedOperationException("Can not get nested column '$columnName' from ValueColumn '$name'") override operator fun get(range: IntRange): ValueColumn = super.get(range) as ValueColumn @@ -50,12 +48,13 @@ internal open class ValueColumnImpl( } internal class ResolvingValueColumn( - override val source: ValueColumn + override val source: ValueColumn, ) : ValueColumn by source, ForceResolvedColumn { override fun resolve(context: ColumnResolutionContext) = super.resolve(context) - override fun resolveSingle(context: ColumnResolutionContext) = context.df.getColumn(source.name(), context.unresolvedColumnsPolicy)?.addPath() + override fun resolveSingle(context: ColumnResolutionContext) = + context.df.getColumn(source.name(), context.unresolvedColumnsPolicy)?.addPath() override fun getValue(row: AnyRow) = super.getValue(row) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 01790f3b8..1cf7d097e 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -144,10 +144,23 @@ internal fun createColumn(values: Iterable, suggestedType: KType, guessTy // region create Columns -internal fun createColumnSet(resolver: (ColumnResolutionContext) -> List>): ColumnSet = - object : ColumnSet { - override fun resolve(context: ColumnResolutionContext) = resolver(context) - } +internal fun createColumnSet( + resolver: (context: ColumnResolutionContext) -> List>, +): ColumnSet = object : ColumnSet { + override fun resolve(context: ColumnResolutionContext) = resolver(context) +} + +internal fun createTransformableColumnSet( + resolver: (context: ColumnResolutionContext) -> List>, + transformResolve: (context: ColumnResolutionContext, transformer: ColumnSetTransformer) -> List>, +): TransformableColumnSet = object : TransformableColumnSet { + override fun resolve(context: ColumnResolutionContext) = resolver(context) + + override fun transformResolve( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): List> = transformResolve(context, transformer) +} // region toColumnSet @@ -155,12 +168,11 @@ internal fun createColumnSet(resolver: (ColumnResolutionContext) -> List, C> Selector>.toColumnSet( createReceiver: (ColumnResolutionContext) -> T, -): ColumnSet = - createColumnSet { - val receiver = createReceiver(it) - val columnSet = this(receiver, receiver) - columnSet.resolve(receiver, it.unresolvedColumnsPolicy) - } +): ColumnSet = createColumnSet { + val receiver = createReceiver(it) + val columnSet = this(receiver, receiver) + columnSet.resolve(receiver, it.unresolvedColumnsPolicy) +} @JvmName("toColumnSetForPivot") internal fun PivotColumnsSelector.toColumnSet(): ColumnSet = toColumnSet { diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingColumnGroup.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingColumnGroup.kt index 62518ab0c..a3abdcedd 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingColumnGroup.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingColumnGroup.kt @@ -8,10 +8,7 @@ import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedBody import org.jetbrains.kotlinx.dataframe.api.asDataColumn import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy import org.jetbrains.kotlinx.dataframe.impl.columns.DataColumnGroup import org.jetbrains.kotlinx.dataframe.impl.columns.addPath diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt index 2f35bca69..bf9d27646 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt @@ -6,7 +6,7 @@ internal class TreeNode( override val name: String, override val depth: Int, override var data: T, - override val parent: TreeNode? = null + override val parent: TreeNode? = null, ) : ReadonlyTreeNode { companion object { @@ -28,7 +28,7 @@ internal class TreeNode( fun pathFromRoot(): ColumnPath { val path = mutableListOf() var node: TreeNode? = this - while (node != null && node.parent != null) { + while (node?.parent != null) { path.add(node.name) node = node.parent } @@ -48,19 +48,34 @@ internal class TreeNode( return addChild(childName, createData()) } - fun dfs(enterCondition: (TreeNode) -> Boolean = { true }, yieldCondition: (TreeNode) -> Boolean = { true }): List> { + @Deprecated("Use allChildren instead", ReplaceWith("allChildren(enterCondition, yieldCondition)")) + fun dfs( + enterCondition: (TreeNode) -> Boolean = { true }, + yieldCondition: (TreeNode) -> Boolean = { true }, + ): List> = allChildren(enterCondition, yieldCondition) + + /** + * Traverses the tree in depth-first order and returns all nodes that satisfy [yieldCondition]. + * If [enterCondition] returns false for a node, its children are not traversed. + * By default, all nodes are traversed and all nodes are returned. + */ + fun allChildren( + enterCondition: (TreeNode) -> Boolean = { true }, + yieldCondition: (TreeNode) -> Boolean = { true }, + ): List> { val result = mutableListOf>() - fun doDfs(node: TreeNode) { + + fun traverse(node: TreeNode) { if (yieldCondition(node)) { result.add(node) } if (enterCondition(node)) { node.children.forEach { - doDfs(it) + traverse(it) } } } - doDfs(this) + traverse(this) return result } } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt index e74bc75c8..97f16582b 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt @@ -29,9 +29,22 @@ internal fun TreeNode.getOrPut(path: ColumnPath, createData: (ColumnPath) return node } -internal fun TreeNode.topDfs(yieldCondition: (TreeNode) -> Boolean): List> = dfs(enterCondition = { !yieldCondition(it) }, yieldCondition = yieldCondition) +/** + * Traverses all children in the tree in depth-first order and returns the top-most nodes that satisfy + * [yieldCondition]. This means that if a node satisfies [yieldCondition], its children are not traversed, regardless of + * whether they satisfy [yieldCondition] or not. + */ +internal fun TreeNode.topmostChildren(yieldCondition: (TreeNode) -> Boolean): List> = + allChildren( + enterCondition = { !yieldCondition(it) }, + yieldCondition = yieldCondition, + ) -internal fun TreeNode.topDfsExcluding(excludeRoot: TreeNode<*>): List> { +@Deprecated("Use topmostChildren instead", ReplaceWith("topmostChildren(yieldCondition)")) +internal fun TreeNode.topDfs(yieldCondition: (TreeNode) -> Boolean): List> = + topmostChildren(yieldCondition) + +internal fun TreeNode.topmostChildrenExcluding(excludeRoot: TreeNode<*>): List> { val result = mutableListOf>() fun doDfs(node: TreeNode, exclude: TreeNode<*>) { if (exclude.children.isNotEmpty()) { @@ -48,23 +61,35 @@ internal fun TreeNode.topDfsExcluding(excludeRoot: TreeNode<*>): List TreeNode.dfsNotNull() = dfs { it.data != null }.map { it as TreeNode } -internal fun TreeNode.dfsTopNotNull() = dfs(enterCondition = { it.data == null }, yieldCondition = { it.data != null }).map { it as TreeNode } +internal fun TreeNode.allChildrenNotNull(): List> = + allChildren { it.data != null } as List> + +internal fun TreeNode.topmostChildrenNotNull() = + topmostChildren { it.data != null } as List> -internal fun TreeNode.allRemovedColumns() = dfs { it.data.wasRemoved && it.data.column != null } -internal fun TreeNode.allWithColumns() = dfs { it.data.column != null } -internal fun Iterable>.dfs(): List> { +internal fun TreeNode.allRemovedColumns() = + allChildren { it.data.wasRemoved && it.data.column != null } + +internal fun TreeNode.allWithColumns() = + allChildren { it.data.column != null } + +internal fun Iterable>.flattenRecursively(): List> { val result = mutableListOf>() - fun dfs(cols: Iterable>) { + + fun flattenRecursively(cols: Iterable>) { cols.forEach { result.add(it) val path = it.path if (it.data.isColumnGroup()) { - dfs(it.data.asColumnGroup().columns().map { it.addPath(path + it.name()) }) + flattenRecursively( + it.data.asColumnGroup() + .columns() + .map { it.addPath(path + it.name()) } + ) } } } - dfs(this) + flattenRecursively(this) return result } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt index aa69386c4..229c13d0d 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt @@ -74,8 +74,8 @@ internal fun tableJs(columns: List, id: Int, rootId: Int, nrow: var index = 0 val data = buildString { append("[") - fun dfs(col: ColumnDataForJs): Int { - val children = col.nested.map { dfs(it) } + fun appendColWithChildren(col: ColumnDataForJs): Int { + val children = col.nested.map { appendColWithChildren(it) } val colIndex = index++ val values = col.values.joinToString(",", prefix = "[", postfix = "]") { when (it) { @@ -100,7 +100,7 @@ internal fun tableJs(columns: List, id: Int, rootId: Int, nrow: return colIndex } - columns.forEach { dfs(it) } + columns.forEach { appendColWithChildren(it) } append("]") } val js = getResourceText( diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt index dd5d16685..d14a0a5ef 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -3,181 +3,159 @@ package org.jetbrains.kotlinx.dataframe.api import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.impl.columns.asValueColumn -import org.jetbrains.kotlinx.dataframe.samples.api.TestBase -import org.jetbrains.kotlinx.dataframe.samples.api.age -import org.jetbrains.kotlinx.dataframe.samples.api.firstName -import org.jetbrains.kotlinx.dataframe.samples.api.isHappy -import org.jetbrains.kotlinx.dataframe.samples.api.lastName -import org.jetbrains.kotlinx.dataframe.samples.api.name +import org.jetbrains.kotlinx.dataframe.samples.api.* import org.junit.Test +import kotlin.reflect.typeOf -class ColumnsSelectionDslTests : TestBase() { +open class ColumnsSelectionDslTests : TestBase() { @Test fun first() { - df.select { all().first() } shouldBe df.select { first() } - df.select { all().first() } shouldBe df.select { name } - df.select { first() } shouldBe df.select { name } - df.select { first { it.name().startsWith("a") } } shouldBe df.select { age } - - df.select { - name.first { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { - name.colsOf().first { col -> - col.any { it == "Alice" } - } - } + listOf( + df.select { name }, + df.select { first() }, + df.select { all().first() }, + df.select { first { it.name().startsWith("n") } }, + ).shouldAllBeEqual() - df.select { - "name".first { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + listOf( + df.select { name.firstName }, - df.select { - Person::name.first { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { name.first { col -> col.any { it == "Alice" } } }, + df.select { name.colsOf().first { col -> col.any { it == "Alice" } } }, - df.select { - pathOf("name").first { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { "name".first { col -> col.any { it == "Alice" } } }, + df.select { "name".colsOf(typeOf()).first { col -> col.any { it == "Alice" } } }, + + df.select { Person::name.first { col -> col.any { it == "Alice" } } }, + df.select { Person::name.colsOf(typeOf()).first { col -> col.any { it == "Alice" } } }, + + df.select { pathOf("name").first { col -> col.any { it == "Alice" } } }, + df.select { pathOf("name").colsOf(typeOf()).first { col -> col.any { it == "Alice" } } }, + + df.select { it["name"].first { col -> col.any { it == "Alice" } } }, + df.select { it["name"].colsOf(typeOf()).first { col -> col.any { it == "Alice" } } }, + ).shouldAllBeEqual() } @Test fun last() { - df.select { all().last() } shouldBe df.select { last() } - df.select { all().last() } shouldBe df.select { isHappy } - df.select { last() } shouldBe df.select { isHappy } - df.select { last { it.name().startsWith("a") } } shouldBe df.select { age } - df.select { - name.last { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { - name.colsOf().last { col -> - col.any { it == "Alice" } - } - } + listOf( + df.select { isHappy }, + df.select { last() }, + df.select { all().last() }, + df.select { last { it.name().startsWith("is") } }, + ).shouldAllBeEqual() - df.select { - "name".last { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + listOf( + df.select { name.firstName }, - df.select { - Person::name.last { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { name.last { col -> col.any { it == "Alice" } } }, + df.select { name.colsOf().last { col -> col.any { it == "Alice" } } }, - df.select { - pathOf("name").last { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { "name".last { col -> col.any { it == "Alice" } } }, + df.select { "name".colsOf(typeOf()).last { col -> col.any { it == "Alice" } } }, + + df.select { Person::name.last { col -> col.any { it == "Alice" } } }, + df.select { Person::name.colsOf(typeOf()).last { col -> col.any { it == "Alice" } } }, + + df.select { pathOf("name").last { col -> col.any { it == "Alice" } } }, + df.select { pathOf("name").colsOf(typeOf()).last { col -> col.any { it == "Alice" } } }, + + df.select { it["name"].last { col -> col.any { it == "Alice" } } }, + df.select { it["name"].colsOf(typeOf()).last { col -> col.any { it == "Alice" } } }, + ).shouldAllBeEqual() } @Test fun single() { val singleDf = df.select { take(1) } - singleDf.select { all().single() } shouldBe singleDf.select { single() } - singleDf.select { all().single() } shouldBe singleDf.select { name } - singleDf.select { single() } shouldBe singleDf.select { name } - df.select { single { it.name().startsWith("a") } } shouldBe df.select { age } - - df.select { - name.single { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { - name.colsOf().single { col -> - col.any { it == "Alice" } - } - } - df.select { - "name".single { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + listOf( + df.select { name }, + singleDf.select { name }, + singleDf.select { single() }, + singleDf.select { all().single() }, + df.select { single { it.name().startsWith("n") } }, + ).shouldAllBeEqual() - df.select { - Person::name.single { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + listOf( + df.select { name.firstName }, - df.select { - pathOf("name").single { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { name.single { col -> col.any { it == "Alice" } } }, + df.select { name.colsOf().single { col -> col.any { it == "Alice" } } }, + + df.select { "name".single { col -> col.any { it == "Alice" } } }, + df.select { "name".colsOf(typeOf()).single { col -> col.any { it == "Alice" } } }, + + df.select { Person::name.single { col -> col.any { it == "Alice" } } }, + df.select { Person::name.colsOf(typeOf()).single { col -> col.any { it == "Alice" } } }, + + df.select { pathOf("name").single { col -> col.any { it == "Alice" } } }, + df.select { pathOf("name").colsOf(typeOf()).single { col -> col.any { it == "Alice" } } }, + + df.select { it["name"].single { col -> col.any { it == "Alice" } } }, + df.select { it["name"].colsOf(typeOf()).single { col -> col.any { it == "Alice" } } }, + ).shouldAllBeEqual() } @Test fun col() { - df.select { col("age") } shouldBe df.select { age } - df.select { col("age") } shouldBe df.select { age } - df.select { col(pathOf("age")) } shouldBe df.select { age } - df.select { col(pathOf("age")) } shouldBe df.select { age } - df.select { col(Person::age) } shouldBe df.select { age } - - df.select { colGroup("name").col("firstName") } shouldBe df.select { name.firstName } - df.select { colGroup("name").col("firstName") } shouldBe df.select { name.firstName } - df.select { colGroup("name").col(pathOf("firstName")) } shouldBe df.select { name.firstName } - df.select { colGroup("name").col(pathOf("firstName")) } shouldBe df.select { name.firstName } - df.select { colGroup("name").col(Name::firstName) } shouldBe df.select { name.firstName } - } + listOf( + df.select { age }, - @DataSchema - interface FirstNames { - val firstName: String - val secondName: String? - val thirdName: String? - } + df.select { col("age") }, + df.select { col("age") }, - @DataSchema - interface MyName : Name { - val firstNames: FirstNames + df.select { col(pathOf("age")) }, + df.select { col(pathOf("age")) }, + + df.select { col(Person::age) }, + ).shouldAllBeEqual() + + listOf( + df.select { name.firstName }, + + df.select { colGroup("name").col("firstName") }, + df.select { colGroup("name").col("firstName") }, + + df.select { colGroup("name").col(pathOf("firstName")) }, + df.select { colGroup("name").col(pathOf("firstName")) }, + + df.select { colGroup("name").col(Name::firstName) }, + ).shouldAllBeEqual() } @Test fun colGroup() { - val firstNames by columnGroup() - val dfGroup = df.convert { name.firstName }.to { - val firstName by it - val secondName by it.map<_, String?> { null }.asValueColumn() - val thirdName by it.map<_, String?> { null }.asValueColumn() - - dataFrameOf(firstName, secondName, thirdName) - .cast(verify = true) - .asColumnGroup(firstNames) - } + listOf( + dfGroup.select { name }, + + dfGroup.select { colGroup("name") }, + dfGroup.select { colGroup("name") }, + + dfGroup.select { colGroup(pathOf("name")) }, + dfGroup.select { colGroup(pathOf("name")) }, + + dfGroup.select { colGroup(Person::name) }, + ).shouldAllBeEqual() - dfGroup.select { colGroup("name") } shouldBe dfGroup.select { name } - dfGroup.select { colGroup("name") } shouldBe dfGroup.select { name } - dfGroup.select { colGroup(pathOf("name")) } shouldBe dfGroup.select { name } - dfGroup.select { colGroup(pathOf("name")) } shouldBe dfGroup.select { name } - dfGroup.select { colGroup(Person::name) } shouldBe dfGroup.select { name } + listOf( + dfGroup.select { name.firstName }, + + dfGroup.select { colGroup("name").colGroup("firstName") }, + dfGroup.select { colGroup("name").colGroup("firstName") }, + + dfGroup.select { colGroup("name").colGroup(pathOf("firstName")) }, + dfGroup.select { colGroup("name").colGroup(pathOf("firstName")) }, - dfGroup.select { colGroup("name").colGroup("firstNames") } shouldBe dfGroup.select { name[firstNames] } - dfGroup.select { colGroup("name").colGroup("firstNames") } shouldBe dfGroup.select { name[firstNames] } - dfGroup.select { colGroup("name").colGroup(pathOf("firstNames")) } shouldBe dfGroup.select { name[firstNames] } - dfGroup.select { colGroup("name").colGroup(pathOf("firstNames")) } shouldBe dfGroup.select { name[firstNames] } - dfGroup.select { colGroup("name").colGroup(MyName::firstNames) } shouldBe dfGroup.select { name[firstNames] } + dfGroup.select { colGroup("name").colGroup(Name2::firstName) }, + ).shouldAllBeEqual() dfGroup.select { - "name"["firstNames"]["firstName", "secondName"] + "name"["firstName"]["firstName", "secondName"] } shouldBe dfGroup.select { - name[firstNames]["firstName"] and name[firstNames]["secondName"] + name.firstName["firstName"] and name.firstName["secondName"] } } @@ -204,321 +182,322 @@ class ColumnsSelectionDslTests : TestBase() { dataFrameOf(firstName, lastName, frameCol).asColumnGroup("name") } - dfWithFrames.select { frameCol("frameCol") } shouldBe dfWithFrames.select { frameCol } - dfWithFrames.select { frameCol("frameCol") } shouldBe dfWithFrames.select { frameCol } - dfWithFrames.select { frameCol(pathOf("frameCol")) } shouldBe dfWithFrames.select { frameCol } - dfWithFrames.select { frameCol(pathOf("frameCol")) } shouldBe dfWithFrames.select { frameCol } - dfWithFrames.select { frameCol(PersonWithFrame::frameCol) } shouldBe dfWithFrames.select { frameCol } - - dfWithFrames.select { colGroup("name").frameCol("frameCol") } shouldBe dfWithFrames.select { name[frameCol] } - dfWithFrames.select { colGroup("name").frameCol("frameCol") } shouldBe dfWithFrames.select { name[frameCol] } - dfWithFrames.select { colGroup("name").frameCol(pathOf("frameCol")) } shouldBe dfWithFrames.select { name[frameCol] } - dfWithFrames.select { colGroup("name").frameCol(pathOf("frameCol")) } shouldBe dfWithFrames.select { name[frameCol] } - dfWithFrames.select { colGroup("name").frameCol(PersonWithFrame::frameCol) } shouldBe dfWithFrames.select { name[frameCol] } + listOf( + dfWithFrames.select { frameCol }, + + dfWithFrames.select { frameCol("frameCol") }, + dfWithFrames.select { frameCol("frameCol") }, + + dfWithFrames.select { frameCol(pathOf("frameCol")) }, + dfWithFrames.select { frameCol(pathOf("frameCol")) }, + + dfWithFrames.select { frameCol(PersonWithFrame::frameCol) }, + ).shouldAllBeEqual() + + listOf( + dfWithFrames.select { name[frameCol] }, + + dfWithFrames.select { colGroup("name").frameCol("frameCol") }, + dfWithFrames.select { colGroup("name").frameCol("frameCol") }, + + dfWithFrames.select { colGroup("name").frameCol(pathOf("frameCol")) }, + dfWithFrames.select { colGroup("name").frameCol(pathOf("frameCol")) }, + + dfWithFrames.select { colGroup("name").frameCol(PersonWithFrame::frameCol) }, + ).shouldAllBeEqual() } @Test fun `cols and get with predicate`() { - df.select { all().cols() } shouldBe df.select { cols() } - df.select { all().cols { "e" in it.name() } } shouldBe df.select { - cols { "e" in it.name() } - } - df.select { all()[{ "e" in it.name() }] } shouldBe df.select { -// this[{ "e" in it.name() }] - cols { "e" in it.name() } - } + listOf( + df.select { cols(name, age, city, weight, isHappy) }, + df.select { all().cols() }, + df.select { cols() }, + df.select { all() }, + ).shouldAllBeEqual() - df.select { - name.cols { - "Name" in it.name() - } - } shouldBe df.select { - name.colsOf().cols { - "Name" in it.name() - } - } + listOf( + df.select { name }, + df.select { name }.select { all() }, + df.select { name }.select { cols() }, + df.select { name }.select { cols().all() }, + df.select { name }.select { all().cols() }, + ).shouldAllBeEqual() - df.select { -// name[{ "Name" in it.name() }] - name.cols { "Name" in it.name() } - } shouldBe df.select { - name.colsOf()[{ "Name" in it.name() }] - } + listOf( + df.select { cols(name, age, weight) }, - df.select { - "name".cols { "Name" in it.name() } - } shouldBe df.select { - Person::name.cols { "Name" in it.name() } - } + df.select { cols { "e" in it.name() } }, +// df.select { this[{ "e" in it.name() }] }, - df.select { - "name"[{ "Name" in it.name() }] - } shouldBe df.select { - Person::name[{ "Name" in it.name() }] - } + df.select { all().cols { "e" in it.name() } }, + df.select { all()[{ "e" in it.name() }] }, + ).shouldAllBeEqual() - df.select { - pathOf("name").cols { "Name" in it.name() } - } shouldBe df.select { - "name"[{ "Name" in it.name() }] - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - pathOf("name").cols { "Name" in it.name() } - } shouldBe df.select { - pathOf("name")[{ "Name" in it.name() }] - } + df.select { name.cols { "Name" in it.name() } }, +// df.select { name[{ "Name" in it.name() }] }, + + df.select { name.colsOf().cols { "Name" in it.name() } }, + df.select { name.colsOf()[{ "Name" in it.name() }] }, + + df.select { "name".cols { "Name" in it.name() } }, + df.select { "name"[{ "Name" in it.name() }] }, + + df.select { Person::name.cols { "Name" in it.name() } }, + df.select { Person::name[{ "Name" in it.name() }] }, + + df.select { pathOf("name").cols { "Name" in it.name() } }, + df.select { pathOf("name")[{ "Name" in it.name() }] }, + + df.select { it["name"].cols { "Name" in it.name() } }, + df.select { it["name"][{ "Name" in it.name() }] }, + ).shouldAllBeEqual() } @Test fun `cols and get with column references`() { - df.select { all().cols(name, age) } shouldBe df.select { cols(name, age) } - df.select { all()[name, age] } shouldBe df.select { this[name, age] } + listOf( + df.select { name and age }, + + df.select { cols(name, age) }, + df.select { this[name, age] }, + df.select { it[name, age] }, + + df.select { all().cols(name, age) }, + df.select { all()[name, age] }, + ).shouldAllBeEqual() val firstName by column() val lastName by column() - df.select { - name.cols(firstName, lastName) - } shouldBe df.select { - name.colsOf().cols(firstName, lastName) - } - df.select { - name.cols(name.firstName, name.lastName) - } shouldBe df.select { - name.colsOf().cols(name.firstName, name.lastName) - }.also { it.print() } - - df.select { -// name[name.firstName, name.lastName] - name.cols(name.firstName, name.lastName) - } shouldBe df.select { - name.colsOf()[name.firstName, name.lastName] - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name".cols(name.firstName, name.lastName) - } shouldBe df.select { - Person::name.cols(name.firstName, name.lastName) - } + df.select { name.cols(firstName, lastName) }, +// df.select { name[name.firstName, name.lastName] }, - df.select { - "name"[name.firstName, name.lastName] - } shouldBe df.select { - Person::name[name.firstName, name.lastName] - } + df.select { name.colsOf().cols(firstName, lastName) }, + df.select { name.colsOf()[firstName, lastName] }, - df.select { - pathOf("name").cols(name.firstName, name.lastName) - } shouldBe df.select { - pathOf("name")[name.firstName, name.lastName] - } + df.select { name.select { cols(this@select.firstName, this@select.lastName) } }, + + df.select { "name".cols(firstName, lastName) }, + df.select { "name"[firstName, lastName] }, + + df.select { Person::name.cols(firstName, lastName) }, + df.select { Person::name[firstName, lastName] }, + + df.select { pathOf("name").cols(firstName, lastName) }, + df.select { pathOf("name")[firstName, lastName] }, + + df.select { it["name"].cols(firstName, lastName) }, + df.select { it["name"][firstName, lastName] }, + ).shouldAllBeEqual() } @Test fun `cols and get with column names`() { - df.select { all().cols("name", "age") } shouldBe df.select { cols("name", "age") } - df.select { all()["name", "age"] } shouldBe df.select { this["name", "age"] } + listOf( + df.select { name and age }, - df.select { - name.cols("firstName", "lastName") - } shouldBe df.select { - name.colsOf().cols("firstName", "lastName") - } + df.select { cols("name", "age") }, + df.select { this["name", "age"] }, + df.select { it["name", "age"] }, - df.select { -// name["firstName", "lastName"] - name.cols("firstName", "lastName") - } shouldBe df.select { - name.colsOf()["firstName", "lastName"] - } + df.select { all().cols("name", "age") }, + df.select { all()["name", "age"] }, + ).shouldAllBeEqual() - df.select { - "name".cols("firstName", "lastName") - } shouldBe df.select { - Person::name.cols("firstName", "lastName") - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name"["firstName", "lastName"] - } shouldBe df.select { - Person::name["firstName", "lastName"] - } + df.select { name.cols("firstName", "lastName") }, +// df.select { name["firstName", "lastName"] }, - df.select { - pathOf("name").cols("firstName", "lastName") - } shouldBe df.select { - pathOf("name")["firstName", "lastName"] - } + df.select { name.colsOf().cols("firstName", "lastName") }, + df.select { name.colsOf()["firstName", "lastName"] }, + + df.select { "name".cols("firstName", "lastName") }, + df.select { "name"["firstName", "lastName"] }, + df.select { "name"["firstName"] and "name"["lastName"] }, + + df.select { Person::name.cols("firstName", "lastName") }, + df.select { Person::name["firstName", "lastName"] }, + + df.select { pathOf("name").cols("firstName", "lastName") }, + df.select { pathOf("name")["firstName", "lastName"] }, + + df.select { it["name"].cols("firstName", "lastName") }, + df.select { it["name"]["firstName", "lastName"] }, + ).shouldAllBeEqual() } @Test fun `cols and get with column paths`() { listOf( - df.select { - all().cols(pathOf("name", "firstName")) - }, - df.select { - cols(pathOf("name", "firstName")) - }, - df.select { - pathOf("name", "firstName") - }, - df.select { - name.firstName - }, - ).reduce { acc, dataFrame -> - acc shouldBe dataFrame - dataFrame - } + df.select { name.firstName }, - df.select { all().cols(pathOf("name"), pathOf("age")) } shouldBe df.select { - cols( - pathOf("name"), - pathOf("age") - ) - } - df.select { all()[pathOf("name"), pathOf("age")] } shouldBe df.select { this[pathOf("name"), pathOf("age")] } + df.select { cols(pathOf("name", "firstName")) }, + df.select { this[pathOf("name", "firstName")] }, + df.select { it[pathOf("name", "firstName")] }, - df.select { - name.cols(pathOf("firstName"), pathOf("lastName")) - } shouldBe df.select { - name.colsOf().cols(pathOf("firstName"), pathOf("lastName")) - } + df.select { all().cols(pathOf("name", "firstName")) }, + df.select { all()[pathOf("name", "firstName")] }, - df.select { -// name[pathOf("firstName"), pathOf("lastName")] - name.cols(pathOf("firstName"), pathOf("lastName")) - } shouldBe df.select { - name.colsOf()[pathOf("firstName"), pathOf("lastName")] - } + df.select { pathOf("name", "firstName") }, + ).shouldAllBeEqual() - df.select { - "name".cols(pathOf("firstName"), pathOf("lastName")) - } shouldBe df.select { - Person::name.cols(pathOf("firstName"), pathOf("lastName")) - } + listOf( + df.select { name and age }, - df.select { - "name"[pathOf("firstName"), pathOf("lastName")] - } shouldBe df.select { - Person::name[pathOf("firstName"), pathOf("lastName")] - } + df.select { cols(pathOf("name"), pathOf("age")) }, + df.select { this[pathOf("name"), pathOf("age")] }, + df.select { it[pathOf("name"), pathOf("age")] }, - df.select { - pathOf("name").cols(pathOf("firstName"), pathOf("lastName")) - } shouldBe df.select { - pathOf("name")[pathOf("firstName"), pathOf("lastName")] - } + df.select { all().cols(pathOf("name"), pathOf("age")) }, + df.select { all()[pathOf("name"), pathOf("age")] }, + ).shouldAllBeEqual() + + listOf( + df.select { name.firstName and name.lastName }, + + df.select { name.cols(pathOf("firstName"), pathOf("lastName")) }, +// df.select { name[pathOf("firstName"), pathOf("lastName")] }, + + df.select { name.colsOf().cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { name.colsOf()[pathOf("firstName"), pathOf("lastName")] }, + + df.select { "name".cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { "name"[pathOf("firstName"), pathOf("lastName")] }, + + df.select { Person::name.cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { Person::name[pathOf("firstName"), pathOf("lastName")] }, + + df.select { pathOf("name").cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { pathOf("name")[pathOf("firstName"), pathOf("lastName")] }, + + df.select { it["name"].cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { it["name"][pathOf("firstName"), pathOf("lastName")] }, + ).shouldAllBeEqual() } @Test fun `cols and get with KProperties`() { - df.select { all().cols(Person::name, Person::age) } shouldBe df.select { cols(Person::name, Person::age) } - df.select { all()[Person::name, Person::age] } shouldBe df.select { this[Person::name, Person::age] } + listOf( + df.select { name and age }, - df.select { - name.cols(Name::firstName, Name::lastName) - } shouldBe df.select { - name.colsOf().cols(Name::firstName, Name::lastName) - } + df.select { cols(Person::name, Person::age) }, + df.select { this[Person::name, Person::age] }, + df.select { it[Person::name, Person::age] }, - df.select { - name[Name::firstName, Name::lastName] - } shouldBe df.select { - name.colsOf()[Name::firstName, Name::lastName] - } + df.select { all().cols(Person::name, Person::age) }, + df.select { all()[Person::name, Person::age] }, + ).shouldAllBeEqual() - df.select { - "name".cols(Name::firstName, Name::lastName) - } shouldBe df.select { - Person::name.cols(Name::firstName, Name::lastName) - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name"[Name::firstName, Name::lastName] - } shouldBe df.select { - Person::name[Name::firstName, Name::lastName] - } + df.select { name.cols(Name::firstName, Name::lastName) }, + df.select { name[Name::firstName, Name::lastName] }, - df.select { - pathOf("name").cols(Name::firstName, Name::lastName) - } shouldBe df.select { - pathOf("name")[Name::firstName, Name::lastName] - } + df.select { name.colsOf().cols(Name::firstName, Name::lastName) }, + df.select { name.colsOf()[Name::firstName, Name::lastName] }, + + df.select { "name".cols(Name::firstName, Name::lastName) }, + df.select { "name"[Name::firstName, Name::lastName] }, + + df.select { Person::name.cols(Name::firstName, Name::lastName) }, + df.select { Person::name[Name::firstName, Name::lastName] }, + + df.select { pathOf("name").cols(Name::firstName, Name::lastName) }, + df.select { pathOf("name")[Name::firstName, Name::lastName] }, + + df.select { it["name"].cols(Name::firstName, Name::lastName) }, + df.select { it["name"][Name::firstName, Name::lastName] }, + ).shouldAllBeEqual() } @Test fun `cols and get with indices`() { - df.select { all().cols(0, 1) } shouldBe df.select { cols(0, 1) } - df.select { all()[0, 1] } shouldBe df.select { this[0, 1] } + listOf( + df.select { name and age }, - df.select { - name.cols(0, 1) - } shouldBe df.select { - name.colsOf().cols(0, 1) - } + df.select { cols(0, 1) }, + df.select { this[0, 1] }, + df.select { it[0, 1] }, - df.select { -// name[0, 1] - name.cols(0, 1) - } shouldBe df.select { - name.colsOf()[0, 1] - } + df.select { all().cols(0, 1) }, + df.select { all()[0, 1] }, + ).shouldAllBeEqual() - df.select { - "name".cols(0, 1) - } shouldBe df.select { - Person::name.cols(0, 1) - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name"[0, 1] - } shouldBe df.select { - Person::name[0, 1] - } + df.select { name.cols(0, 1) }, +// df.select { name[0, 1] }, - df.select { - pathOf("name").cols(0, 1) - } shouldBe df.select { - pathOf("name")[0, 1] - } + df.select { name.colsOf().cols(0, 1) }, + df.select { name.colsOf()[0, 1] }, + + df.select { "name".cols(0, 1) }, + df.select { "name"[0, 1] }, + + df.select { Person::name.cols(0, 1) }, + df.select { Person::name[0, 1] }, + + df.select { pathOf("name").cols(0, 1) }, + df.select { pathOf("name")[0, 1] }, + + df.select { it["name"].cols(0, 1) }, +// df.select { it["name"][0, 1] }, + ).shouldAllBeEqual() } @Test fun `cols and get with range`() { - df.select { all().cols(0..1) } shouldBe df.select { cols(0..1) } - df.select { all()[0..1] } shouldBe df.select { this[0..1] } + listOf( + df.select { name and age }, - df.select { - name.cols(0..1) - } shouldBe df.select { - name.colsOf().cols(0..1) - } + df.select { cols(0..1) }, + df.select { this[0..1] }, + df.select { it[0..1] }, - df.select { -// name[0..1] - name.cols(0..1) - } shouldBe df.select { - name.colsOf()[0..1] - } + df.select { all().cols(0..1) }, + df.select { all()[0..1] }, + ).shouldAllBeEqual() - df.select { - "name".cols(0..1) - } shouldBe df.select { - Person::name.cols(0..1) - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name"[0..1] - } shouldBe df.select { - Person::name[0..1] - } + df.select { name.cols(0..1) }, +// df.select { name[0..1] }, - df.select { - pathOf("name").cols(0..1) - } shouldBe df.select { - pathOf("name")[0..1] - } + df.select { name.colsOf().cols(0..1) }, + df.select { name.colsOf()[0..1] }, + + df.select { "name".cols(0..1) }, + df.select { "name"[0..1] }, + + df.select { Person::name.cols(0..1) }, + df.select { Person::name[0..1] }, + + df.select { pathOf("name").cols(0..1) }, + df.select { pathOf("name")[0..1] }, + + df.select { it["name"].cols(0..1) }, +// df.select { it["name"][0..1] }, + ).shouldAllBeEqual() + } + + @Test + fun roots() { + df.select { cols(name.firstName, name.lastName, age).roots() } shouldBe + df.select { cols(name.firstName, name.lastName, age) } + + df.select { cols(name.firstName, name.lastName, age, name).roots() } shouldBe + df.select { cols(name, age) } } } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt index f585e6b22..5c7357b1c 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt @@ -27,14 +27,18 @@ class MoveTests { } @Test - fun `select all dfs`() { - val selected = grouped.getColumnsWithPaths { all().allDfs() }.map { it.path.joinToString(".") } + fun `select all allRecursively`() { + val selected = grouped + .getColumnsWithPaths { children { !it.isColumnGroup() }.recursively() } + .map { it.path.joinToString(".") } selected shouldBe listOf("a.b", "a.c.d", "b.c", "b.d", "e.f") } @Test fun `batch ungrouping`() { - val ungrouped = grouped.move { dfs { it.depth() > 0 && !it.isColumnGroup() } }.into { pathOf(it.path.joinToString(".")) } + val ungrouped = grouped.move { + cols { it.depth() > 0 && !it.isColumnGroup() }.rec() + }.into { pathOf(it.path.joinToString(".")) } ungrouped.columnNames() shouldBe listOf("q", "a.b", "a.c.d", "b.c", "b.d", "w", "e.f", "r") } @@ -64,15 +68,19 @@ class MoveTests { } @Test - fun `select Dfs`() { - val selected = grouped.select { it["a"].dfs { !it.isColumnGroup() } } + fun `select recursively`() { + val selected = grouped.select { + it["a"].cols { !it.isColumnGroup() }.recursively() + } selected.columnNames() shouldBe listOf("b", "d") } @Test fun `columnsWithPath in selector`() { val selected = grouped.getColumnsWithPaths { it["a"] } - val actual = grouped.getColumnsWithPaths { selected.map { it.allDfs() }.toColumnSet() } + val actual = grouped.getColumnsWithPaths { + selected.map { it.cols { !it.isColumnGroup() }.recursively() }.toColumnSet() + } actual.map { it.path.joinToString(".") } shouldBe listOf("a.b", "a.c.d") } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index 727165729..36cc299a4 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -41,7 +41,7 @@ class PivotTests { } pivoted.columnsCount() shouldBe 3 pivoted.rowsCount() shouldBe 2 - val cols = pivoted.getColumns { except(a).allDfs() } + val cols = pivoted.getColumns { except(a).cols { !it.isColumnGroup() }.rec() } cols.size shouldBe 4 cols.forEach { it.type() shouldBe typeOf() diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/recursively.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/recursively.kt new file mode 100644 index 000000000..38e89fce0 --- /dev/null +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/recursively.kt @@ -0,0 +1,108 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import io.kotest.matchers.shouldNotBe +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.samples.api.TestBase +import org.jetbrains.kotlinx.dataframe.samples.api.city +import org.jetbrains.kotlinx.dataframe.samples.api.firstName +import org.jetbrains.kotlinx.dataframe.samples.api.name +import org.junit.Test + +class Recursively : TestBase() { + + fun List>.print() { + forEach { + if (it.isValueColumn()) println("${it.name}: ${it.type()}") + else it.print() + } + println() + } + + infix fun List>.shouldBe(other: List>) { + this.map { it.name to it.path } shouldBe other.map { it.name to it.path } + } + + infix fun List>.shouldNotBe(other: List>) { + this.map { it.name to it.path } shouldNotBe other.map { it.name to it.path } + } + + private val recursivelyGoal = dfGroup.getColumnsWithPaths { dfs { true } } + .sortedBy { it.name } + + private val recursivelyNoGroups = dfGroup.getColumnsWithPaths { allDfs(false) } + .sortedBy { it.name } + + private val recursivelyString = dfGroup.getColumnsWithPaths { dfsOf() } + .sortedBy { it.name } + + @Test + fun `first, last, and single`() { + listOf( + dfGroup.select { name.firstName.firstName }, + + dfGroup.select { first { col -> col.any { it == "Alice" } }.recursively() }, + dfGroup.select { last { col -> col.any { it == "Alice" } }.recursively() }, + dfGroup.select { single { col -> col.any { it == "Alice" } }.recursively() }, + ).shouldAllBeEqual() + + listOf( + dfGroup.select { city }, + + dfGroup.select { first { col -> col.any { it == "London" } }.recursively() }, + dfGroup.select { last { col -> col.any { it == "London" } }.recursively() }, + dfGroup.select { single { col -> col.any { it == "London" } }.recursively() }, + ).shouldAllBeEqual() + } + + @Test + fun `children`() { + dfGroup.getColumnsWithPaths { children().recursively() }.print() + dfGroup.getColumnsWithPaths { name.children() }.print() + } + + @Test + fun `groups`() { + listOf( + df.select { name }, + df.select { groups().recursively() }, + df.select { groups() }, + df.select { all().groups() }, + df.select { all().groups().rec() }, + ).shouldAllBeEqual() + + dfGroup.select { groups() } shouldBe dfGroup.select { name } + dfGroup.select { groups().rec() } shouldBe dfGroup.select { name and name.firstName } + } + + @Test + fun `all recursively`() { + dfGroup.getColumnsWithPaths { all().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal + dfGroup.getColumnsWithPaths { all().cols { !it.isColumnGroup() }.rec() } + .sortedBy { it.name } shouldBe recursivelyNoGroups + } + + @Test + fun `cols recursively`() { + dfGroup.getColumnsWithPaths { cols().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal + } + + @Test + fun `colsOf recursively`() { + dfGroup.getColumnsWithPaths { colsOf().recursively() }.sortedBy { it.name } shouldBe recursivelyString + } + + @Test + fun `all allRecursively`() { + dfGroup.getColumnsWithPaths { all().all().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal + dfGroup.getColumnsWithPaths { all().cols { !it.isColumnGroup() }.recursively() } + .sortedBy { it.name } shouldBe recursivelyNoGroups + } + + @Test + fun `cols allRecursively`() { + dfGroup.getColumnsWithPaths { cols().all().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal + dfGroup.getColumnsWithPaths { cols().cols { !it.isColumnGroup() }.recursively() } + .sortedBy { it.name } shouldBe recursivelyNoGroups + } +} diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt index 87e28271c..94e64ed3c 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -24,7 +24,7 @@ class ReorderTests { sorted1.columnNames() shouldBe listOf("b", "a") sorted1["a"].asColumnGroup().columnNames() shouldBe listOf("a", "c") - val sorted2 = df.reorder { allDfs(true) }.byName() + val sorted2 = df.reorder { all().recursively() }.byName() sorted2.columnNames() shouldBe listOf("a", "b") sorted2["a"].asColumnGroup().columnNames() shouldBe listOf("a", "c") } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt index 02cdc6642..4366dd86a 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt @@ -136,7 +136,9 @@ class PlaylistJsonTest { @Test fun `deep batch update all`() { - val updated = item.convert { dfs { it.name() == "url" } }.with { (it as? String)?.let { IMG(it) } } + val updated = item + .convert { cols { it.name() == "url" }.rec() } + .with { (it as? String)?.let { IMG(it) } } updated.snippet.thumbnails.default.url.type() shouldBe typeOf() updated.snippet.thumbnails.maxres.url.type() shouldBe typeOf() updated.snippet.thumbnails.standard.url.type() shouldBe typeOf() diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt index 438c3bbf8..f44ce9fd2 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt @@ -701,7 +701,7 @@ class Access : TestBase() { fun columnSelectorsUsages() { // SampleStart df.select { age and name } - df.fillNaNs { dfsOf() }.withZero() + df.fillNaNs { colsOf().recursively() }.withZero() df.remove { cols { it.hasNulls() } } df.group { cols { it.data != name } }.into { "nameless" } df.update { city }.notNull { it.lowercase() } @@ -739,8 +739,8 @@ class Access : TestBase() { // all children of ColumnGroup df.select { name.all() } - // depth-first-search traversal of all children columns - df.select { name.allDfs() } + // recursive traversal of all children columns excluding ColumnGroups + df.select { name.cols { !it.isColumnGroup() }.recursively() } // SampleEnd } @@ -777,8 +777,8 @@ class Access : TestBase() { // all children of ColumnGroup df.select { name.all() } - // depth-first-search traversal of all children columns - df.select { name.allDfs() } + // recursive traversal of all children columns excluding ColumnGroups + df.select { name.cols { !it.isColumnGroup() }.recursively() } // SampleEnd } @@ -812,8 +812,8 @@ class Access : TestBase() { // all children of ColumnGroup df.select { Person::name.all() } - // depth-first-search traversal of all children columns - df.select { Person::name.allDfs() } + // recursive traversal of all children columns excluding groups + df.select { Person::name.cols { !it.isColumnGroup() }.recursively() } // SampleEnd } @@ -846,8 +846,8 @@ class Access : TestBase() { // all children of ColumnGroup df.select { "name".all() } - // depth-first-search traversal of all children columns - df.select { "name".allDfs() } + // recursive traversal of all children columns excluding groups + df.select { "name".cols { !it.isColumnGroup() }.recursively() } // SampleEnd } @@ -905,17 +905,17 @@ class Access : TestBase() { Person::name.single { it.name().startsWith("first") } } - // depth-first-search traversal of all columns, excluding ColumnGroups from result - df.select { allDfs() } + // recursive traversal of all columns, excluding ColumnGroups from result + df.select { cols { !it.isColumnGroup() }.recursively() } // depth-first-search traversal of all columns, including ColumnGroups in result - df.select { allDfs(includeGroups = true) } + df.select { all().recursively() } - // depth-first-search traversal with condition - df.select { dfs { it.name().contains(":") } } + // recursive traversal with condition + df.select { cols { it.name().contains(":") }.recursively() } - // depth-first-search traversal of columns of given type - df.select { dfsOf() } + // recursive traversal of columns of given type + df.select { colsOf().rec() } // all columns except given column set df.select { except { colsOf() } } @@ -929,19 +929,19 @@ class Access : TestBase() { @TransformDataFrameExpressions fun columnSelectorsModifySet() { // SampleStart - // first/last n columns in column set - df.select { allDfs().take(3) } - df.select { allDfs().takeLast(3) } + // first/last n value- and frame columns in column set + df.select { cols { !it.isColumnGroup() }.recursively().take(3) } + df.select { cols { !it.isColumnGroup() }.recursively().takeLast(3) } - // all except first/last n columns in column set - df.select { allDfs().drop(3) } - df.select { allDfs().dropLast(3) } + // all except first/last n value- and frame columns in column set + df.select { cols { !it.isColumnGroup() }.recursively().drop(3) } + df.select { cols { !it.isColumnGroup() }.recursively().dropLast(3) } // filter column set by condition - df.select { allDfs().filter { it.name().startsWith("year") } } + df.select { cols { !it.isColumnGroup() }.rec().filter { it.name().startsWith("year") } } // exclude columns from column set - df.select { allDfs().except { age } } + df.select { cols { !it.isColumnGroup() }.rec().except { age } } // keep only unique columns df.select { (colsOf() and age).distinct() } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index b71f65655..aceb0156e 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -23,7 +23,6 @@ import org.jetbrains.kotlinx.dataframe.api.convert import org.jetbrains.kotlinx.dataframe.api.convertTo import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.default -import org.jetbrains.kotlinx.dataframe.api.dfsOf import org.jetbrains.kotlinx.dataframe.api.dropNulls import org.jetbrains.kotlinx.dataframe.api.explode import org.jetbrains.kotlinx.dataframe.api.fill @@ -120,7 +119,7 @@ class Modify : TestBase() { fun update() { // SampleStart df.update { age }.with { it * 2 } - df.update { dfsOf() }.with { it.uppercase() } + df.update { colsOf().recursively() }.with { it.uppercase() } df.update { weight }.at(1..4).notNull { it / 2 } df.update { name.lastName and age }.at(1, 3, 4).withNull() // SampleEnd @@ -181,7 +180,7 @@ class Modify : TestBase() { fun convert() { // SampleStart df.convert { age }.with { it.toDouble() } - df.convert { dfsOf() }.with { it.toCharArray().toList() } + df.convert { colsOf().recursively() }.with { it.toCharArray().toList() } // SampleEnd } @@ -338,7 +337,7 @@ class Modify : TestBase() { // a.b.e -> be // c.d.e -> de - df.move { dfs { it.name() == "e" } }.toTop { it.parentName + it.name() } + df.move { cols { it.name() == "e" }.recursively() }.toTop { it.parentName + it.name() } // SampleEnd } @@ -779,7 +778,7 @@ class Modify : TestBase() { @Test @TransformDataFrameExpressions - fun concatDfs() { + fun concatDataFrames() { val df1 = df val df2 = df // SampleStart @@ -906,7 +905,7 @@ class Modify : TestBase() { @Test @TransformDataFrameExpressions - fun addDfs() { + fun addDataFrames() { val df1 = df.select { name named "name2" } val df2 = df.select { age named "age2" } // SampleStart diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt index 4f738e1fe..4e4d2ce04 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt @@ -1,13 +1,12 @@ package org.jetbrains.kotlinx.dataframe.samples.api +import io.kotest.matchers.should import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.explainer.PluginCallbackProxy +import org.jetbrains.kotlinx.dataframe.impl.columns.asValueColumn import org.junit.After import org.junit.Before @@ -41,6 +40,16 @@ public open class TestBase { "Charlie", "Byrd", 30, "Moscow", 90, true ).group("firstName", "lastName").into("name").cast() + val dfGroup = df.convert { name.firstName }.to { + val firstName by it + val secondName by it.map<_, String?> { null }.asValueColumn() + val thirdName by it.map<_, String?> { null }.asValueColumn() + + dataFrameOf(firstName, secondName, thirdName) + .cast(verify = true) + .asColumnGroup("firstName") + }.cast(verify = true) + @DataSchema interface Name { val firstName: String @@ -56,5 +65,34 @@ public open class TestBase { val isHappy: Boolean } + @DataSchema + interface FirstNames { + val firstName: String + val secondName: String? + val thirdName: String? + } + + @DataSchema + interface Name2 { + val firstName: DataRow + val lastName: String + } + + @DataSchema + interface Person2 { + val age: Int + val city: String? + val name: DataRow + val weight: Int? + val isHappy: Boolean + } + infix fun T.willBe(expected: U?) = shouldBe(expected) + + fun Iterable.shouldAllBeEqual(): Iterable { + this should { + it.reduce { a, b -> a shouldBe b; b } + } + return this + } } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt index c33a44cc6..4bd50bad7 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt @@ -5,181 +5,20 @@ import io.kotest.matchers.doubles.ToleranceMatcher import io.kotest.matchers.should import io.kotest.matchers.shouldBe import io.kotest.matchers.shouldNotBe -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.annotations.ColumnName import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns -import org.jetbrains.kotlinx.dataframe.api.GroupBy -import org.jetbrains.kotlinx.dataframe.api.ParserOptions -import org.jetbrains.kotlinx.dataframe.api.add -import org.jetbrains.kotlinx.dataframe.api.addAll -import org.jetbrains.kotlinx.dataframe.api.addId -import org.jetbrains.kotlinx.dataframe.api.all -import org.jetbrains.kotlinx.dataframe.api.allNulls -import org.jetbrains.kotlinx.dataframe.api.append -import org.jetbrains.kotlinx.dataframe.api.asColumnGroup -import org.jetbrains.kotlinx.dataframe.api.asDataFrame -import org.jetbrains.kotlinx.dataframe.api.asGroupBy -import org.jetbrains.kotlinx.dataframe.api.asIterable -import org.jetbrains.kotlinx.dataframe.api.at -import org.jetbrains.kotlinx.dataframe.api.between -import org.jetbrains.kotlinx.dataframe.api.by -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.chunked -import org.jetbrains.kotlinx.dataframe.api.colsOf -import org.jetbrains.kotlinx.dataframe.api.column -import org.jetbrains.kotlinx.dataframe.api.columnGroup -import org.jetbrains.kotlinx.dataframe.api.columnOf -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.convertTo -import org.jetbrains.kotlinx.dataframe.api.corr -import org.jetbrains.kotlinx.dataframe.api.count -import org.jetbrains.kotlinx.dataframe.api.countDistinct -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.default -import org.jetbrains.kotlinx.dataframe.api.describe -import org.jetbrains.kotlinx.dataframe.api.dfsOf -import org.jetbrains.kotlinx.dataframe.api.digitize -import org.jetbrains.kotlinx.dataframe.api.distinct -import org.jetbrains.kotlinx.dataframe.api.distinctBy -import org.jetbrains.kotlinx.dataframe.api.div -import org.jetbrains.kotlinx.dataframe.api.drop -import org.jetbrains.kotlinx.dataframe.api.dropLast -import org.jetbrains.kotlinx.dataframe.api.dropNA -import org.jetbrains.kotlinx.dataframe.api.dropNulls -import org.jetbrains.kotlinx.dataframe.api.dropWhile -import org.jetbrains.kotlinx.dataframe.api.explode -import org.jetbrains.kotlinx.dataframe.api.expr -import org.jetbrains.kotlinx.dataframe.api.fill -import org.jetbrains.kotlinx.dataframe.api.fillNulls -import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.first -import org.jetbrains.kotlinx.dataframe.api.forEach -import org.jetbrains.kotlinx.dataframe.api.forEachIndexed -import org.jetbrains.kotlinx.dataframe.api.frameColumn -import org.jetbrains.kotlinx.dataframe.api.gather -import org.jetbrains.kotlinx.dataframe.api.getColumn -import org.jetbrains.kotlinx.dataframe.api.getColumnGroup -import org.jetbrains.kotlinx.dataframe.api.getColumns -import org.jetbrains.kotlinx.dataframe.api.getFrameColumn -import org.jetbrains.kotlinx.dataframe.api.getValue -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.groupBy -import org.jetbrains.kotlinx.dataframe.api.implode -import org.jetbrains.kotlinx.dataframe.api.indices -import org.jetbrains.kotlinx.dataframe.api.inplace -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.intoColumns -import org.jetbrains.kotlinx.dataframe.api.intoList -import org.jetbrains.kotlinx.dataframe.api.intoRows -import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.isFrameColumn -import org.jetbrains.kotlinx.dataframe.api.isNA -import org.jetbrains.kotlinx.dataframe.api.isNumber -import org.jetbrains.kotlinx.dataframe.api.keysInto -import org.jetbrains.kotlinx.dataframe.api.last -import org.jetbrains.kotlinx.dataframe.api.leftJoin -import org.jetbrains.kotlinx.dataframe.api.lowercase -import org.jetbrains.kotlinx.dataframe.api.map -import org.jetbrains.kotlinx.dataframe.api.mapToFrame -import org.jetbrains.kotlinx.dataframe.api.match -import org.jetbrains.kotlinx.dataframe.api.matches -import org.jetbrains.kotlinx.dataframe.api.max -import org.jetbrains.kotlinx.dataframe.api.maxBy -import org.jetbrains.kotlinx.dataframe.api.mean -import org.jetbrains.kotlinx.dataframe.api.meanFor -import org.jetbrains.kotlinx.dataframe.api.meanOf -import org.jetbrains.kotlinx.dataframe.api.median -import org.jetbrains.kotlinx.dataframe.api.merge -import org.jetbrains.kotlinx.dataframe.api.min -import org.jetbrains.kotlinx.dataframe.api.minBy -import org.jetbrains.kotlinx.dataframe.api.minOf -import org.jetbrains.kotlinx.dataframe.api.minus -import org.jetbrains.kotlinx.dataframe.api.move -import org.jetbrains.kotlinx.dataframe.api.moveTo -import org.jetbrains.kotlinx.dataframe.api.moveToLeft -import org.jetbrains.kotlinx.dataframe.api.moveToRight +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.api.named -import org.jetbrains.kotlinx.dataframe.api.notNull -import org.jetbrains.kotlinx.dataframe.api.nullable -import org.jetbrains.kotlinx.dataframe.api.parse -import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.api.pivot -import org.jetbrains.kotlinx.dataframe.api.print -import org.jetbrains.kotlinx.dataframe.api.remove -import org.jetbrains.kotlinx.dataframe.api.rename -import org.jetbrains.kotlinx.dataframe.api.reorderColumnsByName -import org.jetbrains.kotlinx.dataframe.api.replace -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.select -import org.jetbrains.kotlinx.dataframe.api.single -import org.jetbrains.kotlinx.dataframe.api.sortBy -import org.jetbrains.kotlinx.dataframe.api.sortByCount -import org.jetbrains.kotlinx.dataframe.api.sortByDesc -import org.jetbrains.kotlinx.dataframe.api.sortByKey -import org.jetbrains.kotlinx.dataframe.api.sortWith -import org.jetbrains.kotlinx.dataframe.api.split -import org.jetbrains.kotlinx.dataframe.api.sum -import org.jetbrains.kotlinx.dataframe.api.sumOf -import org.jetbrains.kotlinx.dataframe.api.take -import org.jetbrains.kotlinx.dataframe.api.takeLast -import org.jetbrains.kotlinx.dataframe.api.takeWhile -import org.jetbrains.kotlinx.dataframe.api.times -import org.jetbrains.kotlinx.dataframe.api.to -import org.jetbrains.kotlinx.dataframe.api.toColumn -import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor -import org.jetbrains.kotlinx.dataframe.api.toColumnOf -import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.api.toDouble -import org.jetbrains.kotlinx.dataframe.api.toInt -import org.jetbrains.kotlinx.dataframe.api.toList -import org.jetbrains.kotlinx.dataframe.api.toListOf -import org.jetbrains.kotlinx.dataframe.api.toMap -import org.jetbrains.kotlinx.dataframe.api.toRight -import org.jetbrains.kotlinx.dataframe.api.toStr -import org.jetbrains.kotlinx.dataframe.api.toValueColumn -import org.jetbrains.kotlinx.dataframe.api.transpose -import org.jetbrains.kotlinx.dataframe.api.under -import org.jetbrains.kotlinx.dataframe.api.ungroup -import org.jetbrains.kotlinx.dataframe.api.update -import org.jetbrains.kotlinx.dataframe.api.value -import org.jetbrains.kotlinx.dataframe.api.values -import org.jetbrains.kotlinx.dataframe.api.valuesNotNull -import org.jetbrains.kotlinx.dataframe.api.where -import org.jetbrains.kotlinx.dataframe.api.with -import org.jetbrains.kotlinx.dataframe.api.withNull -import org.jetbrains.kotlinx.dataframe.api.withValue -import org.jetbrains.kotlinx.dataframe.api.withValues -import org.jetbrains.kotlinx.dataframe.api.withZero -import org.jetbrains.kotlinx.dataframe.api.xs import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException -import org.jetbrains.kotlinx.dataframe.hasNulls -import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize +import org.jetbrains.kotlinx.dataframe.impl.* import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl -import org.jetbrains.kotlinx.dataframe.impl.between import org.jetbrains.kotlinx.dataframe.impl.columns.isMissingColumn -import org.jetbrains.kotlinx.dataframe.impl.emptyPath -import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl -import org.jetbrains.kotlinx.dataframe.impl.nothingType -import org.jetbrains.kotlinx.dataframe.impl.trackColumnAccess -import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.io.renderValueForStdout -import org.jetbrains.kotlinx.dataframe.kind import org.jetbrains.kotlinx.dataframe.math.mean -import org.jetbrains.kotlinx.dataframe.ncol -import org.jetbrains.kotlinx.dataframe.nrow -import org.jetbrains.kotlinx.dataframe.size -import org.jetbrains.kotlinx.dataframe.type -import org.jetbrains.kotlinx.dataframe.typeClass import org.junit.Test import java.math.BigDecimal import java.time.LocalDate @@ -911,7 +750,7 @@ class DataFrameTests : BaseTest() { df["e"].kind() shouldBe ColumnKind.Group df.getColumnGroup("d").columnNames() shouldBe listOf("f") df.getColumnGroup("e").getColumnGroup("g").columnNames() shouldBe listOf("h") - val cols = df.getColumns { allDfs() } + val cols = df.getColumns { cols { !it.isColumnGroup() }.recursively() } cols.size shouldBe 5 cols.forEach { it.toList() shouldBe expected @@ -1165,7 +1004,7 @@ class DataFrameTests : BaseTest() { @Test fun `gather bool`() { val pivoted = typed.pivot { city }.groupBy { name }.matches() - val res = pivoted.gather { dfsOf() }.where { it }.keysInto("city") + val res = pivoted.gather { colsOf().recursively() }.where { it }.keysInto("city") val sorted = res.sortBy { name and city } sorted shouldBe typed.select { name and city.map { it.toString() } }.distinct().sortBy { name and city } } @@ -1553,10 +1392,10 @@ class DataFrameTests : BaseTest() { @Test fun `union table columns`() { val grouped = typed.addId("id").groupBy { name }.toDataFrame() - val dfs = (0 until grouped.nrow).map { + val flattened = (0 until grouped.nrow).map { grouped[it..it] } - val dst = dfs.concat().asGroupBy().concat().sortBy("id").remove("id") + val dst = flattened.concat().asGroupBy().concat().sortBy("id").remove("id") dst shouldBe typed } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt index d76ad386c..a96b20382 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt @@ -9,84 +9,7 @@ import org.jetbrains.dataframe.impl.codeGen.InterfaceGenerationMode import org.jetbrains.dataframe.impl.codeGen.generate import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.api.GroupBy -import org.jetbrains.kotlinx.dataframe.api.GroupWithKey -import org.jetbrains.kotlinx.dataframe.api.add -import org.jetbrains.kotlinx.dataframe.api.addId -import org.jetbrains.kotlinx.dataframe.api.after -import org.jetbrains.kotlinx.dataframe.api.append -import org.jetbrains.kotlinx.dataframe.api.asColumnGroup -import org.jetbrains.kotlinx.dataframe.api.asDataFrame -import org.jetbrains.kotlinx.dataframe.api.asGroupBy -import org.jetbrains.kotlinx.dataframe.api.at -import org.jetbrains.kotlinx.dataframe.api.by -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.column -import org.jetbrains.kotlinx.dataframe.api.columnGroup -import org.jetbrains.kotlinx.dataframe.api.columnOf -import org.jetbrains.kotlinx.dataframe.api.columnsCount -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.count -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.dfsOf -import org.jetbrains.kotlinx.dataframe.api.distinct -import org.jetbrains.kotlinx.dataframe.api.dropNulls -import org.jetbrains.kotlinx.dataframe.api.duplicate -import org.jetbrains.kotlinx.dataframe.api.duplicateRows -import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame -import org.jetbrains.kotlinx.dataframe.api.explode -import org.jetbrains.kotlinx.dataframe.api.expr -import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.forEach -import org.jetbrains.kotlinx.dataframe.api.frameColumn -import org.jetbrains.kotlinx.dataframe.api.getColumnGroup -import org.jetbrains.kotlinx.dataframe.api.getColumnPath -import org.jetbrains.kotlinx.dataframe.api.getColumnWithPath -import org.jetbrains.kotlinx.dataframe.api.getColumns -import org.jetbrains.kotlinx.dataframe.api.getValue -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.groupBy -import org.jetbrains.kotlinx.dataframe.api.implode -import org.jetbrains.kotlinx.dataframe.api.indices -import org.jetbrains.kotlinx.dataframe.api.insert -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.intoRows -import org.jetbrains.kotlinx.dataframe.api.inward -import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.isEmpty -import org.jetbrains.kotlinx.dataframe.api.isFrameColumn -import org.jetbrains.kotlinx.dataframe.api.join -import org.jetbrains.kotlinx.dataframe.api.last -import org.jetbrains.kotlinx.dataframe.api.map -import org.jetbrains.kotlinx.dataframe.api.max -import org.jetbrains.kotlinx.dataframe.api.maxBy -import org.jetbrains.kotlinx.dataframe.api.median -import org.jetbrains.kotlinx.dataframe.api.minus -import org.jetbrains.kotlinx.dataframe.api.move -import org.jetbrains.kotlinx.dataframe.api.moveTo -import org.jetbrains.kotlinx.dataframe.api.moveToLeft -import org.jetbrains.kotlinx.dataframe.api.moveToRight -import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.api.perRowCol -import org.jetbrains.kotlinx.dataframe.api.pivot -import org.jetbrains.kotlinx.dataframe.api.remove -import org.jetbrains.kotlinx.dataframe.api.rename -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.select -import org.jetbrains.kotlinx.dataframe.api.single -import org.jetbrains.kotlinx.dataframe.api.sortBy -import org.jetbrains.kotlinx.dataframe.api.split -import org.jetbrains.kotlinx.dataframe.api.sumOf -import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor -import org.jetbrains.kotlinx.dataframe.api.toTop -import org.jetbrains.kotlinx.dataframe.api.under -import org.jetbrains.kotlinx.dataframe.api.ungroup -import org.jetbrains.kotlinx.dataframe.api.update -import org.jetbrains.kotlinx.dataframe.api.values -import org.jetbrains.kotlinx.dataframe.api.with -import org.jetbrains.kotlinx.dataframe.api.withNull -import org.jetbrains.kotlinx.dataframe.api.xs +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.FrameColumn @@ -143,9 +66,9 @@ class DataFrameTreeTests : BaseTest() { } @Test - fun `select dfs under group`() { - df2.select { nameAndCity.dfsOf() } shouldBe typed2.select { nameAndCity.name } - df2.select { nameAndCity.dfsOf() } shouldBe typed2.select { nameAndCity.name and nameAndCity.city } + fun `select recursively under group`() { + df2.select { nameAndCity.colsOf().recursively() } shouldBe typed2.select { nameAndCity.name } + df2.select { nameAndCity.colsOf().recursively() } shouldBe typed2.select { nameAndCity.name and nameAndCity.city } } @Test @@ -249,8 +172,8 @@ class DataFrameTreeTests : BaseTest() { } @Test - fun selectDfs() { - val cols = typed2.select { dfs { it.hasNulls } } + fun `select recursively`() { + val cols = typed2.select { cols { it.hasNulls }.rec() } cols shouldBe typed2.select { nameAndCity.city and weight } } @@ -457,14 +380,14 @@ class DataFrameTreeTests : BaseTest() { @Test fun parentColumnTest() { - val res = typed2.move { dfs { it.depth > 0 } }.toTop { it.parentName + "-" + it.name } + val res = typed2.move { cols { it.depth > 0 }.rec() }.toTop { it.parentName + "-" + it.name } res.columnsCount() shouldBe 4 res.columnNames() shouldBe listOf("nameAndCity-name", "nameAndCity-city", "age", "weight") } @Test fun `group cols`() { - val joined = typed2.move { allDfs() }.into { pathOf(it.path.joinToString(".")) } + val joined = typed2.move { cols { !it.isColumnGroup() }.rec() }.into { pathOf(it.path.joinToString(".")) } val grouped = joined.group { nameContains(".") }.into { it.name().substringBefore(".") } val expected = typed2.rename { nameAndCity.all() }.into { it.path.joinToString(".") } grouped shouldBe expected diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt index 5dea9d318..3af47eb35 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt @@ -3,56 +3,7 @@ package org.jetbrains.kotlinx.dataframe.testSets.person import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.api.Infer -import org.jetbrains.kotlinx.dataframe.api.add -import org.jetbrains.kotlinx.dataframe.api.asColumnGroup -import org.jetbrains.kotlinx.dataframe.api.associate -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.column -import org.jetbrains.kotlinx.dataframe.api.columnNames -import org.jetbrains.kotlinx.dataframe.api.columnOf -import org.jetbrains.kotlinx.dataframe.api.columnsCount -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.count -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.drop -import org.jetbrains.kotlinx.dataframe.api.dropNulls -import org.jetbrains.kotlinx.dataframe.api.explodeLists -import org.jetbrains.kotlinx.dataframe.api.expr -import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.first -import org.jetbrains.kotlinx.dataframe.api.frames -import org.jetbrains.kotlinx.dataframe.api.gather -import org.jetbrains.kotlinx.dataframe.api.getColumnGroup -import org.jetbrains.kotlinx.dataframe.api.getColumns -import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.groupBy -import org.jetbrains.kotlinx.dataframe.api.groupByOther -import org.jetbrains.kotlinx.dataframe.api.implode -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.isList -import org.jetbrains.kotlinx.dataframe.api.join -import org.jetbrains.kotlinx.dataframe.api.last -import org.jetbrains.kotlinx.dataframe.api.map -import org.jetbrains.kotlinx.dataframe.api.mapKeys -import org.jetbrains.kotlinx.dataframe.api.mapValues -import org.jetbrains.kotlinx.dataframe.api.matches -import org.jetbrains.kotlinx.dataframe.api.named -import org.jetbrains.kotlinx.dataframe.api.notNull -import org.jetbrains.kotlinx.dataframe.api.pivot -import org.jetbrains.kotlinx.dataframe.api.print -import org.jetbrains.kotlinx.dataframe.api.remove -import org.jetbrains.kotlinx.dataframe.api.replace -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.sortBy -import org.jetbrains.kotlinx.dataframe.api.sumOf -import org.jetbrains.kotlinx.dataframe.api.toInt -import org.jetbrains.kotlinx.dataframe.api.ungroup -import org.jetbrains.kotlinx.dataframe.api.update -import org.jetbrains.kotlinx.dataframe.api.values -import org.jetbrains.kotlinx.dataframe.api.where -import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.nothingType @@ -229,7 +180,11 @@ class PivotTests { group.columnNames() shouldBe if (it.name() == "Bob") keys - "city" else keys } - val leafColumns = pivoted.getColumnsWithPaths { all().drop(1).allDfs() } + val leafColumns = pivoted.getColumnsWithPaths { + all() + .drop(1) + .cols { !it.isColumnGroup() }.recursively() + } leafColumns.size shouldBe typed.name.countDistinct() * typed.key.countDistinct() - 1 leafColumns.forEach { it.path.size shouldBe 2 } @@ -284,7 +239,7 @@ class PivotTests { // nullGroup.columnTypes() shouldBe listOf(typeOf?>(), typeOf?>()) nullGroup.columnTypes() shouldBe listOf(nothingType(true), nothingType(true)) - val cols = pivotedDf.getColumnsWithPaths { all().allDfs() } + val cols = pivotedDf.getColumnsWithPaths { cols { !it.isColumnGroup() }.recursively() } cols.size shouldBe 2 * typed.name.countDistinct() * typed.key.countDistinct() - 2 cols.forEach { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt index 37d387567..76a752d7f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/DataColumn.kt @@ -1,26 +1,8 @@ package org.jetbrains.kotlinx.dataframe -import org.jetbrains.kotlinx.dataframe.api.Infer -import org.jetbrains.kotlinx.dataframe.api.asDataColumn -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.schema -import org.jetbrains.kotlinx.dataframe.api.take -import org.jetbrains.kotlinx.dataframe.columns.BaseColumn -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnKind -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn -import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupImpl -import org.jetbrains.kotlinx.dataframe.impl.columns.FrameColumnImpl -import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl -import org.jetbrains.kotlinx.dataframe.impl.columns.addPath -import org.jetbrains.kotlinx.dataframe.impl.columns.guessColumnType -import org.jetbrains.kotlinx.dataframe.impl.columns.toColumnKind +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.columns.* +import org.jetbrains.kotlinx.dataframe.impl.columns.* import org.jetbrains.kotlinx.dataframe.impl.getValuesType import org.jetbrains.kotlinx.dataframe.impl.splitByIndices import org.jetbrains.kotlinx.dataframe.schema.DataFrameSchema @@ -54,7 +36,7 @@ public interface DataColumn : BaseColumn { values: List, type: KType, infer: Infer = Infer.None, - defaultValue: T? = null + defaultValue: T? = null, ): ValueColumn = ValueColumnImpl(values, name, getValuesType(values, type, infer), defaultValue) /** @@ -67,7 +49,11 @@ public interface DataColumn : BaseColumn { * @param values list of column values * @param infer column type inference mode */ - public inline fun createValueColumn(name: String, values: List, infer: Infer = Infer.None): ValueColumn = createValueColumn( + public inline fun createValueColumn( + name: String, + values: List, + infer: Infer = Infer.None, + ): ValueColumn = createValueColumn( name, values, getValuesType( values, @@ -81,17 +67,21 @@ public interface DataColumn : BaseColumn { public fun createFrameColumn( name: String, df: DataFrame, - startIndices: Iterable + startIndices: Iterable, ): FrameColumn = FrameColumnImpl(name, df.splitByIndices(startIndices.asSequence()).toList(), lazy { df.schema() }) public fun createFrameColumn( name: String, groups: List>, - schema: Lazy? = null + schema: Lazy? = null, ): FrameColumn = FrameColumnImpl(name, groups, schema) - public fun createWithTypeInference(name: String, values: List, nullable: Boolean? = null): DataColumn = guessColumnType(name, values, nullable = nullable) + public fun createWithTypeInference( + name: String, + values: List, + nullable: Boolean? = null, + ): DataColumn = guessColumnType(name, values, nullable = nullable) public fun create(name: String, values: List, type: KType, infer: Infer = Infer.None): DataColumn { return when (type.toColumnKind()) { @@ -101,7 +91,8 @@ public interface DataColumn : BaseColumn { } } - public inline fun create(name: String, values: List, infer: Infer = Infer.None): DataColumn = create(name, values, typeOf(), infer) + public inline fun create(name: String, values: List, infer: Infer = Infer.None): DataColumn = + create(name, values, typeOf(), infer) public fun empty(name: String = ""): AnyCol = createValueColumn(name, emptyList(), typeOf()) } @@ -116,7 +107,8 @@ public interface DataColumn : BaseColumn { override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = this.addPath() - override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn = super.getValue(thisRef, property) as DataColumn + override operator fun getValue(thisRef: Any?, property: KProperty<*>): DataColumn = + super.getValue(thisRef, property) as DataColumn public operator fun iterator(): Iterator = values().iterator() @@ -133,6 +125,8 @@ public val AnyCol.indices: IntRange get() = indices() public val AnyCol.type: KType get() = type() public val AnyCol.kind: ColumnKind get() = kind() -public val AnyCol.typeClass: KClass<*> get() = type.classifier as? KClass<*> ?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type") +public val AnyCol.typeClass: KClass<*> + get() = type.classifier as? KClass<*> + ?: error("Cannot cast ${type.classifier?.javaClass} to a ${KClass::class}. Column $name: $type") public fun AnyBaseCol.indices(): IntRange = 0 until size() diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt index 522207fb9..221d249c6 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -9,39 +9,15 @@ import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.Predicate -import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn -import org.jetbrains.kotlinx.dataframe.columns.renamedReference -import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.documentation.AccessApi import org.jetbrains.kotlinx.dataframe.documentation.ColumnExpression import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls import org.jetbrains.kotlinx.dataframe.documentation.LineBreak -import org.jetbrains.kotlinx.dataframe.hasNulls import org.jetbrains.kotlinx.dataframe.impl.aggregation.toColumns import org.jetbrains.kotlinx.dataframe.impl.columnName -import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnsList -import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet -import org.jetbrains.kotlinx.dataframe.impl.columns.addPath -import org.jetbrains.kotlinx.dataframe.impl.columns.allColumnsExcept -import org.jetbrains.kotlinx.dataframe.impl.columns.changePath -import org.jetbrains.kotlinx.dataframe.impl.columns.createColumnSet -import org.jetbrains.kotlinx.dataframe.impl.columns.getAt -import org.jetbrains.kotlinx.dataframe.impl.columns.getChildrenAt -import org.jetbrains.kotlinx.dataframe.impl.columns.singleImpl -import org.jetbrains.kotlinx.dataframe.impl.columns.top -import org.jetbrains.kotlinx.dataframe.impl.columns.transform -import org.jetbrains.kotlinx.dataframe.impl.columns.transformSingle -import org.jetbrains.kotlinx.dataframe.impl.columns.transformWithContext -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.dfs +import org.jetbrains.kotlinx.dataframe.impl.columns.* +import org.jetbrains.kotlinx.dataframe.impl.columns.tree.flattenRecursively import org.jetbrains.kotlinx.dataframe.impl.headPlusArray import kotlin.reflect.KProperty import kotlin.reflect.KType @@ -223,8 +199,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { `[colsOf][colsOf]`<`[Int][Int]`>().`[first][first]`() }` */ - public fun ColumnSet.first(condition: ColumnFilter = { true }): SingleColumn = - transform { listOf(it.first(condition)) }.singleImpl() + public fun ColumnSet.first(condition: ColumnFilter = { true }): TransformableSingleColumn = + transform { listOf(it.first(condition)) }.singleWithTransformerImpl() /** * @include [CommonFirstDocs] @@ -232,8 +208,10 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * `df.`[select][select]` { `[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` * * `df.`[select][select]` { myColumnGroup.`[first][first]`() }` + * + * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` */ - public fun SingleColumn.first(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun SingleColumn<*>.first(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = all().first(condition) /** @@ -241,15 +219,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @arg [CommonFirstDocs.Examples] * `df.`[select][select]` { "myColumnGroup".`[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` */ - public fun String.first(condition: ColumnFilter<*> = { true }): SingleColumn<*> = - colGroup(this).first(condition) - - /** - * @include [CommonFirstDocs] - * @arg [CommonFirstDocs.Examples] - * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` - */ - public fun ColumnPath.first(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun String.first(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).first(condition) /** @@ -257,7 +227,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @arg [CommonFirstDocs.Examples] * `df.`[select][select]` { Type::myColumnGroup.`[first][first]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` */ - public fun KProperty<*>.first(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun KProperty<*>.first(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).first(condition) /** @@ -286,8 +256,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { `[colsOf][colsOf]`<`[Int][Int]`>().`[first][last]`() }` */ - public fun ColumnSet.last(condition: ColumnFilter = { true }): SingleColumn = - transform { listOf(it.last(condition)) }.singleImpl() + public fun ColumnSet.last(condition: ColumnFilter = { true }): TransformableSingleColumn = + transform { listOf(it.last(condition)) }.singleWithTransformerImpl() /** * @include [CommonLastDocs] @@ -295,8 +265,10 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * `df.`[select][select]` { `[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` * * `df.`[select][select]` { myColumnGroup.`[last][last]`() }` + * + * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` */ - public fun SingleColumn.last(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun SingleColumn<*>.last(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = all().last(condition) /** @@ -304,15 +276,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @arg [CommonLastDocs.Examples] * `df.`[select][select]` { "myColumnGroup".`[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` */ - public fun String.last(condition: ColumnFilter<*> = { true }): SingleColumn<*> = - colGroup(this).last(condition) - - /** - * @include [CommonLastDocs] - * @arg [CommonLastDocs.Examples] - * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` - */ - public fun ColumnPath.last(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun String.last(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).last(condition) /** @@ -320,7 +284,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @arg [CommonLastDocs.Examples] * `df.`[select][select]` { Type::myColumnGroup.`[last][last]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` */ - public fun KProperty<*>.last(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun KProperty<*>.last(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).last(condition) /** @@ -349,8 +313,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { `[colsOf][colsOf]`<`[Int][Int]`>().`[single][single]`() }` */ - public fun ColumnSet.single(condition: ColumnFilter = { true }): SingleColumn = - transform { listOf(it.single(condition)) }.singleImpl() + public fun ColumnSet.single(condition: ColumnFilter = { true }): TransformableSingleColumn = + transform { listOf(it.single(condition)) }.singleWithTransformerImpl() /** * @include [CommonSingleDocs] @@ -358,8 +322,10 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * `df.`[select][select]` { `[single][single]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` * * `df.`[select][select]` { myColumnGroup.`[single][single]`() }` + * + * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[single][single]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` */ - public fun SingleColumn.single(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun SingleColumn<*>.single(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = all().single(condition) /** @@ -367,15 +333,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @arg [CommonSingleDocs.Examples] * `df.`[select][select]` { "myColumnGroup".`[single][single]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` */ - public fun String.single(condition: ColumnFilter<*> = { true }): SingleColumn<*> = - colGroup(this).single(condition) - - /** - * @include [CommonSingleDocs] - * @arg [CommonSingleDocs.Examples] - * `df.`[select][select]` { "pathTo"["myColumnGroup"].`[single][single]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` - */ - public fun ColumnPath.single(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun String.single(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).single(condition) /** @@ -383,7 +341,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @arg [CommonSingleDocs.Examples] * `df.`[select][select]` { Type::myColumnGroup.`[single][single]` { it.`[name][ColumnReference.name]`().`[startsWith][String.startsWith]`("year") } }` */ - public fun KProperty<*>.single(condition: ColumnFilter<*> = { true }): SingleColumn<*> = + public fun KProperty<*>.single(condition: ColumnFilter<*> = { true }): TransformableSingleColumn<*> = colGroup(this).single(condition) /** @@ -467,8 +425,9 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum */ public operator fun AnyColumnReference.rangeTo(endInclusive: AnyColumnReference): ColumnSet<*> = object : ColumnSet { - override fun resolve(context: ColumnResolutionContext): List> { - val startPath = this@rangeTo.resolveSingle(context)!!.path + + private fun process(col: AnyColumnReference, context: ColumnResolutionContext): List> { + val startPath = col.resolveSingle(context)!!.path val endPath = endInclusive.resolveSingle(context)!!.path val parentPath = startPath.parent()!! require(parentPath == endPath.parent()) { "Start and end columns have different parent column paths" } @@ -481,6 +440,9 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } } } + + override fun resolve(context: ColumnResolutionContext): List> = + process(this@rangeTo, context) } /** @@ -608,7 +570,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun ColumnGroupReference.col(property: KProperty): ColumnAccessor = column(property) /** {@comment TODO} */ - public fun SingleColumn.col(index: Int): SingleColumn = getChildrenAt(index).singleImpl() + public fun SingleColumn<*>.col(index: Int): SingleColumn = getChildrenAt(index).singleImpl() /** {@comment TODO} */ public operator fun ColumnSet.get(index: Int): SingleColumn = getAt(index) @@ -872,7 +834,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** * ## Cols - * Creates a subset of columns ([ColumnSet]) from a parent [ColumnSet], -[ColumnGroup], or -[DataFrame]. + * Creates a subset of columns ([ColumnSet]) from the current [ColumnSet]. + * + * If the current [ColumnSet] is a [SingleColumn] + * (and thus consists of only one column (or [column group][ColumnGroup])), + * then `cols` will create a subset of its children. + * * You can use either a [ColumnFilter] or any of the `vararg` overloads for all * [APIs][AccessApi] (+ [ColumnPath]). * @@ -941,17 +908,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum @Suppress("UNCHECKED_CAST") public fun ColumnSet.cols( predicate: ColumnFilter = { true }, - ): ColumnSet = transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(predicate as ColumnFilter<*>) - .resolve(this) - } as ColumnSet + ): TransformableColumnSet = colsInternal(predicate as ColumnFilter<*>) as TransformableColumnSet /** @include [ColumnSetColsPredicateDocs] */ public operator fun ColumnSet.get( predicate: ColumnFilter = { true }, - ): ColumnSet = cols(predicate) + ): TransformableColumnSet = cols(predicate) /** * @include [CommonColsDocs.Predicate] @@ -963,33 +925,34 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup`.[cols][cols]` { "e" `[in\][String.contains\]` it.`[name][ColumnPath.name]`() } }` * - * `// same as `[all][all] + * `df.`[select][select]` { `[cols][cols]`() } // same as `[all][all] * - * `df.`[select][select]` { `[cols][cols]`() }` + * `df.`[select][select]` { "pathTo"["myGroupCol"].`[cols][cols]` { "e" `[in\][String.contains\]` it.`[name][ColumnPath.name]`() } }` + * + * `df.`[select][select]` { "pathTo"["myGroupCol"]`[`[`][cols]`{ it.`[any\][ColumnWithPath.any\]` { it == "Alice" } }`[`]`][cols]` }` + * + * `df.`[select][select]` { "pathTo"["myGroupCol"].`[cols][cols]`() } // identity call, same as `[all][all] * * `// NOTE: there's a `[DataFrame.get]` overload that prevents this:` * * `df.`[select][select]` { myColumnGroup`[`[`][cols]`{ ... }`[`]`][cols]` }` * - * `// use `[cols][cols]` instead` - * `df.`[select][select]` { myColumnGroup`.[cols][cols]` { ... } }` - * * @see [all\] */ private interface SingleColumnAnyRowColsPredicateDocs /** @include [SingleColumnAnyRowColsPredicateDocs] */ - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( predicate: ColumnFilter<*> = { true }, - ): ColumnSet<*> = colsInternal(predicate) + ): TransformableColumnSet<*> = colsInternal(predicate) /** * @include [SingleColumnAnyRowColsPredicateDocs] * {@comment this function is shadowed by [DataFrame.get]} */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( predicate: ColumnFilter<*> = { true }, - ): ColumnSet = cols(predicate) + ): TransformableColumnSet = cols(predicate) /** * @include [CommonColsDocs.Predicate] @@ -1008,38 +971,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** @include [StringColsPredicateDocs] */ public fun String.cols( predicate: ColumnFilter<*> = { true }, - ): ColumnSet<*> = colGroup(this).cols(predicate) + ): TransformableColumnSet<*> = colGroup(this).cols(predicate) /** @include [StringColsPredicateDocs] */ public operator fun String.get( predicate: ColumnFilter<*> = { true }, - ): ColumnSet = cols(predicate) - - /** - * @include [CommonColsDocs.Predicate] - * @arg [CommonColsDocs.Examples] - * - * `df.`[select][select]` { "pathTo"["myGroupCol"].`[cols][cols]` { "e" `[in\][String.contains\]` it.`[name][ColumnPath.name]`() } }` - * - * `df.`[select][select]` { "pathTo"["myGroupCol"]`[`[`][cols]`{ it.`[any\][ColumnWithPath.any\]` { it == "Alice" } }`[`]`][cols]` }` - * - * `// identity call, same as `[all][all] - * - * `df.`[select][select]` { "pathTo"["myGroupCol"].`[cols][cols]`() }` - * - * @see [all\] - */ - private interface ColumnPathColsPredicateDocs - - /** @include [ColumnPathColsPredicateDocs] */ - public fun ColumnPath.cols( - predicate: ColumnFilter<*> = { true }, - ): ColumnSet<*> = colGroup(this).cols(predicate) - - /** @include [ColumnPathColsPredicateDocs] */ - public operator fun ColumnPath.get( - predicate: ColumnFilter<*> = { true }, - ): ColumnSet = cols(predicate) + ): TransformableColumnSet = cols(predicate) /** * @include [CommonColsDocs.Predicate] @@ -1060,12 +997,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum /** @include [KPropertyColsPredicateDocs] */ public fun KProperty<*>.cols( predicate: ColumnFilter<*> = { true }, - ): ColumnSet<*> = colGroup(this).cols(predicate) + ): TransformableColumnSet<*> = colGroup(this).cols(predicate) /** @include [KPropertyColsPredicateDocs] */ public operator fun KProperty<*>.get( predicate: ColumnFilter<*> = { true }, - ): ColumnSet = cols(predicate) + ): TransformableColumnSet = cols(predicate) // endregion @@ -1112,6 +1049,12 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup.`[cols][cols]`(columnA, columnB) }` * + * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`(columnA, columnB) }` + * + * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`("pathTo"["colA"], "pathTo"["colB"]) }` + * + * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`columnA, columnB`[`]`][cols]` }` + * * `// NOTE: there's a `[DataFrame.get]` overload that prevents this:` * * `df.`[select][select]` { myColumnGroup`[`[`][cols]`columnA, columnB`[`]`][cols]` }` @@ -1119,7 +1062,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface SingleColumnColsVarargColumnReferenceDocs /** @include [SingleColumnColsVarargColumnReferenceDocs] */ - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( firstCol: ColumnReference, vararg otherCols: ColumnReference, ): ColumnSet = headPlusArray(firstCol, otherCols).let { refs -> @@ -1132,7 +1075,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @include [SingleColumnColsVarargColumnReferenceDocs] * {@comment this function is shadowed by [DataFrame.get] for accessors} */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( firstCol: ColumnReference, vararg otherCols: ColumnReference, ): ColumnSet = cols(firstCol, *otherCols) @@ -1161,30 +1104,6 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum vararg otherCols: ColumnReference, ): ColumnSet = cols(firstCol, *otherCols) - /** - * @include [CommonColsDocs.Vararg] {@arg [CommonColsDocs.Vararg.AccessorType] [ColumnReference]} - * @arg [CommonColsDocs.Examples] - * - * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`(columnA, columnB) }` - * - * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`("pathTo"["colA"], "pathTo"["colB"]) }` - * - * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`columnA, columnB`[`]`][cols]` }` - */ - private interface ColumnPathColsVarargColumnReferenceDocs - - /** @include [ColumnPathColsVarargColumnReferenceDocs] */ - public fun ColumnPath.cols( - firstCol: ColumnReference, - vararg otherCols: ColumnReference, - ): ColumnSet = colGroup(this).cols(firstCol, *otherCols) - - /** @include [ColumnPathColsVarargColumnReferenceDocs] */ - public operator fun ColumnPath.get( - firstCol: ColumnReference, - vararg otherCols: ColumnReference, - ): ColumnSet = cols(firstCol, *otherCols) - /** * @include [CommonColsDocs.Vararg] {@arg [CommonColsDocs.Vararg.AccessorType] [ColumnReference]} * @arg [CommonColsDocs.Examples] @@ -1224,15 +1143,11 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface ColumnSetColsVarargStringDocs /** @include [ColumnSetColsVarargStringDocs] */ - @Suppress("UNCHECKED_CAST") public fun ColumnSet.cols( firstCol: String, vararg otherCols: String, - ): ColumnSet = transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(firstCol, *otherCols) - .resolve(this) as List> + ): ColumnSet = headPlusArray(firstCol, otherCols).let { names -> + filter { it.name in names } } /** @@ -1253,6 +1168,10 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * * `df.`[select][select]` { myColumnGroup.`[cols][cols]`("columnA", "columnB") }` * + * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`("columnA", "columnB") }` + * + * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`"columnA", "columnB"`[`]`][cols]` }` + * * `// NOTE: there's a `[DataFrame.get]` overload that prevents this:` * * `df.`[select][select]` { myColumnGroup`[`[`][cols]`"columnA", "columnB"`[`]`][cols]` }` @@ -1260,7 +1179,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum private interface SingleColumnColsVarargStringDocs /** @include [SingleColumnColsVarargStringDocs] */ - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( firstCol: String, vararg otherCols: String, ): ColumnSet<*> = headPlusArray(firstCol, otherCols).let { names -> @@ -1271,7 +1190,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * @include [SingleColumnColsVarargStringDocs] * {@comment this function is shadowed by [DataFrame.get] for accessors} */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( firstCol: String, vararg otherCols: String, ): ColumnSet<*> = cols(firstCol, *otherCols) @@ -1298,28 +1217,6 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum vararg otherCols: String, ): ColumnSet<*> = cols(firstCol, *otherCols) - /** - * @include [CommonColsDocs.Vararg] {@arg [CommonColsDocs.Vararg.AccessorType] [String]} - * @arg [CommonColsDocs.Examples] - * - * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`("columnA", "columnB") }` - * - * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`"columnA", "columnB"`[`]`][cols]` }` - */ - private interface ColumnPathColsVarargStringDocs - - /** @include [ColumnPathColsVarargStringDocs] */ - public fun ColumnPath.cols( - firstCol: String, - vararg otherCols: String, - ): ColumnSet<*> = colGroup(this).cols(firstCol, *otherCols) - - /** @include [ColumnPathColsVarargStringDocs] */ - public operator fun ColumnPath.get( - firstCol: String, - vararg otherCols: String, - ): ColumnSet<*> = cols(firstCol, *otherCols) - /** * @include [CommonColsDocs.Vararg] {@arg [CommonColsDocs.Vararg.AccessorType] [String]} * @arg [CommonColsDocs.Examples] @@ -1360,11 +1257,8 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun ColumnSet.cols( firstCol: KProperty, vararg otherCols: KProperty, - ): ColumnSet = transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(firstCol, *otherCols) - .resolve(this) + ): ColumnSet = headPlusArray(firstCol, otherCols).map { it.name }.let { names -> + filter { it.name in names } } /** @include [ColumnSetColsVarargKPropertyDocs] */ @@ -1384,11 +1278,15 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum * `df.`[select][select]` { myColumnGroup.`[cols][cols]`(Type::colA, Type::colB) }` * * `df.`[select][select]` { myColumnGroup`[`[`][cols]`Type::colA, Type::colB`[`]`][cols]` }` + * + * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`(Type::colA, Type::colB) }` + * + * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`Type::colA, Type::colB`[`]`][cols]` }` */ private interface SingleColumnColsVarargKPropertyDocs /** @include [SingleColumnColsVarargKPropertyDocs] */ - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( firstCol: KProperty, vararg otherCols: KProperty, ): ColumnSet = headPlusArray(firstCol, otherCols).let { props -> @@ -1396,7 +1294,7 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum } /** @include [SingleColumnColsVarargKPropertyDocs] */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( firstCol: KProperty, vararg otherCols: KProperty, ): ColumnSet = cols(firstCol, *otherCols) @@ -1423,28 +1321,6 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum vararg otherCols: KProperty, ): ColumnSet = cols(firstCol, *otherCols) - /** - * @include [CommonColsDocs.Vararg] {@arg [CommonColsDocs.Vararg.AccessorType] [KProperty]} - * @arg [CommonColsDocs.Examples] - * - * `df.`[select][select]` { "pathTo"["columnGroup"].`[cols][cols]`(Type::colA, Type::colB) }` - * - * `df.`[select][select]` { "pathTo"["columnGroup"]`[`[`][cols]`Type::colA, Type::colB`[`]`][cols]` }` - */ - private interface ColumnPathColsVarargKPropertyDocs - - /** @include [ColumnPathColsVarargKPropertyDocs] */ - public fun ColumnPath.cols( - firstCol: KProperty, - vararg otherCols: KProperty, - ): ColumnSet = colGroup(this).cols(firstCol, *otherCols) - - /** @include [ColumnPathColsVarargKPropertyDocs] */ - public operator fun ColumnPath.get( - firstCol: KProperty, - vararg otherCols: KProperty, - ): ColumnSet = cols(firstCol, *otherCols) - /** * @include [CommonColsDocs.Vararg] {@arg [CommonColsDocs.Vararg.AccessorType] [KProperty]} * @arg [CommonColsDocs.Examples] @@ -1471,33 +1347,25 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region indices - @Suppress("UNCHECKED_CAST") public fun ColumnSet.cols( firstIndex: Int, vararg otherIndices: Int, - ): ColumnSet = transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(firstIndex, *otherIndices) - .resolve(this) as List> - } + ): ColumnSet = colsInternal(headPlusArray(firstIndex, otherIndices)) as ColumnSet public operator fun ColumnSet.get( firstIndex: Int, vararg otherIndices: Int, ): ColumnSet = cols(firstIndex, *otherIndices) - public fun SingleColumn.cols( + public fun SingleColumn<*>.cols( firstIndex: Int, vararg otherIndices: Int, - ): ColumnSet<*> = headPlusArray(firstIndex, otherIndices).let { indices -> - transform { it.flatMap { it.children().let { children -> indices.map { children[it] } } } } - } + ): ColumnSet<*> = colsInternal(headPlusArray(firstIndex, otherIndices)) /** * {@comment this function is shadowed by [ColumnGroup.get] for accessors} */ - public operator fun SingleColumn.get( + public operator fun SingleColumn<*>.get( firstIndex: Int, vararg otherIndices: Int, ): ColumnSet<*> = cols(firstIndex, *otherIndices) @@ -1512,16 +1380,6 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum vararg otherIndices: Int, ): ColumnSet<*> = cols(firstIndex, *otherIndices) - public fun ColumnPath.cols( - firstIndex: Int, - vararg otherIndices: Int, - ): ColumnSet<*> = colGroup(this).cols(firstIndex, *otherIndices) - - public operator fun ColumnPath.get( - firstIndex: Int, - vararg otherIndices: Int, - ): ColumnSet<*> = cols(firstIndex, *otherIndices) - public fun KProperty<*>.cols( firstIndex: Int, vararg otherIndices: Int, @@ -1536,33 +1394,22 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region ranges - @Suppress("UNCHECKED_CAST") public fun ColumnSet.cols(range: IntRange): ColumnSet = - transformWithContext { - dataFrameOf(it) - .asColumnGroup() - .cols(range) - .resolve(this) as List> - } + colsInternal(range) as ColumnSet public operator fun ColumnSet.get(range: IntRange): ColumnSet = cols(range) - public fun SingleColumn.cols(range: IntRange): ColumnSet<*> = - transform { it.flatMap { it.children().subList(range.first, range.last + 1) } } + public fun SingleColumn<*>.cols(range: IntRange): ColumnSet<*> = colsInternal(range) /** * {@comment this function is shadowed by [ColumnGroup.get] for accessors} */ - public operator fun SingleColumn.get(range: IntRange): ColumnSet<*> = cols(range) + public operator fun SingleColumn<*>.get(range: IntRange): ColumnSet<*> = cols(range) public fun String.cols(range: IntRange): ColumnSet<*> = colGroup(this).cols(range) public operator fun String.get(range: IntRange): ColumnSet<*> = cols(range) - public fun ColumnPath.cols(range: IntRange): ColumnSet<*> = colGroup(this).cols(range) - - public operator fun ColumnPath.get(range: IntRange): ColumnSet<*> = cols(range) - public fun KProperty<*>.cols(range: IntRange): ColumnSet<*> = colGroup(this).cols(range) public operator fun KProperty<*>.get(range: IntRange): ColumnSet<*> = cols(range) @@ -1593,10 +1440,32 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region dfs + @Deprecated( + message = "dfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols(predicate).recursively(includeTopLevel = false)"), + level = DeprecationLevel.WARNING, + ) public fun ColumnSet.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet = dfsInternal(predicate) + @Deprecated( + message = "dfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols(predicate).recursively()"), + level = DeprecationLevel.WARNING, + ) + public fun SingleColumn<*>.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet = dfsInternal(predicate) + + @Deprecated( + message = "dfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols(predicate).recursively()"), + level = DeprecationLevel.WARNING, + ) public fun String.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet<*> = toColumnAccessor().dfs(predicate) + @Deprecated( + message = "dfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols(predicate).recursively()"), + level = DeprecationLevel.WARNING, + ) public fun KProperty.dfs(predicate: (ColumnWithPath<*>) -> Boolean): ColumnSet<*> = toColumnAccessor().dfs(predicate) @@ -1604,22 +1473,167 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // region all - public fun SingleColumn<*>.all(): ColumnSet<*> = transformSingle { it.children() } + /** + * ## All + * Creates a new [ColumnSet] that contains all columns from the current [ColumnSet]. + * + * If the current [ColumnSet] is a [SingleColumn] and consists of only one [column group][ColumnGroup], + * then `all` will create a new [ColumnSet] consisting of its children. + * + * This makes the function equivalent to [cols()][ColumnSet.cols]. + * + * #### For example: + * `df.`[move][DataFrame.move]` { `[all][ColumnSet.all]`().`[recursively][recursively]`() }.`[under][MoveClause.under]`("info")` + * + * `df.`[select][DataFrame.select]` { myGroup.`[all][ColumnSet.all]`() }` + * + * #### Examples for this overload: + * + * {@includeArg [CommonAllDocs.Examples]} + * + * @see [cols\] + */ + private interface CommonAllDocs { + + /** Example argument */ + interface Examples + } + + /** + * @include [CommonAllDocs] + * @arg [CommonAllDocs.Examples] + * + * `df.`[select][select]` { `[cols][cols]` { "a" in `[name][ColumnWithPath.name]` }.`[all][all]`() }` + * {@include [LineBreak]} + * NOTE: This is an identity call and can be omitted in most cases. However, it can still prove useful + * for readability or in combination with [recursively]. + */ + public fun ColumnSet.all(): TransformableColumnSet = allInternal() + + /** + * @include [CommonAllDocs] + * @arg [CommonAllDocs.Examples] + * + * `df.`[select][select]` { `[all][all]`() }` + * + * `df.`[select][select]` { myGroup.`[all][all]`() }` + * + * `df.`[select][select]` { "pathTo"["myGroup"].`[all][all]`() }` + */ + public fun SingleColumn<*>.all(): TransformableColumnSet<*> = allInternal() - public fun String.all(): ColumnSet<*> = toColumnAccessor().transformSingle { it.children() } + /** + * @include [CommonAllDocs] + * @arg [CommonAllDocs.Examples] + * + * `df.`[select][select]` { "myGroupCol".`[all][all]`() }` + */ + public fun String.all(): TransformableColumnSet<*> = toColumnAccessor().all() - public fun KProperty<*>.all(): ColumnSet<*> = toColumnAccessor().transformSingle { it.children() } + /** + * @include [CommonAllDocs] + * @arg [CommonAllDocs.Examples] + * + * `df.`[select][select]` { Type::columnGroup.`[all][all]`() }` + */ + public fun KProperty<*>.all(): TransformableColumnSet<*> = toColumnAccessor().all() // region allDfs + @Deprecated( + message = "allDfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols { includeGroups || !it.isColumnGroup() }.recursively()"), + level = DeprecationLevel.WARNING, + ) public fun ColumnSet<*>.allDfs(includeGroups: Boolean = false): ColumnSet = if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() } + @Deprecated( + message = "allDfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols { includeGroups || !it.isColumnGroup() }.recursively()"), + level = DeprecationLevel.WARNING, + ) + public fun SingleColumn<*>.allDfs(includeGroups: Boolean = false): ColumnSet = + if (includeGroups) dfs { true } else dfs { !it.isColumnGroup() } + + @Deprecated( + message = "allDfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols { includeGroups || !it.isColumnGroup() }.recursively()"), + level = DeprecationLevel.WARNING, + ) public fun String.allDfs(includeGroups: Boolean = false): ColumnSet = toColumnAccessor().allDfs(includeGroups) + @Deprecated( + message = "allDfs is deprecated, use recursively instead.", + replaceWith = ReplaceWith("this.cols { includeGroups || !it.isColumnGroup() }.recursively()"), + level = DeprecationLevel.WARNING, + ) public fun KProperty<*>.allDfs(includeGroups: Boolean = false): ColumnSet = toColumnAccessor().allDfs(includeGroups) + /** + * ## Recursively / Rec + * + * Modifies the previous call to run not only on the current column set, + * but also on all columns inside [column groups][ColumnGroup]. + * + * `df.`[select][DataFrame.select]` { `[colsOf][ColumnSet.colsOf]`<`[String][String]`>() }` + * + * returns all columns of type [String] in the top-level, as expected. However, what if you want ALL + * columns of type [String] even if they are inside a nested [column group][ColumnGroup]? Then you can use [recursively]: + * + * `df.`[select][DataFrame.select]` { `[colsOf][ColumnSet.colsOf]`<`[String][String]`>().`[recursively][recursively]`() }` + * + * This will return the columns of type [String] in all levels. + * + * More examples: + * + * `df.`[select][DataFrame.select]` { `[first][ColumnSet.first]` { col -> col.`[any][DataColumn.any]` { it == "Alice" } }.`[recursively][recursively]`() }` + * + * `df.`[select][DataFrame.select]` { `[cols][ColumnSet.cols]` { "name" in it.`[name][ColumnReference.name]` }.`[recursively][recursively]`() }` + * + * #### Examples for this overload: + * + * {@includeArg [CommonRecursivelyDocs.Examples]} + * + * @param [includeTopLevel\] Whether to include the top-level columns in the result. `true` by default. + */ + private interface CommonRecursivelyDocs { + + /** Example argument */ + interface Examples + } + + /** + * @include [CommonRecursivelyDocs] + * @arg [CommonRecursivelyDocs.Examples] + * + * `df.`[select][DataFrame.select]` { `[colsOf][ColumnSet.colsOf]`<`[String][String]`>().`[recursively][recursively]`() }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[all][ColumnSet.all]`().`[rec][rec]`() }` + * + * `df.`[select][DataFrame.select]` { `[groups][ColumnSet.groups]`().`[recursively][recursively]`() }` + */ + public fun TransformableColumnSet.recursively(): ColumnSet = + recursivelyImpl(includeTopLevel = true, includeGroups = true) + + /** @include [TransformableColumnSet.recursively] */ + public fun TransformableColumnSet.rec(): ColumnSet = recursively() + + /** + * @include [CommonRecursivelyDocs] + * @arg [CommonRecursivelyDocs.Examples] + * + * `df.`[select][DataFrame.select]` { `[first][ColumnSet.first]` { col -> col.`[any][DataColumn.any]` { it == "Alice" } }.`[recursively][recursively]`() }` + * + * `df.`[select][DataFrame.select]` { `[single][ColumnSet.single]` { it.name == "myCol" }.`[rec][rec]`() }` + */ + public fun TransformableSingleColumn<*>.recursively(): SingleColumn<*> = + recursivelyImpl(includeTopLevel = true, includeGroups = true) + + /** @include [TransformableSingleColumn.recursively] */ + public fun TransformableSingleColumn<*>.rec(): SingleColumn<*> = recursively() + // endregion // region allAfter @@ -1753,68 +1767,121 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum // endregion // region groups + public fun ColumnSet<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): TransformableColumnSet = + groupsInternal(filter) - public fun SingleColumn<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): ColumnSet = - children { it.isColumnGroup() && filter(it.asColumnGroup()) } as ColumnSet + public fun SingleColumn<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): TransformableColumnSet = + groupsInternal(filter) - public fun String.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): ColumnSet = + public fun String.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): TransformableColumnSet = toColumnAccessor().groups(filter) - public fun KProperty<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): ColumnSet = + public fun KProperty<*>.groups(filter: (ColumnGroup<*>) -> Boolean = { true }): TransformableColumnSet = toColumnAccessor().groups(filter) // endregion // region children - public fun ColumnSet<*>.children(predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet = + // takes children of all columns in the column set + public fun ColumnSet<*>.children(predicate: ColumnFilter = { true }): TransformableColumnSet = transform { it.flatMap { it.children().filter { predicate(it) } } } - public fun ColumnGroupReference.children(): ColumnSet = transformSingle { it.children() } + // same as cols + public fun SingleColumn<*>.children(predicate: ColumnFilter = { true }): TransformableColumnSet = + (this as ColumnSet<*>).children(predicate) // endregion public operator fun List>.get(range: IntRange): ColumnSet = ColumnsList(subList(range.first, range.last + 1)) - public fun SingleColumn.take(n: Int): ColumnSet<*> = transformSingle { it.children().take(n) } - public fun SingleColumn.takeLast(n: Int): ColumnSet<*> = transformSingle { it.children().takeLast(n) } - public fun SingleColumn.drop(n: Int): ColumnSet<*> = transformSingle { it.children().drop(n) } - public fun SingleColumn.dropLast(n: Int = 1): ColumnSet<*> = transformSingle { it.children().dropLast(n) } + public fun SingleColumn<*>.take(n: Int): ColumnSet<*> = transformSingle { it.children().take(n) } + public fun SingleColumn<*>.takeLast(n: Int): ColumnSet<*> = transformSingle { it.children().takeLast(n) } + public fun SingleColumn<*>.drop(n: Int): ColumnSet<*> = transformSingle { it.children().drop(n) } + public fun SingleColumn<*>.dropLast(n: Int = 1): ColumnSet<*> = transformSingle { it.children().dropLast(n) } public fun ColumnSet.drop(n: Int): ColumnSet = transform { it.drop(n) } public fun ColumnSet.take(n: Int): ColumnSet = transform { it.take(n) } public fun ColumnSet.dropLast(n: Int = 1): ColumnSet = transform { it.dropLast(n) } public fun ColumnSet.takeLast(n: Int): ColumnSet = transform { it.takeLast(n) } - public fun ColumnSet.top(): ColumnSet = transform { it.top() } - public fun ColumnSet.takeWhile(predicate: Predicate>): ColumnSet = + + @Deprecated("Use roots() instead", ReplaceWith("roots()")) + public fun ColumnSet.top(): ColumnSet = roots() + + /** + * ## Roots + * + * Returns a sub-set of columns that are roots of the trees of columns. + * + * In practice, this means that if a column in [this] is a child of another column in [this], + * it will not be included in the result. + * + * If [this] is a [SingleColumn] containing a single [ColumnGroup] it will run on the children of that group, + * else it simply runs on the columns in the [ColumnSet] itself. + */ + public fun ColumnSet.roots(): ColumnSet = rootsInternal() as ColumnSet + + /** + * @include [roots] + */ + public fun SingleColumn<*>.roots(): ColumnSet<*> = rootsInternal() + + public fun ColumnSet.takeWhile(predicate: ColumnFilter): ColumnSet = transform { it.takeWhile(predicate) } - public fun ColumnSet.takeLastWhile(predicate: Predicate>): ColumnSet = + public fun ColumnSet.takeLastWhile(predicate: ColumnFilter): ColumnSet = transform { it.takeLastWhile(predicate) } - public fun ColumnSet.filter(predicate: Predicate>): ColumnSet = - transform { it.filter(predicate) } + public fun ColumnSet.filter(predicate: ColumnFilter): TransformableColumnSet = + colsInternal(predicate as ColumnFilter<*>) as TransformableColumnSet + + public fun SingleColumn<*>.nameContains(text: CharSequence): TransformableColumnSet<*> = + cols { it.name.contains(text) } + + public fun ColumnSet.nameContains(text: CharSequence): TransformableColumnSet = + cols { it.name.contains(text) } + + public fun SingleColumn<*>.nameContains(regex: Regex): TransformableColumnSet<*> = cols { it.name.contains(regex) } + + public fun ColumnSet.nameContains(regex: Regex): TransformableColumnSet = cols { it.name.contains(regex) } - public fun SingleColumn.nameContains(text: CharSequence): ColumnSet<*> = cols { it.name.contains(text) } - public fun ColumnSet.nameContains(text: CharSequence): ColumnSet = cols { it.name.contains(text) } - public fun SingleColumn.nameContains(regex: Regex): ColumnSet<*> = cols { it.name.contains(regex) } - public fun ColumnSet.nameContains(regex: Regex): ColumnSet = cols { it.name.contains(regex) } - public fun SingleColumn.startsWith(prefix: CharSequence): ColumnSet<*> = cols { it.name.startsWith(prefix) } - public fun ColumnSet.startsWith(prefix: CharSequence): ColumnSet = cols { it.name.startsWith(prefix) } - public fun SingleColumn.endsWith(suffix: CharSequence): ColumnSet<*> = cols { it.name.endsWith(suffix) } - public fun ColumnSet.endsWith(suffix: CharSequence): ColumnSet = cols { it.name.endsWith(suffix) } + public fun SingleColumn<*>.startsWith(prefix: CharSequence): TransformableColumnSet<*> = + cols { it.name.startsWith(prefix) } - public fun ColumnSet.except(vararg other: ColumnSet<*>): ColumnSet<*> = except(other.toColumnSet()) - public fun ColumnSet.except(vararg other: String): ColumnSet<*> = except(other.toColumnSet()) + public fun ColumnSet.startsWith(prefix: CharSequence): TransformableColumnSet = + cols { it.name.startsWith(prefix) } - public fun ColumnSet.withoutNulls(): ColumnSet = transform { it.filter { !it.hasNulls } } as ColumnSet + public fun SingleColumn<*>.endsWith(suffix: CharSequence): TransformableColumnSet<*> = + cols { it.name.endsWith(suffix) } - public infix fun ColumnSet.except(other: ColumnSet<*>): ColumnSet<*> = - createColumnSet { resolve(it).allColumnsExcept(other.resolve(it)) } + public fun ColumnSet.endsWith(suffix: CharSequence): TransformableColumnSet = + cols { it.name.endsWith(suffix) } - public infix fun ColumnSet.except(selector: ColumnsSelector): ColumnSet = - except(selector.toColumns()) as ColumnSet + public fun ColumnSet.except(vararg other: ColumnSet<*>): TransformableColumnSet<*> = + except(other.toColumnSet()) + + public fun ColumnSet.except(vararg other: String): TransformableColumnSet<*> = except(other.toColumnSet()) + + public fun ColumnSet.withoutNulls(): TransformableColumnSet = + transform { it.filter { !it.hasNulls() } } as TransformableColumnSet + + public infix fun ColumnSet.except(other: ColumnSet<*>): TransformableColumnSet<*> = + createTransformableColumnSet( + resolver = { context -> + this@except + .resolve(context) + .allColumnsExcept(other.resolve(context)) + }, + transformResolve = { context, transformer -> + transformer.transform(this@except) + .resolve(context) + .allColumnsExcept(other.resolve(context)) + }, + ) + + public infix fun ColumnSet.except(selector: ColumnsSelector): TransformableColumnSet = + except(selector.toColumns()) as TransformableColumnSet public operator fun ColumnsSelector.invoke(): ColumnSet = this(this@ColumnsSelectionDsl, this@ColumnsSelectionDsl) @@ -1892,9 +1959,25 @@ public interface ColumnsSelectionDsl : ColumnSelectionDsl, SingleColum public fun ColumnSet.distinct(): ColumnSet = DistinctColumnSet(this) + @Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(type, predicate).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), + ) public fun String.dfsOf(type: KType, predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet<*> = toColumnAccessor().dfsOf(type, predicate) + @Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(type, predicate).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), + ) public fun KProperty<*>.dfsOf(type: KType, predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet<*> = toColumnAccessor().dfsOf(type, predicate) @@ -1951,18 +2034,99 @@ public inline fun ColumnsSelectionDsl.expr( internal fun ColumnsSelector.filter(predicate: (ColumnWithPath) -> Boolean): ColumnsSelector = { this@filter(it, it).filter(predicate) } -internal fun ColumnSet<*>.colsInternal(predicate: ColumnFilter<*>) = - transform { it.flatMap { it.children().filter { predicate(it) } } } +/** + * If this [ColumnSet] is a [SingleColumn], it + * returns a new [ColumnSet] containing the children of this [SingleColumn] that + * match the given [predicate]. + * + * Else, it returns a new [ColumnSet] containing all columns in this [ColumnSet] that + * match the given [predicate]. + */ +internal fun ColumnSet<*>.colsInternal(predicate: ColumnFilter<*>): TransformableColumnSet<*> = + allInternal().transform { it.filter(predicate) } -internal fun ColumnSet<*>.dfsInternal(predicate: (ColumnWithPath<*>) -> Boolean) = - transform { it.filter { it.isColumnGroup() }.flatMap { it.children().dfs().filter(predicate) } } +internal fun ColumnSet<*>.colsInternal(indices: IntArray): TransformableColumnSet<*> = + allInternal().transform { cols -> + indices.map { cols[it] } + } + +internal fun ColumnSet<*>.colsInternal(range: IntRange): TransformableColumnSet<*> = + allInternal().transform { + it.subList(range.first, range.last + 1) + } + +internal fun ColumnSet<*>.rootsInternal(): ColumnSet<*> = + allInternal().transform { it.roots() } + +internal fun ColumnSet<*>.groupsInternal(filter: (ColumnGroup<*>) -> Boolean): TransformableColumnSet = + colsInternal { it.isColumnGroup() && filter(it.asColumnGroup()) } as TransformableColumnSet + +/** + * If [this] is a [SingleColumn] containing a single [ColumnGroup], it + * returns a [(transformable) ColumnSet][TransformableColumnSet] containing the children of this [ColumnGroup], + * else it simply returns a [(transformable) ColumnSet][TransformableColumnSet] from [this]. + */ +internal fun ColumnSet.allInternal(): TransformableColumnSet = + transform { + if (this.isSingleColumnWithGroup(it)) { + it.single().children() + } else { + it + } + }.cast() + +/** @include [allInternal] */ +internal fun SingleColumn<*>.allInternal(): TransformableColumnSet<*> = + (this as ColumnSet<*>).allInternal() +@Deprecated("Replaced with recursively()") +internal fun ColumnSet<*>.dfsInternal(predicate: (ColumnWithPath<*>) -> Boolean) = + transform { it.filter { it.isColumnGroup() }.flatMap { it.children().flattenRecursively().filter(predicate) } } + +@Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(type, predicate).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), +) public fun ColumnSet<*>.dfsOf(type: KType, predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet<*> = dfsInternal { it.isSubtypeOf(type) && predicate(it.cast()) } +@Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(type, predicate).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), +) +public fun SingleColumn<*>.dfsOf(type: KType, predicate: (ColumnWithPath) -> Boolean = { true }): ColumnSet<*> = + dfsInternal { it.isSubtypeOf(type) && predicate(it.cast()) } + +@Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(filter).recursively(includeTopLevel = false)", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), +) public inline fun ColumnSet<*>.dfsOf(noinline filter: (ColumnWithPath) -> Boolean = { true }): ColumnSet = dfsOf(typeOf(), filter) as ColumnSet +@Deprecated( + message = "Use recursively() instead", + replaceWith = ReplaceWith( + "this.colsOf(filter).recursively()", + "org.jetbrains.kotlinx.dataframe.columns.recursively", + "org.jetbrains.kotlinx.dataframe.api.colsOf", + ), +) +public inline fun SingleColumn<*>.dfsOf(noinline filter: (ColumnWithPath) -> Boolean = { true }): ColumnSet = + dfsOf(typeOf(), filter) as ColumnSet + /** * ## Cols Of * Get columns by a given type and an optional filter. @@ -2018,8 +2182,48 @@ private interface CommonColsOfDocs { * @include [CommonColsOfDocs.FilterParam] * @include [CommonColsOfDocs.Return] */ -public fun ColumnSet<*>.colsOf(type: KType, filter: (DataColumn) -> Boolean = { true }): ColumnSet = - colsInternal { it.isSubtypeOf(type) && filter(it.cast()) } as ColumnSet +public fun ColumnSet<*>.colsOf( + type: KType, + filter: (DataColumn) -> Boolean = { true }, +): TransformableColumnSet = + colsInternal { it.isSubtypeOf(type) && filter(it.cast()) } as TransformableColumnSet + +/** + * @include [CommonColsOfDocs] + * Get (sub-)columns by a given type with or without [filter]. + * #### For example: + * + * `df.`[select][DataFrame.select]` { `[colsOf][colsOf]`<`[Int][Int]`>() }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`<`[Int][Int]`> { it.`[size][DataColumn.size]` > 10 } }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`<`[Int][Int]`>() }` + * + * @include [CommonColsOfDocs.FilterParam] + * @include [CommonColsOfDocs.Return] + */ +public inline fun ColumnSet<*>.colsOf(noinline filter: (DataColumn) -> Boolean = { true }): TransformableColumnSet = + colsOf(typeOf(), filter) + +/** + * @include [CommonColsOfDocs] + * Get (sub-)columns by [type] with or without [filter]. + * #### For example: + * + * `df.`[select][DataFrame.select]` { `[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) { it: `[DataColumn][DataColumn]`<`[Int][Int]`> -> it.`[size][DataColumn.size]` > 10 } }` + * + * `df.`[select][DataFrame.select]` { myColumnGroup.`[colsOf][colsOf]`(`[typeOf][typeOf]`<`[Int][Int]`>()) }` + * + * @include [CommonColsOfDocs.FilterParam] + * @include [CommonColsOfDocs.Return] + */ +public fun SingleColumn<*>.colsOf( + type: KType, + filter: (DataColumn) -> Boolean = { true }, +): TransformableColumnSet = + colsInternal { it.isSubtypeOf(type) && filter(it.cast()) } as TransformableColumnSet /** * @include [CommonColsOfDocs] @@ -2035,7 +2239,7 @@ public fun ColumnSet<*>.colsOf(type: KType, filter: (DataColumn) -> Boole * @include [CommonColsOfDocs.FilterParam] * @include [CommonColsOfDocs.Return] */ -public inline fun ColumnSet<*>.colsOf(noinline filter: (DataColumn) -> Boolean = { true }): ColumnSet = +public inline fun SingleColumn<*>.colsOf(noinline filter: (DataColumn) -> Boolean = { true }): TransformableColumnSet = colsOf(typeOf(), filter) /* TODO: [Issue: #325, context receiver support](https://github.com/Kotlin/dataframe/issues/325) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt index 921fdd412..a77aa4c62 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cast.kt @@ -1,3 +1,5 @@ +@file:Suppress("UNCHECKED_CAST") + package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.AnyCol @@ -15,6 +17,8 @@ import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.columns.SingleColumn import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableColumnSet +import org.jetbrains.kotlinx.dataframe.impl.columns.TransformableSingleColumn import kotlin.reflect.typeOf public fun AnyFrame.cast(): DataFrame = this as DataFrame @@ -44,6 +48,10 @@ public fun ColumnAccessor<*>.cast(): ColumnAccessor = this as ColumnAcces public fun ColumnSet<*>.cast(): ColumnSet = this as ColumnSet +public fun TransformableColumnSet<*>.cast(): TransformableColumnSet = this as TransformableColumnSet + public fun ColumnReference<*>.cast(): ColumnReference = this as ColumnReference public fun SingleColumn<*>.cast(): SingleColumn = this as SingleColumn + +public fun TransformableSingleColumn<*>.cast(): TransformableSingleColumn = this as TransformableSingleColumn diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt index 0e9cb3198..3212310ad 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/corr.kt @@ -18,7 +18,7 @@ public data class Corr( internal val columns: ColumnsSelector, ) -public fun DataFrame.corr(): DataFrame = corr { dfs { it.isSuitableForCorr() } }.withItself() +public fun DataFrame.corr(): DataFrame = corr { cols { it.isSuitableForCorr() }.rec() }.withItself() public fun DataFrame.corr(columns: ColumnsSelector): Corr = Corr(this, columns) public fun DataFrame.corr(vararg columns: String): Corr = corr { columns.toColumnSet() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt index ec2d861a7..40d929b4f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/cumSum.kt @@ -52,7 +52,9 @@ public fun DataFrame.cumSum( public fun DataFrame.cumSum(vararg columns: KProperty<*>, skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { columns.toColumnSet() } -public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { allDfs() } +public fun DataFrame.cumSum(skipNA: Boolean = defaultCumSumSkipNA): DataFrame = cumSum(skipNA) { + cols { !it.isColumnGroup() }.recursively() +} // endregion @@ -77,7 +79,8 @@ public fun GroupBy.cumSum( skipNA: Boolean = defaultCumSumSkipNA, ): GroupBy = cumSum(skipNA) { columns.toColumnSet() } -public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA): GroupBy = - cumSum(skipNA) { allDfs() } +public fun GroupBy.cumSum(skipNA: Boolean = defaultCumSumSkipNA): GroupBy = cumSum(skipNA) { + cols { !it.isColumnGroup() }.recursively() +} // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt index 793dc2f87..11615b113 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/describe.kt @@ -39,7 +39,7 @@ public fun DataColumn.describe(): DataFrame = describe // region DataFrame -public fun DataFrame.describe(): DataFrame = describe { allDfs() } +public fun DataFrame.describe(): DataFrame = describe { cols { !it.isColumnGroup() }.recursively() } public fun DataFrame.describe(columns: ColumnsSelector): DataFrame = describeImpl(getColumnsWithPaths(columns)) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt index a816fde4b..43214008f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/explode.kt @@ -10,7 +10,7 @@ import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.api.explodeImpl import kotlin.reflect.KProperty -private val defaultExplodeColumns: ColumnsSelector<*, *> = { dfs { it.isList() || it.isFrameColumn() } } +private val defaultExplodeColumns: ColumnsSelector<*, *> = { cols { it.isList() || it.isFrameColumn() }.rec() } // region explode DataFrame diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt index 8453af76f..3e28c405c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/inferType.kt @@ -14,7 +14,7 @@ public fun AnyCol.inferType(): DataColumn<*> = guessColumnType(name, toList(), t // region DataFrame -public fun DataFrame.inferType(): DataFrame = inferType { allDfs() } +public fun DataFrame.inferType(): DataFrame = inferType { cols { !it.isColumnGroup() }.recursively() } public fun DataFrame.inferType(columns: ColumnsSelector): DataFrame = replace(columns).with { it.inferType() } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt index 25fb9147a..6fd14236b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/join.kt @@ -2,11 +2,7 @@ package org.jetbrains.kotlinx.dataframe.api import org.jetbrains.kotlinx.dataframe.ColumnsContainer import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.impl.api.joinImpl import kotlin.reflect.KProperty diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt index e4d086f18..a60747a77 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/parse.kt @@ -55,7 +55,9 @@ public data class ParserOptions( public fun DataColumn.tryParse(options: ParserOptions? = null): DataColumn<*> = tryParseImpl(options) -public fun DataFrame.parse(options: ParserOptions? = null): DataFrame = parse(options) { allDfs() } +public fun DataFrame.parse(options: ParserOptions? = null): DataFrame = parse(options) { + cols { !it.isColumnGroup() }.recursively() +} public fun DataColumn.parse(options: ParserOptions? = null): DataColumn<*> = tryParse(options).also { if (it.typeClass == String::class) error("Can't guess column type") } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt index 2b8c88cb2..1c06afae9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/rename.kt @@ -42,28 +42,29 @@ public fun DataFrame.rename(cols: Iterable>): Renam public data class RenameClause(val df: DataFrame, val columns: ColumnsSelector) -public fun DataFrame.renameToCamelCase(): DataFrame { +public fun DataFrame.renameToCamelCase(): DataFrame = this // recursively rename all column groups to camel case - return rename { - dfs { it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX } + .rename { + groups { it.name() matches DELIMITED_STRING_REGEX }.recursively() }.toCamelCase() - // recursively rename all other columns to camel case - .rename { - dfs { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX } - }.toCamelCase() - - // take all frame columns recursively and call renameToCamelCase() on all dataframes inside - .update { - dfsOf() - }.with { it.renameToCamelCase() } - - // convert all first chars of all columns to the lowercase - .rename { - allDfs() - }.into { - it.name.replaceFirstChar { it.lowercaseChar() } - } -} + + // recursively rename all other columns to camel case + .rename { + cols { !it.isColumnGroup() && it.name() matches DELIMITED_STRING_REGEX }.recursively() + }.toCamelCase() + + // take all frame columns recursively and call renameToCamelCase() on all dataframes inside + .update { + colsOf().recursively() + }.with { it.renameToCamelCase() } + + // convert all first chars of all columns to the lowercase + .rename { + cols { !it.isColumnGroup() }.recursively() + }.into { + it.name.replaceFirstChar { it.lowercaseChar() } + } + public fun RenameClause.into(vararg newColumns: ColumnReference<*>): DataFrame = into(*newColumns.map { it.name() }.toTypedArray()) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt index ca32ebd92..0f6672e17 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -39,12 +39,17 @@ public fun > Reorder.byDesc(expression: ColumnExpr reorderImpl(true, expression) public fun > DataFrame.reorderColumnsBy( - dfs: Boolean = true, + recursively: Boolean = true, desc: Boolean = false, expression: Selector, -): DataFrame = Reorder(this, { if (dfs) allDfs(true) else all() }, dfs).reorderImpl(desc, expression) - -public fun DataFrame.reorderColumnsByName(dfs: Boolean = true, desc: Boolean = false): DataFrame = - reorderColumnsBy(dfs, desc) { name() } +): DataFrame = + Reorder( + df = this, + columns = { if (recursively) all().recursively() else all() }, + inFrameColumns = recursively, + ).reorderImpl(desc, expression) + +public fun DataFrame.reorderColumnsByName(recursively: Boolean = true, desc: Boolean = false): DataFrame = + reorderColumnsBy(recursively, desc) { name() } // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt index 0905c0f18..4e2d27e99 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/replace.kt @@ -38,7 +38,7 @@ public fun DataFrame.replace(columns: Iterable>): R public fun DataFrame.replaceAll( vararg valuePairs: Pair, - columns: ColumnsSelector = { allDfs() }, + columns: ColumnsSelector = { cols { !it.isColumnGroup() }.recursively() }, ): DataFrame { val map = valuePairs.toMap() return update(columns).with { map[it] ?: it } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt index c03c68178..0cd779e58 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/toDataFrame.kt @@ -131,12 +131,12 @@ public fun Iterable>>.toDataFrameFromPairs(): AnyFra public interface TraversePropertiesDsl { /** - * Skip given [classes] during dfs traversal + * Skip given [classes] during recursive (dfs) traversal */ public fun exclude(vararg classes: KClass<*>) /** - * Skip given [properties] during dfs traversal + * Skip given [properties] during recursive (dfs) traversal */ public fun exclude(vararg properties: KProperty<*>) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt index 3706590e7..75c843770 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/xs.kt @@ -6,16 +6,22 @@ import org.jetbrains.kotlinx.dataframe.impl.api.xsImpl // region DataFrame -public fun DataFrame.xs(vararg keyValues: Any?): DataFrame = xs(*keyValues) { allDfs().take(keyValues.size) } +public fun DataFrame.xs(vararg keyValues: Any?): DataFrame = xs(*keyValues) { + cols { !it.isColumnGroup() }.recursively().take(keyValues.size) +} -public fun DataFrame.xs(vararg keyValues: C, keyColumns: ColumnsSelector): DataFrame = xsImpl(keyColumns, false, *keyValues) +public fun DataFrame.xs(vararg keyValues: C, keyColumns: ColumnsSelector): DataFrame = + xsImpl(keyColumns, false, *keyValues) // endregion // region GroupBy -public fun GroupBy.xs(vararg keyValues: Any?): GroupBy = xs(*keyValues) { allDfs().take(keyValues.size) } +public fun GroupBy.xs(vararg keyValues: Any?): GroupBy = xs(*keyValues) { + cols { !it.isColumnGroup() }.recursively().take(keyValues.size) +} -public fun GroupBy.xs(vararg keyValues: C, keyColumns: ColumnsSelector): GroupBy = xsImpl(*keyValues, keyColumns = keyColumns) +public fun GroupBy.xs(vararg keyValues: C, keyColumns: ColumnsSelector): GroupBy = + xsImpl(*keyValues, keyColumns = keyColumns) // endregion diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt index e65d38476..8ed0f7840 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnReference.kt @@ -1,10 +1,6 @@ package org.jetbrains.kotlinx.dataframe.columns -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow +import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.api.name import org.jetbrains.kotlinx.dataframe.impl.columnName import org.jetbrains.kotlinx.dataframe.impl.columns.RenamedColumnReference @@ -20,7 +16,8 @@ import kotlin.reflect.KProperty */ public interface ColumnReference : SingleColumn { - public operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnReference = renamedReference(property.columnName) + public operator fun getValue(thisRef: Any?, property: KProperty<*>): ColumnReference = + renamedReference(property.columnName) public fun name(): String @@ -32,13 +29,16 @@ public interface ColumnReference : SingleColumn { public fun getValueOrNull(row: AnyRow): C? = resolveFor(row.df())?.get(row.index()) - override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? { - return context.df.getColumn(path(), context.unresolvedColumnsPolicy)?.addPath(path()) - } + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + context.df + .getColumn(path(), context.unresolvedColumnsPolicy) + ?.addPath(path()) } -internal fun ColumnReference.renamedReference(newName: String): ColumnReference = RenamedColumnReference(this, newName) +internal fun ColumnReference.renamedReference(newName: String): ColumnReference = + RenamedColumnReference(this, newName) internal fun ColumnReference<*>.shortPath() = ColumnPath(name) -internal fun ColumnReference.resolveFor(df: AnyFrame): ColumnWithPath? = resolveSingle(ColumnResolutionContext(df, UnresolvedColumnsPolicy.Skip)) +internal fun ColumnReference.resolveFor(df: AnyFrame): ColumnWithPath? = + resolveSingle(ColumnResolutionContext(df, UnresolvedColumnsPolicy.Skip)) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt index 794f94aab..b534a61c1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnSet.kt @@ -3,21 +3,31 @@ package org.jetbrains.kotlinx.dataframe.columns import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.DataFrame +import org.jetbrains.kotlinx.dataframe.impl.columns.* /** + * ## ColumnSet * Entity that can be resolved into a list of [columns][DataColumn]. * * Used as a return type of [ColumnsSelector]. * @param C common type of resolved columns + * @see [SingleColumn] + * @see [TransformableColumnSet] + * @see [TransformableSingleColumn] */ public interface ColumnSet { + /** + * Resolves this [ColumnSet] as a [List]<[ColumnWithPath]<[C]>>. + * In many cases this function [transforms][ColumnSet.transform] a parent [ColumnSet] to reach + * the current [ColumnSet] result. + */ public fun resolve(context: ColumnResolutionContext): List> } -public class ColumnResolutionContext internal constructor ( +public class ColumnResolutionContext internal constructor( internal val df: DataFrame<*>, - internal val unresolvedColumnsPolicy: UnresolvedColumnsPolicy + internal val unresolvedColumnsPolicy: UnresolvedColumnsPolicy, ) { public val allowMissingColumns: Boolean = unresolvedColumnsPolicy != UnresolvedColumnsPolicy.Fail diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt index 4ef6b1f21..45d841dc5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/ColumnWithPath.kt @@ -19,15 +19,24 @@ public interface ColumnWithPath : DataColumn { public fun depth(): Int = path.depth() - public fun getChild(accessor: ColumnReference): ColumnWithPath? = asColumnGroup().getColumnOrNull(accessor)?.addPath(path + accessor.path()) + public fun getChild(accessor: ColumnReference): ColumnWithPath? = + asColumnGroup().getColumnOrNull(accessor)?.addPath(path + accessor.path()) - public fun getChild(name: String): ColumnWithPath? = asColumnGroup().getColumnOrNull(name)?.addParentPath(path) + public fun getChild(name: String): ColumnWithPath? = + asColumnGroup().getColumnOrNull(name)?.addParentPath(path) - public fun getChild(index: Int): ColumnWithPath? = asColumnGroup().getColumnOrNull(index)?.addParentPath(path) + public fun getChild(index: Int): ColumnWithPath? = + asColumnGroup().getColumnOrNull(index)?.addParentPath(path) - public fun getChild(accessor: KProperty): ColumnWithPath? = asColumnGroup().getColumnOrNull(accessor)?.addParentPath(path) + public fun getChild(accessor: KProperty): ColumnWithPath? = + asColumnGroup().getColumnOrNull(accessor)?.addParentPath(path) - public fun children(): List> = if (isColumnGroup()) data.asColumnGroup().columns().map { it.addParentPath(path) } else emptyList() + public fun children(): List> = + if (isColumnGroup()) { + data.asColumnGroup().columns().map { it.addParentPath(path) } + } else { + emptyList() + } override fun path(): ColumnPath = path diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt index f822f9774..68a0615d9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/columns/SingleColumn.kt @@ -1,15 +1,30 @@ package org.jetbrains.kotlinx.dataframe.columns import org.jetbrains.kotlinx.dataframe.DataColumn +import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.impl.columns.* /** * Entity that can be [resolved][resolveSingle] into [DataColumn]. * * @param C Column [type][BaseColumn.type] of resolved column. + * @see [ColumnSet] + * @see [TransformableColumnSet] + * @see [TransformableSingleColumn] */ public interface SingleColumn : ColumnSet { - override fun resolve(context: ColumnResolutionContext): List> = resolveSingle(context)?.let { listOf(it) } ?: emptyList() + override fun resolve( + context: ColumnResolutionContext, + ): List> = resolveSingle(context)?.let { listOf(it) } ?: emptyList() public fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? } + +public fun ColumnSet<*>.isSingleColumn(): Boolean = this is SingleColumn<*> + +/** + * Returns true if [this] is a [SingleColumn] and [cols] consists of a single column group. + */ +public fun ColumnSet<*>.isSingleColumnWithGroup(cols: List>): Boolean = + isSingleColumn() && cols.singleOrNull()?.isColumnGroup() == true diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt index 3345f87fe..25e877c02 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/DataFrameReceiver.kt @@ -1,27 +1,13 @@ package org.jetbrains.kotlinx.dataframe.impl -import org.jetbrains.kotlinx.dataframe.ColumnSelector -import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.api.asColumnGroup -import org.jetbrains.kotlinx.dataframe.api.asDataColumn -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn -import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy +import org.jetbrains.kotlinx.dataframe.* +import org.jetbrains.kotlinx.dataframe.api.* +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupWithParent import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnGroupWithPathImpl import org.jetbrains.kotlinx.dataframe.impl.columns.addPath import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingColumnGroup import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingDataColumn -import org.jetbrains.kotlinx.dataframe.nrow private fun DataFrame.unbox(): DataFrame = when (this) { is ColumnGroupWithParent -> source.unbox() @@ -29,11 +15,12 @@ private fun DataFrame.unbox(): DataFrame = when (this) { else -> this } -internal abstract class DataFrameReceiverBase(protected val df: DataFrame) : DataFrameImpl(df.columns(), df.nrow) +internal abstract class DataFrameReceiverBase(protected val df: DataFrame) : + DataFrameImpl(df.columns(), df.nrow) internal open class DataFrameReceiver( source: DataFrame, - private val unresolvedColumnsPolicy: UnresolvedColumnsPolicy + private val unresolvedColumnsPolicy: UnresolvedColumnsPolicy, ) : DataFrameReceiverBase(source.unbox()), SingleColumn> { private fun DataColumn?.check(path: ColumnPath): DataColumn = @@ -61,11 +48,14 @@ internal open class DataFrameReceiver( } override fun getColumnOrNull(path: ColumnPath) = super.getColumnOrNull(path).check(path) - override fun getColumnOrNull(column: ColumnSelector) = getColumnsImpl(unresolvedColumnsPolicy, column).singleOrNull() + override fun getColumnOrNull(column: ColumnSelector) = + getColumnsImpl(unresolvedColumnsPolicy, column).singleOrNull() - override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath>? = DataColumn.createColumnGroup("", df).addPath(emptyPath()) + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath>? = + DataColumn.createColumnGroup("", df).addPath(emptyPath()) - override fun columns() = df.columns().map { if (it.isColumnGroup()) ColumnGroupWithParent(null, it.asColumnGroup()) else it } + override fun columns() = + df.columns().map { if (it.isColumnGroup()) ColumnGroupWithParent(null, it.asColumnGroup()) else it } override fun columnNames() = df.columnNames() diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt index 6675dcc0c..585dc87fa 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/GroupByImpl.kt @@ -6,18 +6,7 @@ import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.Selector import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedBody import org.jetbrains.kotlinx.dataframe.aggregation.NamedValue -import org.jetbrains.kotlinx.dataframe.api.GroupBy -import org.jetbrains.kotlinx.dataframe.api.GroupedRowFilter -import org.jetbrains.kotlinx.dataframe.api.asGroupBy -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.getColumn -import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.minus -import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.api.rename +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.columns.FrameColumn import org.jetbrains.kotlinx.dataframe.impl.aggregation.AggregatableInternal import org.jetbrains.kotlinx.dataframe.impl.aggregation.GroupByReceiverImpl @@ -95,7 +84,7 @@ internal fun aggregateGroupBy( if (!removeColumns) removedNode.data.wasRemoved = false - val columnsToInsert = groupedFrame.getColumnsWithPaths { allDfs() }.map { + val columnsToInsert = groupedFrame.getColumnsWithPaths { cols { !it.isColumnGroup() }.rec() }.map { ColumnToInsert(insertPath + it.path, it, removedNode) } val src = if (removeColumns) removed.df else df diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ConfiguredAggregateColumn.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ConfiguredAggregateColumn.kt index daae75d99..6a0f6ea05 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ConfiguredAggregateColumn.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/ConfiguredAggregateColumn.kt @@ -1,11 +1,6 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation -import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.shortPath +import org.jetbrains.kotlinx.dataframe.columns.* internal class ConfiguredAggregateColumn private constructor( val columns: ColumnSet, @@ -22,7 +17,7 @@ internal class ConfiguredAggregateColumn private constructor( else -> AggregateColumnDescriptor(col, default, if (keepName) newPath?.plus(col.name) else newPath) } - override fun resolve(context: ColumnResolutionContext): List> { + private fun resolve(context: ColumnResolutionContext, columns: ColumnSet): List> { val resolved = columns.resolve(context) if (resolved.size == 1) return listOf(resolved[0].toDescriptor(false)) else return resolved.map { @@ -30,6 +25,9 @@ internal class ConfiguredAggregateColumn private constructor( } } + override fun resolve(context: ColumnResolutionContext): List> = + resolve(context, columns) + companion object { fun withDefault(src: ColumnSet, default: C?): ColumnSet = when (src) { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/corr.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/corr.kt index 2a3143be9..cac43096d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/corr.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/corr.kt @@ -2,16 +2,7 @@ package org.jetbrains.kotlinx.dataframe.impl.api import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.api.Corr -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.castToNotNullable -import org.jetbrains.kotlinx.dataframe.api.convertToDouble -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths -import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.isSuitableForCorr -import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.api.toValueColumn +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.columns.ColumnPath import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath import org.jetbrains.kotlinx.dataframe.math.varianceAndMean @@ -25,7 +16,7 @@ internal fun Corr.corrImpl(otherColumns: ColumnsSelector): // extract nested number columns from ColumnGroups if (it.isColumnGroup()) { val groupPath = it.path - df.getColumnsWithPaths { groupPath.dfs { it.isSuitableForCorr() } }.map { it.cast() } + df.getColumnsWithPaths { groupPath.cols { it.isSuitableForCorr() }.rec() }.map { it.cast() } } else listOf(it) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt index 671480716..74b0567b2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/describe.kt @@ -31,12 +31,25 @@ import org.jetbrains.kotlinx.dataframe.type import kotlin.reflect.jvm.jvmErasure internal fun describeImpl(cols: List): DataFrame { - fun List.collectAll(dfs: Boolean): List = flatMap { col -> + fun List.collectAll(recursively: Boolean): List = flatMap { col -> when (col.kind) { - ColumnKind.Frame -> col.asAnyFrameColumn().concat().columns().map { - it.addPath(col.path() + it.name) - }.collectAll(true) - ColumnKind.Group -> if (dfs) col.asColumnGroup().columns().map { it.addPath(col.path() + it.name) }.collectAll(true) else listOf(col) + ColumnKind.Frame -> + col.asAnyFrameColumn() + .concat() + .columns() + .map { it.addPath(col.path() + it.name) } + .collectAll(true) + + ColumnKind.Group -> + if (recursively) { + col.asColumnGroup() + .columns() + .map { it.addPath(col.path() + it.name) } + .collectAll(true) + } else { + listOf(col) + } + ColumnKind.Value -> listOf(col) } } @@ -55,14 +68,18 @@ internal fun describeImpl(cols: List): DataFrame { ColumnDescription::count from { it.size } ColumnDescription::unique from { it.countDistinct() } ColumnDescription::nulls from { it.values.count { it == null } } - ColumnDescription::top from inferType { it.values.filterNotNull().groupBy { it }.maxByOrNull { it.value.size }?.key } + ColumnDescription::top from inferType { + it.values.filterNotNull().groupBy { it }.maxByOrNull { it.value.size }?.key + } if (hasNumeric) { ColumnDescription::mean from { if (it.isNumber()) it.asNumbers().mean() else null } ColumnDescription::std from { if (it.isNumber()) it.asNumbers().std() else null } } if (hasComparable) { ColumnDescription::min from inferType { if (it.isComparable()) it.asComparable().minOrNull() else null } - ColumnDescription::median from inferType { if (it.isComparable()) it.asComparable().medianOrNull() else null } + ColumnDescription::median from inferType { + if (it.isComparable()) it.asComparable().medianOrNull() else null + } ColumnDescription::max from inferType { if (it.isComparable()) it.asComparable().maxOrNull() else null } } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt index 5ed842f65..d410fb525 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/flatten.kt @@ -16,17 +16,19 @@ internal fun DataFrame.flattenImpl( columns: ColumnsSelector, keepParentNameForColumns: Boolean = false ): DataFrame { - val rootColumns = getColumnsWithPaths { columns.toColumnSet().filter { it.isColumnGroup() }.top() } + val rootColumns = getColumnsWithPaths { + columns.toColumnSet().filter { it.isColumnGroup() }.roots() + } val rootPrefixes = rootColumns.map { it.path }.toSet() - val nameGenerators = rootPrefixes.map { it.dropLast() }.distinct().associate { path -> + val nameGenerators = rootPrefixes.map { it.dropLast() }.distinct().associateWith { path -> val usedNames = get(path).asColumnGroup().columns().filter { path + it.name() !in rootPrefixes }.map { it.name() } - path to ColumnNameGenerator(usedNames) + ColumnNameGenerator(usedNames) } fun getRootPrefix(path: ColumnPath) = (1 until path.size).asSequence().map { path.take(it) }.first { rootPrefixes.contains(it) } - val result = move { rootPrefixes.toColumnSet().allDfs() } + val result = move { rootPrefixes.toColumnSet().cols { !it.isColumnGroup() }.recursively() } .into { val targetPath = getRootPrefix(it.path).dropLast(1) val nameGen = nameGenerators[targetPath]!! diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/join.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/join.kt index 4624fa6de..daad1cde2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/join.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/join.kt @@ -74,8 +74,8 @@ internal fun DataFrame.joinImpl( val leftCol = leftJoinColumns[i] val rightCol = rightJoinColumns[i] if (leftCol.isColumnGroup() && rightCol.isColumnGroup()) { - val leftColumns = getColumnsWithPaths { leftCol.allDfs() } - val rightColumns = other.getColumnsWithPaths { rightCol.allDfs() } + val leftColumns = getColumnsWithPaths { leftCol.cols { !it.isColumnGroup() }.recursively()} + val rightColumns = other.getColumnsWithPaths { rightCol.cols { !it.isColumnGroup() }.recursively() } val leftPrefixLength = leftCol.path.size val rightPrefixLength = rightCol.path.size @@ -147,12 +147,13 @@ internal fun DataFrame.joinImpl( outputRowsCount += rightUnmatchedCount } - val leftColumns = getColumnsWithPaths { allDfs() } + val leftColumns = getColumnsWithPaths { cols { !it.isColumnGroup() }.recursively() } - val rightJoinColumnPaths = allRightJoinColumns.map { it.path to it.data }.toMap() + val rightJoinColumnPaths = allRightJoinColumns.associate { it.path to it.data } val newRightColumns = - if (addNewColumns) other.getColumnsWithPaths { dfs { !it.isColumnGroup() && !rightJoinColumnPaths.contains(it.path) } } else emptyList() + if (addNewColumns) other.getColumnsWithPaths { cols { !it.isColumnGroup() && !rightJoinColumnPaths.contains(it.path) }.rec() } + else emptyList() // for every column index from the left dataframe store matching column from the right dataframe val leftToRightColumns = leftColumns.map { rightJoinColumnPaths[pathMapping[it.path()]] } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/pivot.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/pivot.kt index a29c3aa41..292320ce1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/pivot.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/pivot.kt @@ -8,12 +8,8 @@ import org.jetbrains.kotlinx.dataframe.api.PivotColumnsSelector import org.jetbrains.kotlinx.dataframe.api.forEach import org.jetbrains.kotlinx.dataframe.api.groupBy import org.jetbrains.kotlinx.dataframe.api.toPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy -import org.jetbrains.kotlinx.dataframe.columns.toColumnSet import org.jetbrains.kotlinx.dataframe.impl.aggregation.GroupByReceiverImpl import org.jetbrains.kotlinx.dataframe.impl.aggregation.receivers.AggregateInternalDsl import org.jetbrains.kotlinx.dataframe.impl.aggregation.receivers.AggregatePivotDslImpl diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/sort.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/sort.kt index fac8e3967..a83d14824 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/sort.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/api/sort.kt @@ -9,11 +9,8 @@ import org.jetbrains.kotlinx.dataframe.api.castFrameColumn import org.jetbrains.kotlinx.dataframe.api.getFrameColumn import org.jetbrains.kotlinx.dataframe.api.update import org.jetbrains.kotlinx.dataframe.api.with -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.impl.columns.addPath import org.jetbrains.kotlinx.dataframe.impl.columns.assertIsComparable import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingColumnGroup @@ -106,7 +103,8 @@ internal fun ColumnWithPath.addFlag(flag: SortFlag): ColumnWithPath { } internal class ColumnsWithSortFlag(val column: ColumnSet, val flag: SortFlag) : ColumnSet { - override fun resolve(context: ColumnResolutionContext) = column.resolve(context).map { it.addFlag(flag) } + override fun resolve(context: ColumnResolutionContext) = + column.resolve(context).map { it.addFlag(flag) } } internal class SortColumnDescriptor( diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnAccessorImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnAccessorImpl.kt index 3f6e14e82..dbc7d0944 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnAccessorImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnAccessorImpl.kt @@ -6,11 +6,7 @@ import org.jetbrains.kotlinx.dataframe.api.asColumnGroup import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.isColumnGroup import org.jetbrains.kotlinx.dataframe.api.toPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.* internal class ColumnAccessorImpl(val path: ColumnPath) : ColumnAccessor { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnsList.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnsList.kt index 4fe7c9885..35d15fc7c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnsList.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ColumnsList.kt @@ -6,5 +6,6 @@ import org.jetbrains.kotlinx.dataframe.columns.ColumnSet internal class ColumnsList(val columns: List>) : ColumnSet { constructor(vararg columns: ColumnSet) : this(columns.toList()) - override fun resolve(context: ColumnResolutionContext) = columns.flatMap { it.resolve(context) } + override fun resolve(context: ColumnResolutionContext) = + columns.flatMap { it.resolve(context) } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Recursively.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Recursively.kt new file mode 100644 index 000000000..5f6b1de32 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Recursively.kt @@ -0,0 +1,92 @@ +package org.jetbrains.kotlinx.dataframe.impl.columns + +import org.jetbrains.kotlinx.dataframe.api.allInternal +import org.jetbrains.kotlinx.dataframe.api.isColumnGroup +import org.jetbrains.kotlinx.dataframe.columns.* +import org.jetbrains.kotlinx.dataframe.impl.columns.tree.flattenRecursively + +/** + * Recursively implementation for [TransformableColumnSet]. + * This converts a [TransformableColumnSet] into a [ColumnSet] by redirecting [ColumnSet.resolve] + * to [TransformableColumnSet.transformResolve] with a correctly configured [RecursivelyTransformer]. + */ +internal fun TransformableColumnSet.recursivelyImpl( + includeGroups: Boolean = true, + includeTopLevel: Boolean = true, +): ColumnSet = object : ColumnSet { + + override fun resolve(context: ColumnResolutionContext): List> = + this@recursivelyImpl.transformResolve( + context = context, + transformer = RecursivelyTransformer( + includeGroups = includeGroups, + includeTopLevel = includeTopLevel, + ), + ) +} + +/** + * Recursively implementation for [TransformableSingleColumn]. + * This converts a [TransformableSingleColumn] into a [SingleColumn] by redirecting [SingleColumn.resolveSingle] + * to [TransformableSingleColumn.transformResolveSingle] with a correctly configured [RecursivelyTransformer]. + */ +internal fun TransformableSingleColumn.recursivelyImpl( + includeGroups: Boolean = true, + includeTopLevel: Boolean = true, +): SingleColumn = object : SingleColumn { + + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + this@recursivelyImpl.transformResolveSingle( + context = context, + transformer = RecursivelyTransformer( + includeGroups = includeGroups, + includeTopLevel = includeTopLevel, + ), + ) +} + +/** + * ## Recursively transformer. + * A [ColumnSetTransformer] implementation around the [ColumnSet.flattenRecursively] function. + * Created only using [recursivelyImpl]. + */ +private class RecursivelyTransformer( + val includeGroups: Boolean = true, + val includeTopLevel: Boolean = true, +) : ColumnSetTransformer { + + override fun transform(columnSet: ColumnSet<*>): ColumnSet<*> = + columnSet.flattenRecursively( + includeGroups = includeGroups, + includeTopLevel = includeTopLevel, + ) + + override fun transformSingle(singleColumn: SingleColumn<*>): ColumnSet<*> = + singleColumn.flattenRecursively( + includeGroups = includeGroups, + includeTopLevel = includeTopLevel, + ) +} + +/** + * Flattens a [ColumnSet]/[SingleColumn] recursively. + * + * If [this] is a [SingleColumn] containing a single [ColumnGroup], the "top-level" is + * considered to be the [ColumnGroup]'s children, otherwise, if this is a [ColumnSet], + * the "top-level" is considered to be the columns in the [ColumnSet]. + * + * @param includeGroups Whether to include [ColumnGroup]s in the result. + * @param includeTopLevel Whether to include the "top-level" columns in the result. + */ +internal fun ColumnSet<*>.flattenRecursively( + includeGroups: Boolean = true, + includeTopLevel: Boolean = true, +): ColumnSet<*> = allInternal().transform { cols -> + if (includeTopLevel) { + cols.flattenRecursively() + } else { + cols + .filter { it.isColumnGroup() } + .flatMap { it.children().flattenRecursively() } + }.filter { includeGroups || !it.isColumnGroup() } +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/TransformableColumnSet.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/TransformableColumnSet.kt new file mode 100644 index 000000000..37b6e02e3 --- /dev/null +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/TransformableColumnSet.kt @@ -0,0 +1,58 @@ +package org.jetbrains.kotlinx.dataframe.impl.columns + +import org.jetbrains.kotlinx.dataframe.api.ColumnsSelectionDsl +import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext +import org.jetbrains.kotlinx.dataframe.columns.ColumnSet +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.SingleColumn + +/** + * ## Transformable ColumnSet + * This type of [ColumnSet] can be [transformed][transformResolve] before being resolved. + * + * This is especially useful for calls like + * [cols { }][ColumnsSelectionDsl.cols].[recursively()][ColumnsSelectionDsl.recursively], + * where [recursively][ColumnsSelectionDsl.recursively] modifies the [ColumnSet][ColumnSet] + * that [cols { }][ColumnsSelectionDsl.cols] operates on before it's evaluated. + * + * @see [ColumnSet] + * @see [TransformableSingleColumn] + * @see [SingleColumn] + */ +public interface TransformableColumnSet : ColumnSet { + public fun transformResolve( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): List> +} + +/** + * ## Transformable SingleColumn + * This type of [SingleColumn] can be [transformed][transformResolveSingle] before being resolved. + * + * This is especially useful for calls like + * [first { }][ColumnsSelectionDsl.first].[recursively()][ColumnsSelectionDsl.recursively], + * where [recursively][ColumnsSelectionDsl.recursively] modifies the [ColumnSet][ColumnSet] + * that [first { }][ColumnsSelectionDsl.first] operates on before it's evaluated. + * + * @see [SingleColumn] + * @see [TransformableColumnSet] + * @see [ColumnSet] + */ +public interface TransformableSingleColumn : SingleColumn { + public fun transformResolveSingle( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): ColumnWithPath? +} + +/** + * ## Column set transformer. + * This contains implementations for both [transform][ColumnSet.transform] and + * [transformSingle][SingleColumn.transformSingle] and can be passed around. + */ +public interface ColumnSetTransformer { + public fun transform(columnSet: ColumnSet<*>): ColumnSet<*> + + public fun transformSingle(singleColumn: SingleColumn<*>): ColumnSet<*> +} diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt index e30b80ac9..197e9092d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/Utils.kt @@ -9,27 +9,12 @@ import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.name import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.columns.BaseColumn -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnKind -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnSet -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath -import org.jetbrains.kotlinx.dataframe.columns.FrameColumn -import org.jetbrains.kotlinx.dataframe.columns.SingleColumn -import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.columns.values import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl import org.jetbrains.kotlinx.dataframe.impl.asNullable import org.jetbrains.kotlinx.dataframe.impl.columns.missing.MissingDataColumn -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.ColumnPosition -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.TreeNode -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.collectTree -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.getOrPut -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.put -import org.jetbrains.kotlinx.dataframe.impl.columns.tree.topDfs +import org.jetbrains.kotlinx.dataframe.impl.columns.tree.* import org.jetbrains.kotlinx.dataframe.impl.equalsByElement import org.jetbrains.kotlinx.dataframe.impl.rollingHash import org.jetbrains.kotlinx.dataframe.nrow @@ -93,34 +78,108 @@ internal fun DataColumn.assertIsComparable(): DataColumn { return this } -internal fun SingleColumn.transformSingle(converter: (ColumnWithPath) -> List>): ColumnSet = - object : ColumnSet { - override fun resolve(context: ColumnResolutionContext): List> = - this@transformSingle.resolveSingle(context)?.let { converter(it) } ?: emptyList() - } +/** + * Applies a transformation on [this] [SingleColumn] by converting its + * single [ColumnWithPath]<[A]> to [List]<[ColumnWithPath]<[B]>] using [converter]. + * Since [converter] allows you to return multiple columns, the result is turned into a [ColumnSet]<[B]>. + */ +internal fun SingleColumn.transformSingle( + converter: (ColumnWithPath) -> List>, +): ColumnSet = object : ColumnSet { + override fun resolve(context: ColumnResolutionContext): List> = + this@transformSingle + .resolveSingle(context) + ?.let(converter) + ?: emptyList() +} -internal fun ColumnSet.transform(converter: (List>) -> List>): ColumnSet = - object : ColumnSet { - override fun resolve(context: ColumnResolutionContext) = converter(this@transform.resolve(context)) - } +/** + * Applies a transformation on [this] by converting its [List]<[ColumnWithPath]<[A]>] to [List]<[ColumnWithPath]<[B]>] + * using [converter]. + * + * The result can either be used as a normal [ColumnSet]<[B]>, + * which resolves [this] and then applies [converter] on the result, + * + * or it can be used as a [TransformableColumnSet]<[B]>, where a [ColumnSetTransformer] can be injected before + * the [converter] is applied. + */ +internal fun ColumnSet.transform( + converter: (List>) -> List>, +): TransformableColumnSet = object : TransformableColumnSet { + override fun resolve(context: ColumnResolutionContext) = + this@transform + .resolve(context) + .let(converter) + + override fun transformResolve( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): List> = + transformer.transform(this@transform) + .resolve(context) + .let { converter(it as List>) } +} +/** + * Applies a transformation on [this] by converting its [List]<[ColumnWithPath]<[A]>] to [List]<[ColumnWithPath]<[B]>] + * using [converter], but also providing the [ColumnResolutionContext] to the converter. + * + * The result can either be used as a normal [ColumnSet]<[B]>, + * which resolves [this] and then applies [converter] on the result, + * + * or it can be used as a [TransformableColumnSet]<[B]>, where a [ColumnSetTransformer] can be injected before + * the [converter] is applied. + */ internal fun ColumnSet.transformWithContext( converter: ColumnResolutionContext.(List>) -> List>, -): ColumnSet = object : ColumnSet { +): TransformableColumnSet = object : TransformableColumnSet { override fun resolve(context: ColumnResolutionContext) = - converter(context, this@transformWithContext.resolve(context)) + this@transformWithContext + .resolve(context) + .let { converter(context, it) } + + override fun transformResolve( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): List> = + transformer.transform(this@transformWithContext) + .resolve(context) + .let { converter(context, it as List>) } } -internal fun ColumnSet.singleImpl() = object : SingleColumn { - override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? { - return this@singleImpl.resolve(context).singleOrNull() - } +/** + * Converts [this] [ColumnSet] to a [SingleColumn]. + * [resolveSingle] will return the single column of [this] if there is only one, else it will return `null`. + * If the result used as a [ColumnSet], `null` will be converted to an empty list. + */ +internal fun ColumnSet.singleImpl(): SingleColumn = object : SingleColumn { + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + this@singleImpl.resolve(context).singleOrNull() } -internal fun ColumnSet.getAt(index: Int) = object : SingleColumn { - override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? { - return this@getAt.resolve(context).getOrNull(index) +/** + * Same as [singleImpl], however, it passes any [ColumnSetTransformer] back to [this] if it is supplied. + */ +internal fun TransformableColumnSet.singleWithTransformerImpl(): TransformableSingleColumn = + object : TransformableSingleColumn { + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + this@singleWithTransformerImpl.resolve(context).singleOrNull() + + override fun transformResolveSingle( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): ColumnWithPath? = + this@singleWithTransformerImpl.transformResolve( + context = context, + transformer = transformer, + ).singleOrNull() } + +internal fun ColumnSet.getAt(index: Int): SingleColumn = object : SingleColumn { + override fun resolveSingle(context: ColumnResolutionContext): ColumnWithPath? = + this@getAt + .resolve(context) + .getOrNull(index) } internal fun ColumnSet.getChildrenAt(index: Int): ColumnSet = @@ -146,10 +205,16 @@ internal fun ColumnsContainer<*>.getColumn(path: ColumnPath, policy: Unresol UnresolvedColumnsPolicy.Create -> DataColumn.empty().cast() } -internal fun List>.top(): List> { - val root = TreeNode.createRoot?>(null) - forEach { root.put(it.path, it) } - return root.topDfs { it.data != null }.map { it.data!! } +/** + * Returns a sub-list of columns that are roots of the trees of columns. + * + * In practice, this means that if a column in [this] is a child of another column in [this], + * it will not be included in the result. + */ +internal fun List>.roots(): List> { + val emptyRoot = TreeNode.createRoot?>(data = null) + this.forEach { emptyRoot.put(it.path, it) } + return emptyRoot.topmostChildren { it.data != null }.map { it.data!! } } internal fun List>.allColumnsExcept(columns: Iterable>): List> { @@ -157,14 +222,14 @@ internal fun List>.allColumnsExcept(columns: Iterable ColumnSet.resolve( df: DataFrame<*>, - unresolvedColumnsPolicy: UnresolvedColumnsPolicy = UnresolvedColumnsPolicy.Fail + unresolvedColumnsPolicy: UnresolvedColumnsPolicy = UnresolvedColumnsPolicy.Fail, ) = resolve(ColumnResolutionContext(df, unresolvedColumnsPolicy)) internal fun SingleColumn.resolveSingle( df: DataFrame<*>, - unresolvedColumnsPolicy: UnresolvedColumnsPolicy = UnresolvedColumnsPolicy.Fail + unresolvedColumnsPolicy: UnresolvedColumnsPolicy = UnresolvedColumnsPolicy.Fail, ): ColumnWithPath? = resolveSingle(ColumnResolutionContext(df, unresolvedColumnsPolicy)) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index b21d8f6f6..49253bf0b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -2,9 +2,7 @@ package org.jetbrains.kotlinx.dataframe.impl.columns import org.jetbrains.kotlinx.dataframe.AnyRow import org.jetbrains.kotlinx.dataframe.DataColumn -import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn +import org.jetbrains.kotlinx.dataframe.columns.* import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -13,9 +11,8 @@ internal open class ValueColumnImpl( name: String, type: KType, val defaultValue: T? = null, - distinct: Lazy>? = null -) : - DataColumnImpl(values, name, type, distinct), ValueColumn { + distinct: Lazy>? = null, +) : DataColumnImpl(values, name, type, distinct), ValueColumn { override fun distinct() = ValueColumnImpl(toSet().toList(), name, type, defaultValue, distinct) @@ -40,7 +37,8 @@ internal open class ValueColumnImpl( return createWithValues(newValues, nullable) } - override fun get(columnName: String) = throw UnsupportedOperationException("Can not get nested column '$columnName' from ValueColumn '$name'") + override fun get(columnName: String) = + throw UnsupportedOperationException("Can not get nested column '$columnName' from ValueColumn '$name'") override operator fun get(range: IntRange): ValueColumn = super.get(range) as ValueColumn @@ -50,12 +48,13 @@ internal open class ValueColumnImpl( } internal class ResolvingValueColumn( - override val source: ValueColumn + override val source: ValueColumn, ) : ValueColumn by source, ForceResolvedColumn { override fun resolve(context: ColumnResolutionContext) = super.resolve(context) - override fun resolveSingle(context: ColumnResolutionContext) = context.df.getColumn(source.name(), context.unresolvedColumnsPolicy)?.addPath() + override fun resolveSingle(context: ColumnResolutionContext) = + context.df.getColumn(source.name(), context.unresolvedColumnsPolicy)?.addPath() override fun getValue(row: AnyRow) = super.getValue(row) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt index 01790f3b8..1cf7d097e 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/constructors.kt @@ -144,10 +144,23 @@ internal fun createColumn(values: Iterable, suggestedType: KType, guessTy // region create Columns -internal fun createColumnSet(resolver: (ColumnResolutionContext) -> List>): ColumnSet = - object : ColumnSet { - override fun resolve(context: ColumnResolutionContext) = resolver(context) - } +internal fun createColumnSet( + resolver: (context: ColumnResolutionContext) -> List>, +): ColumnSet = object : ColumnSet { + override fun resolve(context: ColumnResolutionContext) = resolver(context) +} + +internal fun createTransformableColumnSet( + resolver: (context: ColumnResolutionContext) -> List>, + transformResolve: (context: ColumnResolutionContext, transformer: ColumnSetTransformer) -> List>, +): TransformableColumnSet = object : TransformableColumnSet { + override fun resolve(context: ColumnResolutionContext) = resolver(context) + + override fun transformResolve( + context: ColumnResolutionContext, + transformer: ColumnSetTransformer, + ): List> = transformResolve(context, transformer) +} // region toColumnSet @@ -155,12 +168,11 @@ internal fun createColumnSet(resolver: (ColumnResolutionContext) -> List, C> Selector>.toColumnSet( createReceiver: (ColumnResolutionContext) -> T, -): ColumnSet = - createColumnSet { - val receiver = createReceiver(it) - val columnSet = this(receiver, receiver) - columnSet.resolve(receiver, it.unresolvedColumnsPolicy) - } +): ColumnSet = createColumnSet { + val receiver = createReceiver(it) + val columnSet = this(receiver, receiver) + columnSet.resolve(receiver, it.unresolvedColumnsPolicy) +} @JvmName("toColumnSetForPivot") internal fun PivotColumnsSelector.toColumnSet(): ColumnSet = toColumnSet { diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingColumnGroup.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingColumnGroup.kt index 62518ab0c..a3abdcedd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingColumnGroup.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/missing/MissingColumnGroup.kt @@ -8,10 +8,7 @@ import org.jetbrains.kotlinx.dataframe.aggregation.AggregateGroupedBody import org.jetbrains.kotlinx.dataframe.api.asDataColumn import org.jetbrains.kotlinx.dataframe.api.cast import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.columns.ColumnPath -import org.jetbrains.kotlinx.dataframe.columns.ColumnReference -import org.jetbrains.kotlinx.dataframe.columns.ColumnResolutionContext -import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.columns.* import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy import org.jetbrains.kotlinx.dataframe.impl.columns.DataColumnGroup import org.jetbrains.kotlinx.dataframe.impl.columns.addPath diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt index 2f35bca69..bf9d27646 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/TreeNode.kt @@ -6,7 +6,7 @@ internal class TreeNode( override val name: String, override val depth: Int, override var data: T, - override val parent: TreeNode? = null + override val parent: TreeNode? = null, ) : ReadonlyTreeNode { companion object { @@ -28,7 +28,7 @@ internal class TreeNode( fun pathFromRoot(): ColumnPath { val path = mutableListOf() var node: TreeNode? = this - while (node != null && node.parent != null) { + while (node?.parent != null) { path.add(node.name) node = node.parent } @@ -48,19 +48,34 @@ internal class TreeNode( return addChild(childName, createData()) } - fun dfs(enterCondition: (TreeNode) -> Boolean = { true }, yieldCondition: (TreeNode) -> Boolean = { true }): List> { + @Deprecated("Use allChildren instead", ReplaceWith("allChildren(enterCondition, yieldCondition)")) + fun dfs( + enterCondition: (TreeNode) -> Boolean = { true }, + yieldCondition: (TreeNode) -> Boolean = { true }, + ): List> = allChildren(enterCondition, yieldCondition) + + /** + * Traverses the tree in depth-first order and returns all nodes that satisfy [yieldCondition]. + * If [enterCondition] returns false for a node, its children are not traversed. + * By default, all nodes are traversed and all nodes are returned. + */ + fun allChildren( + enterCondition: (TreeNode) -> Boolean = { true }, + yieldCondition: (TreeNode) -> Boolean = { true }, + ): List> { val result = mutableListOf>() - fun doDfs(node: TreeNode) { + + fun traverse(node: TreeNode) { if (yieldCondition(node)) { result.add(node) } if (enterCondition(node)) { node.children.forEach { - doDfs(it) + traverse(it) } } } - doDfs(this) + traverse(this) return result } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt index e74bc75c8..97f16582b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/tree/Utils.kt @@ -29,9 +29,22 @@ internal fun TreeNode.getOrPut(path: ColumnPath, createData: (ColumnPath) return node } -internal fun TreeNode.topDfs(yieldCondition: (TreeNode) -> Boolean): List> = dfs(enterCondition = { !yieldCondition(it) }, yieldCondition = yieldCondition) +/** + * Traverses all children in the tree in depth-first order and returns the top-most nodes that satisfy + * [yieldCondition]. This means that if a node satisfies [yieldCondition], its children are not traversed, regardless of + * whether they satisfy [yieldCondition] or not. + */ +internal fun TreeNode.topmostChildren(yieldCondition: (TreeNode) -> Boolean): List> = + allChildren( + enterCondition = { !yieldCondition(it) }, + yieldCondition = yieldCondition, + ) -internal fun TreeNode.topDfsExcluding(excludeRoot: TreeNode<*>): List> { +@Deprecated("Use topmostChildren instead", ReplaceWith("topmostChildren(yieldCondition)")) +internal fun TreeNode.topDfs(yieldCondition: (TreeNode) -> Boolean): List> = + topmostChildren(yieldCondition) + +internal fun TreeNode.topmostChildrenExcluding(excludeRoot: TreeNode<*>): List> { val result = mutableListOf>() fun doDfs(node: TreeNode, exclude: TreeNode<*>) { if (exclude.children.isNotEmpty()) { @@ -48,23 +61,35 @@ internal fun TreeNode.topDfsExcluding(excludeRoot: TreeNode<*>): List TreeNode.dfsNotNull() = dfs { it.data != null }.map { it as TreeNode } -internal fun TreeNode.dfsTopNotNull() = dfs(enterCondition = { it.data == null }, yieldCondition = { it.data != null }).map { it as TreeNode } +internal fun TreeNode.allChildrenNotNull(): List> = + allChildren { it.data != null } as List> + +internal fun TreeNode.topmostChildrenNotNull() = + topmostChildren { it.data != null } as List> -internal fun TreeNode.allRemovedColumns() = dfs { it.data.wasRemoved && it.data.column != null } -internal fun TreeNode.allWithColumns() = dfs { it.data.column != null } -internal fun Iterable>.dfs(): List> { +internal fun TreeNode.allRemovedColumns() = + allChildren { it.data.wasRemoved && it.data.column != null } + +internal fun TreeNode.allWithColumns() = + allChildren { it.data.column != null } + +internal fun Iterable>.flattenRecursively(): List> { val result = mutableListOf>() - fun dfs(cols: Iterable>) { + + fun flattenRecursively(cols: Iterable>) { cols.forEach { result.add(it) val path = it.path if (it.data.isColumnGroup()) { - dfs(it.data.asColumnGroup().columns().map { it.addPath(path + it.name()) }) + flattenRecursively( + it.data.asColumnGroup() + .columns() + .map { it.addPath(path + it.name()) } + ) } } } - dfs(this) + flattenRecursively(this) return result } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt index aa69386c4..229c13d0d 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/html.kt @@ -74,8 +74,8 @@ internal fun tableJs(columns: List, id: Int, rootId: Int, nrow: var index = 0 val data = buildString { append("[") - fun dfs(col: ColumnDataForJs): Int { - val children = col.nested.map { dfs(it) } + fun appendColWithChildren(col: ColumnDataForJs): Int { + val children = col.nested.map { appendColWithChildren(it) } val colIndex = index++ val values = col.values.joinToString(",", prefix = "[", postfix = "]") { when (it) { @@ -100,7 +100,7 @@ internal fun tableJs(columns: List, id: Int, rootId: Int, nrow: return colIndex } - columns.forEach { dfs(it) } + columns.forEach { appendColWithChildren(it) } append("]") } val js = getResourceText( diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt index dd5d16685..d14a0a5ef 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/ColumnsSelectionDsl.kt @@ -3,181 +3,159 @@ package org.jetbrains.kotlinx.dataframe.api import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.impl.columns.asValueColumn -import org.jetbrains.kotlinx.dataframe.samples.api.TestBase -import org.jetbrains.kotlinx.dataframe.samples.api.age -import org.jetbrains.kotlinx.dataframe.samples.api.firstName -import org.jetbrains.kotlinx.dataframe.samples.api.isHappy -import org.jetbrains.kotlinx.dataframe.samples.api.lastName -import org.jetbrains.kotlinx.dataframe.samples.api.name +import org.jetbrains.kotlinx.dataframe.samples.api.* import org.junit.Test +import kotlin.reflect.typeOf -class ColumnsSelectionDslTests : TestBase() { +open class ColumnsSelectionDslTests : TestBase() { @Test fun first() { - df.select { all().first() } shouldBe df.select { first() } - df.select { all().first() } shouldBe df.select { name } - df.select { first() } shouldBe df.select { name } - df.select { first { it.name().startsWith("a") } } shouldBe df.select { age } - - df.select { - name.first { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { - name.colsOf().first { col -> - col.any { it == "Alice" } - } - } + listOf( + df.select { name }, + df.select { first() }, + df.select { all().first() }, + df.select { first { it.name().startsWith("n") } }, + ).shouldAllBeEqual() - df.select { - "name".first { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + listOf( + df.select { name.firstName }, - df.select { - Person::name.first { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { name.first { col -> col.any { it == "Alice" } } }, + df.select { name.colsOf().first { col -> col.any { it == "Alice" } } }, - df.select { - pathOf("name").first { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { "name".first { col -> col.any { it == "Alice" } } }, + df.select { "name".colsOf(typeOf()).first { col -> col.any { it == "Alice" } } }, + + df.select { Person::name.first { col -> col.any { it == "Alice" } } }, + df.select { Person::name.colsOf(typeOf()).first { col -> col.any { it == "Alice" } } }, + + df.select { pathOf("name").first { col -> col.any { it == "Alice" } } }, + df.select { pathOf("name").colsOf(typeOf()).first { col -> col.any { it == "Alice" } } }, + + df.select { it["name"].first { col -> col.any { it == "Alice" } } }, + df.select { it["name"].colsOf(typeOf()).first { col -> col.any { it == "Alice" } } }, + ).shouldAllBeEqual() } @Test fun last() { - df.select { all().last() } shouldBe df.select { last() } - df.select { all().last() } shouldBe df.select { isHappy } - df.select { last() } shouldBe df.select { isHappy } - df.select { last { it.name().startsWith("a") } } shouldBe df.select { age } - df.select { - name.last { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { - name.colsOf().last { col -> - col.any { it == "Alice" } - } - } + listOf( + df.select { isHappy }, + df.select { last() }, + df.select { all().last() }, + df.select { last { it.name().startsWith("is") } }, + ).shouldAllBeEqual() - df.select { - "name".last { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + listOf( + df.select { name.firstName }, - df.select { - Person::name.last { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { name.last { col -> col.any { it == "Alice" } } }, + df.select { name.colsOf().last { col -> col.any { it == "Alice" } } }, - df.select { - pathOf("name").last { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { "name".last { col -> col.any { it == "Alice" } } }, + df.select { "name".colsOf(typeOf()).last { col -> col.any { it == "Alice" } } }, + + df.select { Person::name.last { col -> col.any { it == "Alice" } } }, + df.select { Person::name.colsOf(typeOf()).last { col -> col.any { it == "Alice" } } }, + + df.select { pathOf("name").last { col -> col.any { it == "Alice" } } }, + df.select { pathOf("name").colsOf(typeOf()).last { col -> col.any { it == "Alice" } } }, + + df.select { it["name"].last { col -> col.any { it == "Alice" } } }, + df.select { it["name"].colsOf(typeOf()).last { col -> col.any { it == "Alice" } } }, + ).shouldAllBeEqual() } @Test fun single() { val singleDf = df.select { take(1) } - singleDf.select { all().single() } shouldBe singleDf.select { single() } - singleDf.select { all().single() } shouldBe singleDf.select { name } - singleDf.select { single() } shouldBe singleDf.select { name } - df.select { single { it.name().startsWith("a") } } shouldBe df.select { age } - - df.select { - name.single { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { - name.colsOf().single { col -> - col.any { it == "Alice" } - } - } - df.select { - "name".single { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + listOf( + df.select { name }, + singleDf.select { name }, + singleDf.select { single() }, + singleDf.select { all().single() }, + df.select { single { it.name().startsWith("n") } }, + ).shouldAllBeEqual() - df.select { - Person::name.single { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + listOf( + df.select { name.firstName }, - df.select { - pathOf("name").single { col -> - col.any { it == "Alice" } - } - } shouldBe df.select { name.firstName } + df.select { name.single { col -> col.any { it == "Alice" } } }, + df.select { name.colsOf().single { col -> col.any { it == "Alice" } } }, + + df.select { "name".single { col -> col.any { it == "Alice" } } }, + df.select { "name".colsOf(typeOf()).single { col -> col.any { it == "Alice" } } }, + + df.select { Person::name.single { col -> col.any { it == "Alice" } } }, + df.select { Person::name.colsOf(typeOf()).single { col -> col.any { it == "Alice" } } }, + + df.select { pathOf("name").single { col -> col.any { it == "Alice" } } }, + df.select { pathOf("name").colsOf(typeOf()).single { col -> col.any { it == "Alice" } } }, + + df.select { it["name"].single { col -> col.any { it == "Alice" } } }, + df.select { it["name"].colsOf(typeOf()).single { col -> col.any { it == "Alice" } } }, + ).shouldAllBeEqual() } @Test fun col() { - df.select { col("age") } shouldBe df.select { age } - df.select { col("age") } shouldBe df.select { age } - df.select { col(pathOf("age")) } shouldBe df.select { age } - df.select { col(pathOf("age")) } shouldBe df.select { age } - df.select { col(Person::age) } shouldBe df.select { age } - - df.select { colGroup("name").col("firstName") } shouldBe df.select { name.firstName } - df.select { colGroup("name").col("firstName") } shouldBe df.select { name.firstName } - df.select { colGroup("name").col(pathOf("firstName")) } shouldBe df.select { name.firstName } - df.select { colGroup("name").col(pathOf("firstName")) } shouldBe df.select { name.firstName } - df.select { colGroup("name").col(Name::firstName) } shouldBe df.select { name.firstName } - } + listOf( + df.select { age }, - @DataSchema - interface FirstNames { - val firstName: String - val secondName: String? - val thirdName: String? - } + df.select { col("age") }, + df.select { col("age") }, - @DataSchema - interface MyName : Name { - val firstNames: FirstNames + df.select { col(pathOf("age")) }, + df.select { col(pathOf("age")) }, + + df.select { col(Person::age) }, + ).shouldAllBeEqual() + + listOf( + df.select { name.firstName }, + + df.select { colGroup("name").col("firstName") }, + df.select { colGroup("name").col("firstName") }, + + df.select { colGroup("name").col(pathOf("firstName")) }, + df.select { colGroup("name").col(pathOf("firstName")) }, + + df.select { colGroup("name").col(Name::firstName) }, + ).shouldAllBeEqual() } @Test fun colGroup() { - val firstNames by columnGroup() - val dfGroup = df.convert { name.firstName }.to { - val firstName by it - val secondName by it.map<_, String?> { null }.asValueColumn() - val thirdName by it.map<_, String?> { null }.asValueColumn() - - dataFrameOf(firstName, secondName, thirdName) - .cast(verify = true) - .asColumnGroup(firstNames) - } + listOf( + dfGroup.select { name }, + + dfGroup.select { colGroup("name") }, + dfGroup.select { colGroup("name") }, + + dfGroup.select { colGroup(pathOf("name")) }, + dfGroup.select { colGroup(pathOf("name")) }, + + dfGroup.select { colGroup(Person::name) }, + ).shouldAllBeEqual() - dfGroup.select { colGroup("name") } shouldBe dfGroup.select { name } - dfGroup.select { colGroup("name") } shouldBe dfGroup.select { name } - dfGroup.select { colGroup(pathOf("name")) } shouldBe dfGroup.select { name } - dfGroup.select { colGroup(pathOf("name")) } shouldBe dfGroup.select { name } - dfGroup.select { colGroup(Person::name) } shouldBe dfGroup.select { name } + listOf( + dfGroup.select { name.firstName }, + + dfGroup.select { colGroup("name").colGroup("firstName") }, + dfGroup.select { colGroup("name").colGroup("firstName") }, + + dfGroup.select { colGroup("name").colGroup(pathOf("firstName")) }, + dfGroup.select { colGroup("name").colGroup(pathOf("firstName")) }, - dfGroup.select { colGroup("name").colGroup("firstNames") } shouldBe dfGroup.select { name[firstNames] } - dfGroup.select { colGroup("name").colGroup("firstNames") } shouldBe dfGroup.select { name[firstNames] } - dfGroup.select { colGroup("name").colGroup(pathOf("firstNames")) } shouldBe dfGroup.select { name[firstNames] } - dfGroup.select { colGroup("name").colGroup(pathOf("firstNames")) } shouldBe dfGroup.select { name[firstNames] } - dfGroup.select { colGroup("name").colGroup(MyName::firstNames) } shouldBe dfGroup.select { name[firstNames] } + dfGroup.select { colGroup("name").colGroup(Name2::firstName) }, + ).shouldAllBeEqual() dfGroup.select { - "name"["firstNames"]["firstName", "secondName"] + "name"["firstName"]["firstName", "secondName"] } shouldBe dfGroup.select { - name[firstNames]["firstName"] and name[firstNames]["secondName"] + name.firstName["firstName"] and name.firstName["secondName"] } } @@ -204,321 +182,322 @@ class ColumnsSelectionDslTests : TestBase() { dataFrameOf(firstName, lastName, frameCol).asColumnGroup("name") } - dfWithFrames.select { frameCol("frameCol") } shouldBe dfWithFrames.select { frameCol } - dfWithFrames.select { frameCol("frameCol") } shouldBe dfWithFrames.select { frameCol } - dfWithFrames.select { frameCol(pathOf("frameCol")) } shouldBe dfWithFrames.select { frameCol } - dfWithFrames.select { frameCol(pathOf("frameCol")) } shouldBe dfWithFrames.select { frameCol } - dfWithFrames.select { frameCol(PersonWithFrame::frameCol) } shouldBe dfWithFrames.select { frameCol } - - dfWithFrames.select { colGroup("name").frameCol("frameCol") } shouldBe dfWithFrames.select { name[frameCol] } - dfWithFrames.select { colGroup("name").frameCol("frameCol") } shouldBe dfWithFrames.select { name[frameCol] } - dfWithFrames.select { colGroup("name").frameCol(pathOf("frameCol")) } shouldBe dfWithFrames.select { name[frameCol] } - dfWithFrames.select { colGroup("name").frameCol(pathOf("frameCol")) } shouldBe dfWithFrames.select { name[frameCol] } - dfWithFrames.select { colGroup("name").frameCol(PersonWithFrame::frameCol) } shouldBe dfWithFrames.select { name[frameCol] } + listOf( + dfWithFrames.select { frameCol }, + + dfWithFrames.select { frameCol("frameCol") }, + dfWithFrames.select { frameCol("frameCol") }, + + dfWithFrames.select { frameCol(pathOf("frameCol")) }, + dfWithFrames.select { frameCol(pathOf("frameCol")) }, + + dfWithFrames.select { frameCol(PersonWithFrame::frameCol) }, + ).shouldAllBeEqual() + + listOf( + dfWithFrames.select { name[frameCol] }, + + dfWithFrames.select { colGroup("name").frameCol("frameCol") }, + dfWithFrames.select { colGroup("name").frameCol("frameCol") }, + + dfWithFrames.select { colGroup("name").frameCol(pathOf("frameCol")) }, + dfWithFrames.select { colGroup("name").frameCol(pathOf("frameCol")) }, + + dfWithFrames.select { colGroup("name").frameCol(PersonWithFrame::frameCol) }, + ).shouldAllBeEqual() } @Test fun `cols and get with predicate`() { - df.select { all().cols() } shouldBe df.select { cols() } - df.select { all().cols { "e" in it.name() } } shouldBe df.select { - cols { "e" in it.name() } - } - df.select { all()[{ "e" in it.name() }] } shouldBe df.select { -// this[{ "e" in it.name() }] - cols { "e" in it.name() } - } + listOf( + df.select { cols(name, age, city, weight, isHappy) }, + df.select { all().cols() }, + df.select { cols() }, + df.select { all() }, + ).shouldAllBeEqual() - df.select { - name.cols { - "Name" in it.name() - } - } shouldBe df.select { - name.colsOf().cols { - "Name" in it.name() - } - } + listOf( + df.select { name }, + df.select { name }.select { all() }, + df.select { name }.select { cols() }, + df.select { name }.select { cols().all() }, + df.select { name }.select { all().cols() }, + ).shouldAllBeEqual() - df.select { -// name[{ "Name" in it.name() }] - name.cols { "Name" in it.name() } - } shouldBe df.select { - name.colsOf()[{ "Name" in it.name() }] - } + listOf( + df.select { cols(name, age, weight) }, - df.select { - "name".cols { "Name" in it.name() } - } shouldBe df.select { - Person::name.cols { "Name" in it.name() } - } + df.select { cols { "e" in it.name() } }, +// df.select { this[{ "e" in it.name() }] }, - df.select { - "name"[{ "Name" in it.name() }] - } shouldBe df.select { - Person::name[{ "Name" in it.name() }] - } + df.select { all().cols { "e" in it.name() } }, + df.select { all()[{ "e" in it.name() }] }, + ).shouldAllBeEqual() - df.select { - pathOf("name").cols { "Name" in it.name() } - } shouldBe df.select { - "name"[{ "Name" in it.name() }] - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - pathOf("name").cols { "Name" in it.name() } - } shouldBe df.select { - pathOf("name")[{ "Name" in it.name() }] - } + df.select { name.cols { "Name" in it.name() } }, +// df.select { name[{ "Name" in it.name() }] }, + + df.select { name.colsOf().cols { "Name" in it.name() } }, + df.select { name.colsOf()[{ "Name" in it.name() }] }, + + df.select { "name".cols { "Name" in it.name() } }, + df.select { "name"[{ "Name" in it.name() }] }, + + df.select { Person::name.cols { "Name" in it.name() } }, + df.select { Person::name[{ "Name" in it.name() }] }, + + df.select { pathOf("name").cols { "Name" in it.name() } }, + df.select { pathOf("name")[{ "Name" in it.name() }] }, + + df.select { it["name"].cols { "Name" in it.name() } }, + df.select { it["name"][{ "Name" in it.name() }] }, + ).shouldAllBeEqual() } @Test fun `cols and get with column references`() { - df.select { all().cols(name, age) } shouldBe df.select { cols(name, age) } - df.select { all()[name, age] } shouldBe df.select { this[name, age] } + listOf( + df.select { name and age }, + + df.select { cols(name, age) }, + df.select { this[name, age] }, + df.select { it[name, age] }, + + df.select { all().cols(name, age) }, + df.select { all()[name, age] }, + ).shouldAllBeEqual() val firstName by column() val lastName by column() - df.select { - name.cols(firstName, lastName) - } shouldBe df.select { - name.colsOf().cols(firstName, lastName) - } - df.select { - name.cols(name.firstName, name.lastName) - } shouldBe df.select { - name.colsOf().cols(name.firstName, name.lastName) - }.also { it.print() } - - df.select { -// name[name.firstName, name.lastName] - name.cols(name.firstName, name.lastName) - } shouldBe df.select { - name.colsOf()[name.firstName, name.lastName] - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name".cols(name.firstName, name.lastName) - } shouldBe df.select { - Person::name.cols(name.firstName, name.lastName) - } + df.select { name.cols(firstName, lastName) }, +// df.select { name[name.firstName, name.lastName] }, - df.select { - "name"[name.firstName, name.lastName] - } shouldBe df.select { - Person::name[name.firstName, name.lastName] - } + df.select { name.colsOf().cols(firstName, lastName) }, + df.select { name.colsOf()[firstName, lastName] }, - df.select { - pathOf("name").cols(name.firstName, name.lastName) - } shouldBe df.select { - pathOf("name")[name.firstName, name.lastName] - } + df.select { name.select { cols(this@select.firstName, this@select.lastName) } }, + + df.select { "name".cols(firstName, lastName) }, + df.select { "name"[firstName, lastName] }, + + df.select { Person::name.cols(firstName, lastName) }, + df.select { Person::name[firstName, lastName] }, + + df.select { pathOf("name").cols(firstName, lastName) }, + df.select { pathOf("name")[firstName, lastName] }, + + df.select { it["name"].cols(firstName, lastName) }, + df.select { it["name"][firstName, lastName] }, + ).shouldAllBeEqual() } @Test fun `cols and get with column names`() { - df.select { all().cols("name", "age") } shouldBe df.select { cols("name", "age") } - df.select { all()["name", "age"] } shouldBe df.select { this["name", "age"] } + listOf( + df.select { name and age }, - df.select { - name.cols("firstName", "lastName") - } shouldBe df.select { - name.colsOf().cols("firstName", "lastName") - } + df.select { cols("name", "age") }, + df.select { this["name", "age"] }, + df.select { it["name", "age"] }, - df.select { -// name["firstName", "lastName"] - name.cols("firstName", "lastName") - } shouldBe df.select { - name.colsOf()["firstName", "lastName"] - } + df.select { all().cols("name", "age") }, + df.select { all()["name", "age"] }, + ).shouldAllBeEqual() - df.select { - "name".cols("firstName", "lastName") - } shouldBe df.select { - Person::name.cols("firstName", "lastName") - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name"["firstName", "lastName"] - } shouldBe df.select { - Person::name["firstName", "lastName"] - } + df.select { name.cols("firstName", "lastName") }, +// df.select { name["firstName", "lastName"] }, - df.select { - pathOf("name").cols("firstName", "lastName") - } shouldBe df.select { - pathOf("name")["firstName", "lastName"] - } + df.select { name.colsOf().cols("firstName", "lastName") }, + df.select { name.colsOf()["firstName", "lastName"] }, + + df.select { "name".cols("firstName", "lastName") }, + df.select { "name"["firstName", "lastName"] }, + df.select { "name"["firstName"] and "name"["lastName"] }, + + df.select { Person::name.cols("firstName", "lastName") }, + df.select { Person::name["firstName", "lastName"] }, + + df.select { pathOf("name").cols("firstName", "lastName") }, + df.select { pathOf("name")["firstName", "lastName"] }, + + df.select { it["name"].cols("firstName", "lastName") }, + df.select { it["name"]["firstName", "lastName"] }, + ).shouldAllBeEqual() } @Test fun `cols and get with column paths`() { listOf( - df.select { - all().cols(pathOf("name", "firstName")) - }, - df.select { - cols(pathOf("name", "firstName")) - }, - df.select { - pathOf("name", "firstName") - }, - df.select { - name.firstName - }, - ).reduce { acc, dataFrame -> - acc shouldBe dataFrame - dataFrame - } + df.select { name.firstName }, - df.select { all().cols(pathOf("name"), pathOf("age")) } shouldBe df.select { - cols( - pathOf("name"), - pathOf("age") - ) - } - df.select { all()[pathOf("name"), pathOf("age")] } shouldBe df.select { this[pathOf("name"), pathOf("age")] } + df.select { cols(pathOf("name", "firstName")) }, + df.select { this[pathOf("name", "firstName")] }, + df.select { it[pathOf("name", "firstName")] }, - df.select { - name.cols(pathOf("firstName"), pathOf("lastName")) - } shouldBe df.select { - name.colsOf().cols(pathOf("firstName"), pathOf("lastName")) - } + df.select { all().cols(pathOf("name", "firstName")) }, + df.select { all()[pathOf("name", "firstName")] }, - df.select { -// name[pathOf("firstName"), pathOf("lastName")] - name.cols(pathOf("firstName"), pathOf("lastName")) - } shouldBe df.select { - name.colsOf()[pathOf("firstName"), pathOf("lastName")] - } + df.select { pathOf("name", "firstName") }, + ).shouldAllBeEqual() - df.select { - "name".cols(pathOf("firstName"), pathOf("lastName")) - } shouldBe df.select { - Person::name.cols(pathOf("firstName"), pathOf("lastName")) - } + listOf( + df.select { name and age }, - df.select { - "name"[pathOf("firstName"), pathOf("lastName")] - } shouldBe df.select { - Person::name[pathOf("firstName"), pathOf("lastName")] - } + df.select { cols(pathOf("name"), pathOf("age")) }, + df.select { this[pathOf("name"), pathOf("age")] }, + df.select { it[pathOf("name"), pathOf("age")] }, - df.select { - pathOf("name").cols(pathOf("firstName"), pathOf("lastName")) - } shouldBe df.select { - pathOf("name")[pathOf("firstName"), pathOf("lastName")] - } + df.select { all().cols(pathOf("name"), pathOf("age")) }, + df.select { all()[pathOf("name"), pathOf("age")] }, + ).shouldAllBeEqual() + + listOf( + df.select { name.firstName and name.lastName }, + + df.select { name.cols(pathOf("firstName"), pathOf("lastName")) }, +// df.select { name[pathOf("firstName"), pathOf("lastName")] }, + + df.select { name.colsOf().cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { name.colsOf()[pathOf("firstName"), pathOf("lastName")] }, + + df.select { "name".cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { "name"[pathOf("firstName"), pathOf("lastName")] }, + + df.select { Person::name.cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { Person::name[pathOf("firstName"), pathOf("lastName")] }, + + df.select { pathOf("name").cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { pathOf("name")[pathOf("firstName"), pathOf("lastName")] }, + + df.select { it["name"].cols(pathOf("firstName"), pathOf("lastName")) }, + df.select { it["name"][pathOf("firstName"), pathOf("lastName")] }, + ).shouldAllBeEqual() } @Test fun `cols and get with KProperties`() { - df.select { all().cols(Person::name, Person::age) } shouldBe df.select { cols(Person::name, Person::age) } - df.select { all()[Person::name, Person::age] } shouldBe df.select { this[Person::name, Person::age] } + listOf( + df.select { name and age }, - df.select { - name.cols(Name::firstName, Name::lastName) - } shouldBe df.select { - name.colsOf().cols(Name::firstName, Name::lastName) - } + df.select { cols(Person::name, Person::age) }, + df.select { this[Person::name, Person::age] }, + df.select { it[Person::name, Person::age] }, - df.select { - name[Name::firstName, Name::lastName] - } shouldBe df.select { - name.colsOf()[Name::firstName, Name::lastName] - } + df.select { all().cols(Person::name, Person::age) }, + df.select { all()[Person::name, Person::age] }, + ).shouldAllBeEqual() - df.select { - "name".cols(Name::firstName, Name::lastName) - } shouldBe df.select { - Person::name.cols(Name::firstName, Name::lastName) - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name"[Name::firstName, Name::lastName] - } shouldBe df.select { - Person::name[Name::firstName, Name::lastName] - } + df.select { name.cols(Name::firstName, Name::lastName) }, + df.select { name[Name::firstName, Name::lastName] }, - df.select { - pathOf("name").cols(Name::firstName, Name::lastName) - } shouldBe df.select { - pathOf("name")[Name::firstName, Name::lastName] - } + df.select { name.colsOf().cols(Name::firstName, Name::lastName) }, + df.select { name.colsOf()[Name::firstName, Name::lastName] }, + + df.select { "name".cols(Name::firstName, Name::lastName) }, + df.select { "name"[Name::firstName, Name::lastName] }, + + df.select { Person::name.cols(Name::firstName, Name::lastName) }, + df.select { Person::name[Name::firstName, Name::lastName] }, + + df.select { pathOf("name").cols(Name::firstName, Name::lastName) }, + df.select { pathOf("name")[Name::firstName, Name::lastName] }, + + df.select { it["name"].cols(Name::firstName, Name::lastName) }, + df.select { it["name"][Name::firstName, Name::lastName] }, + ).shouldAllBeEqual() } @Test fun `cols and get with indices`() { - df.select { all().cols(0, 1) } shouldBe df.select { cols(0, 1) } - df.select { all()[0, 1] } shouldBe df.select { this[0, 1] } + listOf( + df.select { name and age }, - df.select { - name.cols(0, 1) - } shouldBe df.select { - name.colsOf().cols(0, 1) - } + df.select { cols(0, 1) }, + df.select { this[0, 1] }, + df.select { it[0, 1] }, - df.select { -// name[0, 1] - name.cols(0, 1) - } shouldBe df.select { - name.colsOf()[0, 1] - } + df.select { all().cols(0, 1) }, + df.select { all()[0, 1] }, + ).shouldAllBeEqual() - df.select { - "name".cols(0, 1) - } shouldBe df.select { - Person::name.cols(0, 1) - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name"[0, 1] - } shouldBe df.select { - Person::name[0, 1] - } + df.select { name.cols(0, 1) }, +// df.select { name[0, 1] }, - df.select { - pathOf("name").cols(0, 1) - } shouldBe df.select { - pathOf("name")[0, 1] - } + df.select { name.colsOf().cols(0, 1) }, + df.select { name.colsOf()[0, 1] }, + + df.select { "name".cols(0, 1) }, + df.select { "name"[0, 1] }, + + df.select { Person::name.cols(0, 1) }, + df.select { Person::name[0, 1] }, + + df.select { pathOf("name").cols(0, 1) }, + df.select { pathOf("name")[0, 1] }, + + df.select { it["name"].cols(0, 1) }, +// df.select { it["name"][0, 1] }, + ).shouldAllBeEqual() } @Test fun `cols and get with range`() { - df.select { all().cols(0..1) } shouldBe df.select { cols(0..1) } - df.select { all()[0..1] } shouldBe df.select { this[0..1] } + listOf( + df.select { name and age }, - df.select { - name.cols(0..1) - } shouldBe df.select { - name.colsOf().cols(0..1) - } + df.select { cols(0..1) }, + df.select { this[0..1] }, + df.select { it[0..1] }, - df.select { -// name[0..1] - name.cols(0..1) - } shouldBe df.select { - name.colsOf()[0..1] - } + df.select { all().cols(0..1) }, + df.select { all()[0..1] }, + ).shouldAllBeEqual() - df.select { - "name".cols(0..1) - } shouldBe df.select { - Person::name.cols(0..1) - } + listOf( + df.select { name.firstName and name.lastName }, - df.select { - "name"[0..1] - } shouldBe df.select { - Person::name[0..1] - } + df.select { name.cols(0..1) }, +// df.select { name[0..1] }, - df.select { - pathOf("name").cols(0..1) - } shouldBe df.select { - pathOf("name")[0..1] - } + df.select { name.colsOf().cols(0..1) }, + df.select { name.colsOf()[0..1] }, + + df.select { "name".cols(0..1) }, + df.select { "name"[0..1] }, + + df.select { Person::name.cols(0..1) }, + df.select { Person::name[0..1] }, + + df.select { pathOf("name").cols(0..1) }, + df.select { pathOf("name")[0..1] }, + + df.select { it["name"].cols(0..1) }, +// df.select { it["name"][0..1] }, + ).shouldAllBeEqual() + } + + @Test + fun roots() { + df.select { cols(name.firstName, name.lastName, age).roots() } shouldBe + df.select { cols(name.firstName, name.lastName, age) } + + df.select { cols(name.firstName, name.lastName, age, name).roots() } shouldBe + df.select { cols(name, age) } } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt index f585e6b22..5c7357b1c 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/move.kt @@ -27,14 +27,18 @@ class MoveTests { } @Test - fun `select all dfs`() { - val selected = grouped.getColumnsWithPaths { all().allDfs() }.map { it.path.joinToString(".") } + fun `select all allRecursively`() { + val selected = grouped + .getColumnsWithPaths { children { !it.isColumnGroup() }.recursively() } + .map { it.path.joinToString(".") } selected shouldBe listOf("a.b", "a.c.d", "b.c", "b.d", "e.f") } @Test fun `batch ungrouping`() { - val ungrouped = grouped.move { dfs { it.depth() > 0 && !it.isColumnGroup() } }.into { pathOf(it.path.joinToString(".")) } + val ungrouped = grouped.move { + cols { it.depth() > 0 && !it.isColumnGroup() }.rec() + }.into { pathOf(it.path.joinToString(".")) } ungrouped.columnNames() shouldBe listOf("q", "a.b", "a.c.d", "b.c", "b.d", "w", "e.f", "r") } @@ -64,15 +68,19 @@ class MoveTests { } @Test - fun `select Dfs`() { - val selected = grouped.select { it["a"].dfs { !it.isColumnGroup() } } + fun `select recursively`() { + val selected = grouped.select { + it["a"].cols { !it.isColumnGroup() }.recursively() + } selected.columnNames() shouldBe listOf("b", "d") } @Test fun `columnsWithPath in selector`() { val selected = grouped.getColumnsWithPaths { it["a"] } - val actual = grouped.getColumnsWithPaths { selected.map { it.allDfs() }.toColumnSet() } + val actual = grouped.getColumnsWithPaths { + selected.map { it.cols { !it.isColumnGroup() }.recursively() }.toColumnSet() + } actual.map { it.path.joinToString(".") } shouldBe listOf("a.b", "a.c.d") } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt index 727165729..36cc299a4 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/pivot.kt @@ -41,7 +41,7 @@ class PivotTests { } pivoted.columnsCount() shouldBe 3 pivoted.rowsCount() shouldBe 2 - val cols = pivoted.getColumns { except(a).allDfs() } + val cols = pivoted.getColumns { except(a).cols { !it.isColumnGroup() }.rec() } cols.size shouldBe 4 cols.forEach { it.type() shouldBe typeOf() diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/recursively.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/recursively.kt new file mode 100644 index 000000000..38e89fce0 --- /dev/null +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/recursively.kt @@ -0,0 +1,108 @@ +package org.jetbrains.kotlinx.dataframe.api + +import io.kotest.matchers.shouldBe +import io.kotest.matchers.shouldNotBe +import org.jetbrains.kotlinx.dataframe.columns.ColumnWithPath +import org.jetbrains.kotlinx.dataframe.samples.api.TestBase +import org.jetbrains.kotlinx.dataframe.samples.api.city +import org.jetbrains.kotlinx.dataframe.samples.api.firstName +import org.jetbrains.kotlinx.dataframe.samples.api.name +import org.junit.Test + +class Recursively : TestBase() { + + fun List>.print() { + forEach { + if (it.isValueColumn()) println("${it.name}: ${it.type()}") + else it.print() + } + println() + } + + infix fun List>.shouldBe(other: List>) { + this.map { it.name to it.path } shouldBe other.map { it.name to it.path } + } + + infix fun List>.shouldNotBe(other: List>) { + this.map { it.name to it.path } shouldNotBe other.map { it.name to it.path } + } + + private val recursivelyGoal = dfGroup.getColumnsWithPaths { dfs { true } } + .sortedBy { it.name } + + private val recursivelyNoGroups = dfGroup.getColumnsWithPaths { allDfs(false) } + .sortedBy { it.name } + + private val recursivelyString = dfGroup.getColumnsWithPaths { dfsOf() } + .sortedBy { it.name } + + @Test + fun `first, last, and single`() { + listOf( + dfGroup.select { name.firstName.firstName }, + + dfGroup.select { first { col -> col.any { it == "Alice" } }.recursively() }, + dfGroup.select { last { col -> col.any { it == "Alice" } }.recursively() }, + dfGroup.select { single { col -> col.any { it == "Alice" } }.recursively() }, + ).shouldAllBeEqual() + + listOf( + dfGroup.select { city }, + + dfGroup.select { first { col -> col.any { it == "London" } }.recursively() }, + dfGroup.select { last { col -> col.any { it == "London" } }.recursively() }, + dfGroup.select { single { col -> col.any { it == "London" } }.recursively() }, + ).shouldAllBeEqual() + } + + @Test + fun `children`() { + dfGroup.getColumnsWithPaths { children().recursively() }.print() + dfGroup.getColumnsWithPaths { name.children() }.print() + } + + @Test + fun `groups`() { + listOf( + df.select { name }, + df.select { groups().recursively() }, + df.select { groups() }, + df.select { all().groups() }, + df.select { all().groups().rec() }, + ).shouldAllBeEqual() + + dfGroup.select { groups() } shouldBe dfGroup.select { name } + dfGroup.select { groups().rec() } shouldBe dfGroup.select { name and name.firstName } + } + + @Test + fun `all recursively`() { + dfGroup.getColumnsWithPaths { all().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal + dfGroup.getColumnsWithPaths { all().cols { !it.isColumnGroup() }.rec() } + .sortedBy { it.name } shouldBe recursivelyNoGroups + } + + @Test + fun `cols recursively`() { + dfGroup.getColumnsWithPaths { cols().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal + } + + @Test + fun `colsOf recursively`() { + dfGroup.getColumnsWithPaths { colsOf().recursively() }.sortedBy { it.name } shouldBe recursivelyString + } + + @Test + fun `all allRecursively`() { + dfGroup.getColumnsWithPaths { all().all().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal + dfGroup.getColumnsWithPaths { all().cols { !it.isColumnGroup() }.recursively() } + .sortedBy { it.name } shouldBe recursivelyNoGroups + } + + @Test + fun `cols allRecursively`() { + dfGroup.getColumnsWithPaths { cols().all().recursively() }.sortedBy { it.name } shouldBe recursivelyGoal + dfGroup.getColumnsWithPaths { cols().cols { !it.isColumnGroup() }.recursively() } + .sortedBy { it.name } shouldBe recursivelyNoGroups + } +} diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt index 87e28271c..94e64ed3c 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/api/reorder.kt @@ -24,7 +24,7 @@ class ReorderTests { sorted1.columnNames() shouldBe listOf("b", "a") sorted1["a"].asColumnGroup().columnNames() shouldBe listOf("a", "c") - val sorted2 = df.reorder { allDfs(true) }.byName() + val sorted2 = df.reorder { all().recursively() }.byName() sorted2.columnNames() shouldBe listOf("a", "b") sorted2["a"].asColumnGroup().columnNames() shouldBe listOf("a", "c") } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt index 02cdc6642..4366dd86a 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/PlaylistJsonTest.kt @@ -136,7 +136,9 @@ class PlaylistJsonTest { @Test fun `deep batch update all`() { - val updated = item.convert { dfs { it.name() == "url" } }.with { (it as? String)?.let { IMG(it) } } + val updated = item + .convert { cols { it.name() == "url" }.rec() } + .with { (it as? String)?.let { IMG(it) } } updated.snippet.thumbnails.default.url.type() shouldBe typeOf() updated.snippet.thumbnails.maxres.url.type() shouldBe typeOf() updated.snippet.thumbnails.standard.url.type() shouldBe typeOf() diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt index 438c3bbf8..f44ce9fd2 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Access.kt @@ -701,7 +701,7 @@ class Access : TestBase() { fun columnSelectorsUsages() { // SampleStart df.select { age and name } - df.fillNaNs { dfsOf() }.withZero() + df.fillNaNs { colsOf().recursively() }.withZero() df.remove { cols { it.hasNulls() } } df.group { cols { it.data != name } }.into { "nameless" } df.update { city }.notNull { it.lowercase() } @@ -739,8 +739,8 @@ class Access : TestBase() { // all children of ColumnGroup df.select { name.all() } - // depth-first-search traversal of all children columns - df.select { name.allDfs() } + // recursive traversal of all children columns excluding ColumnGroups + df.select { name.cols { !it.isColumnGroup() }.recursively() } // SampleEnd } @@ -777,8 +777,8 @@ class Access : TestBase() { // all children of ColumnGroup df.select { name.all() } - // depth-first-search traversal of all children columns - df.select { name.allDfs() } + // recursive traversal of all children columns excluding ColumnGroups + df.select { name.cols { !it.isColumnGroup() }.recursively() } // SampleEnd } @@ -812,8 +812,8 @@ class Access : TestBase() { // all children of ColumnGroup df.select { Person::name.all() } - // depth-first-search traversal of all children columns - df.select { Person::name.allDfs() } + // recursive traversal of all children columns excluding groups + df.select { Person::name.cols { !it.isColumnGroup() }.recursively() } // SampleEnd } @@ -846,8 +846,8 @@ class Access : TestBase() { // all children of ColumnGroup df.select { "name".all() } - // depth-first-search traversal of all children columns - df.select { "name".allDfs() } + // recursive traversal of all children columns excluding groups + df.select { "name".cols { !it.isColumnGroup() }.recursively() } // SampleEnd } @@ -905,17 +905,17 @@ class Access : TestBase() { Person::name.single { it.name().startsWith("first") } } - // depth-first-search traversal of all columns, excluding ColumnGroups from result - df.select { allDfs() } + // recursive traversal of all columns, excluding ColumnGroups from result + df.select { cols { !it.isColumnGroup() }.recursively() } // depth-first-search traversal of all columns, including ColumnGroups in result - df.select { allDfs(includeGroups = true) } + df.select { all().recursively() } - // depth-first-search traversal with condition - df.select { dfs { it.name().contains(":") } } + // recursive traversal with condition + df.select { cols { it.name().contains(":") }.recursively() } - // depth-first-search traversal of columns of given type - df.select { dfsOf() } + // recursive traversal of columns of given type + df.select { colsOf().rec() } // all columns except given column set df.select { except { colsOf() } } @@ -929,19 +929,19 @@ class Access : TestBase() { @TransformDataFrameExpressions fun columnSelectorsModifySet() { // SampleStart - // first/last n columns in column set - df.select { allDfs().take(3) } - df.select { allDfs().takeLast(3) } + // first/last n value- and frame columns in column set + df.select { cols { !it.isColumnGroup() }.recursively().take(3) } + df.select { cols { !it.isColumnGroup() }.recursively().takeLast(3) } - // all except first/last n columns in column set - df.select { allDfs().drop(3) } - df.select { allDfs().dropLast(3) } + // all except first/last n value- and frame columns in column set + df.select { cols { !it.isColumnGroup() }.recursively().drop(3) } + df.select { cols { !it.isColumnGroup() }.recursively().dropLast(3) } // filter column set by condition - df.select { allDfs().filter { it.name().startsWith("year") } } + df.select { cols { !it.isColumnGroup() }.rec().filter { it.name().startsWith("year") } } // exclude columns from column set - df.select { allDfs().except { age } } + df.select { cols { !it.isColumnGroup() }.rec().except { age } } // keep only unique columns df.select { (colsOf() and age).distinct() } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt index b71f65655..aceb0156e 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/Modify.kt @@ -23,7 +23,6 @@ import org.jetbrains.kotlinx.dataframe.api.convert import org.jetbrains.kotlinx.dataframe.api.convertTo import org.jetbrains.kotlinx.dataframe.api.dataFrameOf import org.jetbrains.kotlinx.dataframe.api.default -import org.jetbrains.kotlinx.dataframe.api.dfsOf import org.jetbrains.kotlinx.dataframe.api.dropNulls import org.jetbrains.kotlinx.dataframe.api.explode import org.jetbrains.kotlinx.dataframe.api.fill @@ -120,7 +119,7 @@ class Modify : TestBase() { fun update() { // SampleStart df.update { age }.with { it * 2 } - df.update { dfsOf() }.with { it.uppercase() } + df.update { colsOf().recursively() }.with { it.uppercase() } df.update { weight }.at(1..4).notNull { it / 2 } df.update { name.lastName and age }.at(1, 3, 4).withNull() // SampleEnd @@ -181,7 +180,7 @@ class Modify : TestBase() { fun convert() { // SampleStart df.convert { age }.with { it.toDouble() } - df.convert { dfsOf() }.with { it.toCharArray().toList() } + df.convert { colsOf().recursively() }.with { it.toCharArray().toList() } // SampleEnd } @@ -338,7 +337,7 @@ class Modify : TestBase() { // a.b.e -> be // c.d.e -> de - df.move { dfs { it.name() == "e" } }.toTop { it.parentName + it.name() } + df.move { cols { it.name() == "e" }.recursively() }.toTop { it.parentName + it.name() } // SampleEnd } @@ -779,7 +778,7 @@ class Modify : TestBase() { @Test @TransformDataFrameExpressions - fun concatDfs() { + fun concatDataFrames() { val df1 = df val df2 = df // SampleStart @@ -906,7 +905,7 @@ class Modify : TestBase() { @Test @TransformDataFrameExpressions - fun addDfs() { + fun addDataFrames() { val df1 = df.select { name named "name2" } val df2 = df.select { age named "age2" } // SampleStart diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt index 4f738e1fe..4e4d2ce04 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/samples/api/TestBase.kt @@ -1,13 +1,12 @@ package org.jetbrains.kotlinx.dataframe.samples.api +import io.kotest.matchers.should import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.DataRow import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.into +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.explainer.PluginCallbackProxy +import org.jetbrains.kotlinx.dataframe.impl.columns.asValueColumn import org.junit.After import org.junit.Before @@ -41,6 +40,16 @@ public open class TestBase { "Charlie", "Byrd", 30, "Moscow", 90, true ).group("firstName", "lastName").into("name").cast() + val dfGroup = df.convert { name.firstName }.to { + val firstName by it + val secondName by it.map<_, String?> { null }.asValueColumn() + val thirdName by it.map<_, String?> { null }.asValueColumn() + + dataFrameOf(firstName, secondName, thirdName) + .cast(verify = true) + .asColumnGroup("firstName") + }.cast(verify = true) + @DataSchema interface Name { val firstName: String @@ -56,5 +65,34 @@ public open class TestBase { val isHappy: Boolean } + @DataSchema + interface FirstNames { + val firstName: String + val secondName: String? + val thirdName: String? + } + + @DataSchema + interface Name2 { + val firstName: DataRow + val lastName: String + } + + @DataSchema + interface Person2 { + val age: Int + val city: String? + val name: DataRow + val weight: Int? + val isHappy: Boolean + } + infix fun T.willBe(expected: U?) = shouldBe(expected) + + fun Iterable.shouldAllBeEqual(): Iterable { + this should { + it.reduce { a, b -> a shouldBe b; b } + } + return this + } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt index c33a44cc6..4bd50bad7 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTests.kt @@ -5,181 +5,20 @@ import io.kotest.matchers.doubles.ToleranceMatcher import io.kotest.matchers.should import io.kotest.matchers.shouldBe import io.kotest.matchers.shouldNotBe -import org.jetbrains.kotlinx.dataframe.AnyFrame -import org.jetbrains.kotlinx.dataframe.AnyRow -import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.DataRow -import org.jetbrains.kotlinx.dataframe.RowExpression +import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.annotations.ColumnName import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.api.ExcessiveColumns -import org.jetbrains.kotlinx.dataframe.api.GroupBy -import org.jetbrains.kotlinx.dataframe.api.ParserOptions -import org.jetbrains.kotlinx.dataframe.api.add -import org.jetbrains.kotlinx.dataframe.api.addAll -import org.jetbrains.kotlinx.dataframe.api.addId -import org.jetbrains.kotlinx.dataframe.api.all -import org.jetbrains.kotlinx.dataframe.api.allNulls -import org.jetbrains.kotlinx.dataframe.api.append -import org.jetbrains.kotlinx.dataframe.api.asColumnGroup -import org.jetbrains.kotlinx.dataframe.api.asDataFrame -import org.jetbrains.kotlinx.dataframe.api.asGroupBy -import org.jetbrains.kotlinx.dataframe.api.asIterable -import org.jetbrains.kotlinx.dataframe.api.at -import org.jetbrains.kotlinx.dataframe.api.between -import org.jetbrains.kotlinx.dataframe.api.by -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.chunked -import org.jetbrains.kotlinx.dataframe.api.colsOf -import org.jetbrains.kotlinx.dataframe.api.column -import org.jetbrains.kotlinx.dataframe.api.columnGroup -import org.jetbrains.kotlinx.dataframe.api.columnOf -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.convertTo -import org.jetbrains.kotlinx.dataframe.api.corr -import org.jetbrains.kotlinx.dataframe.api.count -import org.jetbrains.kotlinx.dataframe.api.countDistinct -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.default -import org.jetbrains.kotlinx.dataframe.api.describe -import org.jetbrains.kotlinx.dataframe.api.dfsOf -import org.jetbrains.kotlinx.dataframe.api.digitize -import org.jetbrains.kotlinx.dataframe.api.distinct -import org.jetbrains.kotlinx.dataframe.api.distinctBy -import org.jetbrains.kotlinx.dataframe.api.div -import org.jetbrains.kotlinx.dataframe.api.drop -import org.jetbrains.kotlinx.dataframe.api.dropLast -import org.jetbrains.kotlinx.dataframe.api.dropNA -import org.jetbrains.kotlinx.dataframe.api.dropNulls -import org.jetbrains.kotlinx.dataframe.api.dropWhile -import org.jetbrains.kotlinx.dataframe.api.explode -import org.jetbrains.kotlinx.dataframe.api.expr -import org.jetbrains.kotlinx.dataframe.api.fill -import org.jetbrains.kotlinx.dataframe.api.fillNulls -import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.first -import org.jetbrains.kotlinx.dataframe.api.forEach -import org.jetbrains.kotlinx.dataframe.api.forEachIndexed -import org.jetbrains.kotlinx.dataframe.api.frameColumn -import org.jetbrains.kotlinx.dataframe.api.gather -import org.jetbrains.kotlinx.dataframe.api.getColumn -import org.jetbrains.kotlinx.dataframe.api.getColumnGroup -import org.jetbrains.kotlinx.dataframe.api.getColumns -import org.jetbrains.kotlinx.dataframe.api.getFrameColumn -import org.jetbrains.kotlinx.dataframe.api.getValue -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.groupBy -import org.jetbrains.kotlinx.dataframe.api.implode -import org.jetbrains.kotlinx.dataframe.api.indices -import org.jetbrains.kotlinx.dataframe.api.inplace -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.intoColumns -import org.jetbrains.kotlinx.dataframe.api.intoList -import org.jetbrains.kotlinx.dataframe.api.intoRows -import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.isFrameColumn -import org.jetbrains.kotlinx.dataframe.api.isNA -import org.jetbrains.kotlinx.dataframe.api.isNumber -import org.jetbrains.kotlinx.dataframe.api.keysInto -import org.jetbrains.kotlinx.dataframe.api.last -import org.jetbrains.kotlinx.dataframe.api.leftJoin -import org.jetbrains.kotlinx.dataframe.api.lowercase -import org.jetbrains.kotlinx.dataframe.api.map -import org.jetbrains.kotlinx.dataframe.api.mapToFrame -import org.jetbrains.kotlinx.dataframe.api.match -import org.jetbrains.kotlinx.dataframe.api.matches -import org.jetbrains.kotlinx.dataframe.api.max -import org.jetbrains.kotlinx.dataframe.api.maxBy -import org.jetbrains.kotlinx.dataframe.api.mean -import org.jetbrains.kotlinx.dataframe.api.meanFor -import org.jetbrains.kotlinx.dataframe.api.meanOf -import org.jetbrains.kotlinx.dataframe.api.median -import org.jetbrains.kotlinx.dataframe.api.merge -import org.jetbrains.kotlinx.dataframe.api.min -import org.jetbrains.kotlinx.dataframe.api.minBy -import org.jetbrains.kotlinx.dataframe.api.minOf -import org.jetbrains.kotlinx.dataframe.api.minus -import org.jetbrains.kotlinx.dataframe.api.move -import org.jetbrains.kotlinx.dataframe.api.moveTo -import org.jetbrains.kotlinx.dataframe.api.moveToLeft -import org.jetbrains.kotlinx.dataframe.api.moveToRight +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.api.name -import org.jetbrains.kotlinx.dataframe.api.named -import org.jetbrains.kotlinx.dataframe.api.notNull -import org.jetbrains.kotlinx.dataframe.api.nullable -import org.jetbrains.kotlinx.dataframe.api.parse -import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.api.pivot -import org.jetbrains.kotlinx.dataframe.api.print -import org.jetbrains.kotlinx.dataframe.api.remove -import org.jetbrains.kotlinx.dataframe.api.rename -import org.jetbrains.kotlinx.dataframe.api.reorderColumnsByName -import org.jetbrains.kotlinx.dataframe.api.replace -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.select -import org.jetbrains.kotlinx.dataframe.api.single -import org.jetbrains.kotlinx.dataframe.api.sortBy -import org.jetbrains.kotlinx.dataframe.api.sortByCount -import org.jetbrains.kotlinx.dataframe.api.sortByDesc -import org.jetbrains.kotlinx.dataframe.api.sortByKey -import org.jetbrains.kotlinx.dataframe.api.sortWith -import org.jetbrains.kotlinx.dataframe.api.split -import org.jetbrains.kotlinx.dataframe.api.sum -import org.jetbrains.kotlinx.dataframe.api.sumOf -import org.jetbrains.kotlinx.dataframe.api.take -import org.jetbrains.kotlinx.dataframe.api.takeLast -import org.jetbrains.kotlinx.dataframe.api.takeWhile -import org.jetbrains.kotlinx.dataframe.api.times -import org.jetbrains.kotlinx.dataframe.api.to -import org.jetbrains.kotlinx.dataframe.api.toColumn -import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor -import org.jetbrains.kotlinx.dataframe.api.toColumnOf -import org.jetbrains.kotlinx.dataframe.api.toDataFrame -import org.jetbrains.kotlinx.dataframe.api.toDouble -import org.jetbrains.kotlinx.dataframe.api.toInt -import org.jetbrains.kotlinx.dataframe.api.toList -import org.jetbrains.kotlinx.dataframe.api.toListOf -import org.jetbrains.kotlinx.dataframe.api.toMap -import org.jetbrains.kotlinx.dataframe.api.toRight -import org.jetbrains.kotlinx.dataframe.api.toStr -import org.jetbrains.kotlinx.dataframe.api.toValueColumn -import org.jetbrains.kotlinx.dataframe.api.transpose -import org.jetbrains.kotlinx.dataframe.api.under -import org.jetbrains.kotlinx.dataframe.api.ungroup -import org.jetbrains.kotlinx.dataframe.api.update -import org.jetbrains.kotlinx.dataframe.api.value -import org.jetbrains.kotlinx.dataframe.api.values -import org.jetbrains.kotlinx.dataframe.api.valuesNotNull -import org.jetbrains.kotlinx.dataframe.api.where -import org.jetbrains.kotlinx.dataframe.api.with -import org.jetbrains.kotlinx.dataframe.api.withNull -import org.jetbrains.kotlinx.dataframe.api.withValue -import org.jetbrains.kotlinx.dataframe.api.withValues -import org.jetbrains.kotlinx.dataframe.api.withZero -import org.jetbrains.kotlinx.dataframe.api.xs import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.UnresolvedColumnsPolicy import org.jetbrains.kotlinx.dataframe.exceptions.ExcessiveColumnsException import org.jetbrains.kotlinx.dataframe.exceptions.TypeConversionException -import org.jetbrains.kotlinx.dataframe.hasNulls -import org.jetbrains.kotlinx.dataframe.impl.DataFrameSize +import org.jetbrains.kotlinx.dataframe.impl.* import org.jetbrains.kotlinx.dataframe.impl.api.convertToImpl -import org.jetbrains.kotlinx.dataframe.impl.between import org.jetbrains.kotlinx.dataframe.impl.columns.isMissingColumn -import org.jetbrains.kotlinx.dataframe.impl.emptyPath -import org.jetbrains.kotlinx.dataframe.impl.getColumnsImpl -import org.jetbrains.kotlinx.dataframe.impl.nothingType -import org.jetbrains.kotlinx.dataframe.impl.trackColumnAccess -import org.jetbrains.kotlinx.dataframe.index import org.jetbrains.kotlinx.dataframe.io.renderValueForStdout -import org.jetbrains.kotlinx.dataframe.kind import org.jetbrains.kotlinx.dataframe.math.mean -import org.jetbrains.kotlinx.dataframe.ncol -import org.jetbrains.kotlinx.dataframe.nrow -import org.jetbrains.kotlinx.dataframe.size -import org.jetbrains.kotlinx.dataframe.type -import org.jetbrains.kotlinx.dataframe.typeClass import org.junit.Test import java.math.BigDecimal import java.time.LocalDate @@ -911,7 +750,7 @@ class DataFrameTests : BaseTest() { df["e"].kind() shouldBe ColumnKind.Group df.getColumnGroup("d").columnNames() shouldBe listOf("f") df.getColumnGroup("e").getColumnGroup("g").columnNames() shouldBe listOf("h") - val cols = df.getColumns { allDfs() } + val cols = df.getColumns { cols { !it.isColumnGroup() }.recursively() } cols.size shouldBe 5 cols.forEach { it.toList() shouldBe expected @@ -1165,7 +1004,7 @@ class DataFrameTests : BaseTest() { @Test fun `gather bool`() { val pivoted = typed.pivot { city }.groupBy { name }.matches() - val res = pivoted.gather { dfsOf() }.where { it }.keysInto("city") + val res = pivoted.gather { colsOf().recursively() }.where { it }.keysInto("city") val sorted = res.sortBy { name and city } sorted shouldBe typed.select { name and city.map { it.toString() } }.distinct().sortBy { name and city } } @@ -1553,10 +1392,10 @@ class DataFrameTests : BaseTest() { @Test fun `union table columns`() { val grouped = typed.addId("id").groupBy { name }.toDataFrame() - val dfs = (0 until grouped.nrow).map { + val flattened = (0 until grouped.nrow).map { grouped[it..it] } - val dst = dfs.concat().asGroupBy().concat().sortBy("id").remove("id") + val dst = flattened.concat().asGroupBy().concat().sortBy("id").remove("id") dst shouldBe typed } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt index d76ad386c..a96b20382 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/DataFrameTreeTests.kt @@ -9,84 +9,7 @@ import org.jetbrains.dataframe.impl.codeGen.InterfaceGenerationMode import org.jetbrains.dataframe.impl.codeGen.generate import org.jetbrains.kotlinx.dataframe.* import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.api.GroupBy -import org.jetbrains.kotlinx.dataframe.api.GroupWithKey -import org.jetbrains.kotlinx.dataframe.api.add -import org.jetbrains.kotlinx.dataframe.api.addId -import org.jetbrains.kotlinx.dataframe.api.after -import org.jetbrains.kotlinx.dataframe.api.append -import org.jetbrains.kotlinx.dataframe.api.asColumnGroup -import org.jetbrains.kotlinx.dataframe.api.asDataFrame -import org.jetbrains.kotlinx.dataframe.api.asGroupBy -import org.jetbrains.kotlinx.dataframe.api.at -import org.jetbrains.kotlinx.dataframe.api.by -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.column -import org.jetbrains.kotlinx.dataframe.api.columnGroup -import org.jetbrains.kotlinx.dataframe.api.columnOf -import org.jetbrains.kotlinx.dataframe.api.columnsCount -import org.jetbrains.kotlinx.dataframe.api.concat -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.count -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.dfsOf -import org.jetbrains.kotlinx.dataframe.api.distinct -import org.jetbrains.kotlinx.dataframe.api.dropNulls -import org.jetbrains.kotlinx.dataframe.api.duplicate -import org.jetbrains.kotlinx.dataframe.api.duplicateRows -import org.jetbrains.kotlinx.dataframe.api.emptyDataFrame -import org.jetbrains.kotlinx.dataframe.api.explode -import org.jetbrains.kotlinx.dataframe.api.expr -import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.forEach -import org.jetbrains.kotlinx.dataframe.api.frameColumn -import org.jetbrains.kotlinx.dataframe.api.getColumnGroup -import org.jetbrains.kotlinx.dataframe.api.getColumnPath -import org.jetbrains.kotlinx.dataframe.api.getColumnWithPath -import org.jetbrains.kotlinx.dataframe.api.getColumns -import org.jetbrains.kotlinx.dataframe.api.getValue -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.groupBy -import org.jetbrains.kotlinx.dataframe.api.implode -import org.jetbrains.kotlinx.dataframe.api.indices -import org.jetbrains.kotlinx.dataframe.api.insert -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.intoRows -import org.jetbrains.kotlinx.dataframe.api.inward -import org.jetbrains.kotlinx.dataframe.api.isColumnGroup -import org.jetbrains.kotlinx.dataframe.api.isEmpty -import org.jetbrains.kotlinx.dataframe.api.isFrameColumn -import org.jetbrains.kotlinx.dataframe.api.join -import org.jetbrains.kotlinx.dataframe.api.last -import org.jetbrains.kotlinx.dataframe.api.map -import org.jetbrains.kotlinx.dataframe.api.max -import org.jetbrains.kotlinx.dataframe.api.maxBy -import org.jetbrains.kotlinx.dataframe.api.median -import org.jetbrains.kotlinx.dataframe.api.minus -import org.jetbrains.kotlinx.dataframe.api.move -import org.jetbrains.kotlinx.dataframe.api.moveTo -import org.jetbrains.kotlinx.dataframe.api.moveToLeft -import org.jetbrains.kotlinx.dataframe.api.moveToRight -import org.jetbrains.kotlinx.dataframe.api.pathOf -import org.jetbrains.kotlinx.dataframe.api.perRowCol -import org.jetbrains.kotlinx.dataframe.api.pivot -import org.jetbrains.kotlinx.dataframe.api.remove -import org.jetbrains.kotlinx.dataframe.api.rename -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.select -import org.jetbrains.kotlinx.dataframe.api.single -import org.jetbrains.kotlinx.dataframe.api.sortBy -import org.jetbrains.kotlinx.dataframe.api.split -import org.jetbrains.kotlinx.dataframe.api.sumOf -import org.jetbrains.kotlinx.dataframe.api.toColumnAccessor -import org.jetbrains.kotlinx.dataframe.api.toTop -import org.jetbrains.kotlinx.dataframe.api.under -import org.jetbrains.kotlinx.dataframe.api.ungroup -import org.jetbrains.kotlinx.dataframe.api.update -import org.jetbrains.kotlinx.dataframe.api.values -import org.jetbrains.kotlinx.dataframe.api.with -import org.jetbrains.kotlinx.dataframe.api.withNull -import org.jetbrains.kotlinx.dataframe.api.xs +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.columns.FrameColumn @@ -143,9 +66,9 @@ class DataFrameTreeTests : BaseTest() { } @Test - fun `select dfs under group`() { - df2.select { nameAndCity.dfsOf() } shouldBe typed2.select { nameAndCity.name } - df2.select { nameAndCity.dfsOf() } shouldBe typed2.select { nameAndCity.name and nameAndCity.city } + fun `select recursively under group`() { + df2.select { nameAndCity.colsOf().recursively() } shouldBe typed2.select { nameAndCity.name } + df2.select { nameAndCity.colsOf().recursively() } shouldBe typed2.select { nameAndCity.name and nameAndCity.city } } @Test @@ -249,8 +172,8 @@ class DataFrameTreeTests : BaseTest() { } @Test - fun selectDfs() { - val cols = typed2.select { dfs { it.hasNulls } } + fun `select recursively`() { + val cols = typed2.select { cols { it.hasNulls }.rec() } cols shouldBe typed2.select { nameAndCity.city and weight } } @@ -457,14 +380,14 @@ class DataFrameTreeTests : BaseTest() { @Test fun parentColumnTest() { - val res = typed2.move { dfs { it.depth > 0 } }.toTop { it.parentName + "-" + it.name } + val res = typed2.move { cols { it.depth > 0 }.rec() }.toTop { it.parentName + "-" + it.name } res.columnsCount() shouldBe 4 res.columnNames() shouldBe listOf("nameAndCity-name", "nameAndCity-city", "age", "weight") } @Test fun `group cols`() { - val joined = typed2.move { allDfs() }.into { pathOf(it.path.joinToString(".")) } + val joined = typed2.move { cols { !it.isColumnGroup() }.rec() }.into { pathOf(it.path.joinToString(".")) } val grouped = joined.group { nameContains(".") }.into { it.name().substringBefore(".") } val expected = typed2.rename { nameAndCity.all() }.into { it.path.joinToString(".") } grouped shouldBe expected diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt index 5dea9d318..3af47eb35 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/testSets/person/PivotTests.kt @@ -3,56 +3,7 @@ package org.jetbrains.kotlinx.dataframe.testSets.person import io.kotest.matchers.shouldBe import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.annotations.DataSchema -import org.jetbrains.kotlinx.dataframe.api.Infer -import org.jetbrains.kotlinx.dataframe.api.add -import org.jetbrains.kotlinx.dataframe.api.asColumnGroup -import org.jetbrains.kotlinx.dataframe.api.associate -import org.jetbrains.kotlinx.dataframe.api.cast -import org.jetbrains.kotlinx.dataframe.api.column -import org.jetbrains.kotlinx.dataframe.api.columnNames -import org.jetbrains.kotlinx.dataframe.api.columnOf -import org.jetbrains.kotlinx.dataframe.api.columnsCount -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.count -import org.jetbrains.kotlinx.dataframe.api.dataFrameOf -import org.jetbrains.kotlinx.dataframe.api.drop -import org.jetbrains.kotlinx.dataframe.api.dropNulls -import org.jetbrains.kotlinx.dataframe.api.explodeLists -import org.jetbrains.kotlinx.dataframe.api.expr -import org.jetbrains.kotlinx.dataframe.api.filter -import org.jetbrains.kotlinx.dataframe.api.first -import org.jetbrains.kotlinx.dataframe.api.frames -import org.jetbrains.kotlinx.dataframe.api.gather -import org.jetbrains.kotlinx.dataframe.api.getColumnGroup -import org.jetbrains.kotlinx.dataframe.api.getColumns -import org.jetbrains.kotlinx.dataframe.api.getColumnsWithPaths -import org.jetbrains.kotlinx.dataframe.api.group -import org.jetbrains.kotlinx.dataframe.api.groupBy -import org.jetbrains.kotlinx.dataframe.api.groupByOther -import org.jetbrains.kotlinx.dataframe.api.implode -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.isList -import org.jetbrains.kotlinx.dataframe.api.join -import org.jetbrains.kotlinx.dataframe.api.last -import org.jetbrains.kotlinx.dataframe.api.map -import org.jetbrains.kotlinx.dataframe.api.mapKeys -import org.jetbrains.kotlinx.dataframe.api.mapValues -import org.jetbrains.kotlinx.dataframe.api.matches -import org.jetbrains.kotlinx.dataframe.api.named -import org.jetbrains.kotlinx.dataframe.api.notNull -import org.jetbrains.kotlinx.dataframe.api.pivot -import org.jetbrains.kotlinx.dataframe.api.print -import org.jetbrains.kotlinx.dataframe.api.remove -import org.jetbrains.kotlinx.dataframe.api.replace -import org.jetbrains.kotlinx.dataframe.api.rows -import org.jetbrains.kotlinx.dataframe.api.sortBy -import org.jetbrains.kotlinx.dataframe.api.sumOf -import org.jetbrains.kotlinx.dataframe.api.toInt -import org.jetbrains.kotlinx.dataframe.api.ungroup -import org.jetbrains.kotlinx.dataframe.api.update -import org.jetbrains.kotlinx.dataframe.api.values -import org.jetbrains.kotlinx.dataframe.api.where -import org.jetbrains.kotlinx.dataframe.api.with +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dataframe.columns.ColumnKind import org.jetbrains.kotlinx.dataframe.impl.asList import org.jetbrains.kotlinx.dataframe.impl.nothingType @@ -229,7 +180,11 @@ class PivotTests { group.columnNames() shouldBe if (it.name() == "Bob") keys - "city" else keys } - val leafColumns = pivoted.getColumnsWithPaths { all().drop(1).allDfs() } + val leafColumns = pivoted.getColumnsWithPaths { + all() + .drop(1) + .cols { !it.isColumnGroup() }.recursively() + } leafColumns.size shouldBe typed.name.countDistinct() * typed.key.countDistinct() - 1 leafColumns.forEach { it.path.size shouldBe 2 } @@ -284,7 +239,7 @@ class PivotTests { // nullGroup.columnTypes() shouldBe listOf(typeOf?>(), typeOf?>()) nullGroup.columnTypes() shouldBe listOf(nothingType(true), nothingType(true)) - val cols = pivotedDf.getColumnsWithPaths { all().allDfs() } + val cols = pivotedDf.getColumnsWithPaths { cols { !it.isColumnGroup() }.recursively() } cols.size shouldBe 2 * typed.name.countDistinct() * typed.key.countDistinct() - 2 cols.forEach { diff --git a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt index 860b7c507..a9aacee7e 100644 --- a/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt +++ b/dataframe-arrow/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/arrowReadingImpl.kt @@ -222,7 +222,7 @@ internal fun DataFrame.Companion.readArrowIPCImpl( nullability: NullabilityOptions = NullabilityOptions.Infer, ): AnyFrame { ArrowStreamReader(channel, allocator).use { reader -> - val dfs = buildList { + val flattened = buildList { val root = reader.vectorSchemaRoot val schema = root.schema while (reader.loadNextBatch()) { @@ -230,7 +230,7 @@ internal fun DataFrame.Companion.readArrowIPCImpl( add(df) } } - return dfs.concatKeepingSchema() + return flattened.concatKeepingSchema() } } @@ -243,7 +243,7 @@ internal fun DataFrame.Companion.readArrowFeatherImpl( nullability: NullabilityOptions = NullabilityOptions.Infer, ): AnyFrame { ArrowFileReader(channel, allocator).use { reader -> - val dfs = buildList { + val flattened = buildList { reader.recordBlocks.forEach { block -> reader.loadRecordBatch(block) val root = reader.vectorSchemaRoot @@ -252,6 +252,6 @@ internal fun DataFrame.Companion.readArrowFeatherImpl( add(df) } } - return dfs.concatKeepingSchema() + return flattened.concatKeepingSchema() } } diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectors.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectors.html index 1ca191eec..2d5d9a5ee 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectors.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectors.html @@ -382,7 +382,7 @@
- df.select { name.allDfs() } + df.select { name.cols { !it.isColumnGroup() }.recursively() }
Input DataFrame: rowsCount = 7, columnsCount = 5 diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsMisc.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsMisc.html index 489c6317a..3841486ae 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsMisc.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsMisc.html @@ -499,7 +499,7 @@

- df.select { allDfs() } + df.select { cols { !it.isColumnGroup() }.recursively() }
Input DataFrame: rowsCount = 7, columnsCount = 6 @@ -516,7 +516,7 @@

- df.select { allDfs(includeGroups = true) } + df.select { all().recursively() }
Input DataFrame: rowsCount = 7, columnsCount = 6 @@ -533,7 +533,7 @@

- df.select { dfs { it.name().contains(":") } } + df.select { cols { it.name().contains(":") }.recursively() }
Input DataFrame: rowsCount = 7, columnsCount = 6 @@ -550,7 +550,7 @@

- df.select { dfsOf&lt;String&gt;() } + df.select { colsOf&lt;String&gt;().rec() }
Input DataFrame: rowsCount = 7, columnsCount = 6 diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsModifySet.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsModifySet.html index 54a701263..017e49c2e 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsModifySet.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsModifySet.html @@ -214,7 +214,7 @@
- df.select { allDfs().take(3) } + df.select { cols { !it.isColumnGroup() }.recursively().take(3) }
Input DataFrame: rowsCount = 7, columnsCount = 5 @@ -231,7 +231,7 @@

- df.select { allDfs().takeLast(3) } + df.select { cols { !it.isColumnGroup() }.recursively().takeLast(3) }
Input DataFrame: rowsCount = 7, columnsCount = 5 @@ -248,7 +248,7 @@

- df.select { allDfs().drop(3) } + df.select { cols { !it.isColumnGroup() }.recursively().drop(3) }
Input DataFrame: rowsCount = 7, columnsCount = 5 @@ -265,7 +265,7 @@

- df.select { allDfs().dropLast(3) } + df.select { cols { !it.isColumnGroup() }.recursively().dropLast(3) }
Input DataFrame: rowsCount = 7, columnsCount = 5 @@ -282,7 +282,7 @@

- df.select { allDfs().filter { it.name().startsWith("year") } } + df.select { cols { !it.isColumnGroup() }.rec().filter { it.name().startsWith("year") } }
Input DataFrame: rowsCount = 7, columnsCount = 5 @@ -299,7 +299,7 @@

- df.select { allDfs().except { age } } + df.select { cols { !it.isColumnGroup() }.rec().except { age } }
Input DataFrame: rowsCount = 7, columnsCount = 5 diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsUsages.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsUsages.html index 8405d62d7..e0eb533cd 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsUsages.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Access.columnSelectorsUsages.html @@ -237,7 +237,7 @@

- df.fillNaNs { dfsOf&lt;Double&gt;() }.withZero() + df.fillNaNs { colsOf&lt;Double&gt;().recursively() }.withZero()
Input DataFrame: rowsCount = 7, columnsCount = 5 diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.addDfs.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.addDataFrames.html similarity index 100% rename from docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.addDfs.html rename to docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.addDataFrames.html diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.convert.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.convert.html index 8b3f4bd68..546253124 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.convert.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.convert.html @@ -212,7 +212,7 @@

- df.convert { dfsOf&lt;String&gt;() }.with { it.toCharArray().toList() } + df.convert { colsOf&lt;String&gt;().recursively() }.with { it.toCharArray().toList() }
Input DataFrame: rowsCount = 7, columnsCount = 5 diff --git a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.update.html b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.update.html index 43309259d..df38de7ad 100644 --- a/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.update.html +++ b/docs/StardustDocs/snippets/org.jetbrains.kotlinx.dataframe.samples.api.Modify.update.html @@ -234,7 +234,7 @@

- df.update { dfsOf&lt;String&gt;() }.with { it.uppercase() } + df.update { colsOf&lt;String&gt;().recursively() }.with { it.uppercase() }
Input DataFrame: rowsCount = 7, columnsCount = 5 diff --git a/docs/StardustDocs/topics/ColumnSelectors.md b/docs/StardustDocs/topics/ColumnSelectors.md index fb4ae353f..1964d9c7c 100644 --- a/docs/StardustDocs/topics/ColumnSelectors.md +++ b/docs/StardustDocs/topics/ColumnSelectors.md @@ -10,7 +10,7 @@ Column selectors are used in many operations: ```kotlin df.select { age and name } -df.fillNaNs { dfsOf() }.withZero() +df.fillNaNs { colsOf().recursively() }.withZero() df.remove { cols { it.hasNulls() } } df.group { cols { it.data != name } }.into { "nameless" } df.update { city }.notNull { it.lowercase() } @@ -53,8 +53,8 @@ df.select { name..age } // all children of ColumnGroup df.select { name.all() } -// depth-first-search traversal of all children columns -df.select { name.allDfs() } +// recursive traversal of all children columns excluding ColumnGroups +df.select { name.cols { !it.isColumnGroup() }.recursively() } ``` @@ -89,8 +89,8 @@ df.select { name..age } // all children of ColumnGroup df.select { name.all() } -// depth-first-search traversal of all children columns -df.select { name.allDfs() } +// recursive traversal of all children columns excluding ColumnGroups +df.select { name.cols { !it.isColumnGroup() }.recursively() } ``` @@ -122,8 +122,8 @@ df.select { "name".."age" } // all children of ColumnGroup df.select { "name".all() } -// depth-first-search traversal of all children columns -df.select { "name".allDfs() } +// recursive traversal of all children columns excluding groups +df.select { "name".cols { !it.isColumnGroup() }.recursively() } ``` @@ -187,17 +187,17 @@ df.select { Person::name.single { it.name().startsWith("first") } } -// depth-first-search traversal of all columns, excluding ColumnGroups from result -df.select { allDfs() } +// recursive traversal of all columns, excluding ColumnGroups from result +df.select { cols { !it.isColumnGroup() }.recursively() } // depth-first-search traversal of all columns, including ColumnGroups in result -df.select { allDfs(includeGroups = true) } +df.select { all().recursively() } -// depth-first-search traversal with condition -df.select { dfs { it.name().contains(":") } } +// recursive traversal with condition +df.select { cols { it.name().contains(":") }.recursively() } -// depth-first-search traversal of columns of given type -df.select { dfsOf() } +// recursive traversal of columns of given type +df.select { colsOf().rec() } // all columns except given column set df.select { except { colsOf() } } @@ -214,19 +214,19 @@ df.select { take(2) and col(3) } ```kotlin -// first/last n columns in column set -df.select { allDfs().take(3) } -df.select { allDfs().takeLast(3) } +// first/last n value- and frame columns in column set +df.select { cols { !it.isColumnGroup() }.recursively().take(3) } +df.select { cols { !it.isColumnGroup() }.recursively().takeLast(3) } -// all except first/last n columns in column set -df.select { allDfs().drop(3) } -df.select { allDfs().dropLast(3) } +// all except first/last n value- and frame columns in column set +df.select { cols { !it.isColumnGroup() }.recursively().drop(3) } +df.select { cols { !it.isColumnGroup() }.recursively().dropLast(3) } // filter column set by condition -df.select { allDfs().filter { it.name().startsWith("year") } } +df.select { cols { !it.isColumnGroup() }.rec().filter { it.name().startsWith("year") } } // exclude columns from column set -df.select { allDfs().except { age } } +df.select { cols { !it.isColumnGroup() }.rec().except { age } } // keep only unique columns df.select { (colsOf() and age).distinct() } diff --git a/docs/StardustDocs/topics/add.md b/docs/StardustDocs/topics/add.md index 047141e95..b9f9bc995 100644 --- a/docs/StardustDocs/topics/add.md +++ b/docs/StardustDocs/topics/add.md @@ -212,13 +212,13 @@ df + score ## Add all columns from another [`DataFrame`](DataFrame.md) - + ```kotlin df.add(df1, df2) ``` - + ## addId diff --git a/docs/StardustDocs/topics/addDf.md b/docs/StardustDocs/topics/addDf.md index 31a1a4965..03889c6fc 100644 --- a/docs/StardustDocs/topics/addDf.md +++ b/docs/StardustDocs/topics/addDf.md @@ -4,13 +4,13 @@ Returns [`DataFrame`](DataFrame.md) with union of columns from several given [`DataFrames`](DataFrame.md). - + ```kotlin df.add(df1, df2) ``` - + See [all use cases of 'add' operation](add.md). diff --git a/docs/StardustDocs/topics/concat.md b/docs/StardustDocs/topics/concat.md index 172ec56ef..b826a9bb8 100644 --- a/docs/StardustDocs/topics/concat.md +++ b/docs/StardustDocs/topics/concat.md @@ -8,7 +8,7 @@ Returns [`DataFrame`](DataFrame.md) with the union of rows from several given [` [`DataFrame`](DataFrame.md): - + ```kotlin df.concat(df1, df2) diff --git a/docs/StardustDocs/topics/concatDf.md b/docs/StardustDocs/topics/concatDf.md index 5b30ccd43..7f93e31bb 100644 --- a/docs/StardustDocs/topics/concatDf.md +++ b/docs/StardustDocs/topics/concatDf.md @@ -4,7 +4,7 @@ Returns [`DataFrame`](DataFrame.md) with the union of rows from several given [`DataFrames`](DataFrame.md). - + ```kotlin df.concat(df1, df2) diff --git a/docs/StardustDocs/topics/convert.md b/docs/StardustDocs/topics/convert.md index 8adf878aa..15f594404 100644 --- a/docs/StardustDocs/topics/convert.md +++ b/docs/StardustDocs/topics/convert.md @@ -18,7 +18,7 @@ See [column selectors](ColumnSelectors.md) and [row expressions](DataRow.md#row- ```kotlin df.convert { age }.with { it.toDouble() } -df.convert { dfsOf() }.with { it.toCharArray().toList() } +df.convert { colsOf().recursively() }.with { it.toCharArray().toList() } ``` diff --git a/docs/StardustDocs/topics/move.md b/docs/StardustDocs/topics/move.md index 70f9f9100..b5504ef90 100644 --- a/docs/StardustDocs/topics/move.md +++ b/docs/StardustDocs/topics/move.md @@ -43,7 +43,7 @@ df.move { name.cols() }.toTop() // a.b.e -> be // c.d.e -> de -df.move { dfs { it.name() == "e" } }.toTop { it.parentName + it.name() } +df.move { cols { it.name() == "e" }.recursively() }.toTop { it.parentName + it.name() } ``` diff --git a/docs/StardustDocs/topics/update.md b/docs/StardustDocs/topics/update.md index d761790bf..860f15aef 100644 --- a/docs/StardustDocs/topics/update.md +++ b/docs/StardustDocs/topics/update.md @@ -23,7 +23,7 @@ See [column selectors](ColumnSelectors.md) and [row expressions](DataRow.md#row- ```kotlin df.update { age }.with { it * 2 } -df.update { dfsOf() }.with { it.uppercase() } +df.update { colsOf().recursively() }.with { it.uppercase() } df.update { weight }.at(1..4).notNull { it / 2 } df.update { name.lastName and age }.at(1, 3, 4).withNull() ``` diff --git a/examples/idea-examples/titanic/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/titanic/ml/titanic.kt b/examples/idea-examples/titanic/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/titanic/ml/titanic.kt index 810c6b955..6786230c8 100644 --- a/examples/idea-examples/titanic/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/titanic/ml/titanic.kt +++ b/examples/idea-examples/titanic/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/titanic/ml/titanic.kt @@ -2,24 +2,7 @@ package org.jetbrains.kotlinx.dataframe.examples.titanic.ml import org.jetbrains.kotlinx.dataframe.ColumnSelector import org.jetbrains.kotlinx.dataframe.DataFrame -import org.jetbrains.kotlinx.dataframe.api.by -import org.jetbrains.kotlinx.dataframe.api.column -import org.jetbrains.kotlinx.dataframe.api.convert -import org.jetbrains.kotlinx.dataframe.api.dfsOf -import org.jetbrains.kotlinx.dataframe.api.fillNulls -import org.jetbrains.kotlinx.dataframe.api.getColumn -import org.jetbrains.kotlinx.dataframe.api.into -import org.jetbrains.kotlinx.dataframe.api.mean -import org.jetbrains.kotlinx.dataframe.api.merge -import org.jetbrains.kotlinx.dataframe.api.perCol -import org.jetbrains.kotlinx.dataframe.api.pivotMatches -import org.jetbrains.kotlinx.dataframe.api.remove -import org.jetbrains.kotlinx.dataframe.api.select -import org.jetbrains.kotlinx.dataframe.api.shuffle -import org.jetbrains.kotlinx.dataframe.api.toFloat -import org.jetbrains.kotlinx.dataframe.api.toFloatArray -import org.jetbrains.kotlinx.dataframe.api.toTypedArray -import org.jetbrains.kotlinx.dataframe.api.withValue +import org.jetbrains.kotlinx.dataframe.api.* import org.jetbrains.kotlinx.dl.api.core.Sequential import org.jetbrains.kotlinx.dl.api.core.activation.Activations import org.jetbrains.kotlinx.dl.api.core.initializer.HeNormal @@ -100,8 +83,8 @@ private fun OnHeapDataset.Companion.create( fun extractX(): Array = dataframe.remove(yColumn) - .convert { allDfs() }.toFloat() - .merge { dfsOf() }.by { it.toFloatArray() }.into(x) + .convert { cols { !it.isColumnGroup() }.rec() }.toFloat() + .merge { colsOf().recursively() }.by { it.toFloatArray() }.into(x) .getColumn(x).toTypedArray() fun extractY(): FloatArray = dataframe[yColumn].toFloatArray() diff --git a/examples/idea-examples/youtube/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/youtube/youtube.kt b/examples/idea-examples/youtube/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/youtube/youtube.kt index 8fb697b2c..911d75216 100644 --- a/examples/idea-examples/youtube/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/youtube/youtube.kt +++ b/examples/idea-examples/youtube/src/main/kotlin/org/jetbrains/kotlinx/dataframe/examples/youtube/youtube.kt @@ -53,7 +53,7 @@ fun main() { .select { id.videoId into videoId and snippet } .distinct() .parse() - .convert { dfsOf() }.with { + .convert { colsOf().recursively() }.with { IMG(it, maxHeight = 150) }.add("video") { val id = videoId() diff --git a/examples/notebooks/youtube/Youtube.ipynb b/examples/notebooks/youtube/Youtube.ipynb index ee4512154..557518acd 100644 --- a/examples/notebooks/youtube/Youtube.ipynb +++ b/examples/notebooks/youtube/Youtube.ipynb @@ -607,7 +607,7 @@ "metadata": {}, "outputs": [], "source": [ - "val loaded = parsed.convert { dfsOf() }.with { IMG(it, maxHeight = 150) }\n", + "val loaded = parsed.convert { colsOf().recursively() }.with { IMG(it, maxHeight = 150) }\n", " .add(\"video\") { IFRAME(\"http://www.youtube.com/embed/$id\") }" ] },