diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt index 82015f2fd7..0b0a92b870 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/distinct.kt @@ -4,10 +4,13 @@ import org.jetbrains.kotlinx.dataframe.AnyColumnReference import org.jetbrains.kotlinx.dataframe.ColumnsSelector import org.jetbrains.kotlinx.dataframe.DataFrame import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload +import org.jetbrains.kotlinx.dataframe.api.Select.SelectSelectingOptions import org.jetbrains.kotlinx.dataframe.columns.ColumnSet import org.jetbrains.kotlinx.dataframe.columns.SingleColumn import org.jetbrains.kotlinx.dataframe.columns.toColumnSet +import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls import org.jetbrains.kotlinx.dataframe.documentation.DslGrammarTemplateColumnsSelectionDsl.DslGrammarTemplate +import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources import org.jetbrains.kotlinx.dataframe.documentation.Indent import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException import org.jetbrains.kotlinx.dataframe.impl.columns.DistinctColumnSet @@ -16,10 +19,44 @@ import kotlin.reflect.KProperty // region DataFrame + +/** + * ## The Distinct Operation + * + * It removes duplicated rows based on {@get PHRASE_ENDING}. + * + * __NOTE:__ The rows in the resulting [DataFrame] are in the same order as they were in the original [DataFrame]. + * + * {@get [DISTINCT_PARAM] @param [columns] + * The names of the columns to consider for evaluating distinct rows.} + * + * @return A new DataFrame containing only distinct rows. + * + * @see [Selecting Columns][SelectSelectingOptions]. + * @see {@include [DocumentationUrls.Distinct]} + */ +@ExcludeFromSources +private interface DistinctDocs { + interface DISTINCT_PARAM +} + +/** + * {@include [DistinctDocs]} + * {@set PHRASE_ENDING all columns}. + * {@set [DistinctDocs.DISTINCT_PARAM]} + */ public fun DataFrame.distinct(): DataFrame = distinctBy { all() } +/** + * {@include [DistinctDocs]} + * {@set PHRASE_ENDING the specified columns}. + */ public fun DataFrame.distinct(columns: ColumnsSelector): DataFrame = select(columns).distinct() +/** + * {@include [DistinctDocs]} + * {@set PHRASE_ENDING the specified columns}. + */ @AccessApiOverload public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame = distinct { @@ -27,22 +64,47 @@ public fun DataFrame.distinct(vararg columns: KProperty<*>): DataFrame set } +/** + * {@include [DistinctDocs]} + * {@set PHRASE_ENDING the specified columns}. + */ public fun DataFrame.distinct(vararg columns: String): DataFrame = distinct { columns.toColumnSet() } +/** + * {@include [DistinctDocs]} + * {@set PHRASE_ENDING the specified columns}. + */ @AccessApiOverload public fun DataFrame.distinct(vararg columns: AnyColumnReference): DataFrame = distinct { columns.toColumnSet() } +/** + * {@include [DistinctDocs]} + * {@set PHRASE_ENDING the specified columns}. + */ @AccessApiOverload public fun DataFrame.distinctBy(vararg columns: KProperty<*>): DataFrame = distinctBy { columns.toColumnSet() } +/** + * {@include [DistinctDocs]} + * {@set PHRASE_ENDING the specified columns}. + */ public fun DataFrame.distinctBy(vararg columns: String): DataFrame = distinctBy { columns.toColumnSet() } +/** + * {@include [DistinctDocs]} + * {@set PHRASE_ENDING the specified columns}. + */ @AccessApiOverload public fun DataFrame.distinctBy(vararg columns: AnyColumnReference): DataFrame = distinctBy { columns.toColumnSet() } + +/** + * {@include [DistinctDocs]} + * {@set PHRASE_ENDING the specified columns}. + */ public fun DataFrame.distinctBy(columns: ColumnsSelector): DataFrame { val cols = get(columns) val distinctIndices = indices.distinctBy { i -> cols.map { it[i] } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt index 4fd9836b22..eb3ba32662 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/documentation/DocumentationUrls.kt @@ -77,4 +77,7 @@ internal interface DocumentationUrls { /** [See `remove` on the documentation website.]({@include [Url]}/remove.html) */ interface Remove + + /** See `distinct` on the documentation website. */ + interface Distinct }