-
Notifications
You must be signed in to change notification settings - Fork 76
Replace Klaxon with kotlinx-serialization #603
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
63d6f8b
83ba578
78441dd
b7e4558
d061d13
6d954f8
22bb293
79c294e
cf43401
59c33b1
eda1884
f3770cf
b385a62
c0ba411
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,21 @@ | ||
| package org.jetbrains.kotlinx.dataframe.impl.io | ||
|
|
||
| import com.beust.klaxon.JsonArray | ||
| import com.beust.klaxon.JsonObject | ||
| import kotlinx.serialization.json.JsonArray | ||
| import kotlinx.serialization.json.JsonNull | ||
| import kotlinx.serialization.json.JsonObject | ||
| import kotlinx.serialization.json.JsonPrimitive | ||
| import kotlinx.serialization.json.boolean | ||
| import kotlinx.serialization.json.booleanOrNull | ||
| import kotlinx.serialization.json.double | ||
| import kotlinx.serialization.json.doubleOrNull | ||
| import kotlinx.serialization.json.float | ||
| import kotlinx.serialization.json.floatOrNull | ||
| import kotlinx.serialization.json.int | ||
| import kotlinx.serialization.json.intOrNull | ||
| import kotlinx.serialization.json.jsonArray | ||
| import kotlinx.serialization.json.jsonPrimitive | ||
| import kotlinx.serialization.json.long | ||
| import kotlinx.serialization.json.longOrNull | ||
| import org.jetbrains.kotlinx.dataframe.AnyCol | ||
| import org.jetbrains.kotlinx.dataframe.AnyFrame | ||
| import org.jetbrains.kotlinx.dataframe.DataColumn | ||
|
|
@@ -73,8 +87,8 @@ internal fun readJson( | |
| val df: AnyFrame = when (typeClashTactic) { | ||
| ARRAY_AND_VALUE_COLUMNS -> { | ||
| when (parsed) { | ||
| is JsonArray<*> -> fromJsonListArrayAndValueColumns( | ||
| records = parsed.value, | ||
| is JsonArray -> fromJsonListArrayAndValueColumns( | ||
| records = parsed, | ||
| header = header, | ||
| keyValuePaths = keyValuePaths, | ||
| ) | ||
|
|
@@ -88,8 +102,8 @@ internal fun readJson( | |
|
|
||
| ANY_COLUMNS -> { | ||
| when (parsed) { | ||
| is JsonArray<*> -> fromJsonListAnyColumns( | ||
| records = parsed.value, | ||
| is JsonArray -> fromJsonListAnyColumns( | ||
| records = parsed, | ||
| header = header, | ||
| keyValuePaths = keyValuePaths, | ||
| ) | ||
|
|
@@ -126,18 +140,16 @@ internal fun fromJsonListAnyColumns( | |
|
|
||
| // list element type can be JsonObject, JsonArray or primitive | ||
| val nameGenerator = ColumnNameGenerator() | ||
| records.forEach { | ||
| when (it) { | ||
| records.forEach { record -> | ||
| when (record) { | ||
| is JsonObject -> { | ||
| hasObject = true | ||
| it.entries.forEach { | ||
| nameGenerator.addIfAbsent(it.key) | ||
| } | ||
| record.entries.forEach { nameGenerator.addIfAbsent(it.key) } | ||
| } | ||
|
|
||
| is JsonArray<*> -> hasArray = true | ||
| null -> Unit | ||
| else -> hasPrimitive = true | ||
| is JsonArray -> hasArray = true | ||
| is JsonNull, null -> Unit | ||
| is JsonPrimitive -> hasPrimitive = true | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -155,7 +167,7 @@ internal fun fromJsonListAnyColumns( | |
|
|
||
| @Suppress("KotlinConstantConditions") | ||
| val columns: List<AnyCol> = when { | ||
| // Create one column of type Any? (or guessed primitive type) from all the records | ||
| // Create one column of type Any? (or guessed a primitive type) from all the records | ||
| colType == AnyColType.ANY -> { | ||
| val collector: DataCollectorBase<Any?> = | ||
| if (justPrimitives) createDataCollector(records.size) // guess the type | ||
|
|
@@ -177,7 +189,7 @@ internal fun fromJsonListAnyColumns( | |
| ) | ||
| } | ||
|
|
||
| is JsonArray<*> -> { | ||
| is JsonArray -> { | ||
| val parsed = fromJsonListAnyColumns( | ||
| records = v, | ||
| keyValuePaths = keyValuePaths, | ||
|
|
@@ -189,9 +201,21 @@ internal fun fromJsonListAnyColumns( | |
| ) | ||
| } | ||
|
|
||
| "NaN" -> { | ||
| nanIndices.add(i) | ||
| collector.add(null) | ||
| is JsonPrimitive -> { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. While this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I might be missing something There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, let's say we want to parse an array of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, indeed this isn't the good place that should be fixed when supporting Serializable for DataFrame and DataRow. I also think that part of this might be optimized at runtime since our value is already wrapped in JsonPrimitive. There are doubts that jdk8 handles such optimizations well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved the null check higher up. Given the current capabilities of kotlinx-serialization, there are two options: either leave it as it is now, or write a custom serializer. However, the logic in the custom serializer would be quite similar. We can always take a String and then manually attempt to convert it to the desired type. This might be slightly better in terms of performance, and we would need to account for all edge cases for different types |
||
| when { | ||
| v.content == "NaN" -> { | ||
| nanIndices.add(i) | ||
| collector.add(null) | ||
| } | ||
|
|
||
| v.isString -> collector.add(v.content) | ||
| v.booleanOrNull != null -> collector.add(v.boolean) | ||
| v.intOrNull != null -> collector.add(v.int) | ||
| v.longOrNull != null -> collector.add(v.long) | ||
| v.doubleOrNull != null -> collector.add(v.double) | ||
| v.floatOrNull != null -> collector.add(v.float) | ||
| v.jsonPrimitive is JsonNull -> collector.add(null) | ||
| } | ||
| } | ||
|
|
||
| else -> collector.add(v) | ||
|
|
@@ -227,8 +251,8 @@ internal fun fromJsonListAnyColumns( | |
| records.forEach { | ||
| startIndices.add(values.size) | ||
| when (it) { | ||
| is JsonArray<*> -> values.addAll(it.value) | ||
| null -> Unit | ||
| is JsonArray -> values.addAll(it) | ||
| is JsonNull, null -> Unit | ||
| else -> error("Expected JsonArray, got $it") | ||
| } | ||
| } | ||
|
|
@@ -242,10 +266,10 @@ internal fun fromJsonListAnyColumns( | |
| parsed.isSingleUnnamedColumn() -> { | ||
| val col = (parsed.getColumn(0) as UnnamedColumn).col | ||
| val elementType = col.type | ||
| val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList() | ||
| val columnValues = col.values.asList().splitByIndices(startIndices.asSequence()).toList() | ||
| DataColumn.createValueColumn( | ||
| name = arrayColumnName, | ||
| values = values, | ||
| values = columnValues, | ||
| type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), | ||
| ) | ||
| } | ||
|
|
@@ -263,10 +287,10 @@ internal fun fromJsonListAnyColumns( | |
| colType == AnyColType.OBJECTS && isKeyValue -> { | ||
| // collect the value types to make sure Value columns with lists and other values aren't all turned into lists | ||
| val valueTypes = mutableSetOf<KType>() | ||
| val dataFrames = records.map { | ||
| when (it) { | ||
| val dataFrames = records.map { record -> | ||
| when (record) { | ||
| is JsonObject -> { | ||
| val map = it.map.mapValues { (key, value) -> | ||
| val map = record.mapValues { (key, value) -> | ||
| val parsed = fromJsonListAnyColumns( | ||
| records = listOf(value), | ||
| keyValuePaths = keyValuePaths, | ||
|
|
@@ -288,8 +312,8 @@ internal fun fromJsonListAnyColumns( | |
| ) | ||
| } | ||
|
|
||
| null -> DataFrame.emptyOf<AnyKeyValueProperty>() | ||
| else -> error("Expected JsonObject, got $it") | ||
| is JsonNull, null -> DataFrame.emptyOf<AnyKeyValueProperty>() | ||
| else -> error("Expected JsonObject, got $record") | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -328,7 +352,7 @@ internal fun fromJsonListAnyColumns( | |
| records.forEach { | ||
| when (it) { | ||
| is JsonObject -> values.add(it[colName]) | ||
| null -> values.add(null) | ||
| is JsonNull, null -> values.add(null) | ||
| else -> error("Expected JsonObject, got $it") | ||
| } | ||
| } | ||
|
|
@@ -395,24 +419,24 @@ internal fun fromJsonListArrayAndValueColumns( | |
|
|
||
| // list element type can be JsonObject, JsonArray or primitive | ||
| // So first, we gather all properties of objects to merge including "array" and "value" if needed | ||
| // so the resulting type of a property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be | ||
| // so the resulting type of property with instances 123, ["abc"], and { "a": 1, "b": 2 } will be | ||
devcrocod marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| // { array: List<String>, value: Int?, a: Int?, b: Int? } | ||
| // and instances will look like | ||
| // { "array": [], "value": 123, "a": null, "b": null } | ||
|
|
||
| val nameGenerator = ColumnNameGenerator() | ||
| records.forEach { | ||
| when (it) { | ||
| is JsonObject -> it.entries.forEach { | ||
| records.forEach { record -> | ||
| when (record) { | ||
| is JsonObject -> record.entries.forEach { | ||
| nameGenerator.addIfAbsent(it.key) | ||
| } | ||
|
|
||
| is JsonArray<*> -> hasArray = true | ||
| null -> Unit | ||
| else -> hasPrimitive = true | ||
| is JsonArray -> hasArray = true | ||
| is JsonNull, null -> Unit | ||
| is JsonPrimitive -> hasPrimitive = true | ||
| } | ||
| } | ||
| if (records.all { it == null }) hasPrimitive = true | ||
| if (records.all { it == null || it is JsonNull }) hasPrimitive = true | ||
|
|
||
| // Add a value column to the collected names if needed | ||
| val valueColumn = if (hasPrimitive || records.isEmpty()) { | ||
|
|
@@ -433,10 +457,10 @@ internal fun fromJsonListArrayAndValueColumns( | |
| val columns: List<AnyCol> = when { | ||
| // instead of using the names, generate a single key/value frame column | ||
| isKeyValue -> { | ||
| val dataFrames = records.map { | ||
| when (it) { | ||
| val dataFrames = records.map { record -> | ||
| when (record) { | ||
| is JsonObject -> { | ||
| val map = it.map.mapValues { (key, value) -> | ||
| val map = record.mapValues { (key, value) -> | ||
| val parsed = fromJsonListArrayAndValueColumns( | ||
| records = listOf(value), | ||
| keyValuePaths = keyValuePaths, | ||
|
|
@@ -459,8 +483,8 @@ internal fun fromJsonListArrayAndValueColumns( | |
| ) | ||
| } | ||
|
|
||
| null -> DataFrame.emptyOf<AnyKeyValueProperty>() | ||
| else -> error("Expected JsonObject, got $it") | ||
| is JsonNull, null -> DataFrame.emptyOf<AnyKeyValueProperty>() | ||
| else -> error("Expected JsonObject, got $record") | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -488,10 +512,23 @@ internal fun fromJsonListArrayAndValueColumns( | |
| records.forEachIndexed { i, v -> | ||
| when (v) { | ||
| is JsonObject -> collector.add(null) | ||
| is JsonArray<*> -> collector.add(null) | ||
| "NaN" -> { | ||
| nanIndices.add(i) | ||
| collector.add(null) | ||
| is JsonArray -> collector.add(null) | ||
| is JsonPrimitive -> { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same note as before with |
||
| when { | ||
| v.content == "NaN" -> { | ||
| nanIndices.add(i) | ||
| collector.add(null) | ||
| } | ||
|
|
||
| v.isString -> collector.add(v.content) | ||
| v.booleanOrNull != null -> collector.add(v.boolean) | ||
| v.intOrNull != null -> collector.add(v.int) | ||
| v.longOrNull != null -> collector.add(v.long) | ||
| v.doubleOrNull != null -> collector.add(v.double) | ||
| v.floatOrNull != null -> collector.add(v.float) | ||
| v is JsonNull -> collector.add(null) | ||
| else -> collector.add(v) | ||
| } | ||
| } | ||
|
|
||
| else -> collector.add(v) | ||
|
|
@@ -526,7 +563,7 @@ internal fun fromJsonListArrayAndValueColumns( | |
| val startIndices = ArrayList<Int>() | ||
| records.forEach { | ||
| startIndices.add(values.size) | ||
| if (it is JsonArray<*>) values.addAll(it.value) | ||
| if (it is JsonArray) values.addAll(it.jsonArray) | ||
| } | ||
| val parsed = fromJsonListArrayAndValueColumns( | ||
| records = values, | ||
|
|
@@ -538,10 +575,11 @@ internal fun fromJsonListArrayAndValueColumns( | |
| parsed.isSingleUnnamedColumn() -> { | ||
| val col = (parsed.getColumn(0) as UnnamedColumn).col | ||
| val elementType = col.type | ||
| val values = col.values.asList().splitByIndices(startIndices.asSequence()).toList() | ||
| val columnValues = | ||
| col.values.asList().splitByIndices(startIndices.asSequence()).toList() | ||
| DataColumn.createValueColumn( | ||
| name = colName, | ||
| values = values, | ||
| values = columnValues, | ||
| type = List::class.createType(listOf(KTypeProjection.invariant(elementType))), | ||
| ) | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.