Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ public class CSV(private val delimiter: Char = ',') : SupportedDataFrameFormat {
}

public enum class CSVType(public val format: CSVFormat) {
DEFAULT(CSVFormat.DEFAULT.withAllowMissingColumnNames().withIgnoreSurroundingSpaces()),
TDF(CSVFormat.TDF.withAllowMissingColumnNames())
DEFAULT(CSVFormat.DEFAULT.builder().setAllowMissingColumnNames(true).setIgnoreSurroundingSpaces(true).build()),
TDF(CSVFormat.TDF.builder().setAllowMissingColumnNames(true).build())
}

private val defaultCharset = Charsets.UTF_8
Expand All @@ -73,11 +73,15 @@ internal fun isCompressed(url: URL) = isCompressed(url.path)
@Interpretable("ReadDelimStr")
public fun DataFrame.Companion.readDelimStr(
text: String,
delimiter: Char = ',',
colTypes: Map<String, ColType> = mapOf(),
skipLines: Int = 0,
readLines: Int? = null,
): DataFrame<*> =
StringReader(text).use { readDelim(it, CSVType.DEFAULT.format.withHeader(), colTypes, skipLines, readLines) }
StringReader(text).use {
val format = CSVType.DEFAULT.format.builder().setHeader().setDelimiter(delimiter).build()
readDelim(it, format, colTypes, skipLines, readLines)
}

public fun DataFrame.Companion.read(
fileOrUrl: String,
Expand Down Expand Up @@ -212,7 +216,7 @@ public fun asURL(fileOrUrl: String): URL = (
).toURL()

private fun getFormat(type: CSVType, delimiter: Char, header: List<String>, duplicate: Boolean): CSVFormat =
type.format.withDelimiter(delimiter).withHeader(*header.toTypedArray()).withAllowDuplicateHeaderNames(duplicate)
type.format.builder().setDelimiter(delimiter).setHeader(*header.toTypedArray()).setAllowMissingColumnNames(duplicate).build()

public fun DataFrame.Companion.readDelim(
inStream: InputStream,
Expand Down Expand Up @@ -268,7 +272,7 @@ public fun ColType.toType(): KClass<out Any> = when (this) {

public fun DataFrame.Companion.readDelim(
reader: Reader,
format: CSVFormat = CSVFormat.DEFAULT.withHeader(),
format: CSVFormat = CSVFormat.DEFAULT.builder().setHeader().build(),
colTypes: Map<String, ColType> = mapOf(),
skipLines: Int = 0,
readLines: Int? = null,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ class CsvTests {
)
df.writeCSV(
"src/test/resources/without_header.csv",
CSVFormat.DEFAULT.withSkipHeaderRecord(),
CSVFormat.DEFAULT.builder().setSkipHeaderRecord(true).build(),
)
val producedFile = File("src/test/resources/without_header.csv")
producedFile.exists() shouldBe true
Expand All @@ -258,6 +258,16 @@ class CsvTests {
df shouldBe DataFrame.readCSV("../data/jetbrains repositories.csv")
}

@Test
fun `readDelimStr delimiter`() {
val tsv = """
a b c
1 2 3
""".trimIndent()
val df = DataFrame.readDelimStr(tsv, '\t')
df shouldBe dataFrameOf("a", "b", "c")(1, 2, 3)
}

companion object {
private val simpleCsv = testCsv("testCSV")
private val csvWithFrenchLocale = testCsv("testCSVwithFrenchLocale")
Expand Down
17 changes: 11 additions & 6 deletions docs/StardustDocs/topics/write.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ df.writeCSV(file)
<!---FUN writeCsvStr-->

```kotlin
val csvStr = df.toCsv(CSVFormat.DEFAULT.withDelimiter(';').withRecordSeparator(System.lineSeparator()))
val format = CSVFormat.DEFAULT.builder().setDelimiter(';').setRecordSeparator(System.lineSeparator()).build()
val csvStr = df.toCsv(format)
```

<!---END-->
Expand Down Expand Up @@ -104,8 +105,10 @@ val wb = WorkbookFactory.create(true)

// Create different sheets from different data frames in the workbook
val allPersonsSheet = df.writeExcel(wb, sheetName = "allPersons")
val happyPersonsSheet = df.filter { person -> person.isHappy }.remove("isHappy").writeExcel(wb, sheetName = "happyPersons")
val unhappyPersonsSheet = df.filter { person -> !person.isHappy }.remove("isHappy").writeExcel(wb, sheetName = "unhappyPersons")
val happyPersonsSheet =
df.filter { person -> person.isHappy }.remove("isHappy").writeExcel(wb, sheetName = "happyPersons")
val unhappyPersonsSheet =
df.filter { person -> !person.isHappy }.remove("isHappy").writeExcel(wb, sheetName = "unhappyPersons")

// Do anything you want by POI
listOf(happyPersonsSheet, unhappyPersonsSheet).forEach { setStyles(it) }
Expand All @@ -125,9 +128,11 @@ Add new sheets without using Apache POI directly by using a parameter to keep us
// Create a new Excel workbook with a single sheet called "allPersons", replacing the file if it already exists -> Current sheets: allPersons
df.writeExcel(file, sheetName = "allPersons")
// Add a new sheet to the previous file without replacing it, by setting keepFile = true -> Current sheets: allPersons, happyPersons
df.filter { person -> person.isHappy }.remove("isHappy").writeExcel(file, sheetName = "happyPersons", keepFile = true)
df.filter { person -> person.isHappy }.remove("isHappy")
.writeExcel(file, sheetName = "happyPersons", keepFile = true)
// Add a new sheet to the previous file without replacing it, by setting keepFile = true -> Current sheets: allPersons, happyPersons, unhappyPersons
df.filter { person -> !person.isHappy }.remove("isHappy").writeExcel(file, sheetName = "unhappyPersons", keepFile = true)
df.filter { person -> !person.isHappy }.remove("isHappy")
.writeExcel(file, sheetName = "unhappyPersons", keepFile = true)
```

<!---END-->
Expand Down Expand Up @@ -203,7 +208,7 @@ df.arrowWriter(
// Specify mismatch subscriber
mismatchSubscriber = writeMismatchMessage,

).use { writer: ArrowWriter ->
).use { writer: ArrowWriter ->

// Save to any format and sink, like in the previous example
writer.writeArrowFeather(file)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,10 @@ private fun resolveFile(resolutionPath: String?, path: String): File? {

internal class ReadDelimStr : AbstractInterpreter<PluginDataFrameSchema>() {
val Arguments.text: String by arg()
val Arguments.delimiter: Char by arg(defaultValue = Present(','))

override fun Arguments.interpret(): PluginDataFrameSchema {
return DataFrame.readDelimStr(text).schema().toPluginDataFrameSchema()
return DataFrame.readDelimStr(text, delimiter).schema().toPluginDataFrameSchema()
}
}

Expand Down
2 changes: 1 addition & 1 deletion plugins/kotlin-dataframe/testData/box/diff.fir.ir.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/diff.kt
FUN name:box visibility:public modality:FINAL <> () returnType:kotlin.String
BLOCK_BODY
VAR name:df type:org.jetbrains.kotlinx.dataframe.DataFrame<*> [val]
CALL 'public final fun readDelimStr (text: kotlin.String, colTypes: kotlin.collections.Map<kotlin.String, org.jetbrains.kotlinx.dataframe.io.ColType>, skipLines: kotlin.Int, readLines: kotlin.Int?): org.jetbrains.kotlinx.dataframe.DataFrame<*> declared in org.jetbrains.kotlinx.dataframe.io' type=org.jetbrains.kotlinx.dataframe.DataFrame<*> origin=null
CALL 'public final fun readDelimStr (text: kotlin.String, delimiter: kotlin.Char, colTypes: kotlin.collections.Map<kotlin.String, org.jetbrains.kotlinx.dataframe.io.ColType>, skipLines: kotlin.Int, readLines: kotlin.Int?): org.jetbrains.kotlinx.dataframe.DataFrame<*> declared in org.jetbrains.kotlinx.dataframe.io' type=org.jetbrains.kotlinx.dataframe.DataFrame<*> origin=null
$receiver: GET_OBJECT 'CLASS OBJECT name:Companion modality:FINAL visibility:public [companion] superTypes:[kotlin.Any]' type=org.jetbrains.kotlinx.dataframe.DataFrame.Companion
text: CALL 'public final fun trimIndent (): kotlin.String declared in kotlin.text' type=kotlin.String origin=null
$receiver: CONST String type=kotlin.String value="\n char,level,race,charclass,zone,guild,timestamp\n 59425,1,Orc,Rogue,Orgrimmar,165,01/01/08 00:02:04\n 65494,9,Orc,Hunter,Durotar,-1,01/01/08 00:02:04\n "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ FILE fqName:org.jetbrains.kotlinx.dataframe fileName:/flexibleReturnType.kt
FUN name:box visibility:public modality:FINAL <> () returnType:kotlin.String
BLOCK_BODY
VAR name:df type:org.jetbrains.kotlinx.dataframe.DataFrame<*> [val]
CALL 'public final fun readDelimStr (text: kotlin.String, colTypes: kotlin.collections.Map<kotlin.String, org.jetbrains.kotlinx.dataframe.io.ColType>, skipLines: kotlin.Int, readLines: kotlin.Int?): org.jetbrains.kotlinx.dataframe.DataFrame<*> declared in org.jetbrains.kotlinx.dataframe.io' type=org.jetbrains.kotlinx.dataframe.DataFrame<*> origin=null
CALL 'public final fun readDelimStr (text: kotlin.String, delimiter: kotlin.Char, colTypes: kotlin.collections.Map<kotlin.String, org.jetbrains.kotlinx.dataframe.io.ColType>, skipLines: kotlin.Int, readLines: kotlin.Int?): org.jetbrains.kotlinx.dataframe.DataFrame<*> declared in org.jetbrains.kotlinx.dataframe.io' type=org.jetbrains.kotlinx.dataframe.DataFrame<*> origin=null
$receiver: GET_OBJECT 'CLASS OBJECT name:Companion modality:FINAL visibility:public [companion] superTypes:[kotlin.Any]' type=org.jetbrains.kotlinx.dataframe.DataFrame.Companion
text: CALL 'public final fun trimIndent (): kotlin.String declared in kotlin.text' type=kotlin.String origin=null
$receiver: CONST String type=kotlin.String value="\n char,level,race,charclass,zone,guild,timestamp\n 59425,1,Orc,Rogue,Orgrimmar,165,01/01/08 00:02:04\n 65494,9,Orc,Hunter,Durotar,-1,01/01/08 00:02:04\n "
Expand Down
Loading