Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,15 @@ public fun DataFrame.Companion.readParquet(
vararg paths: Path,
nullability: NullabilityOptions = NullabilityOptions.Infer,
batchSize: Long = ARROW_PARQUET_DEFAULT_BATCH_SIZE,
): AnyFrame = readArrowDatasetImpl(paths.map { "file:$it" }.toTypedArray(), FileFormat.PARQUET, nullability, batchSize)
): AnyFrame =
readArrowDatasetImpl(
paths.map {
it.toUri().toString()
}.toTypedArray(),
FileFormat.PARQUET,
nullability,
batchSize,
)

/**
* Read [Parquet](https://parquet.apache.org/) data from existing [files] by using [Arrow Dataset](https://arrow.apache.org/docs/java/dataset.html)
Expand All @@ -235,7 +243,7 @@ public fun DataFrame.Companion.readParquet(
): AnyFrame =
readArrowDatasetImpl(
files.map {
"file:${it.toPath()}"
it.toURI().toString()
}.toTypedArray(),
FileFormat.PARQUET,
nullability,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -432,12 +432,12 @@ private fun resolveArrowDatasetUris(fileUris: Array<String>): Array<String> =
tempFile.deleteOnExit()
url.openStream().use { input ->
Files.copy(input, tempFile.toPath())
"file:${tempFile.toPath()}"
tempFile.toURI().toString()
}
}

!it.startsWith("file:", true) && File(it).exists() -> {
"file:$it"
File(it).toURI().toString()
}

else -> it
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,11 @@ import org.junit.Test
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.File
import java.net.URI
import java.net.URL
import java.nio.channels.Channels
import java.nio.file.FileSystems
import java.sql.DriverManager
import java.util.Locale
import kotlin.io.path.toPath
import kotlin.reflect.typeOf

internal class ArrowKtTest {
Expand Down Expand Up @@ -658,9 +657,11 @@ internal class ArrowKtTest {

@Test
fun testReadParquetPath() {
val resourceLocation = testResource("test.arrow.parquet").path
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
val resourceUrl = testResource("test.arrow.parquet")
val resourcePath = resourceUrl.toURI().toPath()

val dataFrame = DataFrame.readParquet(resourcePath)

dataFrame.rowsCount() shouldBe 300
assertEstimations(
exampleFrame = dataFrame,
Expand All @@ -672,9 +673,11 @@ internal class ArrowKtTest {

@Test
fun testReadParquetFile() {
val resourceLocation = testResource("test.arrow.parquet").path
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
val resourceUrl = testResource("test.arrow.parquet")
val resourcePath = resourceUrl.toURI().toPath()

val dataFrame = DataFrame.readParquet(resourcePath.toFile())

dataFrame.rowsCount() shouldBe 300
assertEstimations(
exampleFrame = dataFrame,
Expand All @@ -686,9 +689,11 @@ internal class ArrowKtTest {

@Test
fun testReadParquetStringPath() {
val resourceLocation = testResource("test.arrow.parquet").path
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
val resourceUrl = testResource("test.arrow.parquet")
val resourcePath = resourceUrl.toURI().toPath()

val dataFrame = DataFrame.readParquet("$resourcePath")

dataFrame.rowsCount() shouldBe 300
assertEstimations(
exampleFrame = dataFrame,
Expand All @@ -700,10 +705,12 @@ internal class ArrowKtTest {

@Test
fun testReadParquetUrl() {
val resourceLocation = testResource("test.arrow.parquet").path
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
val fileUrl = URI.create("file:$resourcePath").toURL()
val resourceUrl = testResource("test.arrow.parquet")
val resourcePath = resourceUrl.toURI().toPath()
val fileUrl = resourcePath.toUri().toURL()
Copy link

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method toUri() is called on resourcePath but should be toUri() with capital 'U' to match the standard Kotlin Path extension. It should be toUri() not toUri() - please verify this is the correct method name for the Path type.

Copilot uses AI. Check for mistakes.

val dataFrame = DataFrame.readParquet(fileUrl)

dataFrame.rowsCount() shouldBe 300
assertEstimations(
exampleFrame = dataFrame,
Expand All @@ -715,9 +722,11 @@ internal class ArrowKtTest {

@Test
fun testReadMultipleParquetFiles() {
val resourceLocation = testResource("test.arrow.parquet").path
val resourcePath = FileSystems.getDefault().getPath(resourceLocation)
val resourceUrl = testResource("test.arrow.parquet")
val resourcePath = resourceUrl.toURI().toPath()

val dataFrame = DataFrame.readParquet(resourcePath, resourcePath, resourcePath)

dataFrame.rowsCount() shouldBe 900
}
}