diff --git a/dataframe-jdbc/api/dataframe-jdbc.api b/dataframe-jdbc/api/dataframe-jdbc.api index 552c069316..bbda40153d 100644 --- a/dataframe-jdbc/api/dataframe-jdbc.api +++ b/dataframe-jdbc/api/dataframe-jdbc.api @@ -1,13 +1,15 @@ public final class org/jetbrains/kotlinx/dataframe/io/DbConnectionConfig { - public fun (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V - public synthetic fun (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ILkotlin/jvm/internal/DefaultConstructorMarker;)V + public fun (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)V + public synthetic fun (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZILkotlin/jvm/internal/DefaultConstructorMarker;)V public final fun component1 ()Ljava/lang/String; public final fun component2 ()Ljava/lang/String; public final fun component3 ()Ljava/lang/String; - public final fun copy (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig; - public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig; + public final fun component4 ()Z + public final fun copy (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Z)Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig; + public static synthetic fun copy$default (Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;ZILjava/lang/Object;)Lorg/jetbrains/kotlinx/dataframe/io/DbConnectionConfig; public fun equals (Ljava/lang/Object;)Z public final fun getPassword ()Ljava/lang/String; + public final fun getReadOnly ()Z public final fun getUrl ()Ljava/lang/String; public final fun getUser ()Ljava/lang/String; public fun hashCode ()I diff --git a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt index 90dfef9f5e..e4d6d8edf3 100644 --- a/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt +++ b/dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt @@ -22,6 +22,7 @@ import java.sql.Ref import java.sql.ResultSet import java.sql.ResultSetMetaData import java.sql.RowId +import java.sql.SQLException import java.sql.SQLXML import java.sql.Time import java.sql.Timestamp @@ -104,13 +105,105 @@ public data class TableColumnMetadata( public data class TableMetadata(val name: String, val schemaName: String?, val catalogue: String?) /** - * Represents the configuration for a database connection. + * Represents the configuration for an internally managed JDBC database connection. * - * @property [url] the URL of the database. Keep it in the following form jdbc:subprotocol:subnam - * @property [user] the username used for authentication (optional, default is empty string). - * @property [password] the password used for authentication (optional, default is empty string). + * This class defines connection parameters used by the library to create a `Connection` + * when the user does not provide one explicitly. It is designed for safe, read-only access by default. + * + * @property url The JDBC URL of the database, e.g., `"jdbc:postgresql://localhost:5432/mydb"`. + * Must follow the standard format: `jdbc:subprotocol:subname`. + * + * @property user The username used for authentication. + * Optional, default is an empty string. + * + * @property password The password used for authentication. + * Optional, default is an empty string. + * + * @property readOnly If `true` (default), the library will create the connection in read-only mode. + * This enables the following behavior: + * - `Connection.setReadOnly(true)` + * - `Connection.setAutoCommit(false)` + * - automatic `rollback()` at the end of execution + * + * If `false`, the connection will be created with JDBC defaults (usually read-write), + * but the library will still reject any queries that appear to modify data + * (e.g. contain `INSERT`, `UPDATE`, `DELETE`, etc.). + * + * Note: Connections created using this configuration are managed entirely by the library. + * Users do not have access to the underlying `Connection` instance and cannot commit or close it manually. + * + * ### Examples: + * + * ```kotlin + * // Safe read-only connection (default) + * val config = DbConnectionConfig("jdbc:sqlite::memory:") + * val df = DataFrame.readSqlQuery(config, "SELECT * FROM books") + * + * // Use default JDBC connection settings (still protected against mutations) + * val config = DbConnectionConfig( + * url = "jdbc:sqlite::memory:", + * readOnly = false + * ) + * ``` */ -public data class DbConnectionConfig(val url: String, val user: String = "", val password: String = "") +public data class DbConnectionConfig( + val url: String, + val user: String = "", + val password: String = "", + val readOnly: Boolean = true, +) + +/** + * Executes the given block with a managed JDBC connection created from [DbConnectionConfig]. + * + * If [DbConnectionConfig.readOnly] is `true` (default), the connection will be: + * - explicitly marked as read-only + * - used with auto-commit disabled + * - rolled back after execution to prevent unintended modifications + * + * This utility guarantees proper closing of the connection and safe rollback in read-only mode. + * It should be used when the user does not manually manage JDBC connections. + * + * @param [dbConfig] The configuration used to create the connection. + * @param [dbType] Optional database type (not used here but can be passed through for logging or future extensions). + * @param [block] A lambda with receiver that runs with an open and managed [Connection]. + * @return The result of the [block] execution. + */ +internal inline fun withReadOnlyConnection( + dbConfig: DbConnectionConfig, + dbType: DbType? = null, + block: (Connection) -> T, +): T { + val connection = DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password) + + val originalAutoCommit = connection.autoCommit + val originalReadOnly = connection.isReadOnly + + return connection.use { conn -> + try { + if (dbConfig.readOnly) { + conn.autoCommit = false + conn.isReadOnly = true + } + + block(conn) + } finally { + if (dbConfig.readOnly) { + try { + conn.rollback() + } catch (e: SQLException) { + logger.warn(e) { + "Failed to rollback read-only transaction (url=${dbConfig.url})" + } + } + } + + // Restore original settings (relevant in pooled environments) + conn.autoCommit = originalAutoCommit + conn.isReadOnly = originalReadOnly + } + } +} /** * Reads data from an SQL table and converts it into a DataFrame. @@ -124,6 +217,15 @@ public data class DbConnectionConfig(val url: String, val user: String = "", val * @param [strictValidation] if `true`, the method validates that the provided table name is in a valid format. * Default is `true` for strict validation. * @return the DataFrame containing the data from the SQL table. + * + * ### Default Behavior: + * If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be: + * - explicitly set as read-only via `Connection.setReadOnly(true)` + * - used with `autoCommit = false` + * - automatically rolled back after reading, ensuring no changes to the database + * + * Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries + * and only permits safe `SELECT` operations internally. */ public fun DataFrame.Companion.readSqlTable( dbConfig: DbConnectionConfig, @@ -132,11 +234,10 @@ public fun DataFrame.Companion.readSqlTable( inferNullability: Boolean = true, dbType: DbType? = null, strictValidation: Boolean = true, -): AnyFrame { - DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> - return readSqlTable(connection, tableName, limit, inferNullability, dbType, strictValidation) +): AnyFrame = + withReadOnlyConnection(dbConfig, dbType) { conn -> + readSqlTable(conn, tableName, limit, inferNullability, dbType, strictValidation) } -} /** * Reads data from an SQL table and converts it into a DataFrame. @@ -203,6 +304,15 @@ public fun DataFrame.Companion.readSqlTable( * @param [strictValidation] if `true`, the method validates that the provided query is in a valid format. * Default is `true` for strict validation. * @return the DataFrame containing the result of the SQL query. + * + * ### Default Behavior: + * If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be: + * - explicitly set as read-only via `Connection.setReadOnly(true)` + * - used with `autoCommit = false` + * - automatically rolled back after reading, ensuring no changes to the database + * + * Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries + * and only permits safe `SELECT` operations internally. */ public fun DataFrame.Companion.readSqlQuery( @@ -212,11 +322,10 @@ public fun DataFrame.Companion.readSqlQuery( inferNullability: Boolean = true, dbType: DbType? = null, strictValidation: Boolean = true, -): AnyFrame { - DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> - return readSqlQuery(connection, sqlQuery, limit, inferNullability, dbType, strictValidation) +): AnyFrame = + withReadOnlyConnection(dbConfig, dbType) { conn -> + readSqlQuery(conn, sqlQuery, limit, inferNullability, dbType, strictValidation) } -} /** * Converts the result of an SQL query to the DataFrame. @@ -281,6 +390,15 @@ public fun DataFrame.Companion.readSqlQuery( * @param [strictValidation] if `true`, the method validates that the provided query or table name is in a valid format. * Default is `true` for strict validation. * @return the DataFrame containing the result of the SQL query. + * + * ### Default Behavior: + * If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be: + * - explicitly set as read-only via `Connection.setReadOnly(true)` + * - used with `autoCommit = false` + * - automatically rolled back after reading, ensuring no changes to the database + * + * Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries + * and only permits safe `SELECT` operations internally. */ public fun DbConnectionConfig.readDataFrame( sqlQueryOrTableName: String, @@ -638,6 +756,15 @@ public fun ResultSet.readDataFrame( * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [dbConfig]. * @return a map of [String] to [AnyFrame] objects representing the non-system tables from the database. + * + * ### Default Behavior: + * If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be: + * - explicitly set as read-only via `Connection.setReadOnly(true)` + * - used with `autoCommit = false` + * - automatically rolled back after reading, ensuring no changes to the database + * + * Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries + * and only permits safe `SELECT` operations internally. */ public fun DataFrame.Companion.readAllSqlTables( dbConfig: DbConnectionConfig, @@ -645,11 +772,10 @@ public fun DataFrame.Companion.readAllSqlTables( limit: Int = DEFAULT_LIMIT, inferNullability: Boolean = true, dbType: DbType? = null, -): Map { - DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> - return readAllSqlTables(connection, catalogue, limit, inferNullability, dbType) +): Map = + withReadOnlyConnection(dbConfig, dbType) { connection -> + readAllSqlTables(connection, catalogue, limit, inferNullability, dbType) } -} /** * Reads all non-system tables from a database and returns them @@ -712,16 +838,24 @@ public fun DataFrame.Companion.readAllSqlTables( * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [dbConfig]. * @return the [DataFrameSchema] object representing the schema of the SQL table + * + * ### Default Behavior: + * If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be: + * - explicitly set as read-only via `Connection.setReadOnly(true)` + * - used with `autoCommit = false` + * - automatically rolled back after reading, ensuring no changes to the database + * + * Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries + * and only permits safe `SELECT` operations internally. */ public fun DataFrame.Companion.getSchemaForSqlTable( dbConfig: DbConnectionConfig, tableName: String, dbType: DbType? = null, -): DataFrameSchema { - DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> - return getSchemaForSqlTable(connection, tableName, dbType) +): DataFrameSchema = + withReadOnlyConnection(dbConfig, dbType) { connection -> + getSchemaForSqlTable(connection, tableName, dbType) } -} /** * Retrieves the schema for an SQL table using the provided database connection. @@ -760,16 +894,24 @@ public fun DataFrame.Companion.getSchemaForSqlTable( * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [dbConfig]. * @return the schema of the SQL query as a [DataFrameSchema] object. + * + * ### Default Behavior: + * If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be: + * - explicitly set as read-only via `Connection.setReadOnly(true)` + * - used with `autoCommit = false` + * - automatically rolled back after reading, ensuring no changes to the database + * + * Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries + * and only permits safe `SELECT` operations internally. */ public fun DataFrame.Companion.getSchemaForSqlQuery( dbConfig: DbConnectionConfig, sqlQuery: String, dbType: DbType? = null, -): DataFrameSchema { - DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> - return getSchemaForSqlQuery(connection, sqlQuery, dbType) +): DataFrameSchema = + withReadOnlyConnection(dbConfig, dbType) { connection -> + getSchemaForSqlQuery(connection, sqlQuery, dbType) } -} /** * Retrieves the schema of an SQL query result using the provided database connection. @@ -804,6 +946,15 @@ public fun DataFrame.Companion.getSchemaForSqlQuery( * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [DbConnectionConfig]. * @return the schema of the SQL query as a [DataFrameSchema] object. + * + * ### Default Behavior: + * If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be: + * - explicitly set as read-only via `Connection.setReadOnly(true)` + * - used with `autoCommit = false` + * - automatically rolled back after reading, ensuring no changes to the database + * + * Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries + * and only permits safe `SELECT` operations internally. */ public fun DbConnectionConfig.getDataFrameSchema( sqlQueryOrTableName: String, @@ -869,15 +1020,23 @@ public fun ResultSet.getDataFrameSchema(dbType: DbType): DataFrameSchema = DataF * @param [dbType] the type of database, could be a custom object, provided by user, optional, default is `null`, * in that case the [dbType] will be recognized from the [dbConfig]. * @return a map of [String, DataFrameSchema] objects representing the table name and its schema for each non-system table. + * + * ### Default Behavior: + * If [DbConnectionConfig.readOnly] is `true` (which is the default), the connection will be: + * - explicitly set as read-only via `Connection.setReadOnly(true)` + * - used with `autoCommit = false` + * - automatically rolled back after reading, ensuring no changes to the database + * + * Even if [DbConnectionConfig.readOnly] is set to `false`, the library still prevents data-modifying queries + * and only permits safe `SELECT` operations internally. */ public fun DataFrame.Companion.getSchemaForAllSqlTables( dbConfig: DbConnectionConfig, dbType: DbType? = null, -): Map { - DriverManager.getConnection(dbConfig.url, dbConfig.user, dbConfig.password).use { connection -> - return getSchemaForAllSqlTables(connection, dbType) +): Map = + withReadOnlyConnection(dbConfig, dbType) { connection -> + getSchemaForAllSqlTables(connection, dbType) } -} /** * Retrieves the schemas of all non-system tables in the database using the provided database connection. diff --git a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt index 0a990730d3..eb075ac473 100644 --- a/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt +++ b/dataframe-jdbc/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/h2/h2Test.kt @@ -25,6 +25,7 @@ import org.jetbrains.kotlinx.dataframe.io.readDataFrame import org.jetbrains.kotlinx.dataframe.io.readResultSet import org.jetbrains.kotlinx.dataframe.io.readSqlQuery import org.jetbrains.kotlinx.dataframe.io.readSqlTable +import org.jetbrains.kotlinx.dataframe.io.withReadOnlyConnection import org.junit.AfterClass import org.junit.BeforeClass import org.junit.Test @@ -1169,4 +1170,19 @@ class JdbcTest { saleDataSchema1.columns.size shouldBe 3 saleDataSchema1.columns["amount"]!!.type shouldBe typeOf() } + + @Test + fun `withReadOnlyConnection sets readOnly and rolls back after execution`() { + val config = DbConnectionConfig("jdbc:h2:mem:test;DB_CLOSE_DELAY=-1", readOnly = true) + + var wasExecuted = false + val result = withReadOnlyConnection(config) { conn -> + wasExecuted = true + conn.autoCommit shouldBe false + 42 + } + + wasExecuted shouldBe true + result shouldBe 42 + } }