Skip to content

Commit 19b71ff

Browse files
committed
Refactor column metadata retrieval to use database-specific handling and enhance JDBC ResultSet processing
1 parent ef50adf commit 19b71ff

File tree

3 files changed

+102
-69
lines changed

3 files changed

+102
-69
lines changed

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/db/DbType.kt

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import java.sql.NClob
1515
import java.sql.PreparedStatement
1616
import java.sql.Ref
1717
import java.sql.ResultSet
18+
import java.sql.ResultSetMetaData
1819
import java.sql.RowId
1920
import java.sql.SQLXML
2021
import java.sql.Time
@@ -397,4 +398,100 @@ public abstract class DbType(public val dbTypeInJdbcUrl: String) {
397398
val kType = createArrayTypeIfNeeded(kClass, tableColumnMetadata.isNullable)
398399
return kType
399400
}
401+
402+
/**
403+
* Retrieves column metadata from a JDBC ResultSet.
404+
*
405+
* By default, this method reads column metadata from [ResultSetMetaData],
406+
* which is fast and supported by most JDBC drivers.
407+
* If the driver does not provide sufficient information (e.g., `isNullable` unknown),
408+
* it falls back to using [DatabaseMetaData.getColumns] for affected columns.
409+
*
410+
* Override this method in subclasses to provide database-specific behavior
411+
* (for example, to disable fallback for databases like Teradata or Oracle
412+
* where `DatabaseMetaData.getColumns` is known to be slow).
413+
*
414+
* @param resultSet The [ResultSet] containing query results.
415+
* @return A mutable list of [TableColumnMetadata] objects.
416+
*/
417+
public open fun getTableColumnsMetadata(resultSet: ResultSet): MutableList<TableColumnMetadata> {
418+
val rsMetaData = resultSet.metaData
419+
val connection = resultSet.statement.connection
420+
val dbMetaData = connection.metaData
421+
val catalog = connection.catalog.takeUnless { it.isNullOrBlank() }
422+
val schema = connection.schema.takeUnless { it.isNullOrBlank() }
423+
424+
val columnCount = rsMetaData.columnCount
425+
val columns = mutableListOf<TableColumnMetadata>()
426+
val nameCounter = mutableMapOf<String, Int>()
427+
428+
for (index in 1..columnCount) {
429+
val columnName = rsMetaData.getColumnName(index)
430+
val tableName = rsMetaData.getTableName(index)
431+
432+
// Try to detect nullability from ResultSetMetaData
433+
val isNullable = try {
434+
when (rsMetaData.isNullable(index)) {
435+
ResultSetMetaData.columnNoNulls -> false
436+
437+
ResultSetMetaData.columnNullable -> true
438+
439+
ResultSetMetaData.columnNullableUnknown -> {
440+
// Unknown nullability: assume it nullable, may trigger fallback
441+
true
442+
}
443+
444+
else -> true
445+
}
446+
} catch (_: Exception) {
447+
// Some drivers may throw for unsupported features
448+
// In that case, fallback to DatabaseMetaData
449+
val cols = dbMetaData.getColumns(catalog, schema, tableName, columnName)
450+
if (cols.next()) cols.getString("IS_NULLABLE") == "YES" else true
451+
}
452+
453+
val columnType = rsMetaData.getColumnTypeName(index)
454+
val jdbcType = rsMetaData.getColumnType(index)
455+
val displaySize = rsMetaData.getColumnDisplaySize(index)
456+
val javaClassName = rsMetaData.getColumnClassName(index)
457+
458+
val uniqueName = manageColumnNameDuplication(nameCounter, columnName)
459+
460+
columns += TableColumnMetadata(
461+
uniqueName,
462+
columnType,
463+
jdbcType,
464+
displaySize,
465+
javaClassName,
466+
isNullable,
467+
)
468+
}
469+
470+
return columns
471+
}
472+
473+
/**
474+
* Manages the duplication of column names by appending a unique identifier to the original name if necessary.
475+
*
476+
* @param columnNameCounter a mutable map that keeps track of the count for each column name.
477+
* @param originalName the original name of the column to be managed.
478+
* @return the modified column name that is free from duplication.
479+
*/
480+
internal fun manageColumnNameDuplication(columnNameCounter: MutableMap<String, Int>, originalName: String): String {
481+
var name = originalName
482+
val count = columnNameCounter[originalName]
483+
484+
if (count != null) {
485+
var incrementedCount = count + 1
486+
while (columnNameCounter.containsKey("${originalName}_$incrementedCount")) {
487+
incrementedCount++
488+
}
489+
columnNameCounter[originalName] = incrementedCount
490+
name = "${originalName}_$incrementedCount"
491+
} else {
492+
columnNameCounter[originalName] = 0
493+
}
494+
495+
return name
496+
}
400497
}

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readDataFrameSchema.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ public fun Connection.readDataFrameSchema(sqlQueryOrTableName: String, dbType: D
321321
* @return the schema of the [ResultSet] as a [DataFrameSchema] object.
322322
*/
323323
public fun DataFrameSchema.Companion.readResultSet(resultSet: ResultSet, dbType: DbType): DataFrameSchema {
324-
val tableColumns = getTableColumnsMetadata(resultSet)
324+
val tableColumns = getTableColumnsMetadata(resultSet, dbType)
325325
return buildSchemaByTableColumns(tableColumns, dbType)
326326
}
327327

dataframe-jdbc/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/readJdbc.kt

Lines changed: 4 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import java.sql.DatabaseMetaData
1212
import java.sql.DriverManager
1313
import java.sql.PreparedStatement
1414
import java.sql.ResultSet
15-
import java.sql.ResultSetMetaData
1615
import javax.sql.DataSource
1716
import kotlin.reflect.KType
1817

@@ -181,7 +180,7 @@ private fun executeQueryAndBuildDataFrame(
181180
configureStatement(statement)
182181
logger.debug { "Executing query: $sqlQuery" }
183182
statement.executeQuery().use { rs ->
184-
val tableColumns = getTableColumnsMetadata(rs)
183+
val tableColumns = getTableColumnsMetadata(rs, determinedDbType)
185184
fetchAndConvertDataFromResultSet(tableColumns, rs, determinedDbType, limit, inferNullability)
186185
}
187186
}
@@ -562,7 +561,7 @@ public fun DataFrame.Companion.readResultSet(
562561
inferNullability: Boolean = true,
563562
): AnyFrame {
564563
validateLimit(limit)
565-
val tableColumns = getTableColumnsMetadata(resultSet)
564+
val tableColumns = getTableColumnsMetadata(resultSet, dbType)
566565
return fetchAndConvertDataFromResultSet(tableColumns, resultSet, dbType, limit, inferNullability)
567566
}
568567

@@ -852,71 +851,8 @@ private fun readTableAsDataFrame(
852851
return dataFrame
853852
}
854853

855-
/**
856-
* Retrieves the metadata of the columns in the result set.
857-
*
858-
* @param rs the result set
859-
* @return a mutable list of [TableColumnMetadata] objects,
860-
* where each TableColumnMetadata object contains information such as the column type,
861-
* JDBC type, size, and name.
862-
*/
863-
internal fun getTableColumnsMetadata(rs: ResultSet): MutableList<TableColumnMetadata> {
864-
val metaData: ResultSetMetaData = rs.metaData
865-
val numberOfColumns: Int = metaData.columnCount
866-
val tableColumns = mutableListOf<TableColumnMetadata>()
867-
val columnNameCounter = mutableMapOf<String, Int>()
868-
val databaseMetaData: DatabaseMetaData = rs.statement.connection.metaData
869-
val catalog: String? = rs.statement.connection.catalog.takeUnless { it.isNullOrBlank() }
870-
val schema: String? = rs.statement.connection.schema.takeUnless { it.isNullOrBlank() }
871-
872-
for (i in 1 until numberOfColumns + 1) {
873-
val tableName = metaData.getTableName(i)
874-
val columnName = metaData.getColumnName(i)
875-
876-
// this algorithm works correctly only for SQL Table and ResultSet opened on one SQL table
877-
val columnResultSet: ResultSet =
878-
databaseMetaData.getColumns(catalog, schema, tableName, columnName)
879-
val isNullable = if (columnResultSet.next()) {
880-
columnResultSet.getString("IS_NULLABLE") == "YES"
881-
} else {
882-
true // we assume that it's nullable by default
883-
}
884-
885-
val name = manageColumnNameDuplication(columnNameCounter, columnName)
886-
val size = metaData.getColumnDisplaySize(i)
887-
val type = metaData.getColumnTypeName(i)
888-
val jdbcType = metaData.getColumnType(i)
889-
val javaClassName = metaData.getColumnClassName(i)
890-
891-
tableColumns += TableColumnMetadata(name, type, jdbcType, size, javaClassName, isNullable)
892-
}
893-
return tableColumns
894-
}
895-
896-
/**
897-
* Manages the duplication of column names by appending a unique identifier to the original name if necessary.
898-
*
899-
* @param columnNameCounter a mutable map that keeps track of the count for each column name.
900-
* @param originalName the original name of the column to be managed.
901-
* @return the modified column name that is free from duplication.
902-
*/
903-
internal fun manageColumnNameDuplication(columnNameCounter: MutableMap<String, Int>, originalName: String): String {
904-
var name = originalName
905-
val count = columnNameCounter[originalName]
906-
907-
if (count != null) {
908-
var incrementedCount = count + 1
909-
while (columnNameCounter.containsKey("${originalName}_$incrementedCount")) {
910-
incrementedCount++
911-
}
912-
columnNameCounter[originalName] = incrementedCount
913-
name = "${originalName}_$incrementedCount"
914-
} else {
915-
columnNameCounter[originalName] = 0
916-
}
917-
918-
return name
919-
}
854+
internal fun getTableColumnsMetadata(resultSet: ResultSet, dbType: DbType): MutableList<TableColumnMetadata> =
855+
dbType.getTableColumnsMetadata(resultSet)
920856

921857
/**
922858
* Fetches and converts data from a ResultSet into a mutable map.

0 commit comments

Comments
 (0)