From 1a25b587fc334baf43932e6948041ad1c5941345 Mon Sep 17 00:00:00 2001 From: catsona Date: Thu, 25 Sep 2025 16:29:28 +0300 Subject: [PATCH 1/7] feat(connectors): BI-5971 Upgrade YDB SDK dependencies --- lib/dl_connector_ydb/pyproject.toml | 1 + lib/dl_type_transformer/dl_type_transformer/type_transformer.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/lib/dl_connector_ydb/pyproject.toml b/lib/dl_connector_ydb/pyproject.toml index 2aee44c9ef..475d110b32 100644 --- a/lib/dl_connector_ydb/pyproject.toml +++ b/lib/dl_connector_ydb/pyproject.toml @@ -41,6 +41,7 @@ dl-core-testing = {path = "../dl_core_testing"} dl-formula-testing = {path = "../dl_formula_testing"} dl-testing = {path = "../dl_testing"} frozendict = "*" +dl-sqlalchemy-ydb = {path = "../../lib/dl_sqlalchemy_ydb"} pytest = "*" requests = "*" diff --git a/lib/dl_type_transformer/dl_type_transformer/type_transformer.py b/lib/dl_type_transformer/dl_type_transformer/type_transformer.py index c23118f90e..dcf9701dde 100644 --- a/lib/dl_type_transformer/dl_type_transformer/type_transformer.py +++ b/lib/dl_type_transformer/dl_type_transformer/type_transformer.py @@ -72,6 +72,8 @@ def make_datetime(value: Any) -> Optional[datetime.datetime]: def make_int(value: Any) -> Optional[int]: if value is None: return None + # if isinstance(value, datetime.timedelta): + # return int(value.total_seconds()) if isinstance(value, float) and (math.isinf(value) or math.isnan(value)): return None return int(value) From ba01d2785c1d3b6cd8d57e6b68a2faf0fa45c1d8 Mon Sep 17 00:00:00 2001 From: catsona Date: Fri, 26 Sep 2025 11:55:34 +0300 Subject: [PATCH 2/7] cleanup --- lib/dl_type_transformer/dl_type_transformer/type_transformer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/dl_type_transformer/dl_type_transformer/type_transformer.py b/lib/dl_type_transformer/dl_type_transformer/type_transformer.py index dcf9701dde..c23118f90e 100644 --- a/lib/dl_type_transformer/dl_type_transformer/type_transformer.py +++ b/lib/dl_type_transformer/dl_type_transformer/type_transformer.py @@ -72,8 +72,6 @@ def make_datetime(value: Any) -> Optional[datetime.datetime]: def make_int(value: Any) -> Optional[int]: if value is None: return None - # if isinstance(value, datetime.timedelta): - # return int(value.total_seconds()) if isinstance(value, float) and (math.isinf(value) or math.isnan(value)): return None return int(value) From cd9f0516b7c788ad001ec81230a92aea4a4aabb9 Mon Sep 17 00:00:00 2001 From: catsona Date: Fri, 26 Sep 2025 13:09:16 +0300 Subject: [PATCH 3/7] feat(connectors): BI-6585 Add column tables support for YDB --- .../dl_connector_ydb/core/ydb/adapter.py | 2 +- .../dl_connector_ydb_tests/db/api/base.py | 37 +++++++++++++++++++ .../db/api/test_dataset.py | 17 ++++++++- .../dl_connector_ydb_tests/db/config.py | 35 ++++++++++++++++++ lib/dl_connector_ydb/docker-compose.yml | 2 + .../docker-compose/Dockerfile.db-ydb | 2 +- .../dl_type_transformer/type_transformer.py | 2 + 7 files changed, 93 insertions(+), 4 deletions(-) diff --git a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/adapter.py b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/adapter.py index df450d16ca..f4d88a7539 100644 --- a/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/adapter.py +++ b/lib/dl_connector_ydb/dl_connector_ydb/core/ydb/adapter.py @@ -122,7 +122,7 @@ def _list_table_names_i(self, db_name: str, show_dot: bool = False) -> Iterable[ ] children.sort() for full_path, child in children: - if child.is_any_table(): + if child.is_any_table() or child.is_view() or child.is_column_table(): yield full_path.removeprefix(unprefix) elif child.is_directory(): queue.append(full_path) diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/base.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/base.py index 6c22a974b9..ce78e11b8f 100644 --- a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/base.py +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/base.py @@ -24,7 +24,9 @@ ) from dl_connector_ydb_tests.db.config import ( API_TEST_CONFIG, + COLUMN_TABLE_SCHEMA, DB_CORE_URL, + SA_TYPE_TO_YDB_TYPE_NAME, TABLE_DATA, TABLE_NAME, TABLE_SCHEMA, @@ -102,6 +104,41 @@ def dataset_params(self, sample_table: DbTable) -> dict: ) +class YDBColumnDatasetTestBase(YDBDatasetTestBase): + @pytest.fixture(scope="class") + def sample_table(self, db: Db) -> DbTable: + table_name = TABLE_NAME + "_column" + + db_table = make_table( + db=db, + name=table_name, + columns=[ + C(name=name, user_type=user_type, sa_type=sa_type) for name, user_type, sa_type in COLUMN_TABLE_SCHEMA + ], + data=[], # to avoid producing a sample data + create_in_db=False, + ) + + column_definitions_list = [] + for name, _, sa_type in COLUMN_TABLE_SCHEMA: + target_type = SA_TYPE_TO_YDB_TYPE_NAME[sa_type] + if name == "id": + target_type += " NOT NULL" + + column_definitions_list.append(f"{name} {target_type}") + column_definitions = ", ".join(column_definitions_list) + + query = f"CREATE TABLE `{table_name}` ({column_definitions}, PRIMARY KEY (id)) WITH (STORE = COLUMN)" + db.get_current_connection().connection.cursor().execute_scheme(query) + + db.create_table(db_table.table) + db.insert_into_table(db_table.table, TABLE_DATA) + + yield db_table + + db.get_current_connection().connection.cursor().execute_scheme(f"DROP TABLE `{table_name}`;") + + class YDBDataApiTestBase(YDBDatasetTestBase, StandardizedDataApiTestBase): mutation_caches_enabled = False diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py index 0d3d580de6..b914c85b84 100644 --- a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py @@ -1,8 +1,14 @@ from dl_api_client.dsmaker.primitives import Dataset from dl_api_lib_testing.connector.dataset_suite import DefaultConnectorDatasetTestSuite -from dl_connector_ydb_tests.db.api.base import YDBDatasetTestBase -from dl_connector_ydb_tests.db.config import TABLE_SCHEMA +from dl_connector_ydb_tests.db.api.base import ( + YDBColumnDatasetTestBase, + YDBDatasetTestBase, +) +from dl_connector_ydb_tests.db.config import ( + COLUMN_TABLE_SCHEMA, + TABLE_SCHEMA, +) class TestYDBDataset(YDBDatasetTestBase, DefaultConnectorDatasetTestSuite): @@ -12,3 +18,10 @@ def check_basic_dataset(self, ds: Dataset, annotation: dict) -> None: assert field_names == {column[0] for column in TABLE_SCHEMA} assert ds.annotation == annotation + + +class TestYDBColumnDataset(YDBColumnDatasetTestBase, DefaultConnectorDatasetTestSuite): + def check_basic_dataset(self, ds: Dataset) -> None: + assert ds.id + field_names = {field.title for field in ds.result_schema} + assert field_names == {column[0] for column in COLUMN_TABLE_SCHEMA} diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py index d468bf9e6c..49e8b22a56 100644 --- a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py @@ -79,6 +79,36 @@ class CoreSslConnectionSettings: ("some_timestamp", UserDataType.genericdatetime, sa.TIMESTAMP), ("some_interval", UserDataType.integer, dl_sqlalchemy_ydb.dialect.YqlInterval), ) + +COLUMN_TABLE_SCHEMA = ( + ("id", UserDataType.integer, sa.Integer), + ("distinct_string", UserDataType.string, sa.String), + ("some_int32", UserDataType.integer, sa.Integer), + ("some_int64", UserDataType.integer, sa.BigInteger), + ("some_uint8", UserDataType.integer, sa.SmallInteger), + ("some_double", UserDataType.float, sa.Float), + ("some_string", UserDataType.string, sa.String), + ("some_utf8", UserDataType.string, sa.Unicode), + ("some_date", UserDataType.date, sa.Date), + ("some_datetime", UserDataType.genericdatetime, sa.DATETIME), + ("some_timestamp", UserDataType.genericdatetime, sa.TIMESTAMP), + ("some_interval", UserDataType.integer, dl_sqlalchemy_ydb.dialect.YqlInterval), +) + +SA_TYPE_TO_YDB_TYPE_NAME = { + sa.Integer: "Int32", + sa.String: "String", + sa.BigInteger: "Int64", + sa.SmallInteger: "Int8", + sa.Boolean: "Bool", + sa.Float: "Double", + sa.Unicode: "Utf8", + sa.Date: "Date", + sa.DATETIME: "Datetime", + sa.TIMESTAMP: "Timestamp", + dl_sqlalchemy_ydb.dialect.YqlInterval: "Interval", +} + TABLE_DATA = [ { "id": 1, @@ -246,8 +276,13 @@ class CoreSslConnectionSettings: "some_interval": 1234, }, ] + +# Leave only values in COLUMN_TABLE_SCHEMA +COLUMN_TABLE_DATA = [{key: value for key, value in row.items() if key in COLUMN_TABLE_SCHEMA} for row in TABLE_DATA] + TABLE_NAME = "test_table_h" + DASHSQL_QUERY = r""" select id, diff --git a/lib/dl_connector_ydb/docker-compose.yml b/lib/dl_connector_ydb/docker-compose.yml index 84878d3b67..9acc9b298b 100644 --- a/lib/dl_connector_ydb/docker-compose.yml +++ b/lib/dl_connector_ydb/docker-compose.yml @@ -9,6 +9,8 @@ services: YDB_GRPC_ENABLE_TLS: 1 GRPC_TLS_PORT: "51902" YDB_GRPC_TLS_DATA_PATH: "/ydb_certs" + YDB_FEATURE_FLAGS: "enable_views,enable_olap_schema_operations" + YDB_ENABLE_COLUMN_TABLES: "true" hostname: "db-ydb" ports: - "51900:51900" diff --git a/lib/dl_connector_ydb/docker-compose/Dockerfile.db-ydb b/lib/dl_connector_ydb/docker-compose/Dockerfile.db-ydb index 6e5191f894..3bdf2a9399 100644 --- a/lib/dl_connector_ydb/docker-compose/Dockerfile.db-ydb +++ b/lib/dl_connector_ydb/docker-compose/Dockerfile.db-ydb @@ -1,4 +1,4 @@ -FROM cr.yandex/yc/yandex-docker-local-ydb:latest +FROM ydbplatform/local-ydb:latest@sha256:1252f37e5f3fd6c490a8e6c34f927cb8b3dd8323db2aa14ffa21dd09dd6ebcdc RUN apt update && apt install -y python3 diff --git a/lib/dl_type_transformer/dl_type_transformer/type_transformer.py b/lib/dl_type_transformer/dl_type_transformer/type_transformer.py index c23118f90e..dcf9701dde 100644 --- a/lib/dl_type_transformer/dl_type_transformer/type_transformer.py +++ b/lib/dl_type_transformer/dl_type_transformer/type_transformer.py @@ -72,6 +72,8 @@ def make_datetime(value: Any) -> Optional[datetime.datetime]: def make_int(value: Any) -> Optional[int]: if value is None: return None + # if isinstance(value, datetime.timedelta): + # return int(value.total_seconds()) if isinstance(value, float) and (math.isinf(value) or math.isnan(value)): return None return int(value) From 115d207a161cdc52f86de4b9718e2b0628bc9c56 Mon Sep 17 00:00:00 2001 From: catsona Date: Thu, 27 Nov 2025 15:48:49 +0300 Subject: [PATCH 4/7] lint-fix --- lib/dl_connector_ydb/pyproject.toml | 3 +-- metapkg/poetry.lock | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/dl_connector_ydb/pyproject.toml b/lib/dl_connector_ydb/pyproject.toml index 475d110b32..afea6497e6 100644 --- a/lib/dl_connector_ydb/pyproject.toml +++ b/lib/dl_connector_ydb/pyproject.toml @@ -19,7 +19,7 @@ dl-formula = {path = "../dl_formula"} dl-formula-ref = {path = "../dl_formula_ref"} dl-i18n = {path = "../dl_i18n"} dl-query-processing = {path = "../dl_query_processing"} -dl-sqlalchemy-ydb = {path = "../../lib/dl_sqlalchemy_ydb"} +dl-sqlalchemy-ydb = {path = "../dl_sqlalchemy_ydb"} dl-type-transformer = {path = "../dl_type_transformer"} dl-utils = {path = "../dl_utils"} grpcio = "*" @@ -41,7 +41,6 @@ dl-core-testing = {path = "../dl_core_testing"} dl-formula-testing = {path = "../dl_formula_testing"} dl-testing = {path = "../dl_testing"} frozendict = "*" -dl-sqlalchemy-ydb = {path = "../../lib/dl_sqlalchemy_ydb"} pytest = "*" requests = "*" diff --git a/metapkg/poetry.lock b/metapkg/poetry.lock index e88ee0a605..fd1f8cd17b 100644 --- a/metapkg/poetry.lock +++ b/metapkg/poetry.lock @@ -2569,7 +2569,7 @@ dl-formula = {path = "../dl_formula"} dl-formula-ref = {path = "../dl_formula_ref"} dl-i18n = {path = "../dl_i18n"} dl-query-processing = {path = "../dl_query_processing"} -dl-sqlalchemy-ydb = {path = "../../lib/dl_sqlalchemy_ydb"} +dl-sqlalchemy-ydb = {path = "../dl_sqlalchemy_ydb"} dl-type-transformer = {path = "../dl_type_transformer"} dl-utils = {path = "../dl_utils"} grpcio = "*" From 7b111e6ec81ca730f441e668b87380859e8acadd Mon Sep 17 00:00:00 2001 From: catsona Date: Thu, 27 Nov 2025 16:54:02 +0300 Subject: [PATCH 5/7] remove interval column from test --- lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py index 49e8b22a56..0f275d4160 100644 --- a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/config.py @@ -92,7 +92,6 @@ class CoreSslConnectionSettings: ("some_date", UserDataType.date, sa.Date), ("some_datetime", UserDataType.genericdatetime, sa.DATETIME), ("some_timestamp", UserDataType.genericdatetime, sa.TIMESTAMP), - ("some_interval", UserDataType.integer, dl_sqlalchemy_ydb.dialect.YqlInterval), ) SA_TYPE_TO_YDB_TYPE_NAME = { From ab3ff176f74c4959a3db7588af04fa5e938630da Mon Sep 17 00:00:00 2001 From: catsona Date: Thu, 27 Nov 2025 16:55:55 +0300 Subject: [PATCH 6/7] add annotation for test function --- .../dl_connector_ydb_tests/db/api/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py index b914c85b84..afa3a08f9e 100644 --- a/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py +++ b/lib/dl_connector_ydb/dl_connector_ydb_tests/db/api/test_dataset.py @@ -21,7 +21,7 @@ def check_basic_dataset(self, ds: Dataset, annotation: dict) -> None: class TestYDBColumnDataset(YDBColumnDatasetTestBase, DefaultConnectorDatasetTestSuite): - def check_basic_dataset(self, ds: Dataset) -> None: + def check_basic_dataset(self, ds: Dataset, annotation: dict) -> None: assert ds.id field_names = {field.title for field in ds.result_schema} assert field_names == {column[0] for column in COLUMN_TABLE_SCHEMA} From a381827378aa2ee776e152aba7f5512cadfea75e Mon Sep 17 00:00:00 2001 From: catsona Date: Fri, 28 Nov 2025 14:07:32 +0300 Subject: [PATCH 7/7] cleanup --- lib/dl_type_transformer/dl_type_transformer/type_transformer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/dl_type_transformer/dl_type_transformer/type_transformer.py b/lib/dl_type_transformer/dl_type_transformer/type_transformer.py index dcf9701dde..c23118f90e 100644 --- a/lib/dl_type_transformer/dl_type_transformer/type_transformer.py +++ b/lib/dl_type_transformer/dl_type_transformer/type_transformer.py @@ -72,8 +72,6 @@ def make_datetime(value: Any) -> Optional[datetime.datetime]: def make_int(value: Any) -> Optional[int]: if value is None: return None - # if isinstance(value, datetime.timedelta): - # return int(value.total_seconds()) if isinstance(value, float) and (math.isinf(value) or math.isnan(value)): return None return int(value)