From e9d8b417414d994c401f69516a011dbcad195f92 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Wed, 27 May 2026 05:10:48 +0000 Subject: [PATCH 01/11] Initial commit to allow pagination --- src/orm_loader/backends/base.py | 6 + src/orm_loader/backends/postgres.py | 137 ++++++++++++++++------ src/orm_loader/backends/sqlite.py | 6 + src/orm_loader/loaders/loading_helpers.py | 13 +- src/orm_loader/tables/loadable_table.py | 47 +++++--- tests/backends/test_postgres_backend.py | 21 +++- tests/loaders/test_pg_loader.py | 27 ++++- 7 files changed, 193 insertions(+), 64 deletions(-) diff --git a/src/orm_loader/backends/base.py b/src/orm_loader/backends/base.py index b5fc9a1..db3cd8f 100644 --- a/src/orm_loader/backends/base.py +++ b/src/orm_loader/backends/base.py @@ -181,6 +181,8 @@ def merge_replace( target_name: str, staging_name: str, pk_cols: list[str], + *, + merge_batch_size: int = 1_000_000, ) -> None: """Merge staging rows by replacing matching target rows first.""" @@ -192,6 +194,8 @@ def merge_upsert( target_name: str, staging_name: str, pk_cols: list[str], + *, + merge_batch_size: int = 1_000_000, ) -> None: """Merge staging rows using backend-specific upsert semantics.""" @@ -202,6 +206,8 @@ def merge_insert( session: so.Session, target_name: str, staging_name: str, + *, + merge_batch_size: int = 1_000_000, ) -> None: """Insert all staging rows into the target table.""" diff --git a/src/orm_loader/backends/postgres.py b/src/orm_loader/backends/postgres.py index 7b57c8b..4813d04 100644 --- a/src/orm_loader/backends/postgres.py +++ b/src/orm_loader/backends/postgres.py @@ -1,13 +1,14 @@ from __future__ import annotations -from contextlib import contextmanager, AbstractContextManager +from contextlib import AbstractContextManager, contextmanager from typing import TYPE_CHECKING, Any + import sqlalchemy as sa -import sqlalchemy.orm as so import sqlalchemy.event as sae +import sqlalchemy.orm as so -from .base import BackendCapabilities, DatabaseBackend, Dialect from ..loaders.loading_helpers import quick_load_pg +from .base import BackendCapabilities, DatabaseBackend, Dialect if TYPE_CHECKING: from sqlalchemy.engine import Connection, Engine @@ -57,6 +58,14 @@ def create_staging_table( for col in computed_cols: session.execute(sa.text(f'ALTER TABLE "{staging_name}" DROP COLUMN "{col}";')) + # allows pagniation in O(N log N) time for large tables in merge_insert without needing to add an index on every staging table + session.execute( + sa.text( + f'ALTER TABLE "{staging_name}" ADD COLUMN _rownum BIGINT' + f" GENERATED ALWAYS AS IDENTITY (CACHE 1000);" + ) + ) + session.commit() def drop_staging_table( @@ -124,19 +133,33 @@ def merge_replace( target_name: str, staging_name: str, pk_cols: list[str], + *, + merge_batch_size: int = 1_000_000, ) -> None: - pk_join = " AND ".join( - f't."{c}" = s."{c}"' for c in pk_cols - ) - session.execute( - sa.text( - f""" - DELETE FROM "{target_name}" t - USING "{staging_name}" s - WHERE {pk_join}; - """ + pk_join = " AND ".join(f't."{c}" = s."{c}"' for c in pk_cols) + total = session.execute(sa.text(f'SELECT COUNT(*) FROM "{staging_name}"')).scalar_one() + + if total <= merge_batch_size: + session.execute(sa.text( + f'DELETE FROM "{target_name}" t USING "{staging_name}" s WHERE {pk_join}' + )) + return + + session.execute(sa.text(f'CREATE INDEX ON "{staging_name}" (_rownum)')) + session.commit() + + start = 0 + while start < total: + end = start + merge_batch_size + session.execute( + sa.text( + f'DELETE FROM "{target_name}" t USING "{staging_name}" s' + f' WHERE {pk_join} AND s._rownum > :start AND s._rownum <= :end' + ), + {"start": start, "end": end}, ) - ) + session.commit() + start = end def merge_upsert( self, @@ -145,19 +168,39 @@ def merge_upsert( target_name: str, staging_name: str, pk_cols: list[str], + *, + merge_batch_size: int = 1_000_000, ) -> None: insertable_cols = self._insertable_column_names(table_cls) cols_str = ", ".join(f'"{c}"' for c in insertable_cols) conflict_cols = ", ".join(f'"{c}"' for c in pk_cols) - session.execute( - sa.text( - f""" - INSERT INTO "{target_name}" ({cols_str}) - SELECT {cols_str} FROM "{staging_name}" - ON CONFLICT ({conflict_cols}) DO NOTHING; - """ + total = session.execute(sa.text(f'SELECT COUNT(*) FROM "{staging_name}"')).scalar_one() + + if total <= merge_batch_size: + session.execute(sa.text( + f'INSERT INTO "{target_name}" ({cols_str})' + f' SELECT {cols_str} FROM "{staging_name}"' + f' ON CONFLICT ({conflict_cols}) DO NOTHING' + )) + return + + session.execute(sa.text(f'CREATE INDEX ON "{staging_name}" (_rownum)')) + session.commit() + + start = 0 + while start < total: + end = start + merge_batch_size + session.execute( + sa.text( + f'INSERT INTO "{target_name}" ({cols_str})' + f' SELECT {cols_str} FROM "{staging_name}"' + f' WHERE _rownum > :start AND _rownum <= :end' + f' ON CONFLICT ({conflict_cols}) DO NOTHING' + ), + {"start": start, "end": end}, ) - ) + session.commit() + start = end def merge_insert( self, @@ -165,17 +208,41 @@ def merge_insert( session: so.Session, target_name: str, staging_name: str, + *, + merge_batch_size: int = 1_000_000, ) -> None: insertable_cols = self._insertable_column_names(table_cls) cols_str = ", ".join(f'"{c}"' for c in insertable_cols) - session.execute( - sa.text( - f""" - INSERT INTO "{target_name}" ({cols_str}) - SELECT {cols_str} FROM "{staging_name}"; - """ + + total = session.execute(sa.text(f'SELECT COUNT(*) FROM "{staging_name}"')).scalar_one() + + if total <= merge_batch_size: + session.execute(sa.text( + f'INSERT INTO "{target_name}" ({cols_str})' + f' SELECT {cols_str} FROM "{staging_name}"' + )) + return + + # Large table: index _rownum for O(N log N) range pagination then + # INSERT in batch-sized transactions to bound WAL per commit. + # session_replication_role='replica' is session-level and persists + # across commits, so FK checks stay disabled for all batches. + session.execute(sa.text(f'CREATE INDEX ON "{staging_name}" (_rownum)')) + session.commit() + + start = 0 + while start < total: + end = start + merge_batch_size + session.execute( + sa.text( + f'INSERT INTO "{target_name}" ({cols_str})' + f' SELECT {cols_str} FROM "{staging_name}"' + f' WHERE _rownum > :start AND _rownum <= :end' + ), + {"start": start, "end": end}, ) - ) + session.commit() + start = end def merge_context( self, @@ -184,8 +251,6 @@ def merge_context( ) -> AbstractContextManager[None]: return self.bulk_load_context(session, disable_fk=True, no_autoflush=False) - - def create_materialized_view( self, bind: Engine | Connection, @@ -196,7 +261,7 @@ def create_materialized_view( with self._as_connection(bind) as conn: conn.execute(CreateMaterializedView(name, selectable)) - + def refresh_materialized_view( self, bind: Engine | Connection, @@ -207,9 +272,7 @@ def refresh_materialized_view( dialect = getattr(conn, "dialect", None) if dialect is not None: safe_name = dialect.identifier_preparer.quote(name) - conn.execute( - sa.text(f"REFRESH MATERIALIZED VIEW {safe_name};") - ) + conn.execute(sa.text(f"REFRESH MATERIALIZED VIEW {safe_name};")) @contextmanager def engine_with_replica_role(self, engine: "Engine"): @@ -230,8 +293,6 @@ def _set_replica_role( with engine.connect() as conn: conn = conn.execution_options(isolation_level="AUTOCOMMIT") conn.execute(sa.text("SET session_replication_role = DEFAULT")) - role = conn.execute( - sa.text("SHOW session_replication_role") - ).scalar() + role = conn.execute(sa.text("SHOW session_replication_role")).scalar() if role != "origin": raise RuntimeError("Failed to restore session_replication_role") diff --git a/src/orm_loader/backends/sqlite.py b/src/orm_loader/backends/sqlite.py index 753abd4..0a1ec63 100644 --- a/src/orm_loader/backends/sqlite.py +++ b/src/orm_loader/backends/sqlite.py @@ -139,6 +139,8 @@ def merge_replace( target_name: str, staging_name: str, pk_cols: list[str], + *, + merge_batch_size: int = 1_000_000, ) -> None: if len(pk_cols) == 1: pk = pk_cols[0] @@ -176,6 +178,8 @@ def merge_upsert( target_name: str, staging_name: str, pk_cols: list[str], + *, + merge_batch_size: int = 1_000_000, ) -> None: insertable_cols = self._insertable_column_names(table_cls) cols_str = ", ".join(f'"{c}"' for c in insertable_cols) @@ -194,6 +198,8 @@ def merge_insert( session: so.Session, target_name: str, staging_name: str, + *, + merge_batch_size: int = 1_000_000, ) -> None: insertable_cols = self._insertable_column_names(table_cls) cols_str = ", ".join(f'"{c}"' for c in insertable_cols) diff --git a/src/orm_loader/loaders/loading_helpers.py b/src/orm_loader/loaders/loading_helpers.py index cbc5ef7..4deb13d 100644 --- a/src/orm_loader/loaders/loading_helpers.py +++ b/src/orm_loader/loaders/loading_helpers.py @@ -266,15 +266,24 @@ def quick_load_pg( else: raise ValueError(f"Unknown quote_mode: {quote_mode}") + # Peek at the CSV header to build an explicit column list for COPY. + # Without this, PostgreSQL expects ALL table columns including internal staging + # columns like _rownum (GENERATED ALWAYS AS IDENTITY), which the CSV doesn't have. + with open(path, "rb") as _f_peek: + _raw_hdr = _f_peek.readline().decode(encoding) + _nl = check_line_ending(_raw_hdr) + _csv_cols = [c.strip().lower().replace('_hash', '') for c in _raw_hdr.rstrip(_nl).split(delimiter)] + _cols_sql = ", ".join(f'"{c}"' for c in _csv_cols) + logger.info(f"Bulk loading {tablename} via COPY (encoding={encoding}, delimiter={delimiter})") - + cur = raw_conn.cursor() try: with open(path, "rb") as f: stream = NormalisedCSVStream(f, encoding=encoding, delimiter=delimiter) with cur.copy( f''' - COPY "{tablename}" + COPY "{tablename}" ({_cols_sql}) FROM STDIN WITH ( {copy_options} diff --git a/src/orm_loader/tables/loadable_table.py b/src/orm_loader/tables/loadable_table.py index acec0f9..3b73e63 100644 --- a/src/orm_loader/tables/loadable_table.py +++ b/src/orm_loader/tables/loadable_table.py @@ -358,6 +358,7 @@ def load_csv( merge_strategy: str = "replace", quote_mode: str = "auto", index_strategy: str = "auto", + merge_batch_size: int = 1_000_000, ) -> int: """ @@ -446,7 +447,7 @@ def load_csv( # Merge staging to target (Wrapped in our index dropper!) logger.info(f"Table `{cls.__tablename__}`: Merging staging data into target table") with cls.manage_indices(session, index_strategy=index_strategy): - cls.merge_from_staging(session, merge_strategy=merge_strategy) + cls.merge_from_staging(session, merge_strategy=merge_strategy, merge_batch_size=merge_batch_size) cls.drop_staging_table(session) @@ -457,10 +458,12 @@ def load_csv( @classmethod def _merge_replace( cls: Type[CSVTableProtocol], - session: so.Session, - target: str, - staging: str, - pk_cols: list[str] + session: so.Session, + target: str, + staging: str, + pk_cols: list[str], + *, + merge_batch_size: int = 1_000_000, ): """ Merge staging data by replacing existing rows. @@ -469,20 +472,22 @@ def _merge_replace( deleted prior to insertion. """ backend = resolve_backend(session) - backend.merge_replace(cls, session, target, staging, pk_cols) + backend.merge_replace(cls, session, target, staging, pk_cols, merge_batch_size=merge_batch_size) @classmethod def _merge_insert( cls: Type[CSVTableProtocol], session: so.Session, target: str, - staging: str + staging: str, + *, + merge_batch_size: int = 1_000_000, ): """ Insert all rows from the staging table into the target table. """ backend = resolve_backend(session) - backend.merge_insert(cls, session, target, staging) + backend.merge_insert(cls, session, target, staging, merge_batch_size=merge_batch_size) @classmethod def _target_has_rows( @@ -508,23 +513,27 @@ def _target_has_rows( @classmethod def _merge_upsert( - cls: Type[CSVTableProtocol], - session: so.Session, - target: str, - staging: str, - pk_cols: list[str] + cls: Type[CSVTableProtocol], + session: so.Session, + target: str, + staging: str, + pk_cols: list[str], + *, + merge_batch_size: int = 1_000_000, ): """ Merge staging data using an upsert strategy. """ backend = resolve_backend(session) - backend.merge_upsert(cls, session, target, staging, pk_cols) + backend.merge_upsert(cls, session, target, staging, pk_cols, merge_batch_size=merge_batch_size) @classmethod def merge_from_staging( - cls: Type[CSVTableProtocol], - session: so.Session, - merge_strategy: str = "replace" + cls: Type[CSVTableProtocol], + session: so.Session, + merge_strategy: str = "replace", + *, + merge_batch_size: int = 1_000_000, ): """ Merge data from the staging table into the target table. @@ -572,6 +581,7 @@ def merge_from_staging( target=target, staging=staging, pk_cols=pk_cols, + merge_batch_size=merge_batch_size, ) logger.info( f"Table `{target}`: Merge replace delete phase completed in " @@ -583,6 +593,7 @@ def merge_from_staging( session=session, target=target, staging=staging, + merge_batch_size=merge_batch_size, ) logger.info( f"Table `{target}`: Merge insert phase completed in " @@ -596,6 +607,7 @@ def merge_from_staging( target=target, staging=staging, pk_cols=pk_cols, + merge_batch_size=merge_batch_size, ) logger.info( f"Table `{target}`: Merge upsert phase completed in " @@ -626,6 +638,7 @@ def merge_from_staging( session=session, target=target, staging=staging, + merge_batch_size=merge_batch_size, ) logger.info( f"Table `{target}`: Merge insert-if-empty phase completed in " diff --git a/tests/backends/test_postgres_backend.py b/tests/backends/test_postgres_backend.py index 6ac4467..50e8202 100644 --- a/tests/backends/test_postgres_backend.py +++ b/tests/backends/test_postgres_backend.py @@ -44,6 +44,9 @@ def __init__(self, value): def scalar(self): return self._value + def scalar_one(self): + return self._value + return _Result(self.scalar_result) def commit(self) -> None: @@ -115,11 +118,13 @@ def test_postgres_backend_fk_methods_emit_expected_sql(): def test_postgres_backend_merge_replace_uses_using_delete(): backend = PostgresBackend() - session = _FakeSession() + # scalar_result=0 → COUNT returns 0 → small-table path → single DELETE statement + session = _FakeSession(scalar_result=0) backend.merge_replace(_ComputedTableCls, _sess(session), "target_table", "_staging_target_table", ["id", "name"]) - sql = session.statements[0] + # statements[0] is the COUNT query; statements[1] is the DELETE + sql = session.statements[1] assert 'DELETE FROM "target_table" t' in sql assert 'USING "_staging_target_table" s' in sql assert 't."id" = s."id" AND t."name" = s."name"' in sql @@ -127,22 +132,26 @@ def test_postgres_backend_merge_replace_uses_using_delete(): def test_postgres_backend_merge_insert_excludes_computed_columns(): backend = PostgresBackend() - session = _FakeSession() + # scalar_result=0 → COUNT returns 0 → small-table path → single INSERT statement + session = _FakeSession(scalar_result=0) backend.merge_insert(_ComputedTableCls, _sess(session), "target_table", "_staging_target_table") - sql = session.statements[0] + # statements[0] is the COUNT query; statements[1] is the INSERT + sql = session.statements[1] assert 'INSERT INTO "target_table" ("id", "name")' in sql assert 'SELECT "id", "name" FROM "_staging_target_table"' in sql def test_postgres_backend_merge_upsert_excludes_computed_columns(): backend = PostgresBackend() - session = _FakeSession() + # scalar_result=0 → COUNT returns 0 → small-table path → single INSERT statement + session = _FakeSession(scalar_result=0) backend.merge_upsert(_ComputedTableCls, _sess(session), "target_table", "_staging_target_table", ["id"]) - sql = session.statements[0] + # statements[0] is the COUNT query; statements[1] is the INSERT + sql = session.statements[1] assert 'INSERT INTO "target_table" ("id", "name")' in sql assert 'ON CONFLICT ("id") DO NOTHING' in sql diff --git a/tests/loaders/test_pg_loader.py b/tests/loaders/test_pg_loader.py index f556354..208ea01 100644 --- a/tests/loaders/test_pg_loader.py +++ b/tests/loaders/test_pg_loader.py @@ -6,6 +6,29 @@ from tests.models import SimpleTable +@pytest.mark.postgres +def test_copy_into_staging_with_extra_identity_column(pg_session, tmp_path): + """COPY must succeed when the staging table has a _rownum identity column.""" + csv = tmp_path / "test_table.csv" + pd.DataFrame([{"id": 1, "name": "alpha"}, {"id": 2, "name": "beta"}]).to_csv(csv, index=False) + + SimpleTable.create_staging_table(pg_session) + staging_name = SimpleTable.staging_tablename() + + cols = pg_session.execute(sa.text( + "SELECT column_name FROM information_schema.columns WHERE table_name = :t" + ), {"t": staging_name}).scalars().all() + assert "_rownum" in cols, "staging table should have _rownum before COPY" + + total = quick_load_pg(path=csv, session=pg_session, tablename=staging_name) + assert total == 2 + + rownums = pg_session.execute( + sa.text(f'SELECT _rownum FROM "{staging_name}" ORDER BY _rownum') + ).scalars().all() + assert rownums == [1, 2], "_rownum must be auto-populated by IDENTITY sequence" + + @pytest.mark.postgres def test_copy_and_orm_path_equivalence(pg_session, tmp_path): csv = tmp_path / "test_table.csv" @@ -171,10 +194,12 @@ def test_staging_schema_matches_target(pg_session, tmp_path): ORDER BY ordinal_position """), {"table": SimpleTable.staging_tablename()}).all() - assert cols == [ + data_cols = [(name, dtype) for name, dtype in cols if name != "_rownum"] + assert data_cols == [ ("id", "integer"), ("name", "character varying"), ] + assert any(name == "_rownum" and dtype == "bigint" for name, dtype in cols) def test_infer_encoding_ascii_promoted_to_utf8(tmp_path): From deae608b5dac9b8c7248a9e04643f211c9a6d9b1 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Wed, 27 May 2026 05:12:57 +0000 Subject: [PATCH 02/11] Remove redundant clsmethods and call correct implementation --- src/orm_loader/tables/loadable_table.py | 81 ++----------------------- src/orm_loader/tables/typing.py | 20 +++--- 2 files changed, 14 insertions(+), 87 deletions(-) diff --git a/src/orm_loader/tables/loadable_table.py b/src/orm_loader/tables/loadable_table.py index 3b73e63..ac8211d 100644 --- a/src/orm_loader/tables/loadable_table.py +++ b/src/orm_loader/tables/loadable_table.py @@ -455,40 +455,6 @@ def load_csv( return total - @classmethod - def _merge_replace( - cls: Type[CSVTableProtocol], - session: so.Session, - target: str, - staging: str, - pk_cols: list[str], - *, - merge_batch_size: int = 1_000_000, - ): - """ - Merge staging data by replacing existing rows. - - Existing target rows matching the staging primary keys are - deleted prior to insertion. - """ - backend = resolve_backend(session) - backend.merge_replace(cls, session, target, staging, pk_cols, merge_batch_size=merge_batch_size) - - @classmethod - def _merge_insert( - cls: Type[CSVTableProtocol], - session: so.Session, - target: str, - staging: str, - *, - merge_batch_size: int = 1_000_000, - ): - """ - Insert all rows from the staging table into the target table. - """ - backend = resolve_backend(session) - backend.merge_insert(cls, session, target, staging, merge_batch_size=merge_batch_size) - @classmethod def _target_has_rows( cls: Type[CSVTableProtocol], @@ -511,22 +477,6 @@ def _target_has_rows( return row is not None - @classmethod - def _merge_upsert( - cls: Type[CSVTableProtocol], - session: so.Session, - target: str, - staging: str, - pk_cols: list[str], - *, - merge_batch_size: int = 1_000_000, - ): - """ - Merge staging data using an upsert strategy. - """ - backend = resolve_backend(session) - backend.merge_upsert(cls, session, target, staging, pk_cols, merge_batch_size=merge_batch_size) - @classmethod def merge_from_staging( cls: Type[CSVTableProtocol], @@ -551,6 +501,7 @@ def merge_from_staging( pk_cols = cls.pk_names() _require_bind(session) + backend = resolve_backend(session) target_empty_confirmed = False if merge_strategy in {"replace", "upsert"}: logger.info( @@ -576,25 +527,14 @@ def merge_from_staging( if merge_strategy == "replace": logger.info(f"Table `{target}`: Merge replace delete phase starting.") delete_started = perf_counter() - cls._merge_replace( - session=session, - target=target, - staging=staging, - pk_cols=pk_cols, - merge_batch_size=merge_batch_size, - ) + backend.merge_replace(cls, session, target, staging, pk_cols, merge_batch_size=merge_batch_size) logger.info( f"Table `{target}`: Merge replace delete phase completed in " f"{_format_elapsed(perf_counter() - delete_started)}." ) logger.info(f"Table `{target}`: Merge insert phase starting.") insert_started = perf_counter() - cls._merge_insert( - session=session, - target=target, - staging=staging, - merge_batch_size=merge_batch_size, - ) + backend.merge_insert(cls, session, target, staging, merge_batch_size=merge_batch_size) logger.info( f"Table `{target}`: Merge insert phase completed in " f"{_format_elapsed(perf_counter() - insert_started)}." @@ -602,13 +542,7 @@ def merge_from_staging( elif merge_strategy == "upsert": logger.info(f"Table `{target}`: Merge upsert phase starting.") upsert_started = perf_counter() - cls._merge_upsert( - session=session, - target=target, - staging=staging, - pk_cols=pk_cols, - merge_batch_size=merge_batch_size, - ) + backend.merge_upsert(cls, session, target, staging, pk_cols, merge_batch_size=merge_batch_size) logger.info( f"Table `{target}`: Merge upsert phase completed in " f"{_format_elapsed(perf_counter() - upsert_started)}." @@ -634,12 +568,7 @@ def merge_from_staging( logger.info(f"Table `{target}`: Merge insert-if-empty phase starting.") insert_started = perf_counter() - cls._merge_insert( - session=session, - target=target, - staging=staging, - merge_batch_size=merge_batch_size, - ) + backend.merge_insert(cls, session, target, staging, merge_batch_size=merge_batch_size) logger.info( f"Table `{target}`: Merge insert-if-empty phase completed in " f"{_format_elapsed(perf_counter() - insert_started)}." diff --git a/src/orm_loader/tables/typing.py b/src/orm_loader/tables/typing.py index 38322be..fe291f5 100644 --- a/src/orm_loader/tables/typing.py +++ b/src/orm_loader/tables/typing.py @@ -87,32 +87,30 @@ def load_csv( merge_strategy: str = "replace", quote_mode: str = "csv", index_strategy: str = "auto", + merge_batch_size: int = 1_000_000, ) -> int: ... @classmethod - def orm_staging_load(cls, loader: "LoaderInterface",loader_context: "LoaderContext") -> int: ... + def orm_staging_load(cls, loader: "LoaderInterface", loader_context: "LoaderContext") -> int: ... @classmethod def get_staging_table(cls, session: so.Session) -> sa.Table: ... @classmethod - def merge_from_staging(cls, session: so.Session, merge_strategy: str) -> None: ... + def merge_from_staging( + cls, + session: so.Session, + merge_strategy: str = "replace", + *, + merge_batch_size: int = 1_000_000, + ) -> None: ... @classmethod def drop_staging_table(cls, session: so.Session) -> None: ... - @classmethod - def _merge_insert(cls, session: so.Session, target: str, staging: str) -> None: ... - @classmethod def _target_has_rows(cls, session: so.Session, target: str) -> bool: ... - @classmethod - def _merge_replace(cls, session: so.Session, target: str, staging: str, pk_cols: list[str]) -> None: ... - - @classmethod - def _merge_upsert(cls, session: so.Session, target: str, staging: str, pk_cols: list[str]) -> None: ... - @classmethod def manage_indices(cls, session: so.Session, index_strategy: str = "auto") -> AbstractContextManager[None]: ... From d2847120d718f5549ff660e0aed5b3e1846af940 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Fri, 5 Jun 2026 05:29:54 +0000 Subject: [PATCH 03/11] More efficient backend resolving --- src/orm_loader/backends/resolve.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/src/orm_loader/backends/resolve.py b/src/orm_loader/backends/resolve.py index e3919c6..94ee0d2 100644 --- a/src/orm_loader/backends/resolve.py +++ b/src/orm_loader/backends/resolve.py @@ -11,10 +11,10 @@ from sqlalchemy.engine import Connection, Engine -_BACKEND_TYPES: tuple[type[DatabaseBackend], ...] = ( - PostgresBackend, - SQLiteBackend, -) +_BACKEND_TYPES: dict[Dialect, type[DatabaseBackend]] = { + Dialect.POSTGRESQL: PostgresBackend, + Dialect.SQLITE: SQLiteBackend, +} def _dialect(bindable: "so.Session | Engine | Connection") -> Dialect: @@ -34,13 +34,10 @@ def _dialect(bindable: "so.Session | Engine | Connection") -> Dialect: ) from exc -def resolve_backend(bindable: "so.Session | Engine | Connection") -> DatabaseBackend: - """ - Resolve a concrete backend from a SQLAlchemy session, engine, or connection. - """ +def resolve_backend(bindable: "so.Session | Engine | Connection", **kwargs) -> DatabaseBackend: + """Resolve a concrete backend from a SQLAlchemy session, engine, or connection.""" dialect = _dialect(bindable) - for backend_type in _BACKEND_TYPES: - backend = backend_type() - if backend.supports_dialect(dialect): - return backend - raise NotImplementedError(f"No backend registered for dialect '{dialect.value}'") + try: + return _BACKEND_TYPES[dialect](**kwargs) + except KeyError: + raise NotImplementedError(f"No backend registered for dialect '{dialect.value}'") From ed0400b50d0ff1b08ac7b3aef90ed5b818c8f4db Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Wed, 10 Jun 2026 01:02:52 +0000 Subject: [PATCH 04/11] Update the logging namespace to reflect the package name --- src/orm_loader/helpers/logging.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/orm_loader/helpers/logging.py b/src/orm_loader/helpers/logging.py index bce30f9..93afe0a 100644 --- a/src/orm_loader/helpers/logging.py +++ b/src/orm_loader/helpers/logging.py @@ -14,7 +14,7 @@ "uri", "url", } -LOGGING_NAMESPACE = "sql_loader" +LOGGING_NAMESPACE = "orm_loader" def _coerce_log_level(level: int | str) -> int: @@ -39,8 +39,8 @@ def get_logger(name: Optional[str] = None) -> logging.Logger: Return a namespaced logger. Examples: - get_logger() -> sql_loader - get_logger("loadable_table") -> sql_loader.loadable_table + get_logger() -> orm_loader + get_logger("loadable_table") -> orm_loader.loadable_table """ full_name = LOGGING_NAMESPACE if name is None else f"{LOGGING_NAMESPACE}.{name}" return logging.getLogger(full_name) From 391c8db2b912579980a8a9c884d123da56c71bd9 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Wed, 10 Jun 2026 01:13:47 +0000 Subject: [PATCH 05/11] Make the merge-batch-size optional --- src/orm_loader/backends/base.py | 6 +-- src/orm_loader/backends/postgres.py | 55 ++++++++++++++++--------- src/orm_loader/backends/sqlite.py | 6 +-- src/orm_loader/tables/loadable_table.py | 4 +- src/orm_loader/tables/typing.py | 4 +- 5 files changed, 46 insertions(+), 29 deletions(-) diff --git a/src/orm_loader/backends/base.py b/src/orm_loader/backends/base.py index db3cd8f..07028cd 100644 --- a/src/orm_loader/backends/base.py +++ b/src/orm_loader/backends/base.py @@ -182,7 +182,7 @@ def merge_replace( staging_name: str, pk_cols: list[str], *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: """Merge staging rows by replacing matching target rows first.""" @@ -195,7 +195,7 @@ def merge_upsert( staging_name: str, pk_cols: list[str], *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: """Merge staging rows using backend-specific upsert semantics.""" @@ -207,7 +207,7 @@ def merge_insert( target_name: str, staging_name: str, *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: """Insert all staging rows into the target table.""" diff --git a/src/orm_loader/backends/postgres.py b/src/orm_loader/backends/postgres.py index 4813d04..5511747 100644 --- a/src/orm_loader/backends/postgres.py +++ b/src/orm_loader/backends/postgres.py @@ -134,15 +134,21 @@ def merge_replace( staging_name: str, pk_cols: list[str], *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: pk_join = " AND ".join(f't."{c}" = s."{c}"' for c in pk_cols) - total = session.execute(sa.text(f'SELECT COUNT(*) FROM "{staging_name}"')).scalar_one() + non_paginated_replace = sa.text( + f'DELETE FROM "{target_name}" t USING "{staging_name}" s WHERE {pk_join}' + ) + + if merge_batch_size is None: + session.execute(non_paginated_replace) + return + + total = session.execute(sa.text(f'SELECT COUNT(*) FROM "{staging_name}"')).scalar_one() if total <= merge_batch_size: - session.execute(sa.text( - f'DELETE FROM "{target_name}" t USING "{staging_name}" s WHERE {pk_join}' - )) + session.execute(non_paginated_replace) return session.execute(sa.text(f'CREATE INDEX ON "{staging_name}" (_rownum)')) @@ -169,19 +175,25 @@ def merge_upsert( staging_name: str, pk_cols: list[str], *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: insertable_cols = self._insertable_column_names(table_cls) cols_str = ", ".join(f'"{c}"' for c in insertable_cols) conflict_cols = ", ".join(f'"{c}"' for c in pk_cols) - total = session.execute(sa.text(f'SELECT COUNT(*) FROM "{staging_name}"')).scalar_one() + non_paginated_upsert = sa.text( + f'INSERT INTO "{target_name}" ({cols_str})' + f' SELECT {cols_str} FROM "{staging_name}"' + f' ON CONFLICT ({conflict_cols}) DO NOTHING' + ) + + if merge_batch_size is None: + session.execute(non_paginated_upsert) + return + + total = session.execute(sa.text(f'SELECT COUNT(*) FROM "{staging_name}"')).scalar_one() if total <= merge_batch_size: - session.execute(sa.text( - f'INSERT INTO "{target_name}" ({cols_str})' - f' SELECT {cols_str} FROM "{staging_name}"' - f' ON CONFLICT ({conflict_cols}) DO NOTHING' - )) + session.execute(non_paginated_upsert) return session.execute(sa.text(f'CREATE INDEX ON "{staging_name}" (_rownum)')) @@ -209,21 +221,26 @@ def merge_insert( target_name: str, staging_name: str, *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: insertable_cols = self._insertable_column_names(table_cls) cols_str = ", ".join(f'"{c}"' for c in insertable_cols) - total = session.execute(sa.text(f'SELECT COUNT(*) FROM "{staging_name}"')).scalar_one() + non_paginated_insert = sa.text( + f'INSERT INTO "{target_name}" ({cols_str})' + f' SELECT {cols_str} FROM "{staging_name}"' + ) + + if merge_batch_size is None: + session.execute(non_paginated_insert) + return + total = session.execute(sa.text(f'SELECT COUNT(*) FROM "{staging_name}"')).scalar_one() if total <= merge_batch_size: - session.execute(sa.text( - f'INSERT INTO "{target_name}" ({cols_str})' - f' SELECT {cols_str} FROM "{staging_name}"' - )) + session.execute(non_paginated_insert) return - # Large table: index _rownum for O(N log N) range pagination then + # Paginated path: index _rownum for O(N log N) range scans then # INSERT in batch-sized transactions to bound WAL per commit. # session_replication_role='replica' is session-level and persists # across commits, so FK checks stay disabled for all batches. diff --git a/src/orm_loader/backends/sqlite.py b/src/orm_loader/backends/sqlite.py index 0a1ec63..06b58a9 100644 --- a/src/orm_loader/backends/sqlite.py +++ b/src/orm_loader/backends/sqlite.py @@ -140,7 +140,7 @@ def merge_replace( staging_name: str, pk_cols: list[str], *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: if len(pk_cols) == 1: pk = pk_cols[0] @@ -179,7 +179,7 @@ def merge_upsert( staging_name: str, pk_cols: list[str], *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: insertable_cols = self._insertable_column_names(table_cls) cols_str = ", ".join(f'"{c}"' for c in insertable_cols) @@ -199,7 +199,7 @@ def merge_insert( target_name: str, staging_name: str, *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: insertable_cols = self._insertable_column_names(table_cls) cols_str = ", ".join(f'"{c}"' for c in insertable_cols) diff --git a/src/orm_loader/tables/loadable_table.py b/src/orm_loader/tables/loadable_table.py index ac8211d..f00dbbf 100644 --- a/src/orm_loader/tables/loadable_table.py +++ b/src/orm_loader/tables/loadable_table.py @@ -358,7 +358,7 @@ def load_csv( merge_strategy: str = "replace", quote_mode: str = "auto", index_strategy: str = "auto", - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> int: """ @@ -483,7 +483,7 @@ def merge_from_staging( session: so.Session, merge_strategy: str = "replace", *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ): """ Merge data from the staging table into the target table. diff --git a/src/orm_loader/tables/typing.py b/src/orm_loader/tables/typing.py index fe291f5..6b88e2b 100644 --- a/src/orm_loader/tables/typing.py +++ b/src/orm_loader/tables/typing.py @@ -87,7 +87,7 @@ def load_csv( merge_strategy: str = "replace", quote_mode: str = "csv", index_strategy: str = "auto", - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> int: ... @classmethod @@ -102,7 +102,7 @@ def merge_from_staging( session: so.Session, merge_strategy: str = "replace", *, - merge_batch_size: int = 1_000_000, + merge_batch_size: int | None = None, ) -> None: ... @classmethod From cd18565021e2d06e839a9480f252b886da0f1c69 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Wed, 10 Jun 2026 03:01:21 +0000 Subject: [PATCH 06/11] Include oa-configurator --- pyproject.toml | 4 ++ src/orm_loader/config.py | 22 +++++++ src/orm_loader/helpers/__init__.py | 3 - src/orm_loader/helpers/bulk.py | 4 +- src/orm_loader/helpers/logging.py | 92 ------------------------------ 5 files changed, 28 insertions(+), 97 deletions(-) create mode 100644 src/orm_loader/config.py delete mode 100644 src/orm_loader/helpers/logging.py diff --git a/pyproject.toml b/pyproject.toml index 419d7e2..236faf7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ authors = [ requires-python = ">=3.12" dependencies = [ "chardet>=5.2.0", + #"oa-configurator", "pandas>=2.3.3", "pyarrow>=23.0.0", "sqlalchemy>=2.0.45", @@ -53,6 +54,9 @@ dev = [ "python-dotenv" ] +[project.entry-points."omop.config"] +orm_loader = "orm_loader.config:OrmLoaderConfig" + [tool.setuptools] packages = ["orm_loader"] diff --git a/src/orm_loader/config.py b/src/orm_loader/config.py new file mode 100644 index 0000000..efb8ba3 --- /dev/null +++ b/src/orm_loader/config.py @@ -0,0 +1,22 @@ +"""Configuration for orm-loader via oa-configurator.""" + +from __future__ import annotations + +from typing import ClassVar, Final + +from oa_configurator import PackageConfigBase + +TOOL_NAME: Final[str] = "orm_loader" + + +class OrmLoaderConfig(PackageConfigBase): + """oa-configurator config class for orm-loader. + + orm-loader is connection-agnostic — it accepts SQLAlchemy sessions/engines + as parameters and owns no database resources. This class exists solely to + register orm-loader in the oa-configurator ecosystem and to provide a + canonical ``configure_logging()`` entry point. + """ + + tool_name: ClassVar[str] = TOOL_NAME + extra_logging_namespaces: ClassVar[tuple[str, ...]] = () diff --git a/src/orm_loader/helpers/__init__.py b/src/orm_loader/helpers/__init__.py index f2c49a1..651ab2f 100644 --- a/src/orm_loader/helpers/__init__.py +++ b/src/orm_loader/helpers/__init__.py @@ -1,5 +1,4 @@ from .errors import IngestError, ValidationError -from .logging import get_logger, configure_logging from .bootstrap import bootstrap, create_db from .sqlite import ( attach_sqlite_bulk_load_pragmas, @@ -14,8 +13,6 @@ __all__ = [ "IngestError", "ValidationError", - "get_logger", - "configure_logging", "bootstrap", "create_db", "attach_sqlite_bulk_load_pragmas", diff --git a/src/orm_loader/helpers/bulk.py b/src/orm_loader/helpers/bulk.py index 4be22b4..e0c44e6 100644 --- a/src/orm_loader/helpers/bulk.py +++ b/src/orm_loader/helpers/bulk.py @@ -1,11 +1,11 @@ +import logging from contextlib import contextmanager from sqlalchemy import Engine from sqlalchemy.orm import Session from typing import Iterator from ..backends.resolve import resolve_backend -from .logging import get_logger -logger = get_logger(__name__) +logger = logging.getLogger(__name__) def disable_fk_check(session: Session) -> str | int: """Disable foreign-key checks for the current session and return the previous state.""" diff --git a/src/orm_loader/helpers/logging.py b/src/orm_loader/helpers/logging.py deleted file mode 100644 index 93afe0a..0000000 --- a/src/orm_loader/helpers/logging.py +++ /dev/null @@ -1,92 +0,0 @@ -from __future__ import annotations - -import logging -import re -from typing import Any, Optional - -SENSITIVE_KEYS = { - "password", - "passwd", - "secret", - "token", - "key", - "dsn", - "uri", - "url", -} -LOGGING_NAMESPACE = "orm_loader" - - -def _coerce_log_level(level: int | str) -> int: - if isinstance(level, int): - return level - - if not isinstance(level, str): - raise TypeError(f"log level must be an int or str, got {type(level).__name__}") - s = level.strip().upper() - if s.isdigit(): - return int(s) - - mapping = logging.getLevelNamesMapping() - if s in mapping: - return mapping[s] - - raise ValueError(f"Invalid log level: {level!r}") - - -def get_logger(name: Optional[str] = None) -> logging.Logger: - """ - Return a namespaced logger. - - Examples: - get_logger() -> orm_loader - get_logger("loadable_table") -> orm_loader.loadable_table - """ - full_name = LOGGING_NAMESPACE if name is None else f"{LOGGING_NAMESPACE}.{name}" - return logging.getLogger(full_name) - - -class RedactingFormatter(logging.Formatter): - def __init__(self, *args: Any, **kwargs: Any): - super().__init__(*args, **kwargs) - self._pattern = re.compile( - r"(?i)\\b(" + "|".join(SENSITIVE_KEYS) + r")\\b\\s*[:=]\\s*[^\\s,;]+" - ) - - def format(self, record: logging.LogRecord) -> str: - msg = super().format(record) - return self._pattern.sub(r"\\1=", msg) - - -def configure_logging( - *, - level: int | str = logging.INFO, - handler: Optional[logging.Handler] = None, - format: Optional[str] = None, - propagate: bool = True, - redact: bool = True, -) -> None: - """ - Enable logging output for omop_alchemy. - - Safe to call multiple times. - """ - logger = get_logger() - logger.setLevel(_coerce_log_level(level)) - - if handler is None: - handler = logging.StreamHandler() - - if format is None: - format = "%(asctime)s | %(levelname)-8s | %(name)s | %(message)s" - - formatter_cls = RedactingFormatter if redact else logging.Formatter - handler.setFormatter(formatter_cls(format)) - - if not any(isinstance(h, type(handler)) for h in logger.handlers): - logger.addHandler(handler) - - logger.propagate = propagate - - -logging.getLogger(LOGGING_NAMESPACE).addHandler(logging.NullHandler()) From 99dcee530082e97ca6e7e50d0d06f74107da26f4 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 11 Jun 2026 01:46:51 +0000 Subject: [PATCH 07/11] Include oa-configurator with new test coverage --- pyproject.toml | 3 - src/orm_loader/config.py | 34 ++++++++--- tests/backends/test_postgres_backend.py | 15 ++--- tests/conftest.py | 79 +++++-------------------- tests/loaders/test_pg_loader.py | 35 +++++------ 5 files changed, 66 insertions(+), 100 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 236faf7..46b87f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,9 +74,6 @@ python_files = ["test_*.py"] python_classes = ["Test*"] python_functions = ["test_*"] addopts = "-ra" -markers = [ - "postgres: requires a running Postgres instance (set TEST_POSTGRES_URL)", -] [tool.pyright] reportMissingTypeStubs = false \ No newline at end of file diff --git a/src/orm_loader/config.py b/src/orm_loader/config.py index efb8ba3..f4c562f 100644 --- a/src/orm_loader/config.py +++ b/src/orm_loader/config.py @@ -2,21 +2,39 @@ from __future__ import annotations -from typing import ClassVar, Final +from typing import ClassVar -from oa_configurator import PackageConfigBase - -TOOL_NAME: Final[str] = "orm_loader" +from oa_configurator import PackageConfigBase, ResourceSpec class OrmLoaderConfig(PackageConfigBase): """oa-configurator config class for orm-loader. orm-loader is connection-agnostic — it accepts SQLAlchemy sessions/engines - as parameters and owns no database resources. This class exists solely to - register orm-loader in the oa-configurator ecosystem and to provide a - canonical ``configure_logging()`` entry point. + as parameters and owns no database resources. This class exists to register + orm-loader in the oa-configurator ecosystem, provide a canonical + ``configure_logging()`` entry point, and declare the test database resource + used by the integration test suite. """ - tool_name: ClassVar[str] = TOOL_NAME + TEST_DB: ClassVar[ResourceSpec] = ResourceSpec( + semantic_name="test_orm_db", + display_name="ORM Loader Test Database", + description="PostgreSQL database for running orm-loader integration tests.", + connection_name_hint="pg_test_orm", + is_cdm_database=False, + cdm_schema_default="public", + defaults={ + "dialect": "postgresql+psycopg", + "host": "localhost", + "port": "55432", + "user": "test", + "password": "test", + "database_name": "test", + "cdm_schema": "public", + }, + ) + + tool_name: ClassVar[str] = "orm_loader" extra_logging_namespaces: ClassVar[tuple[str, ...]] = () + test_resources: ClassVar[tuple[ResourceSpec, ...]] = (TEST_DB,) diff --git a/tests/backends/test_postgres_backend.py b/tests/backends/test_postgres_backend.py index 50e8202..936da8b 100644 --- a/tests/backends/test_postgres_backend.py +++ b/tests/backends/test_postgres_backend.py @@ -118,13 +118,12 @@ def test_postgres_backend_fk_methods_emit_expected_sql(): def test_postgres_backend_merge_replace_uses_using_delete(): backend = PostgresBackend() - # scalar_result=0 → COUNT returns 0 → small-table path → single DELETE statement session = _FakeSession(scalar_result=0) backend.merge_replace(_ComputedTableCls, _sess(session), "target_table", "_staging_target_table", ["id", "name"]) - # statements[0] is the COUNT query; statements[1] is the DELETE - sql = session.statements[1] + # No merge_batch_size → non-paginated path → single DELETE statement at index 0 + sql = session.statements[0] assert 'DELETE FROM "target_table" t' in sql assert 'USING "_staging_target_table" s' in sql assert 't."id" = s."id" AND t."name" = s."name"' in sql @@ -132,26 +131,24 @@ def test_postgres_backend_merge_replace_uses_using_delete(): def test_postgres_backend_merge_insert_excludes_computed_columns(): backend = PostgresBackend() - # scalar_result=0 → COUNT returns 0 → small-table path → single INSERT statement session = _FakeSession(scalar_result=0) backend.merge_insert(_ComputedTableCls, _sess(session), "target_table", "_staging_target_table") - # statements[0] is the COUNT query; statements[1] is the INSERT - sql = session.statements[1] + # No merge_batch_size → non-paginated path → single INSERT statement at index 0 + sql = session.statements[0] assert 'INSERT INTO "target_table" ("id", "name")' in sql assert 'SELECT "id", "name" FROM "_staging_target_table"' in sql def test_postgres_backend_merge_upsert_excludes_computed_columns(): backend = PostgresBackend() - # scalar_result=0 → COUNT returns 0 → small-table path → single INSERT statement session = _FakeSession(scalar_result=0) backend.merge_upsert(_ComputedTableCls, _sess(session), "target_table", "_staging_target_table", ["id"]) - # statements[0] is the COUNT query; statements[1] is the INSERT - sql = session.statements[1] + # No merge_batch_size → non-paginated path → single INSERT statement at index 0 + sql = session.statements[0] assert 'INSERT INTO "target_table" ("id", "name")' in sql assert 'ON CONFLICT ("id") DO NOTHING' in sql diff --git a/tests/conftest.py b/tests/conftest.py index 64a2531..a5a9abe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,7 +1,5 @@ -import os import time from pathlib import Path -from urllib.parse import urlparse, urlunparse import pytest import sqlalchemy as sa @@ -30,87 +28,42 @@ def session(engine): # Postgres fixtures # --------------------------------------------------------------------------- -POSTGRES_URL = os.getenv( - "TEST_POSTGRES_URL", - "postgresql+psycopg://test:test@localhost:55432/test", -) - -# Shown whenever Postgres is unreachable — centralised so every skip carries -# the same actionable instructions. -_PG_SKIP_MSG = ( - "Postgres tests skipped — could not connect to {url}.\n" - " Set TEST_POSTGRES_URL to a writable test database and re-run, e.g.:\n" - " export TEST_POSTGRES_URL='postgresql+psycopg://user:pass@host:5432/orm_loader_test'\n" - " Or add it to orm-loader/.env.\n" - " Last error: {{last_err}}" -).format(url=POSTGRES_URL) - -# Module-level sentinel: None = not yet attempted, str = skip reason. -# Prevents the 20-retry loop from running once per postgres test when -# the server is not reachable. -_pg_unavailable: str | None = None - - -def _ensure_db_exists(url: str) -> None: - """Create the target database if it doesn't already exist. - - Connects to the 'postgres' maintenance database (same host/user/pass) - so the target database can be created without touching anything else. - """ - parsed = urlparse(url) - db_name = parsed.path.lstrip("/") - admin_url = urlunparse(parsed._replace(path="/postgres")) - - admin_engine = sa.create_engine(admin_url, isolation_level="AUTOCOMMIT") - try: - with admin_engine.connect() as conn: - exists = conn.execute( - sa.text("SELECT 1 FROM pg_database WHERE datname = :name"), - {"name": db_name}, - ).scalar() - if not exists: - conn.execute(sa.text(f'CREATE DATABASE "{db_name}"')) - print(f"Created test database: {db_name!r}") - finally: - admin_engine.dispose() - - @pytest.fixture(scope="session") def pg_engine(): - global _pg_unavailable + from oa_configurator.pytest_plugin import ensure_test_db_exists, resolve_test_resource + from orm_loader.config import OrmLoaderConfig - # Fast path: already know Postgres is not reachable — skip immediately - # without re-running the retry loop. - if _pg_unavailable is not None: - pytest.skip(_pg_unavailable) + url = resolve_test_resource(OrmLoaderConfig.TEST_DB) try: - _ensure_db_exists(POSTGRES_URL) - except Exception as e: - print(f"Could not ensure test DB exists (will try connecting anyway): {e}") + ensure_test_db_exists(url) + except Exception as exc: + print(f"Could not ensure test DB exists, will try anyway: {exc}") last_err = None for i in range(20): + engine: sa.Engine | None = None try: - engine = sa.create_engine(POSTGRES_URL, future=True) + engine = sa.create_engine(url, future=True) with engine.connect() as conn: - conn.execute(sa.text("select 1")) + conn.execute(sa.text("SELECT 1")) print("Postgres connection established") yield engine engine.dispose() return - except Exception as e: - last_err = e - print(f"[{i}] Postgres not ready:", repr(e)) + except Exception as exc: + if engine is not None: + engine.dispose() + last_err = exc + print(f"[{i}] Postgres not ready:", repr(exc)) time.sleep(1) - _pg_unavailable = _PG_SKIP_MSG.format(last_err=last_err) - pytest.skip(_pg_unavailable) + pytest.skip(f"PostgreSQL never became available: {last_err}") @pytest.fixture def pg_session(pg_engine): - Session = so.sessionmaker(bind=pg_engine, future=True) + Session = so.sessionmaker(pg_engine, future=True) with pg_engine.begin() as conn: Base.metadata.drop_all(conn) Base.metadata.create_all(conn) diff --git a/tests/loaders/test_pg_loader.py b/tests/loaders/test_pg_loader.py index 208ea01..9877e5e 100644 --- a/tests/loaders/test_pg_loader.py +++ b/tests/loaders/test_pg_loader.py @@ -2,11 +2,12 @@ import pandas as pd import pytest from orm_loader.loaders.loading_helpers import infer_encoding, infer_delim, check_line_ending, quick_load_pg +from orm_loader.config import OrmLoaderConfig from tests.models import SimpleTable -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_copy_into_staging_with_extra_identity_column(pg_session, tmp_path): """COPY must succeed when the staging table has a _rownum identity column.""" csv = tmp_path / "test_table.csv" @@ -29,7 +30,7 @@ def test_copy_into_staging_with_extra_identity_column(pg_session, tmp_path): assert rownums == [1, 2], "_rownum must be auto-populated by IDENTITY sequence" -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_copy_and_orm_path_equivalence(pg_session, tmp_path): csv = tmp_path / "test_table.csv" @@ -51,7 +52,7 @@ def test_copy_and_orm_path_equivalence(pg_session, tmp_path): -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_postgres_copy_fast_path(pg_session, tmp_path): csv = tmp_path / "test_table.csv" pd.DataFrame([{"id": 1, "name": "alpha"}]).to_csv(csv, index=False) @@ -61,7 +62,7 @@ def test_postgres_copy_fast_path(pg_session, tmp_path): assert inserted == 1 -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_postgres_copy_fast_path_is_used(pg_session, tmp_path, monkeypatch): csv = tmp_path / "test_table.csv" pd.DataFrame([{"id": 1, "name": "alpha"}]).to_csv(csv, index=False) @@ -81,7 +82,7 @@ def fake_quick_load_pg(*args, **kwargs): assert called["copy"] is True assert inserted == 1 -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_copy_failure_falls_back_to_orm(pg_session, tmp_path, monkeypatch): csv = tmp_path / "test_table.csv" pd.DataFrame([{"id": 1, "name": "alpha"}]).to_csv(csv, index=False) @@ -101,7 +102,7 @@ def broken_copy(*args, **kwargs): assert [(r.id, r.name) for r in rows] == [(1, "alpha")] -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_postgres_upsert_does_not_update(pg_session, tmp_path): csv = tmp_path / "test_table.csv" @@ -118,7 +119,7 @@ def test_postgres_upsert_does_not_update(pg_session, tmp_path): assert [(r.id, r.name) for r in rows] == [(1, "alpha")] -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_postgres_insert_if_empty(pg_session, tmp_path): csv = tmp_path / "test_table.csv" @@ -145,7 +146,7 @@ def test_postgres_insert_if_empty(pg_session, tmp_path): ] -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_postgres_insert_if_empty_raises_on_non_empty_target(pg_session, tmp_path): csv = tmp_path / "test_table.csv" @@ -163,7 +164,7 @@ def test_postgres_insert_if_empty_raises_on_non_empty_target(pg_session, tmp_pat ) -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_postgres_copy_large_batch(pg_session, tmp_path): csv = tmp_path / "test_table.csv" @@ -180,7 +181,7 @@ def test_postgres_copy_large_batch(pg_session, tmp_path): assert inserted == 9999 -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_staging_schema_matches_target(pg_session, tmp_path): csv = tmp_path / "test_table.csv" pd.DataFrame([{"id": 1, "name": "alpha"}]).to_csv(csv, index=False) @@ -250,7 +251,7 @@ def test_check_line_ending_unknown(caplog): assert "Unable to detect line ending" in caplog.text -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_quick_load_pg_basic(pg_session, tmp_path): csv = tmp_path / "test_table.csv" csv.write_text("id,name\n1,alpha\n2,beta\n") @@ -266,7 +267,7 @@ def test_quick_load_pg_basic(pg_session, tmp_path): assert rows == [(1, "alpha"), (2, "beta")] -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_quick_load_pg_lowercases_header(pg_session, tmp_path): csv = tmp_path / "test_table.csv" csv.write_text("ID,NAME\n1,alpha\n") @@ -278,7 +279,7 @@ def test_quick_load_pg_lowercases_header(pg_session, tmp_path): assert row == (1, "alpha") -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_quick_load_pg_tab_delimiter(pg_session, tmp_path): csv = tmp_path / "test_table.csv" csv.write_text("id\tname\n1\talpha\n2\tbeta\n") @@ -290,7 +291,7 @@ def test_quick_load_pg_tab_delimiter(pg_session, tmp_path): assert rows == [(1, "alpha"), (2, "beta")] -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_quick_load_pg_rollback_on_error(pg_session, tmp_path): csv = tmp_path / "test_table.csv" csv.write_text("id,name\n1,alpha\n2,\n") # violates NOT NULL @@ -302,7 +303,7 @@ def test_quick_load_pg_rollback_on_error(pg_session, tmp_path): assert rows == 0 -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_quick_load_pg_equivalence_with_orm(pg_session, tmp_path): csv = tmp_path / "test_table.csv" csv.write_text("id,name\n1,alpha\n2,beta\n") @@ -326,7 +327,7 @@ def test_quick_load_pg_equivalence_with_orm(pg_session, tmp_path): assert rows_pg == rows_orm -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_quick_load_pg_trailing_blank_lines(pg_session, tmp_path): csv = tmp_path / "test_table.csv" @@ -345,7 +346,7 @@ def test_quick_load_pg_trailing_blank_lines(pg_session, tmp_path): assert total == 2 assert rows == [(1, "alpha"), (2, "beta")] -@pytest.mark.postgres +@pytest.mark.requires_resource(OrmLoaderConfig.TEST_DB) def test_copy_fails_with_raw_carriage_returns_but_succeeds_after_normalisation(pg_session, tmp_path): csv = tmp_path / "test_table.csv" From fe81cf3233f3535979de363355c0eec63f660939 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Thu, 11 Jun 2026 06:15:44 +0000 Subject: [PATCH 08/11] Check ruff, pylance, mkdocs and pytest --- src/orm_loader/loaders/loader_interface.py | 2 +- tests/backends/test_base_backend.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/orm_loader/loaders/loader_interface.py b/src/orm_loader/loaders/loader_interface.py index 825b816..8b500f1 100644 --- a/src/orm_loader/loaders/loader_interface.py +++ b/src/orm_loader/loaders/loader_interface.py @@ -98,7 +98,7 @@ def _on_cast_error(value, *, _col=col_name): if required_cols: null_mask = df[required_cols].isna() for col in required_cols: - null_count = int(null_mask[col].sum()) + null_count = int(null_mask[col].sum()) # type: ignore[arg-type] if null_count > 0: logger.warning( "Found %d rows with unexpected nulls in %s.%s", diff --git a/tests/backends/test_base_backend.py b/tests/backends/test_base_backend.py index e1d2b44..212f1f6 100644 --- a/tests/backends/test_base_backend.py +++ b/tests/backends/test_base_backend.py @@ -70,6 +70,8 @@ def merge_replace( target_name: str, staging_name: str, pk_cols: list[str], + *, + merge_batch_size: int | None = None, ) -> None: return None @@ -80,6 +82,8 @@ def merge_upsert( target_name: str, staging_name: str, pk_cols: list[str], + *, + merge_batch_size: int | None = None, ) -> None: return None @@ -89,6 +93,8 @@ def merge_insert( session: so.Session, target_name: str, staging_name: str, + *, + merge_batch_size: int | None = None, ) -> None: return None From 29fad697121e40824a8a968d2c2418a8b0406431 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Mon, 15 Jun 2026 01:45:34 +0000 Subject: [PATCH 09/11] Update oa-configurator version --- pyproject.toml | 3 +- uv.lock | 217 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 215 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 46b87f9..3f2a5f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ requires-python = ">=3.12" dependencies = [ "chardet>=5.2.0", - #"oa-configurator", + "oa-configurator>=0.1.0", "pandas>=2.3.3", "pyarrow>=23.0.0", "sqlalchemy>=2.0.45", @@ -42,6 +42,7 @@ postgres = [ "psycopg[binary]>=3.2", ] dev = [ + "oa-configurator[postgres]>=0.1.0", "pytest>=9.0.3", "mypy>=1.19.1", "ruff>=0.14.11", diff --git a/uv.lock b/uv.lock index a2ef818..02e8aff 100644 --- a/uv.lock +++ b/uv.lock @@ -2,6 +2,24 @@ version = 1 revision = 3 requires-python = ">=3.12" +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + [[package]] name = "babel" version = "2.17.0" @@ -164,7 +182,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/0a/a3871375c7b9727edaeeea994bfff7c63ff7804c9829c19309ba2e058807/greenlet-3.3.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:b01548f6e0b9e9784a2c99c5651e5dc89ffcbe870bc5fb2e5ef864e9cc6b5dcb", size = 276379, upload-time = "2025-12-04T14:23:30.498Z" }, { url = "https://files.pythonhosted.org/packages/43/ab/7ebfe34dce8b87be0d11dae91acbf76f7b8246bf9d6b319c741f99fa59c6/greenlet-3.3.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:349345b770dc88f81506c6861d22a6ccd422207829d2c854ae2af8025af303e3", size = 597294, upload-time = "2025-12-04T14:50:06.847Z" }, { url = "https://files.pythonhosted.org/packages/a4/39/f1c8da50024feecd0793dbd5e08f526809b8ab5609224a2da40aad3a7641/greenlet-3.3.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e8e18ed6995e9e2c0b4ed264d2cf89260ab3ac7e13555b8032b25a74c6d18655", size = 607742, upload-time = "2025-12-04T14:57:42.349Z" }, - { url = "https://files.pythonhosted.org/packages/77/cb/43692bcd5f7a0da6ec0ec6d58ee7cddb606d055ce94a62ac9b1aa481e969/greenlet-3.3.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c024b1e5696626890038e34f76140ed1daf858e37496d33f2af57f06189e70d7", size = 622297, upload-time = "2025-12-04T15:07:13.552Z" }, { url = "https://files.pythonhosted.org/packages/75/b0/6bde0b1011a60782108c01de5913c588cf51a839174538d266de15e4bf4d/greenlet-3.3.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:047ab3df20ede6a57c35c14bf5200fcf04039d50f908270d3f9a7a82064f543b", size = 609885, upload-time = "2025-12-04T14:26:02.368Z" }, { url = "https://files.pythonhosted.org/packages/49/0e/49b46ac39f931f59f987b7cd9f34bfec8ef81d2a1e6e00682f55be5de9f4/greenlet-3.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2d9ad37fc657b1102ec880e637cccf20191581f75c64087a549e66c57e1ceb53", size = 1567424, upload-time = "2025-12-04T15:04:23.757Z" }, { url = "https://files.pythonhosted.org/packages/05/f5/49a9ac2dff7f10091935def9165c90236d8f175afb27cbed38fb1d61ab6b/greenlet-3.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83cd0e36932e0e7f36a64b732a6f60c2fc2df28c351bae79fbaf4f8092fe7614", size = 1636017, upload-time = "2025-12-04T14:27:29.688Z" }, @@ -172,7 +189,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/2f/28592176381b9ab2cafa12829ba7b472d177f3acc35d8fbcf3673d966fff/greenlet-3.3.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:a1e41a81c7e2825822f4e068c48cb2196002362619e2d70b148f20a831c00739", size = 275140, upload-time = "2025-12-04T14:23:01.282Z" }, { url = "https://files.pythonhosted.org/packages/2c/80/fbe937bf81e9fca98c981fe499e59a3f45df2a04da0baa5c2be0dca0d329/greenlet-3.3.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f515a47d02da4d30caaa85b69474cec77b7929b2e936ff7fb853d42f4bf8808", size = 599219, upload-time = "2025-12-04T14:50:08.309Z" }, { url = "https://files.pythonhosted.org/packages/c2/ff/7c985128f0514271b8268476af89aee6866df5eec04ac17dcfbc676213df/greenlet-3.3.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7d2d9fd66bfadf230b385fdc90426fcd6eb64db54b40c495b72ac0feb5766c54", size = 610211, upload-time = "2025-12-04T14:57:43.968Z" }, - { url = "https://files.pythonhosted.org/packages/79/07/c47a82d881319ec18a4510bb30463ed6891f2ad2c1901ed5ec23d3de351f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30a6e28487a790417d036088b3bcb3f3ac7d8babaa7d0139edbaddebf3af9492", size = 624311, upload-time = "2025-12-04T15:07:14.697Z" }, { url = "https://files.pythonhosted.org/packages/fd/8e/424b8c6e78bd9837d14ff7df01a9829fc883ba2ab4ea787d4f848435f23f/greenlet-3.3.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:087ea5e004437321508a8d6f20efc4cfec5e3c30118e1417ea96ed1d93950527", size = 612833, upload-time = "2025-12-04T14:26:03.669Z" }, { url = "https://files.pythonhosted.org/packages/b5/ba/56699ff9b7c76ca12f1cdc27a886d0f81f2189c3455ff9f65246780f713d/greenlet-3.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ab97cf74045343f6c60a39913fa59710e4bd26a536ce7ab2397adf8b27e67c39", size = 1567256, upload-time = "2025-12-04T15:04:25.276Z" }, { url = "https://files.pythonhosted.org/packages/1e/37/f31136132967982d698c71a281a8901daf1a8fbab935dce7c0cf15f942cc/greenlet-3.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5375d2e23184629112ca1ea89a53389dddbffcf417dad40125713d88eb5f96e8", size = 1636483, upload-time = "2025-12-04T14:27:30.804Z" }, @@ -180,7 +196,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/7c/f0a6d0ede2c7bf092d00bc83ad5bafb7e6ec9b4aab2fbdfa6f134dc73327/greenlet-3.3.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:60c2ef0f578afb3c8d92ea07ad327f9a062547137afe91f38408f08aacab667f", size = 275671, upload-time = "2025-12-04T14:23:05.267Z" }, { url = "https://files.pythonhosted.org/packages/44/06/dac639ae1a50f5969d82d2e3dd9767d30d6dbdbab0e1a54010c8fe90263c/greenlet-3.3.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a5d554d0712ba1de0a6c94c640f7aeba3f85b3a6e1f2899c11c2c0428da9365", size = 646360, upload-time = "2025-12-04T14:50:10.026Z" }, { url = "https://files.pythonhosted.org/packages/e0/94/0fb76fe6c5369fba9bf98529ada6f4c3a1adf19e406a47332245ef0eb357/greenlet-3.3.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3a898b1e9c5f7307ebbde4102908e6cbfcb9ea16284a3abe15cab996bee8b9b3", size = 658160, upload-time = "2025-12-04T14:57:45.41Z" }, - { url = "https://files.pythonhosted.org/packages/93/79/d2c70cae6e823fac36c3bbc9077962105052b7ef81db2f01ec3b9bf17e2b/greenlet-3.3.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:dcd2bdbd444ff340e8d6bdf54d2f206ccddbb3ccfdcd3c25bf4afaa7b8f0cf45", size = 671388, upload-time = "2025-12-04T15:07:15.789Z" }, { url = "https://files.pythonhosted.org/packages/b8/14/bab308fc2c1b5228c3224ec2bf928ce2e4d21d8046c161e44a2012b5203e/greenlet-3.3.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5773edda4dc00e173820722711d043799d3adb4f01731f40619e07ea2750b955", size = 660166, upload-time = "2025-12-04T14:26:05.099Z" }, { url = "https://files.pythonhosted.org/packages/4b/d2/91465d39164eaa0085177f61983d80ffe746c5a1860f009811d498e7259c/greenlet-3.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ac0549373982b36d5fd5d30beb8a7a33ee541ff98d2b502714a09f1169f31b55", size = 1615193, upload-time = "2025-12-04T15:04:27.041Z" }, { url = "https://files.pythonhosted.org/packages/42/1b/83d110a37044b92423084d52d5d5a3b3a73cafb51b547e6d7366ff62eff1/greenlet-3.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d198d2d977460358c3b3a4dc844f875d1adb33817f0613f663a656f463764ccc", size = 1683653, upload-time = "2025-12-04T14:27:32.366Z" }, @@ -188,7 +203,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/66/bd6317bc5932accf351fc19f177ffba53712a202f9df10587da8df257c7e/greenlet-3.3.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d6ed6f85fae6cdfdb9ce04c9bf7a08d666cfcfb914e7d006f44f840b46741931", size = 282638, upload-time = "2025-12-04T14:25:20.941Z" }, { url = "https://files.pythonhosted.org/packages/30/cf/cc81cb030b40e738d6e69502ccbd0dd1bced0588e958f9e757945de24404/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d9125050fcf24554e69c4cacb086b87b3b55dc395a8b3ebe6487b045b2614388", size = 651145, upload-time = "2025-12-04T14:50:11.039Z" }, { url = "https://files.pythonhosted.org/packages/9c/ea/1020037b5ecfe95ca7df8d8549959baceb8186031da83d5ecceff8b08cd2/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:87e63ccfa13c0a0f6234ed0add552af24cc67dd886731f2261e46e241608bee3", size = 654236, upload-time = "2025-12-04T14:57:47.007Z" }, - { url = "https://files.pythonhosted.org/packages/69/cc/1e4bae2e45ca2fa55299f4e85854606a78ecc37fead20d69322f96000504/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2662433acbca297c9153a4023fe2161c8dcfdcc91f10433171cf7e7d94ba2221", size = 662506, upload-time = "2025-12-04T15:07:16.906Z" }, { url = "https://files.pythonhosted.org/packages/57/b9/f8025d71a6085c441a7eaff0fd928bbb275a6633773667023d19179fe815/greenlet-3.3.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3c6e9b9c1527a78520357de498b0e709fb9e2f49c3a513afd5a249007261911b", size = 653783, upload-time = "2025-12-04T14:26:06.225Z" }, { url = "https://files.pythonhosted.org/packages/f6/c7/876a8c7a7485d5d6b5c6821201d542ef28be645aa024cfe1145b35c120c1/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:286d093f95ec98fdd92fcb955003b8a3d054b4e2cab3e2707a5039e7b50520fd", size = 1614857, upload-time = "2025-12-04T15:04:28.484Z" }, { url = "https://files.pythonhosted.org/packages/4f/dc/041be1dff9f23dac5f48a43323cd0789cb798342011c19a248d9c9335536/greenlet-3.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c10513330af5b8ae16f023e8ddbfb486ab355d04467c4679c5cfe4659975dd9", size = 1676034, upload-time = "2025-12-04T14:27:33.531Z" }, @@ -310,6 +324,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/59/1b/6ef961f543593969d25b2afe57a3564200280528caa9bd1082eecdd7b3bc/markdown-3.10.1-py3-none-any.whl", hash = "sha256:867d788939fe33e4b736426f5b9f651ad0c0ae0ecf89df0ca5d1176c70812fe3", size = 107684, upload-time = "2026-01-21T18:09:27.203Z" }, ] +[[package]] +name = "markdown-it-py" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mdurl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/ff/7841249c247aa650a76b9ee4bbaeae59370dc8bfd2f6c01f3630c35eb134/markdown_it_py-4.2.0.tar.gz", hash = "sha256:04a21681d6fbb623de53f6f364d352309d4094dd4194040a10fd51833e418d49", size = 82454, upload-time = "2026-05-07T12:08:28.36Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/81/4da04ced5a082363ecfa159c010d200ecbd959ae410c10c0264a38cac0f5/markdown_it_py-4.2.0-py3-none-any.whl", hash = "sha256:9f7ebbcd14fe59494226453aed97c1070d83f8d24b6fc3a3bcf9a38092641c4a", size = 91687, upload-time = "2026-05-07T12:08:27.182Z" }, +] + [[package]] name = "markupsafe" version = "3.0.3" @@ -373,6 +399,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, ] +[[package]] +name = "mdurl" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, +] + [[package]] name = "mergedeep" version = "1.3.4" @@ -616,12 +651,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ad/0d/eca3d962f9eef265f01a8e0d20085c6dd1f443cbffc11b6dede81fd82356/numpy-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:6436cffb4f2bf26c974344439439c95e152c9a527013f26b3577be6c2ca64295", size = 10667121, upload-time = "2026-01-10T06:44:41.644Z" }, ] +[[package]] +name = "oa-configurator" +version = "0.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "pydantic" }, + { name = "rich" }, + { name = "sqlalchemy" }, + { name = "tomli-w" }, + { name = "typer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/bf/abe1fd37a4e717d10b53ce1f73f91ddc8baef39d0c1bda0c6123bf702a6e/oa_configurator-0.1.0.tar.gz", hash = "sha256:2bd8e8416abc4dc99ff78a7961b445c499e7fd25f649a967be037fec5f2d2d08", size = 84105, upload-time = "2026-06-15T01:18:50.128Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/ab/18695a718a53526823d7bf32194d6d78f4ea6dc0062ee17b43c6058e03f7/oa_configurator-0.1.0-py3-none-any.whl", hash = "sha256:7165984090711adff857a76d0468bb789f9b4756081a5688f609f487391a8dba", size = 30092, upload-time = "2026-06-15T01:18:49.163Z" }, +] + +[package.optional-dependencies] +postgres = [ + { name = "psycopg", extra = ["binary"] }, +] + [[package]] name = "orm-loader" version = "0.4.1" source = { editable = "." } dependencies = [ { name = "chardet" }, + { name = "oa-configurator" }, { name = "pandas" }, { name = "pyarrow" }, { name = "sqlalchemy" }, @@ -634,6 +692,7 @@ dev = [ { name = "mkdocs-mermaid2-plugin" }, { name = "mkdocstrings-python" }, { name = "mypy" }, + { name = "oa-configurator", extra = ["postgres"] }, { name = "pygments" }, { name = "pytest" }, { name = "python-dotenv" }, @@ -652,6 +711,8 @@ requires-dist = [ { name = "mkdocs-mermaid2-plugin", marker = "extra == 'dev'" }, { name = "mkdocstrings-python", marker = "extra == 'dev'", specifier = ">=2.0.1" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.19.1" }, + { name = "oa-configurator", specifier = ">=0.1.0" }, + { name = "oa-configurator", extras = ["postgres"], marker = "extra == 'dev'", specifier = ">=0.1.0" }, { name = "pandas", specifier = ">=2.3.3" }, { name = "psycopg", extras = ["binary"], marker = "extra == 'postgres'", specifier = ">=3.2" }, { name = "pyarrow", specifier = ">=23.0.0" }, @@ -857,6 +918,96 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/9c/47693463894b610f8439b2e970b82ef81e9599c757bf2049365e40ff963c/pyarrow-23.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:427deac1f535830a744a4f04a6ac183a64fcac4341b3f618e693c41b7b98d2b0", size = 28338905, upload-time = "2026-01-18T16:19:32.93Z" }, ] +[[package]] +name = "pydantic" +version = "2.13.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/18/a5/b60d21ac674192f8ab0ba4e9fd860690f9b4a6e51ca5df118733b487d8d6/pydantic-2.13.4.tar.gz", hash = "sha256:c40756b57adaa8b1efeeced5c196f3f3b7c435f90e84ea7f443901bec8099ef6", size = 844775, upload-time = "2026-05-06T13:43:05.343Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/7b/122376b1fd3c62c1ed9dc80c931ace4844b3c55407b6fb2d199377c9736f/pydantic-2.13.4-py3-none-any.whl", hash = "sha256:45a282cde31d808236fd7ea9d919b128653c8b38b393d1c4ab335c62924d9aba", size = 472262, upload-time = "2026-05-06T13:43:02.641Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.46.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/56/921726b776ace8d8f5db44c4ef961006580d91dc52b803c489fafd1aa249/pydantic_core-2.46.4.tar.gz", hash = "sha256:62f875393d7f270851f20523dd2e29f082bcc82292d66db2b64ea71f64b6e1c1", size = 471464, upload-time = "2026-05-06T13:37:06.98Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/8c/af022f0af448d7747c5154288d46b5f2bc5f17366eaa0e23e9aa04d59f3b/pydantic_core-2.46.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3245406455a5d98187ec35530fd772b1d799b26667980872c8d4614991e2c4a2", size = 2106158, upload-time = "2026-05-06T13:38:57.215Z" }, + { url = "https://files.pythonhosted.org/packages/19/95/6195171e385007300f0f5574592e467c568becce2d937a0b6804f218bc49/pydantic_core-2.46.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:962ccbab7b642487b1d8b7df90ef677e03134cf1fd8880bf698649b22a69371f", size = 1951724, upload-time = "2026-05-06T13:37:02.697Z" }, + { url = "https://files.pythonhosted.org/packages/8e/bc/f47d1ff9cbb1620e1b5b697eef06010035735f07820180e74178226b27b3/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8233f2947cf85404441fd7e0085f53b10c93e0ee78611099b5c7237e36aacbf7", size = 1975742, upload-time = "2026-05-06T13:37:09.448Z" }, + { url = "https://files.pythonhosted.org/packages/5b/11/9b9a5b0306345664a2da6410877af6e8082481b5884b3ddd78d47c6013ce/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a233125ac121aa3ffba9a2b59edfc4a985a76092dc8279586ab4b71390875e7", size = 2052418, upload-time = "2026-05-06T13:37:38.234Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b7/a65fec226f5d78fc39f4a13c4cc0c768c22b113438f60c14adc9d2865038/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b712b53160b79a5850310b912a5ef8e57e56947c8ad690c227f5c9d7e561712", size = 2232274, upload-time = "2026-05-06T13:38:27.753Z" }, + { url = "https://files.pythonhosted.org/packages/68/f0/92039db98b907ef49269a8271f67db9cb78ae2fc68062ef7e4e77adb5f61/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9401557acd873c3a7f3eb9383edef8ac4968f9510e340f4808d427e75667e7b4", size = 2309940, upload-time = "2026-05-06T13:38:05.353Z" }, + { url = "https://files.pythonhosted.org/packages/5f/97/2aab507d3d00ca626e8e57c1eac6a79e4e5fbcc63eb99733ff55d1717f65/pydantic_core-2.46.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:926c9541b14b12b1681dca8a0b75feb510b06c6341b70a8e500c2fdcff837cce", size = 2094516, upload-time = "2026-05-06T13:39:10.577Z" }, + { url = "https://files.pythonhosted.org/packages/22/37/a8aca44d40d737dde2bc05b3c6c07dff0de07ce6f82e9f3167aeaf4d5dea/pydantic_core-2.46.4-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:56cb4851bcaf3d117eddcef4fe66afd750a50274b0da8e22be256d10e5611987", size = 2136854, upload-time = "2026-05-06T13:40:22.59Z" }, + { url = "https://files.pythonhosted.org/packages/24/99/fcef1b79238c06a8cbec70819ac722ba76e02bc8ada9b0fd66eba40da01b/pydantic_core-2.46.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c68fcd102d71ea85c5b2dfac3f4f8476eff42a9e078fd5faefff6d145063536b", size = 2180306, upload-time = "2026-05-06T13:40:10.666Z" }, + { url = "https://files.pythonhosted.org/packages/ae/6c/fc44000918855b42779d007ae63b0532794739027b2f417321cddbc44f6a/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b2f69dec1725e79a012d920df1707de5caf7ed5e08f3be4435e25803efc47458", size = 2190044, upload-time = "2026-05-06T13:40:43.231Z" }, + { url = "https://files.pythonhosted.org/packages/6b/65/d9cadc9f1920d7a127ad2edba16c1db7916e59719285cd6c94600b0080ba/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:8d0820e8192167f80d88d64038e609c31452eeca865b4e1d9950a27a4609b00b", size = 2329133, upload-time = "2026-05-06T13:39:57.365Z" }, + { url = "https://files.pythonhosted.org/packages/d0/cf/c873d91679f3a30bcf5e7ac280ce5573483e72295307685120d0d5ad3416/pydantic_core-2.46.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fbdb89b3e1c94a30cc5edfce477c6e6a5dc4d8f84665b455c27582f211a1c72c", size = 2374464, upload-time = "2026-05-06T13:38:06.976Z" }, + { url = "https://files.pythonhosted.org/packages/47/bd/6f2fc8188f31bf10590f1e98e7b306336161fac930a8c514cd7bd828c7dc/pydantic_core-2.46.4-cp312-cp312-win32.whl", hash = "sha256:9aa768456404a8bf48a4406685ac2bec8e72b62c69313734fa3b73cf33b3a894", size = 1974823, upload-time = "2026-05-06T13:40:47.985Z" }, + { url = "https://files.pythonhosted.org/packages/40/8c/985c1d41ea1107c2534abd9870e4ed5c8e7669b5c308297835c001e7a1c4/pydantic_core-2.46.4-cp312-cp312-win_amd64.whl", hash = "sha256:e9c26f834c65f5752f3f06cb08cb86a913ceb7274d0db6e267808a708b46bc89", size = 2072919, upload-time = "2026-05-06T13:39:21.153Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ba/f463d006e0c47373ca7ec5e1a261c59dc01ef4d62b2657af925fb0deee3a/pydantic_core-2.46.4-cp312-cp312-win_arm64.whl", hash = "sha256:4fc73cb559bdb54b1134a706a2802a4cddd27a0633f5abb7e53056268751ac6a", size = 2027604, upload-time = "2026-05-06T13:39:03.753Z" }, + { url = "https://files.pythonhosted.org/packages/51/a2/5d30b469c5267a17b39dec53208222f76a8d351dfac4af661888c5aee77d/pydantic_core-2.46.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:5d5902252db0d3cedf8d4a1bc68f70eeb430f7e4c7104c8c476753519b423008", size = 2106306, upload-time = "2026-05-06T13:37:48.029Z" }, + { url = "https://files.pythonhosted.org/packages/c1/81/4fa520eaffa8bd7d1525e644cd6d39e7d60b1592bc5b516693c7340b50f1/pydantic_core-2.46.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94f0688e7b8d0a67abf40e57a7eaaecd17cc9586706a31b76c031f63df052b4", size = 1951906, upload-time = "2026-05-06T13:37:17.012Z" }, + { url = "https://files.pythonhosted.org/packages/03/d5/fd02da45b659668b05923b17ba3a0100a0a3d5541e3bd8fcc4ecb711309e/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f027324c56cd5406ca49c124b0db10e56c69064fec039acc571c29020cc87c76", size = 1976802, upload-time = "2026-05-06T13:37:35.113Z" }, + { url = "https://files.pythonhosted.org/packages/21/f2/95727e1368be3d3ed485eaab7adbd7dda408f33f7a36e8b48e0144002b91/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e739fee756ba1010f8bcccb534252e85a35fe45ae92c295a06059ce58b74ccd3", size = 2052446, upload-time = "2026-05-06T13:37:12.313Z" }, + { url = "https://files.pythonhosted.org/packages/9c/86/5d99feea3f77c7234b8718075b23db11532773c1a0dbd9b9490215dc2eeb/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d56801be94b86a9da183e5f3766e6310752b99ff647e38b09a9500d88e46e76", size = 2232757, upload-time = "2026-05-06T13:39:01.149Z" }, + { url = "https://files.pythonhosted.org/packages/d2/3a/508ac615935ef7588cf6d9e9b91309fdc2da751af865e02a9098de88258c/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2412e734dcb48da14d4e4006b82b46b74f2518b8a26ee7e58c6844a6cd6d03c4", size = 2309275, upload-time = "2026-05-06T13:37:41.406Z" }, + { url = "https://files.pythonhosted.org/packages/07/f8/41db9de19d7987d6b04715a02b3b40aea467000275d9d758ffaa31af7d50/pydantic_core-2.46.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9551187363ffc0de2a00b2e47c25aeaeb1020b69b668762966df15fc5659dd5a", size = 2094467, upload-time = "2026-05-06T13:39:18.847Z" }, + { url = "https://files.pythonhosted.org/packages/2c/e2/f35033184cb11d0052daf4416e8e10a502ea2ac006fc4f459aee872727d1/pydantic_core-2.46.4-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0186750b482eefa11d7f435892b09c5c606193ef3375bcf94aa00ae6bfb66262", size = 2134417, upload-time = "2026-05-06T13:40:17.944Z" }, + { url = "https://files.pythonhosted.org/packages/7e/7b/6ceeb1cc90e193862f444ebe373d8fdf613f0a82572dde03fb10734c6c71/pydantic_core-2.46.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5855698a4856556d86e8e6cd8434bc3ac0314ee8e12089ae0e143f64c6256e4e", size = 2179782, upload-time = "2026-05-06T13:40:32.618Z" }, + { url = "https://files.pythonhosted.org/packages/5a/f2/c8d7773ede6af08036423a00ae0ceffce266c3c52a096c435d68c896083f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:cbaf13819775b7f769bf4a1f066cb6df7a28d4480081a589828ef190226881cd", size = 2188782, upload-time = "2026-05-06T13:36:51.018Z" }, + { url = "https://files.pythonhosted.org/packages/59/31/0c864784e31f09f05cdd87606f08923b9c9e7f6e51dd27f20f62f975ce9f/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:633147d34cf4550417f12e2b1a0383973bdf5cdfde212cb09e9a581cf10820be", size = 2328334, upload-time = "2026-05-06T13:40:37.764Z" }, + { url = "https://files.pythonhosted.org/packages/c2/eb/4f6c8a41efa30baa755590f4141abf3a8c370fab610915733e74134a7270/pydantic_core-2.46.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:82cf5301172168103724d49a1444d3378cb20cdee30b116a1bd6031236298a5d", size = 2372986, upload-time = "2026-05-06T13:39:34.152Z" }, + { url = "https://files.pythonhosted.org/packages/5b/24/b375a480d53113860c299764bfe9f349a3dc9108b3adc0d7f0d786492ebf/pydantic_core-2.46.4-cp313-cp313-win32.whl", hash = "sha256:9fa8ae11da9e2b3126c6426f147e0fba88d96d65921799bb30c6abd1cb2c97fb", size = 1973693, upload-time = "2026-05-06T13:37:55.072Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e8/cff247591966f2d22ec8c003cd7587e27b7ba7b81ab2fb888e3ab75dc285/pydantic_core-2.46.4-cp313-cp313-win_amd64.whl", hash = "sha256:6b3ace8194b0e5204818c92802dcdca7fc6d88aabbb799d7c795540d9cd6d292", size = 2071819, upload-time = "2026-05-06T13:38:49.139Z" }, + { url = "https://files.pythonhosted.org/packages/c6/1a/f4aee670d5670e9e148e0c82c7db98d780be566c6e6a97ee8035528ca0b3/pydantic_core-2.46.4-cp313-cp313-win_arm64.whl", hash = "sha256:184c081504d17f1c1066e430e117142b2c77d9448a97f7b65c6ac9fd9aee238d", size = 2027411, upload-time = "2026-05-06T13:40:45.796Z" }, + { url = "https://files.pythonhosted.org/packages/8d/74/228a26ddad29c6672b805d9fd78e8d251cd04004fa7eed0e622096cd0250/pydantic_core-2.46.4-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:428e04521a40150c85216fc8b85e8d39fece235a9cf5e383761238c7fa9b96fb", size = 2102079, upload-time = "2026-05-06T13:38:41.019Z" }, + { url = "https://files.pythonhosted.org/packages/ad/1f/8970b150a4b4365623ae00fc88603491f763c627311ae8031e3111356d6e/pydantic_core-2.46.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:23ace664830ee0bfe014a0c7bc248b1f7f25ed7ad103852c317624a1083af462", size = 1952179, upload-time = "2026-05-06T13:36:59.812Z" }, + { url = "https://files.pythonhosted.org/packages/95/30/5211a831ae054928054b2f79731661087a2bc5c01e825c672b3a4a8f1b3e/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce5c1d2a8b27468f433ca974829c44060b8097eedc39933e3c206a90ee49c4a9", size = 1978926, upload-time = "2026-05-06T13:37:39.933Z" }, + { url = "https://files.pythonhosted.org/packages/57/e9/689668733b1eb67adeef047db3c2e8788fcf65a7fd9c9e2b46b7744fe245/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7283d57845ecf5a163403eb0702dfc220cc4fbdd18919cb5ccea4f95ee1cdab4", size = 2046785, upload-time = "2026-05-06T13:38:01.995Z" }, + { url = "https://files.pythonhosted.org/packages/60/d9/6715260422ff50a2109878fd24d948a6c3446bb2664f34ee78cd972b3acd/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8daafc69c93ee8a0204506a3b6b30f586ef54028f52aeeeb5c4cfc5184fd5914", size = 2228733, upload-time = "2026-05-06T13:40:50.371Z" }, + { url = "https://files.pythonhosted.org/packages/18/ae/fdb2f64316afca925640f8e70bb1a564b0ec2721c1389e25b8eb4bf9a299/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd2213145bcc2ba85884d0ac63d222fece9209678f77b9b4d76f054c561adb28", size = 2307534, upload-time = "2026-05-06T13:37:21.531Z" }, + { url = "https://files.pythonhosted.org/packages/89/1d/8eff589b45bb8190a9d12c49cfad0f176a5cbd1534908a6b5125e2886239/pydantic_core-2.46.4-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a5f930472650a82629163023e630d160863fce524c616f4e5186e5de9d9a49b", size = 2099732, upload-time = "2026-05-06T13:39:31.942Z" }, + { url = "https://files.pythonhosted.org/packages/06/d5/ee5a3366637fee41dee51a1fc91562dcf12ddbc68fda34e6b253da2324bb/pydantic_core-2.46.4-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:c1b3f518abeca3aa13c712fd202306e145abf59a18b094a6bafb2d2bbf59192c", size = 2129627, upload-time = "2026-05-06T13:37:25.033Z" }, + { url = "https://files.pythonhosted.org/packages/94/33/2414be571d2c6a6c4d08be21f9292b6d3fdb08949a97b6dfe985017821db/pydantic_core-2.46.4-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a7dd0b3ee80d90150e3495a3a13ac34dbcbfd4f012996a6a1d8900e91b5c0fb", size = 2179141, upload-time = "2026-05-06T13:37:14.046Z" }, + { url = "https://files.pythonhosted.org/packages/7b/79/7daa95be995be0eecc4cf75064cb33f9bbbfe3fe0158caf2f0d4a996a5c7/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:3fb702cd90b0446a3a1c5e470bfa0dd23c0233b676a9099ddcc964fa6ca13898", size = 2184325, upload-time = "2026-05-06T13:36:53.615Z" }, + { url = "https://files.pythonhosted.org/packages/9f/cb/d0a382f5c0de8a222dc61c65348e0ce831b1f68e0a018450d31c2cace3a5/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:b8458003118a712e66286df6a707db01c52c0f52f7db8e4a38f0da1d3b94fc4e", size = 2323990, upload-time = "2026-05-06T13:40:29.971Z" }, + { url = "https://files.pythonhosted.org/packages/05/db/d9ba624cc4a5aced1598e88c04fdbd8310c8a69b9d38b9a3d39ce3a61ed7/pydantic_core-2.46.4-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:372429a130e469c9cd698925ce5fc50940b7a1336b0d82038e63d5bbc4edc519", size = 2369978, upload-time = "2026-05-06T13:37:23.027Z" }, + { url = "https://files.pythonhosted.org/packages/f2/20/d15df15ba918c423461905802bfd2981c3af0bfa0e40d05e13edbfa48bc3/pydantic_core-2.46.4-cp314-cp314-win32.whl", hash = "sha256:85bb3611ff1802f3ee7fdd7dbff26b56f343fb432d57a4728fdd49b6ef35e2f4", size = 1966354, upload-time = "2026-05-06T13:38:03.499Z" }, + { url = "https://files.pythonhosted.org/packages/fc/b6/6b8de4c0a7d7ab3004c439c80c5c1e0a3e8d78bbae19379b01960383d9e5/pydantic_core-2.46.4-cp314-cp314-win_amd64.whl", hash = "sha256:811ff8e9c313ab425368bcbb36e5c4ebd7108c2bbf4e4089cfbb0b01eff63fac", size = 2072238, upload-time = "2026-05-06T13:39:40.807Z" }, + { url = "https://files.pythonhosted.org/packages/32/36/51eb763beec1f4cf59b1db243a7dcc39cbb41230f050a09b9d69faaf0a48/pydantic_core-2.46.4-cp314-cp314-win_arm64.whl", hash = "sha256:bfec22eab3c8cc2ceec0248aec886624116dc079afa027ecc8ad4a7e62010f8a", size = 2018251, upload-time = "2026-05-06T13:37:26.72Z" }, + { url = "https://files.pythonhosted.org/packages/e8/91/855af51d625b23aa987116a19e231d2aaef9c4a415273ddc189b79a45fee/pydantic_core-2.46.4-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:af8244b2bef6aaad6d92cda81372de7f8c8d36c9f0c3ea36e827c60e7d9467a0", size = 2099593, upload-time = "2026-05-06T13:39:47.682Z" }, + { url = "https://files.pythonhosted.org/packages/fb/1b/8784a54c65edb5f49f0a14d6977cf1b209bba85a4c77445b255c2de58ab3/pydantic_core-2.46.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a4330cdbc57162e4b3aa303f588ba752257694c9c9be3e7ebb11b4aca659b5d", size = 1935226, upload-time = "2026-05-06T13:40:40.428Z" }, + { url = "https://files.pythonhosted.org/packages/e8/e7/1955d28d1afc56dd4b3ad7cc0cf39df1b9852964cf16e5d13912756d6d6b/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29c61fc04a3d840155ff08e475a04809278972fe6aef51e2720554e96367e34b", size = 1974605, upload-time = "2026-05-06T13:37:32.029Z" }, + { url = "https://files.pythonhosted.org/packages/93/e2/3fedbf0ba7a22850e6e9fd78117f1c0f10f950182344d8a6c535d468fdd8/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c50f2528cf200c5eed56faf3f4e22fcd5f38c157a8b78576e6ba3168ec35f000", size = 2030777, upload-time = "2026-05-06T13:38:55.239Z" }, + { url = "https://files.pythonhosted.org/packages/f8/61/46be275fcaaba0b4f5b9669dd852267ce1ff616592dccf7a7845588df091/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0cbe8b01f948de4286c74cdd6c667aceb38f5c1e26f0693b3983d9d74887c65e", size = 2236641, upload-time = "2026-05-06T13:37:08.096Z" }, + { url = "https://files.pythonhosted.org/packages/60/db/12e93e46a8bac9988be3c016860f83293daea8c716c029c9ace279036f2f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:617d7e2ca7dcb8c5cf6bcb8c59b8832c94b36196bbf1cbd1bfb56ed341905edd", size = 2286404, upload-time = "2026-05-06T13:40:20.221Z" }, + { url = "https://files.pythonhosted.org/packages/e2/4a/4d8b19008f38d31c53b8219cfedc2e3d5de5fe99d90076b7e767de29274f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7027560ee92211647d0d34e3f7cd6f50da56399d26a9c8ad0da286d3869a53f3", size = 2109219, upload-time = "2026-05-06T13:38:12.153Z" }, + { url = "https://files.pythonhosted.org/packages/88/70/3cbc40978fefb7bb09c6708d40d4ad1a5d70fd7213c3d17f971de868ec1f/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:f99626688942fb746e545232e7726926f3be91b5975f8b55327665fafda991c7", size = 2110594, upload-time = "2026-05-06T13:40:02.971Z" }, + { url = "https://files.pythonhosted.org/packages/9d/20/b8d36736216e29491125531685b2f9e61aa5b4b2599893f8268551da3338/pydantic_core-2.46.4-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc3e9034a63de20e15e8ade85358bc6efc614008cab72898b4b4952bea0509ff", size = 2159542, upload-time = "2026-05-06T13:39:27.506Z" }, + { url = "https://files.pythonhosted.org/packages/1d/a2/367df868eb584dacf6bf82a389272406d7178e301c4ac82545ab98bc2dd9/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:97e7cf2be5c77b7d1a9713a05605d49460d02c6078d38d8bef3cbe323c548424", size = 2168146, upload-time = "2026-05-06T13:38:31.93Z" }, + { url = "https://files.pythonhosted.org/packages/c1/b8/4460f77f7e201893f649a29ab355dddd3beee8a97bcb1a320db414f9a06e/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:3bf92c5d0e00fefaab325a4d27828fe6b6e2a21848686b5b60d2d9eeb09d76c6", size = 2306309, upload-time = "2026-05-06T13:37:44.717Z" }, + { url = "https://files.pythonhosted.org/packages/64/c4/be2639293acd87dc8ddbcec41a73cee9b2ebf996fe6d892a1a74e88ad3f7/pydantic_core-2.46.4-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:3ecbc122d18468d06ca279dc26a8c2e2d5acb10943bb35e36ae92096dc3b5565", size = 2369736, upload-time = "2026-05-06T13:37:05.645Z" }, + { url = "https://files.pythonhosted.org/packages/30/a6/9f9f380dbb301f67023bf8f707aaa75daadf84f7152d95c410fd7e81d994/pydantic_core-2.46.4-cp314-cp314t-win32.whl", hash = "sha256:e846ae7835bf0703ae43f534ab79a867146dadd59dc9ca5c8b53d5c8f7c9ef02", size = 1955575, upload-time = "2026-05-06T13:38:51.116Z" }, + { url = "https://files.pythonhosted.org/packages/40/1f/f1eb9eb350e795d1af8586289746f5c5677d16043040d63710e22abc43c9/pydantic_core-2.46.4-cp314-cp314t-win_amd64.whl", hash = "sha256:2108ba5c1c1eca18030634489dc544844144ee36357f2f9f780b93e7ddbb44b5", size = 2051624, upload-time = "2026-05-06T13:38:21.672Z" }, + { url = "https://files.pythonhosted.org/packages/f6/d2/42dd53d0a85c27606f316d3aa5d2869c4e8470a5ed6dec30e4a1abe19192/pydantic_core-2.46.4-cp314-cp314t-win_arm64.whl", hash = "sha256:4fcbe087dbc2068af7eda3aa87634eba216dbda64d1ae73c8684b621d33f6596", size = 2017325, upload-time = "2026-05-06T13:40:52.723Z" }, + { url = "https://files.pythonhosted.org/packages/9d/1d/8987ad40f65ae1432753072f214fb5c74fe47ffbd0698bb9cbbb585664f8/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:1d8ba486450b14f3b1d63bc521d410ec7565e52f887b9fb671791886436a42f7", size = 2095527, upload-time = "2026-05-06T13:39:52.283Z" }, + { url = "https://files.pythonhosted.org/packages/64/d3/84c282a7eee1d3ac4c0377546ef5a1ea436ce26840d9ac3b7ed54a377507/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:3009f12e4e90b7f88b4f9adb1b0c4a3d58fe7820f3238c190047209d148026df", size = 1936024, upload-time = "2026-05-06T13:40:15.671Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ca/eac61596cdeb4d7e174d3dc0bd8a6238f14f75f97a24e7b7db4c7e7340a0/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad785e92e6dc634c21555edc8bd6b64957ab844541bcb96a1366c202951ae526", size = 1990696, upload-time = "2026-05-06T13:38:34.717Z" }, + { url = "https://files.pythonhosted.org/packages/fa/c3/7c8b240552251faf6b3a957db200fcfbbcec36763c050428b601e0c9b83b/pydantic_core-2.46.4-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00c603d540afdd6b80eb39f078f33ebd46211f02f33e34a32d9f053bba711de0", size = 2147590, upload-time = "2026-05-06T13:39:29.883Z" }, +] + [[package]] name = "pygments" version = "2.20.0" @@ -998,6 +1149,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/8e/7540e8a2036f79a125c1d2ebadf69ed7901608859186c856fa0388ef4197/requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a", size = 64947, upload-time = "2026-03-30T16:09:13.83Z" }, ] +[[package]] +name = "rich" +version = "14.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown-it-py" }, + { name = "pygments" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/67/cae617f1351490c25a4b8ac3b8b63a4dda609295d8222bad12242dfdc629/rich-14.3.4.tar.gz", hash = "sha256:817e02727f2b25b40ef56f5aa2217f400c8489f79ca8f46ea2b70dd5e14558a9", size = 230524, upload-time = "2026-04-11T02:57:45.419Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/76/6d163cfac87b632216f71879e6b2cf17163f773ff59c00b5ff4900a80fa3/rich-14.3.4-py3-none-any.whl", hash = "sha256:07e7adb4690f68864777b1450859253bed81a99a31ac321ac1817b2313558952", size = 310480, upload-time = "2026-04-11T02:57:47.484Z" }, +] + [[package]] name = "ruff" version = "0.14.11" @@ -1033,6 +1197,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/76/f963c61683a39084aa575f98089253e1e852a4417cb8a3a8a422923a5246/setuptools-80.10.1-py3-none-any.whl", hash = "sha256:fc30c51cbcb8199a219c12cc9c281b5925a4978d212f84229c909636d9f6984e", size = 1099859, upload-time = "2026-01-21T09:42:00.688Z" }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -1086,6 +1259,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/e1/3ccb13c643399d22289c6a9786c1a91e3dcbb68bce4beb44926ac2c557bf/sqlalchemy-2.0.45-py3-none-any.whl", hash = "sha256:5225a288e4c8cc2308dbdd874edad6e7d0fd38eac1e9e5f23503425c8eee20d0", size = 1936672, upload-time = "2025-12-09T21:54:52.608Z" }, ] +[[package]] +name = "tomli-w" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/75/241269d1da26b624c0d5e110e8149093c759b7a286138f4efd61a60e75fe/tomli_w-1.2.0.tar.gz", hash = "sha256:2dd14fac5a47c27be9cd4c976af5a12d87fb1f0b4512f81d69cce3b35ae25021", size = 7184, upload-time = "2025-01-15T12:07:24.262Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/18/c86eb8e0202e32dd3df50d43d7ff9854f8e0603945ff398974c1d91ac1ef/tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90", size = 6675, upload-time = "2025-01-15T12:07:22.074Z" }, +] + +[[package]] +name = "typer" +version = "0.26.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/ed/ef06584ccdd5c410df0837951ecd7e15d9a6144ea1bd4c73cecab1a89891/typer-0.26.7.tar.gz", hash = "sha256:e314a34c617e419c091b2830dda3ea1f257134ff593061a8f5b9717ab8dddb3a", size = 201709, upload-time = "2026-06-03T07:18:06.843Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/24/25/2201973529af2c954de0bb725323c3aaed6d7f0ceee8f550dec9185df013/typer-0.26.7-py3-none-any.whl", hash = "sha256:5c87cfbc5d34491c5346ebf49c23e18d56ccb863268d3a8d592b26087c2f5e58", size = 122456, upload-time = "2026-06-03T07:18:05.732Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -1095,6 +1292,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" }, ] +[[package]] +name = "typing-inspection" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" }, +] + [[package]] name = "tzdata" version = "2025.3" From 10769644e0b97c76ba8ae929976da02572f8c9c0 Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Mon, 15 Jun 2026 06:07:22 +0000 Subject: [PATCH 10/11] Bump oa-configurator version to support python 3.13 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3f2a5f4..e9687a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ authors = [ requires-python = ">=3.12" dependencies = [ "chardet>=5.2.0", - "oa-configurator>=0.1.0", + "oa-configurator>=0.1.1", "pandas>=2.3.3", "pyarrow>=23.0.0", "sqlalchemy>=2.0.45", @@ -42,7 +42,7 @@ postgres = [ "psycopg[binary]>=3.2", ] dev = [ - "oa-configurator[postgres]>=0.1.0", + "oa-configurator[postgres]>=0.1.1", "pytest>=9.0.3", "mypy>=1.19.1", "ruff>=0.14.11", From c575a1335b1dd629a7283eec1e7d93dd763ccf4b Mon Sep 17 00:00:00 2001 From: Nico Loesch Date: Mon, 15 Jun 2026 22:06:23 +0000 Subject: [PATCH 11/11] Address PR comments --- CHANGELOG.md | 6 +++ pyproject.toml | 2 +- src/orm_loader/backends/postgres.py | 6 +-- src/orm_loader/loaders/loading_helpers.py | 2 + tests/backends/test_postgres_backend.py | 47 ++++++++++++++++++++++- uv.lock | 12 +++--- 6 files changed, 64 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78358e9..fc1524f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# 0.5.0 +- paginated merge support via optional `merge_batch_size` parameter to bound memory per transaction on large datasets +- more efficient backend resolving +- logging namespace updated to reflect package name +- Python 3.13 support (updated oa-configurator dependency) + ## 0.1.0 - initial commit - stripped out generalisable functionality from omop-alchemy so that it could be reused in multiple clinical data models diff --git a/pyproject.toml b/pyproject.toml index e9687a1..627307d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "orm-loader" -version = "0.4.1" +version = "0.5.0" description = "Generic base classes to handle ORM functionality for multiple downstream datamodels" readme = "README.md" authors = [ diff --git a/src/orm_loader/backends/postgres.py b/src/orm_loader/backends/postgres.py index 5511747..6118ed6 100644 --- a/src/orm_loader/backends/postgres.py +++ b/src/orm_loader/backends/postgres.py @@ -151,7 +151,7 @@ def merge_replace( session.execute(non_paginated_replace) return - session.execute(sa.text(f'CREATE INDEX ON "{staging_name}" (_rownum)')) + session.execute(sa.text(f'CREATE INDEX IF NOT EXISTS "{staging_name}_rownum_idx" ON "{staging_name}" (_rownum)')) session.commit() start = 0 @@ -196,7 +196,7 @@ def merge_upsert( session.execute(non_paginated_upsert) return - session.execute(sa.text(f'CREATE INDEX ON "{staging_name}" (_rownum)')) + session.execute(sa.text(f'CREATE INDEX IF NOT EXISTS "{staging_name}_rownum_idx" ON "{staging_name}" (_rownum)')) session.commit() start = 0 @@ -244,7 +244,7 @@ def merge_insert( # INSERT in batch-sized transactions to bound WAL per commit. # session_replication_role='replica' is session-level and persists # across commits, so FK checks stay disabled for all batches. - session.execute(sa.text(f'CREATE INDEX ON "{staging_name}" (_rownum)')) + session.execute(sa.text(f'CREATE INDEX IF NOT EXISTS "{staging_name}_rownum_idx" ON "{staging_name}" (_rownum)')) session.commit() start = 0 diff --git a/src/orm_loader/loaders/loading_helpers.py b/src/orm_loader/loaders/loading_helpers.py index 4deb13d..25fc949 100644 --- a/src/orm_loader/loaders/loading_helpers.py +++ b/src/orm_loader/loaders/loading_helpers.py @@ -272,6 +272,8 @@ def quick_load_pg( with open(path, "rb") as _f_peek: _raw_hdr = _f_peek.readline().decode(encoding) _nl = check_line_ending(_raw_hdr) + # _hash is an internal convention for encrypted/hashed columns; strip it so + # CSV headers map to the base column names that PostgreSQL COPY expects. _csv_cols = [c.strip().lower().replace('_hash', '') for c in _raw_hdr.rstrip(_nl).split(delimiter)] _cols_sql = ", ".join(f'"{c}"' for c in _csv_cols) diff --git a/tests/backends/test_postgres_backend.py b/tests/backends/test_postgres_backend.py index 936da8b..360b331 100644 --- a/tests/backends/test_postgres_backend.py +++ b/tests/backends/test_postgres_backend.py @@ -30,7 +30,7 @@ def __init__(self, scalar_result: str | int = "origin") -> None: self.scalar_result = scalar_result self.commits = 0 - def execute(self, statement): + def execute(self, statement, parameters=None): if hasattr(statement, "compile"): sql = str(statement.compile(dialect=postgresql.dialect())) else: @@ -153,6 +153,51 @@ def test_postgres_backend_merge_upsert_excludes_computed_columns(): assert 'ON CONFLICT ("id") DO NOTHING' in sql +def test_postgres_backend_merge_replace_paginated_path(): + backend = PostgresBackend() + session = _FakeSession(scalar_result=10) + + backend.merge_replace( + _ComputedTableCls, _sess(session), "target_table", "_staging_target_table", + ["id", "name"], merge_batch_size=3, + ) + + sqls = session.statements + assert any("CREATE INDEX IF NOT EXISTS" in s and "_rownum" in s for s in sqls) + assert any("_rownum >" in s and "DELETE" in s for s in sqls) + assert session.commits >= 4 # 1 for index + 4 batches (ceil(10/3)) + + +def test_postgres_backend_merge_insert_paginated_path(): + backend = PostgresBackend() + session = _FakeSession(scalar_result=10) + + backend.merge_insert( + _ComputedTableCls, _sess(session), "target_table", "_staging_target_table", + merge_batch_size=3, + ) + + sqls = session.statements + assert any("CREATE INDEX IF NOT EXISTS" in s and "_rownum" in s for s in sqls) + assert any("_rownum >" in s and "INSERT" in s for s in sqls) + assert session.commits >= 4 + + +def test_postgres_backend_merge_upsert_paginated_path(): + backend = PostgresBackend() + session = _FakeSession(scalar_result=10) + + backend.merge_upsert( + _ComputedTableCls, _sess(session), "target_table", "_staging_target_table", + ["id"], merge_batch_size=3, + ) + + sqls = session.statements + assert any("CREATE INDEX IF NOT EXISTS" in s and "_rownum" in s for s in sqls) + assert any("_rownum >" in s and "INSERT" in s for s in sqls) + assert session.commits >= 4 + + def test_postgres_backend_materialized_view_methods_emit_expected_sql(): backend = PostgresBackend() session = _FakeSession() diff --git a/uv.lock b/uv.lock index 02e8aff..d97edf7 100644 --- a/uv.lock +++ b/uv.lock @@ -653,7 +653,7 @@ wheels = [ [[package]] name = "oa-configurator" -version = "0.1.0" +version = "0.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, @@ -663,9 +663,9 @@ dependencies = [ { name = "tomli-w" }, { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/04/bf/abe1fd37a4e717d10b53ce1f73f91ddc8baef39d0c1bda0c6123bf702a6e/oa_configurator-0.1.0.tar.gz", hash = "sha256:2bd8e8416abc4dc99ff78a7961b445c499e7fd25f649a967be037fec5f2d2d08", size = 84105, upload-time = "2026-06-15T01:18:50.128Z" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/7b/1c3aa611b6a4b0441314e3f8fbbf66e4a6dd3377ff039b434d6e69d6a52f/oa_configurator-0.1.1.tar.gz", hash = "sha256:888608c265799e670097bbb5d967460449c51e03dabe97176ad8443d992ce3e6", size = 123250, upload-time = "2026-06-15T06:05:57.8Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4a/ab/18695a718a53526823d7bf32194d6d78f4ea6dc0062ee17b43c6058e03f7/oa_configurator-0.1.0-py3-none-any.whl", hash = "sha256:7165984090711adff857a76d0468bb789f9b4756081a5688f609f487391a8dba", size = 30092, upload-time = "2026-06-15T01:18:49.163Z" }, + { url = "https://files.pythonhosted.org/packages/c9/87/7437b6dce109f7c84267ec1b87bd1897bb983ba0dcc4d5fba82b28d2bc20/oa_configurator-0.1.1-py3-none-any.whl", hash = "sha256:2b63a87c6d242b6e49b219d61bbacf664454beb1755e8747cdff910611633747", size = 30088, upload-time = "2026-06-15T06:05:58.632Z" }, ] [package.optional-dependencies] @@ -675,7 +675,7 @@ postgres = [ [[package]] name = "orm-loader" -version = "0.4.1" +version = "0.5.0" source = { editable = "." } dependencies = [ { name = "chardet" }, @@ -711,8 +711,8 @@ requires-dist = [ { name = "mkdocs-mermaid2-plugin", marker = "extra == 'dev'" }, { name = "mkdocstrings-python", marker = "extra == 'dev'", specifier = ">=2.0.1" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.19.1" }, - { name = "oa-configurator", specifier = ">=0.1.0" }, - { name = "oa-configurator", extras = ["postgres"], marker = "extra == 'dev'", specifier = ">=0.1.0" }, + { name = "oa-configurator", specifier = ">=0.1.1" }, + { name = "oa-configurator", extras = ["postgres"], marker = "extra == 'dev'", specifier = ">=0.1.1" }, { name = "pandas", specifier = ">=2.3.3" }, { name = "psycopg", extras = ["binary"], marker = "extra == 'postgres'", specifier = ">=3.2" }, { name = "pyarrow", specifier = ">=23.0.0" },