Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions dataframely/columns/array.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) QuantCo 2025-2025
# Copyright (c) QuantCo 2025-2026
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations
Expand Down Expand Up @@ -97,8 +97,17 @@ def validation_rules(self, expr: pl.Expr) -> dict[str, pl.Expr]:
}

def sqlalchemy_dtype(self, dialect: sa.Dialect) -> sa_TypeEngine:
# NOTE: We might want to add support for PostgreSQL's ARRAY type or use JSON in the future.
raise NotImplementedError("SQL column cannot have 'Array' type.")
match dialect.name:
case "postgresql":
# Note that the length of the array in each dimension is not supported in SQLAlchemy
# That is because PostgreSQL does not enforce the length anyway
return sa.ARRAY(
self.inner.sqlalchemy_dtype(dialect), dimensions=len(self.shape)
)
case _:
raise NotImplementedError(
f"SQL column cannot have 'Array' type for dialect '{dialect}'."
)

def _pyarrow_field_of_shape(self, shape: Sequence[int]) -> pa.Field:
if shape:
Expand Down
10 changes: 6 additions & 4 deletions dataframely/columns/binary.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) QuantCo 2025-2025
# Copyright (c) QuantCo 2025-2026
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations
Expand All @@ -21,9 +21,11 @@ def dtype(self) -> pl.DataType:
return pl.Binary()

def sqlalchemy_dtype(self, dialect: sa.Dialect) -> sa_TypeEngine:
if dialect.name == "mssql":
return sa.VARBINARY()
return sa.LargeBinary()
match dialect.name:
case "mssql":
return sa.VARBINARY()
case _:
return sa.LargeBinary()

@property
def pyarrow_dtype(self) -> pa.DataType:
Expand Down
11 changes: 8 additions & 3 deletions dataframely/columns/list.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) QuantCo 2025-2025
# Copyright (c) QuantCo 2025-2026
# SPDX-License-Identifier: BSD-3-Clause

from __future__ import annotations
Expand Down Expand Up @@ -120,8 +120,13 @@ def validation_rules(self, expr: pl.Expr) -> dict[str, pl.Expr]:
}

def sqlalchemy_dtype(self, dialect: sa.Dialect) -> sa_TypeEngine:
# NOTE: We might want to add support for PostgreSQL's ARRAY type or use JSON in the future.
raise NotImplementedError("SQL column cannot have 'List' type.")
match dialect.name:
case "postgresql":
return sa.ARRAY(self.inner.sqlalchemy_dtype(dialect))
case _:
raise NotImplementedError(
f"SQL column cannot have 'List' type for dialect '{dialect}'."
)

@property
def pyarrow_dtype(self) -> pa.DataType:
Expand Down
8 changes: 6 additions & 2 deletions tests/columns/test_sqlalchemy_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ def test_mssql_datatype(column: Column, datatype: str) -> None:
(dy.String(regex="^[abc]{1,3}d$"), "VARCHAR(4)"),
(dy.Enum(["foo", "bar"]), "CHAR(3)"),
(dy.Enum(["a", "abc"]), "VARCHAR(3)"),
(dy.List(dy.Integer()), "INTEGER[]"),
(dy.List(dy.String(max_length=5)), "VARCHAR(5)[]"),
(dy.Array(dy.Integer(), shape=5), "INTEGER[]"),
(dy.Array(dy.String(max_length=5), shape=(2, 1)), "VARCHAR(5)[][]"),
(dy.Struct({"a": dy.String(nullable=True)}), "JSONB"),
],
)
Expand Down Expand Up @@ -137,15 +141,15 @@ def test_sql_multiple_columns(dialect: Dialect) -> None:
assert len(schema.to_sqlalchemy_columns(dialect)) == 2


@pytest.mark.parametrize("dialect", [MSDialect_pyodbc(), PGDialect_psycopg2()])
@pytest.mark.parametrize("dialect", [MSDialect_pyodbc()])
def test_raise_for_list_column(dialect: Dialect) -> None:
with pytest.raises(
NotImplementedError, match="SQL column cannot have 'List' type."
):
dy.List(dy.String()).sqlalchemy_dtype(dialect)


@pytest.mark.parametrize("dialect", [MSDialect_pyodbc(), PGDialect_psycopg2()])
@pytest.mark.parametrize("dialect", [MSDialect_pyodbc()])
def test_raise_for_array_column(dialect: Dialect) -> None:
with pytest.raises(
NotImplementedError, match="SQL column cannot have 'Array' type."
Expand Down
Loading