Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
- name: Install dependencies
uses: astral-sh/setup-uv@v7
with:
enable-cache: true
enable-cache: true

# Download the Linux wheel built in the build workflow
- name: Download pre-built Linux wheel
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ repos:
- id: actionlint-docker
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: v0.9.10
rev: v0.15.1
hooks:
# Run the linter.
- id: ruff
Expand Down
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ protoc = ["datafusion-substrait/protoc"]
substrait = ["dep:datafusion-substrait"]

[dependencies]
tokio = { version = "1.47", features = [
tokio = { version = "1.49", features = [
"macros",
"rt",
"rt-multi-thread",
Expand All @@ -54,16 +54,16 @@ pyo3 = { version = "0.26", features = [
"abi3-py310",
] }
pyo3-async-runtimes = { version = "0.26", features = ["tokio-runtime"] }
pyo3-log = "0.13.2"
pyo3-log = "0.13.3"
arrow = { version = "57", features = ["pyarrow"] }
arrow-select = { version = "57" }
datafusion = { version = "52", features = ["avro", "unicode_expressions"] }
datafusion-substrait = { version = "52", optional = true }
datafusion-proto = { version = "52" }
datafusion-ffi = { version = "52" }
prost = "0.14.1" # keep in line with `datafusion-substrait`
prost = "0.14.3" # keep in line with `datafusion-substrait`
serde_json = "1"
uuid = { version = "1.18", features = ["v4"] }
uuid = { version = "1.21", features = ["v4"] }
mimalloc = { version = "0.1", optional = true, default-features = false, features = [
"local_dynamic_tls",
] }
Expand All @@ -77,11 +77,11 @@ object_store = { version = "0.12.4", features = [
"http",
] }
url = "2"
log = "0.4.27"
log = "0.4.29"
parking_lot = "0.12"

[build-dependencies]
prost-types = "0.14.1" # keep in line with `datafusion-substrait`
prost-types = "0.14.3" # keep in line with `datafusion-substrait`
pyo3-build-config = "0.26"

[lib]
Expand Down
81 changes: 41 additions & 40 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ description = "Build and run queries against data"
readme = "README.md"
license = { file = "LICENSE.txt" }
requires-python = ">=3.10"
keywords = ["datafusion", "dataframe", "rust", "query-engine"]
keywords = ["dataframe", "datafusion", "query-engine", "rust"]
classifiers = [
"Development Status :: 2 - Pre-Alpha",
"Intended Audience :: Developers",
Expand Down Expand Up @@ -62,7 +62,7 @@ profile = "black"
python-source = "python"
module-name = "datafusion._internal"
include = [{ path = "Cargo.lock", format = "sdist" }]
exclude = [".github/**", "ci/**", ".asf.yaml"]
exclude = [".asf.yaml", ".github/**", "ci/**"]
# Require Cargo.lock is up to date
locked = true
features = ["substrait"]
Expand All @@ -77,19 +77,19 @@ select = ["ALL"]
ignore = [
"A001", # Allow using words like min as variable names
"A002", # Allow using words like filter as variable names
"A005", # Allow module named io
"ANN401", # Allow Any for wrapper classes
"COM812", # Recommended to ignore these rules when using with ruff-format
"FIX002", # Allow TODO lines - consider removing at some point
"FBT001", # Allow boolean positional args
"FBT002", # Allow boolean positional args
"FIX002", # Allow TODO lines - consider removing at some point
"ISC001", # Recommended to ignore these rules when using with ruff-format
"N812", # Allow importing functions as `F`
"PD901", # Allow variable name df
"PLR0913", # Allow many arguments in function definition
"SLF001", # Allow accessing private members
"TD002", # Do not require author names in TODO statements
"TD003", # Allow TODO lines
"PLR0913", # Allow many arguments in function definition
"PD901", # Allow variable name df
"N812", # Allow importing functions as `F`
"A005", # Allow module named io
]

[tool.ruff.lint.pydocstyle]
Expand All @@ -99,7 +99,7 @@ convention = "google"
max-doc-length = 88

[tool.ruff.lint.flake8-boolean-trap]
extend-allowed-calls = ["lit", "datafusion.lit"]
extend-allowed-calls = ["datafusion.lit", "lit"]

# Disable docstring checking for these directories
[tool.ruff.lint.per-file-ignores]
Expand All @@ -108,68 +108,69 @@ extend-allowed-calls = ["lit", "datafusion.lit"]
"ARG",
"BLE001",
"D",
"S101",
"SLF",
"PD",
"PLC0415",
"PLR0913",
"PLR2004",
"PT004",
"PT011",
"RUF015",
"S101",
"S608",
"PLR0913",
"PT004",
"SLF",
]
"examples/*" = [
"D",
"W505",
"E501",
"T201",
"S101",
"PLR2004",
"ANN001",
"ANN202",
"INP001",
"D",
"DTZ007",
"E501",
"INP001",
"PLR2004",
"RUF015",
"S101",
"T201",
"W505",
]
"dev/*" = [
"ANN001",
"C",
"D",
"E",
"T",
"S",
"ERA001",
"EXE",
"N817",
"PLR",
"C",
"S",
"SIM",
"T",
"UP",
"EXE",
"N817",
"ERA001",
"ANN001",
]
"benchmarks/*" = [
"ANN001",
"BLE",
"D",
"E",
"ERA001",
"EXE",
"F",
"T",
"BLE",
"FURB",
"INP001",
"PLR",
"E",
"TD",
"TRY",
"S",
"SIM",
"EXE",
"T",
"TD",
"TRY",
"UP",
"ERA001",
"ANN001",
"INP001",
]
"docs/*" = ["D"]
"docs/source/conf.py" = ["ERA001", "ANN001", "INP001"]
"docs/source/conf.py" = ["ANN001", "ERA001", "INP001"]

[tool.codespell]
skip = ["./target", "uv.lock", "./python/tests/test_functions.py"]
skip = ["./python/tests/test_functions.py", "./target", "uv.lock"]
count = true
ignore-words-list = ["ans", "IST"]
ignore-words-list = ["IST", "ans"]

[dependency-groups]
dev = [
Expand All @@ -182,8 +183,8 @@ dev = [
"pre-commit>=4.3.0",
"pyarrow>=19.0.0",
"pygithub==2.5.0",
"pytest>=7.4.4",
"pytest-asyncio>=0.23.3",
"pytest>=7.4.4",
"pyyaml>=6.0.3",
"ruff>=0.9.1",
"toml>=0.10.2",
Expand All @@ -196,6 +197,6 @@ docs = [
"pickleshare>=0.7.5",
"pydata-sphinx-theme==0.8.0",
"setuptools>=75.3.0",
"sphinx>=7.1.2",
"sphinx-autoapi>=3.4.0",
"sphinx>=7.1.2",
]
2 changes: 2 additions & 0 deletions python/datafusion/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
See :ref:`Expressions` in the online documentation for more details.
"""

# ruff: noqa: PLC0415

from __future__ import annotations

from collections.abc import Iterable, Sequence
Expand Down
4 changes: 2 additions & 2 deletions python/datafusion/user_defined.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,11 +583,11 @@ def from_pycapsule(func: AggregateUDFExportable | _PyCapsule) -> AggregateUDF:
AggregateUDF that is exported via the FFI bindings.
"""
if _is_pycapsule(func):
aggregate = cast(AggregateUDF, object.__new__(AggregateUDF))
aggregate = cast("AggregateUDF", object.__new__(AggregateUDF))
aggregate._udaf = df_internal.AggregateUDF.from_pycapsule(func)
return aggregate

capsule = cast(AggregateUDFExportable, func)
capsule = cast("AggregateUDFExportable", func)
name = str(capsule.__class__)
return AggregateUDF(
name=name,
Expand Down
2 changes: 1 addition & 1 deletion python/tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def test_exception_not_mangled(ctx: SessionContext):

schema.register_table("test_table", create_dataset())

with pytest.raises(ValueError, match="^test_table is not an acceptable name$"):
with pytest.raises(ValueError, match=r"^test_table is not an acceptable name$"):
ctx.sql(f"select * from {catalog_name}.{schema_name}.test_table")


Expand Down
2 changes: 1 addition & 1 deletion python/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2790,7 +2790,7 @@ def test_write_parquet_with_options_encoding(tmp_path, encoding, data_types, res
def test_write_parquet_with_options_unsupported_encoding(df, tmp_path, encoding):
"""Test that unsupported Parquet encodings do not work."""
# BaseException is used since this throws a Rust panic: https://github.com/PyO3/pyo3/issues/3519
with pytest.raises(BaseException, match="Encoding .*? is not supported"):
with pytest.raises(BaseException, match=r"Encoding .*? is not supported"):
df.write_parquet_with_options(tmp_path, ParquetWriterOptions(encoding=encoding))


Expand Down
24 changes: 12 additions & 12 deletions python/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,31 +303,31 @@ def py_flatten(arr):
lambda data: [np.concatenate([arr, arr]) for arr in data],
),
(
lambda col: f.array_dims(col),
f.array_dims,
lambda data: [[len(r)] for r in data],
),
(
lambda col: f.array_distinct(col),
f.array_distinct,
lambda data: [list(set(r)) for r in data],
),
(
lambda col: f.list_distinct(col),
f.list_distinct,
lambda data: [list(set(r)) for r in data],
),
(
lambda col: f.list_dims(col),
f.list_dims,
lambda data: [[len(r)] for r in data],
),
(
lambda col: f.array_element(col, literal(1)),
lambda data: [r[0] for r in data],
),
(
lambda col: f.array_empty(col),
f.array_empty,
lambda data: [len(r) == 0 for r in data],
),
(
lambda col: f.empty(col),
f.empty,
lambda data: [len(r) == 0 for r in data],
),
(
Expand All @@ -343,11 +343,11 @@ def py_flatten(arr):
lambda data: [r[0] for r in data],
),
(
lambda col: f.array_length(col),
f.array_length,
lambda data: [len(r) for r in data],
),
(
lambda col: f.list_length(col),
f.list_length,
lambda data: [len(r) for r in data],
),
(
Expand Down Expand Up @@ -391,11 +391,11 @@ def py_flatten(arr):
lambda data: [[i + 1 for i, _v in enumerate(r) if _v == 1.0] for r in data],
),
(
lambda col: f.array_ndims(col),
f.array_ndims,
lambda data: [np.array(r).ndim for r in data],
),
(
lambda col: f.list_ndims(col),
f.list_ndims,
lambda data: [np.array(r).ndim for r in data],
),
(
Expand All @@ -415,11 +415,11 @@ def py_flatten(arr):
lambda data: [np.insert(arr, 0, 99.0) for arr in data],
),
(
lambda col: f.array_pop_back(col),
f.array_pop_back,
lambda data: [arr[:-1] for arr in data],
),
(
lambda col: f.array_pop_front(col),
f.array_pop_front,
lambda data: [arr[1:] for arr in data],
),
(
Expand Down
2 changes: 1 addition & 1 deletion python/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
def test_no_table(ctx):
with pytest.raises(
ValueError,
match="^Error during planning: table 'datafusion.public.b' not found$",
match=r"^Error during planning: table 'datafusion.public.b' not found$",
):
ctx.sql("SELECT a FROM b").collect()

Expand Down
Loading