Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 68 additions & 10 deletions python/datafusion/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Sequence

Check failure on line 21 in python/datafusion/functions.py

View workflow job for this annotation

GitHub Actions / build / lint-python

Ruff (F401)

python/datafusion/functions.py:21:40: F401 `typing.Sequence` imported but unused

Check failure on line 21 in python/datafusion/functions.py

View workflow job for this annotation

GitHub Actions / build / lint-python

Ruff (UP035)

python/datafusion/functions.py:21:1: UP035 Import from `collections.abc` instead: `Sequence`

import pyarrow as pa

Expand All @@ -42,7 +42,6 @@

if TYPE_CHECKING:
from datafusion.context import SessionContext

__all__ = [
"abs",
"acos",
Expand Down Expand Up @@ -268,7 +267,11 @@
"sum",
"tan",
"tanh",
"to_char",
"to_date",
"to_hex",
"to_local_time",
"to_time",
"to_timestamp",
"to_timestamp_micros",
"to_timestamp_millis",
Expand All @@ -290,6 +293,7 @@
]



def isnan(expr: Expr) -> Expr:
"""Returns true if a given number is +NaN or -NaN otherwise returns false."""
return Expr(f.isnan(expr.expr))
Expand Down Expand Up @@ -1009,6 +1013,57 @@
"""
return Expr(f.now())

def to_char(arg: Expr, format: Expr) -> Expr:
"""Returns a string representation of a date, time, timestamp or duration
based on ``format`.

For usage of ``format`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""

Check failure on line 1023 in python/datafusion/functions.py

View workflow job for this annotation

GitHub Actions / build / lint-python

Ruff (D205)

python/datafusion/functions.py:1017:5: D205 1 blank line required between summary line and description
return Expr(f.to_char(arg.expr, format.expr))

def to_date(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a value to a date (YYYY-MM-DD).

Supports strings, numeric and timestamp types as input.
Integers and doubles are interpreted as days since the unix epoch.
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20')
if ``formatters`` are not provided.

For usage of ``formatters`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
if not formatters:
return Expr(f.to_date(arg.expr))
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_date(arg.expr, *formatters))


def to_local_time(arg: Expr) -> Expr:
"""Converts a timestamp with a timezone to a timestamp without a timezone.

This function handles daylight saving time changes.
"""
return Expr(f.to_local_time(arg.expr))


def to_time(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a value to a time. Supports strings and timestamps as input.

If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or
HH:MM:SS.nnnnnnnnn;

For usage of ``formatters`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
if not formatters:
return Expr(f.to_time(arg.expr))
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_time(arg.expr, *formatters))


def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
Expand All @@ -1017,10 +1072,10 @@

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
if formatters is None:
return f.to_timestamp(arg.expr)
if not formatters:
return Expr(f.to_timestamp(arg.expr))

formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp(arg.expr, *formatters))


Expand All @@ -1029,7 +1084,7 @@

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp_millis(arg.expr, *formatters))


Expand All @@ -1038,7 +1093,7 @@

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp_micros(arg.expr, *formatters))


Expand All @@ -1047,7 +1102,7 @@

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp_nanos(arg.expr, *formatters))


Expand All @@ -1056,13 +1111,13 @@

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp_seconds(arg.expr, *formatters))


def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
"""Converts a string and optional formats to a Unixtime."""
args = [f.expr for f in format_arguments]
args = [fmt.expr for fmt in format_arguments]
return Expr(f.to_unixtime(string.expr, *args))


Expand All @@ -1071,6 +1126,9 @@
return Expr(f.current_date())


today = current_date


def current_time() -> Expr:
"""Returns current UTC time as a Time64 value."""
return Expr(f.current_time())
Expand Down
41 changes: 40 additions & 1 deletion python/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
import math
from datetime import datetime, timezone
from datetime import date, datetime, time, timezone

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -952,6 +952,12 @@
f.to_timestamp_nanos(
literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
),
f.to_time(literal("12:30:45")),
f.to_time(literal("12-30-45"), literal("%H-%M-%S")),
f.to_date(literal("2017-05-31")),
f.to_date(literal("2017-05-31"), literal("%Y-%m-%d")),
f.to_local_time(column("d")),
f.to_char(column("d"), literal('%d-%m-%Y'))

Check failure on line 960 in python/tests/test_functions.py

View workflow job for this annotation

GitHub Actions / build / lint-python

Ruff (Q000)

python/tests/test_functions.py:960:40: Q000 Single quotes found but double quotes preferred
)
result = df.collect()
assert len(result) == 1
Expand Down Expand Up @@ -1026,6 +1032,39 @@
[datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
type=pa.timestamp("ns"),
)
assert result.column(17) == pa.array(
[time(12, 30, 45)] * 3,
type=pa.time64("ns"),
)
assert result.column(18) == pa.array(
[time(12, 30, 45)] * 3,
type=pa.time64("ns"),
)
assert result.column(19) == pa.array(
[date(2017, 5, 31)] * 3,
type=pa.date32(),
)
assert result.column(20) == pa.array(
[date(2017, 5, 31)] * 3,
type=pa.date32(),
)
assert result.column(21) == pa.array(
[
datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
datetime(2027, 6, 26, tzinfo=DEFAULT_TZ),
datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
],
type=pa.timestamp("us"),
)

assert result.column(22) == pa.array(
[
"31-12-2022",
"26-06-2027",
"02-07-2020",
],
type=pa.string(),
)


def test_arrow_cast(df):
Expand Down
8 changes: 8 additions & 0 deletions src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,9 @@ expr_fn!(
"Converts the number to its equivalent hexadecimal representation."
);
expr_fn!(now);
expr_fn_vec!(to_date);
expr_fn_vec!(to_local_time);
expr_fn_vec!(to_time);
expr_fn_vec!(to_timestamp);
expr_fn_vec!(to_timestamp_millis);
expr_fn_vec!(to_timestamp_nanos);
Expand All @@ -613,6 +616,7 @@ expr_fn!(date_part, part date);
expr_fn!(date_trunc, part date);
expr_fn!(date_bin, stride source origin);
expr_fn!(make_date, year month day);
expr_fn!(to_char, datetime format);

expr_fn!(translate, string from to, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.");
expr_fn_vec!(
Expand Down Expand Up @@ -1045,6 +1049,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(tan))?;
m.add_wrapped(wrap_pyfunction!(tanh))?;
m.add_wrapped(wrap_pyfunction!(to_hex))?;
m.add_wrapped(wrap_pyfunction!(to_char))?;
m.add_wrapped(wrap_pyfunction!(to_date))?;
m.add_wrapped(wrap_pyfunction!(to_local_time))?;
m.add_wrapped(wrap_pyfunction!(to_time))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?;
Expand Down
Loading