Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions bigframes/core/array_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,17 @@ def filter(self, predicate: ex.Expression):
return arr.drop_columns(filter_ids)

def order_by(
self, by: Sequence[OrderingExpression], is_total_order: bool = False
self,
by: Sequence[OrderingExpression],
is_total_order: bool = False,
stable: bool = True,
) -> ArrayValue:
return ArrayValue(
nodes.OrderByNode(
child=self.node, by=tuple(by), is_total_order=is_total_order
child=self.node,
by=tuple(by),
is_total_order=is_total_order,
stable=stable,
)
)

Expand Down
3 changes: 2 additions & 1 deletion bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,9 +395,10 @@ def cols_matching_label(self, partial_label: Label) -> typing.Sequence[str]:
def order_by(
self,
by: typing.Sequence[ordering.OrderingExpression],
stable: bool = True,
) -> Block:
return Block(
self._expr.order_by(by),
self._expr.order_by(by, stable=stable),
index_columns=self.index_columns,
column_labels=self.column_labels,
index_labels=self.index.names,
Expand Down
4 changes: 3 additions & 1 deletion bigframes/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,7 @@ def sort_values(
*,
inplace: bool = False,
ascending: bool = True,
kind: __builtins__.str | None = None,
na_position: __builtins__.str = "last",
) -> Index:
if na_position not in ["first", "last"]:
Expand All @@ -448,7 +449,8 @@ def sort_values(
else order.descending_over(column, na_last)
for column in index_columns
]
return Index(self._block.order_by(ordering))
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
return Index(self._block.order_by(ordering, stable=is_stable))

def astype(
self,
Expand Down
3 changes: 2 additions & 1 deletion bigframes/core/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -992,7 +992,8 @@ def remap_refs(
@dataclasses.dataclass(frozen=True, eq=False)
class OrderByNode(UnaryNode):
by: Tuple[OrderingExpression, ...]
# This is an optimization, if true, can discard previous orderings.
stable: bool = True
# This is an optimization, if true, can discard previous orderings, even if doing a stable sort
# might be a total ordering even if false
is_total_order: bool = False

Expand Down
7 changes: 6 additions & 1 deletion bigframes/core/rewrite/order.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def pull_up_order_inner(
child_result, child_order = pull_up_order_inner(node.child)
return child_result, child_order.with_reverse()
elif isinstance(node, bigframes.core.nodes.OrderByNode):
if node.is_total_order:
# unstable sorts don't care about previous order, total orders override previous order
if (not node.stable) or node.is_total_order:
new_node = remove_order(node.child)
else:
new_node, child_order = pull_up_order_inner(node.child)
Expand Down Expand Up @@ -106,6 +107,10 @@ def pull_up_order_inner(
),
)
)
elif not node.stable:
new_order = bigframes.core.ordering.RowOrdering(
ordering_value_columns=tuple(new_by),
)
else:
assert child_order
new_order = child_order.with_ordering_columns(new_by)
Expand Down
11 changes: 8 additions & 3 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2390,6 +2390,7 @@ def sort_index(
*,
ascending: bool = ...,
inplace: Literal[False] = ...,
kind: str = ...,
na_position: Literal["first", "last"] = ...,
) -> DataFrame:
...
Expand All @@ -2400,6 +2401,7 @@ def sort_index(
*,
ascending: bool = ...,
inplace: Literal[True] = ...,
kind: str = ...,
na_position: Literal["first", "last"] = ...,
) -> None:
...
Expand All @@ -2410,6 +2412,7 @@ def sort_index(
axis: Union[int, str] = 0,
ascending: bool = True,
inplace: bool = False,
kind: str | None = None,
na_position: Literal["first", "last"] = "last",
) -> Optional[DataFrame]:
if utils.get_axis_number(axis) == 0:
Expand All @@ -2423,7 +2426,8 @@ def sort_index(
else order.descending_over(column, na_last)
for column in index_columns
]
block = self._block.order_by(ordering)
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
block = self._block.order_by(ordering, stable=is_stable)
else: # axis=1
_, indexer = self.columns.sort_values(
return_indexer=True, ascending=ascending, na_position=na_position # type: ignore
Expand Down Expand Up @@ -2467,7 +2471,7 @@ def sort_values(
*,
inplace: bool = False,
ascending: bool | typing.Sequence[bool] = True,
kind: str = "quicksort",
kind: str | None = None,
na_position: typing.Literal["first", "last"] = "last",
) -> Optional[DataFrame]:
if isinstance(by, (bigframes.series.Series, indexes.Index, DataFrame)):
Expand Down Expand Up @@ -2499,7 +2503,8 @@ def sort_values(
if is_ascending
else order.descending_over(column_id, na_last)
)
block = self._block.order_by(ordering)
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
block = self._block.order_by(ordering, stable=is_stable)
if inplace:
self._set_block(block)
return None
Expand Down
31 changes: 26 additions & 5 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1801,19 +1801,21 @@ def sort_values(
axis=0,
inplace: bool = False,
ascending=True,
kind: str = "quicksort",
kind: str | None = None,
na_position: typing.Literal["first", "last"] = "last",
) -> Optional[Series]:
if axis != 0 and axis != "index":
raise ValueError(f"No axis named {axis} for object type Series")
if na_position not in ["first", "last"]:
raise ValueError("Param na_position must be one of 'first' or 'last'")
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
block = self._block.order_by(
[
order.ascending_over(self._value_column, (na_position == "last"))
if ascending
else order.descending_over(self._value_column, (na_position == "last"))
],
stable=is_stable,
)
if inplace:
self._set_block(block)
Expand All @@ -1823,19 +1825,37 @@ def sort_values(

@typing.overload # type: ignore[override]
def sort_index(
self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=...
self,
*,
axis=...,
inplace: Literal[False] = ...,
ascending=...,
kind: str = ...,
na_position=...,
) -> Series:
...

@typing.overload
def sort_index(
self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=...
self,
*,
axis=0,
inplace: Literal[True] = ...,
ascending=...,
kind: str = ...,
na_position=...,
) -> None:
...

@validations.requires_index
def sort_index(
self, *, axis=0, inplace: bool = False, ascending=True, na_position="last"
self,
*,
axis=0,
inplace: bool = False,
ascending=True,
kind: str | None = None,
na_position="last",
) -> Optional[Series]:
# TODO(tbergeron): Support level parameter once multi-index introduced.
if axis != 0 and axis != "index":
Expand All @@ -1850,7 +1870,8 @@ def sort_index(
else order.descending_over(column, na_last)
for column in block.index_columns
]
block = block.order_by(ordering)
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
block = block.order_by(ordering, stable=is_stable)
if inplace:
self._set_block(block)
return None
Expand Down
2 changes: 2 additions & 0 deletions third_party/bigframes_vendored/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,5 @@
"_deferred",
]
VALID_WRITE_ENGINES = typing.get_args(WriteEngineType)

DEFAULT_SORT_KIND = "stable"
9 changes: 7 additions & 2 deletions third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2210,7 +2210,7 @@ def sort_values(
*,
inplace: bool = False,
ascending: bool | Sequence[bool] = True,
kind: str = "quicksort",
kind: str | None = None,
na_position: Literal["first", "last"] = "last",
):
"""Sort by the values along row axis.
Expand Down Expand Up @@ -2296,7 +2296,7 @@ def sort_values(
the by.
inplace (bool, default False):
If True, perform operation in-place.
kind (str, default 'quicksort'):
kind (str, default None):
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder.
Expand All @@ -2320,6 +2320,7 @@ def sort_index(
axis: str | int = 0,
ascending: bool = True,
inplace: bool = False,
kind: str | None = None,
na_position: Literal["first", "last"] = "last",
):
"""Sort object by labels (along an axis).
Expand All @@ -2332,6 +2333,10 @@ def sort_index(
Sort ascending vs. descending.
inplace (bool, default False):
Whether to modify the DataFrame rather than creating a new one.
kind (str, default None):
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder.
na_position ({'first', 'last'}, default 'last'):
Puts NaNs at the beginning if `first`; `last` puts NaNs at the end.
Not implemented for MultiIndex.
Expand Down
10 changes: 9 additions & 1 deletion third_party/bigframes_vendored/pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,11 @@ def nunique(self) -> int:
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def sort_values(
self, *, ascending: bool = True, na_position: str = "last"
self,
*,
ascending: bool = True,
kind: str | None = None,
na_position: str = "last",
) -> Index:
"""
Return a sorted copy of the index.
Expand All @@ -851,6 +855,10 @@ def sort_values(
Args:
ascending (bool, default True):
Should the index values be sorted in an ascending order.
kind (str, default None):
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder.
na_position ({'first' or 'last'}, default 'last'):
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
the end.
Expand Down
9 changes: 7 additions & 2 deletions third_party/bigframes_vendored/pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1458,7 +1458,7 @@ def sort_values(
axis: Axis = 0,
inplace: bool = False,
ascending: bool | int | Sequence[bool] | Sequence[int] = True,
kind: str = "quicksort",
kind: str | None = None,
na_position: str = "last",
):
"""
Expand Down Expand Up @@ -1535,7 +1535,7 @@ def sort_values(
Whether to modify the Series rather than creating a new one.
ascending (bool or list of bools, default True):
If True, sort values in ascending order, otherwise descending.
kind (str, default to 'quicksort'):
kind (str, default to None):
Choice of sorting algorithm. Accepts quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder
Expand All @@ -1555,6 +1555,7 @@ def sort_index(
axis: Axis = 0,
inplace: bool = False,
ascending: bool | Sequence[bool] = True,
kind: str | None = None,
na_position: NaPosition = "last",
):
"""
Expand Down Expand Up @@ -1602,6 +1603,10 @@ def sort_index(
ascending (bool or list-like of bools, default True):
Sort ascending vs. descending. When the index is a MultiIndex the
sort direction can be controlled for each level individually.
kind (str, default None):
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
'heapsort', 'stable'. Ignored except when determining whether to
sort stably. 'mergesort' or 'stable' will result in stable reorder.
na_position ({'first', 'last'}, default 'last'):
If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
Not implemented for MultiIndex.
Expand Down
Loading