githubnext · github-actions · May 18, 2026 · May 18, 2026 · May 26, 2026 · Jun 4, 2026
diff --git a/benchmarks/pandas/bench_at_iat.py b/benchmarks/pandas/bench_at_iat.py
@@ -0,0 +1,37 @@
+"""Benchmark: Series.at, Series.iat, DataFrame.at, DataFrame.iat — fast scalar access"""
+import json
+import time
+import pandas as pd
+
+N = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+labels = [f"r{i}" for i in range(N)]
+values = [i * 1.5 for i in range(N)]
+
+s = pd.Series(values, index=labels)
+df = pd.DataFrame({"a": values, "b": [v * 2 for v in values]}, index=labels)
+
+mid_label = f"r{N // 2}"
+
+for _ in range(WARMUP):
+    _ = s.at[mid_label]
+    _ = s.iat[N // 2]
+    _ = df.at[mid_label, "a"]
+    _ = df.iat[N // 2, 0]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    _ = s.at[mid_label]
+    _ = s.iat[N // 2]
+    _ = df.at[mid_label, "a"]
+    _ = df.iat[N // 2, 0]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "at_iat",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_cross_join.py b/benchmarks/pandas/bench_cross_join.py
@@ -0,0 +1,32 @@
+"""Benchmark: cross_join — Cartesian product of two 300-row DataFrames (90k result rows)"""
+import json
+import time
+import pandas as pd
+
+N = 300
+WARMUP = 3
+ITERATIONS = 10
+
+left = pd.DataFrame({
+    "id_a": list(range(N)),
+    "val_a": [i * 1.5 for i in range(N)],
+})
+right = pd.DataFrame({
+    "id_b": list(range(N)),
+    "val_b": [i * 2.5 for i in range(N)],
+})
+
+for _ in range(WARMUP):
+    pd.merge(left, right, how="cross")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.merge(left, right, how="cross")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "cross_join",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_cut_bins_to_frame.py b/benchmarks/pandas/bench_cut_bins_to_frame.py
@@ -0,0 +1,56 @@
+"""Benchmark: cut_bins_to_frame — pd.cut with value_counts and bin summary on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+NUM_BINS = 20
+WARMUP = 5
+ITERATIONS = 50
+
+data = np.array([(i % 1000) * 0.1 for i in range(SIZE)])
+
+for _ in range(WARMUP):
+    # pandas equivalent of cutBinsToFrame: cut + value_counts on the categorical result
+    cut_result = pd.cut(data, NUM_BINS)
+    # Summary DataFrame equivalent to cutBinsToFrame
+    counts = cut_result.value_counts(sort=False)
+    summary = pd.DataFrame({
+        "bin": counts.index.astype(str),
+        "left": [iv.left for iv in counts.index],
+        "right": [iv.right for iv in counts.index],
+        "count": counts.values,
+        "frequency": counts.values / len(data),
+    })
+    # cutBinCounts equivalent: counts dict
+    count_dict = dict(zip(counts.index.astype(str), counts.values))
+    # binEdges equivalent: DataFrame of interval edges
+    edges = pd.DataFrame({
+        "left": [iv.left for iv in counts.index],
+        "right": [iv.right for iv in counts.index],
+    })
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    cut_result = pd.cut(data, NUM_BINS)
+    counts = cut_result.value_counts(sort=False)
+    summary = pd.DataFrame({
+        "bin": counts.index.astype(str),
+        "left": [iv.left for iv in counts.index],
+        "right": [iv.right for iv in counts.index],
+        "count": counts.values,
+        "frequency": counts.values / len(data),
+    })
+    count_dict = dict(zip(counts.index.astype(str), counts.values))
+    edges = pd.DataFrame({
+        "left": [iv.left for iv in counts.index],
+        "right": [iv.right for iv in counts.index],
+    })
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "cut_bins_to_frame",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_filter_series.py b/benchmarks/pandas/bench_filter_series.py
@@ -0,0 +1,31 @@
+"""Benchmark: Series.filter — filter Series index labels by items/like/regex"""
+import json
+import time
+import pandas as pd
+
+N = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+labels = [f"label_{i}" for i in range(N)]
+values = [i * 0.5 for i in range(N)]
+s = pd.Series(values, index=labels)
+
+keep_items = [f"label_{i * 100}" for i in range(1_000)]
+
+for _ in range(WARMUP):
+    s.filter(items=keep_items)
+    s.filter(like="label_5")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.filter(items=keep_items)
+    s.filter(like="label_5")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "filter_series",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_join_all.py b/benchmarks/pandas/bench_join_all.py
@@ -0,0 +1,30 @@
+"""Benchmark: join_all — sequential left-join of 4 DataFrames each with 5k rows"""
+import json
+import time
+import pandas as pd
+
+N = 5_000
+WARMUP = 3
+ITERATIONS = 10
+
+idx = [str(i) for i in range(N)]
+
+base = pd.DataFrame({"a": list(range(N))}, index=idx)
+df1 = pd.DataFrame({"b": [i * 2 for i in range(N)]}, index=idx)
+df2 = pd.DataFrame({"c": [i * 3 for i in range(N)]}, index=idx)
+df3 = pd.DataFrame({"d": [i * 4 for i in range(N)]}, index=idx)
+
+for _ in range(WARMUP):
+    base.join([df1, df2, df3])
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    base.join([df1, df2, df3])
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "join_all",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_math_ops.py b/benchmarks/pandas/bench_math_ops.py
@@ -0,0 +1,35 @@
+"""Benchmark: math_ops — abs / round on Series and DataFrame of 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.where(np.arange(SIZE) % 2 == 0, -(np.arange(SIZE) + 0.567), np.arange(SIZE) + 0.567))
+df = pd.DataFrame({
+    "a": -(np.arange(SIZE) + 0.123),
+    "b": np.arange(SIZE) + 0.456,
+})
+
+for _ in range(WARMUP):
+    s.abs()
+    df.abs()
+    s.round(1)
+    df.round(1)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.abs()
+    df.abs()
+    s.round(1)
+    df.round(1)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "math_ops",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_merge_asof.py b/benchmarks/pandas/bench_merge_asof.py
@@ -0,0 +1,34 @@
+"""Benchmark: merge_asof — backward asof join of two 10k-row sorted DataFrames"""
+import json
+import time
+import pandas as pd
+
+N = 10_000
+WARMUP = 3
+ITERATIONS = 10
+
+# Trades sorted by time: 0, 2, 4, ...
+trade_times = list(range(0, N * 2, 2))
+prices = [100.0 + i * 0.5 for i in range(N)]
+
+# Quotes sorted by time, sparser: 0, 3, 6, ...
+quote_times = list(range(0, N * 3, 3))
+bids = [99.0 + i * 0.5 for i in range(N)]
+
+trades = pd.DataFrame({"time": trade_times, "price": prices})
+quotes = pd.DataFrame({"time": quote_times, "bid": bids})
+
+for _ in range(WARMUP):
+    pd.merge_asof(trades, quotes, on="time")
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.merge_asof(trades, quotes, on="time")
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "merge_asof",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_na_ops.py b/benchmarks/pandas/bench_na_ops.py
@@ -0,0 +1,42 @@
+"""Benchmark: na_ops — isna / notna / ffill / bfill on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+data = pd.array([i if i % 5 != 0 else pd.NA for i in range(SIZE)], dtype="Int64")
+s = pd.Series(data, dtype="float64")
+s[np.arange(SIZE) % 5 == 0] = np.nan
+
+df = pd.DataFrame({
+    "a": s,
+    "b": pd.Series([float(i * 2) if i % 7 != 0 else np.nan for i in range(SIZE)]),
+})
+
+for _ in range(WARMUP):
+    pd.isna(s)
+    pd.notna(s)
+    s.ffill()
+    s.bfill()
+    df.ffill()
+    df.bfill()
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    pd.isna(s)
+    pd.notna(s)
+    s.ffill()
+    s.bfill()
+    df.ffill()
+    df.bfill()
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "na_ops",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_notna_boolean.py b/benchmarks/pandas/bench_notna_boolean.py
@@ -0,0 +1,36 @@
+"""Benchmark: notna_boolean — boolean-mask indexing on 100k rows."""
+import json, time
+import numpy as np
+import pandas as pd
+
+SIZE = 100_000
+WARMUP = 5
+ITERATIONS = 50
+
+s = pd.Series(np.arange(SIZE))
+mask = pd.Series(np.arange(SIZE) % 2 == 0)
+bool_arr = np.arange(SIZE) % 3 != 0
+
+df = pd.DataFrame({
+    "a": np.arange(SIZE),
+    "b": np.arange(SIZE) * 2,
+})
+
+for _ in range(WARMUP):
+    s[mask]
+    s[~mask]
+    df[bool_arr]
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s[mask]
+    s[~mask]
+    df[bool_arr]
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "notna_boolean",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))
diff --git a/benchmarks/pandas/bench_pow_mod.py b/benchmarks/pandas/bench_pow_mod.py
@@ -0,0 +1,34 @@
+"""Benchmark: Series.pow, Series.mod, DataFrame.pow on 100k rows"""
+import json, time
+import numpy as np
+import pandas as pd
+
+ROWS = 100_000
+WARMUP = 3
+ITERATIONS = 10
+
+data = (np.arange(ROWS) % 100) + 1
+s = pd.Series(data.astype(float))
+df = pd.DataFrame({
+    "a": ((np.arange(ROWS) % 100) + 1).astype(float),
+    "b": ((np.arange(ROWS) % 50) + 1).astype(float),
+})
+
+for _ in range(WARMUP):
+    s.pow(2)
+    s.mod(7)
+    df.pow(2)
+
+start = time.perf_counter()
+for _ in range(ITERATIONS):
+    s.pow(2)
+    s.mod(7)
+    df.pow(2)
+total = (time.perf_counter() - start) * 1000
+
+print(json.dumps({
+    "function": "pow_mod",
+    "mean_ms": total / ITERATIONS,
+    "iterations": ITERATIONS,
+    "total_ms": total,
+}))