From 19b020baf90399726dddcd3abec87fe56b9d0287 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Sat, 14 Jun 2025 23:59:36 +0100
Subject: [PATCH 01/12] Migrate CircleCI to actions

Limit to linux os for now
---
 .ci/setup.sh                        |  49 --------
 .ci/test.sh                         |  80 ------------
 .circleci/config.yml                | 181 ----------------------------
 .github/workflows/hamilton-main.yml | 149 +++++++++++++++++++++++
 4 files changed, 149 insertions(+), 310 deletions(-)
 delete mode 100755 .ci/setup.sh
 delete mode 100755 .ci/test.sh
 delete mode 100644 .circleci/config.yml
 create mode 100644 .github/workflows/hamilton-main.yml

diff --git a/.ci/setup.sh b/.ci/setup.sh
deleted file mode 100755
index 3feca8331..000000000
--- a/.ci/setup.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-set -e -u -o pipefail
-
-OPERATING_SYSTEM=$(uname -s)
-
-if [[ ${OPERATING_SYSTEM} == "Linux" ]]; then
-    sudo apt-get update -y
-    sudo apt-get install \
-        --no-install-recommends \
-        --yes \
-            graphviz
-fi
-
-# setting up a virtualenv isn't necessary for the "pre-commit" task
-if [[ ${TASK} != "pre-commit" ]]; then
-    mkdir -p "${HOME}/venvs/hamilton-venv"
-    python -m venv "${HOME}/venvs/hamilton-venv" # TODO: add --upgrade-deps after dropping support for py3.8
-    source "${HOME}/venvs/hamilton-venv/bin/activate"
-    pip install ".[test]"
-fi
-
-if [[ ${TASK} == "pyspark" ]]; then
-    if [[ ${OPERATING_SYSTEM} == "Linux" ]]; then
-        sudo apt-get install \
-            --no-install-recommends \
-            --yes \
-                default-jre
-    fi
-fi
-
-if [[ ${TASK} == "vaex" ]]; then
-    if [[ ${OPERATING_SYSTEM} == "Linux" ]]; then
-        sudo apt-get install \
-            --no-install-recommends \
-            --yes \
-                libpcre3-dev cargo
-    fi
-fi
-
-echo "----- python version -----"
-python --version
-
-echo "----- pip version -----"
-pip --version
-echo "-----------------------"
-
-# disable telemetry!
-export HAMILTON_TELEMETRY_ENABLED=false
diff --git a/.ci/test.sh b/.ci/test.sh
deleted file mode 100755
index 1d2542ec7..000000000
--- a/.ci/test.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-set -e -u -o pipefail
-
-echo "running CI task '${TASK}'"
-
-if [[ ${TASK} == "pre-commit" ]]; then
-    pip install pre-commit
-    pre-commit run --all-files
-    exit 0
-fi
-
-echo "using venv at '${HOME}/venvs/hamilton-venv/bin/activate'"
-source "${HOME}/venvs/hamilton-venv/bin/activate"
-
-if [[ ${TASK} == "async" ]]; then
-    pip install .
-    pytest plugin_tests/h_async
-    exit 0
-fi
-
-if [[ ${TASK} == "dask" ]]; then
-    pip install -e '.[dask]'
-    pytest plugin_tests/h_dask
-    exit 0
-fi
-
-if [[ ${TASK} == "integrations" ]]; then
-    pip install -e '.[pandera, test]'
-    pip install -r tests/integrations/pandera/requirements.txt
-    if python -c 'import sys; exit(0) if sys.version_info[:2] == (3, 9) else exit(1)'; then
-      echo "Python version is 3.9"
-      pip install dask-expr
-    else
-      echo "Python version is not 3.9"
-    fi
-    pytest tests/integrations
-    exit 0
-fi
-
-if [[ ${TASK} == "ray" ]]; then
-    pip install -e '.[ray]'
-    pytest plugin_tests/h_ray
-    exit 0
-fi
-
-if [[ ${TASK} == "pyspark" ]]; then
-    pip install -e '.[pyspark]'
-    pip install 'numpy<2' 'pyspark[connect]' # downgrade until spark fixes their bug
-    pytest plugin_tests/h_spark
-    exit 0
-fi
-
-if [[ ${TASK} == "vaex" ]]; then
-    pip install "numpy<2.0.0"  # numpy2.0 breaks vaex
-    pip install -e '.[vaex]'
-    pytest plugin_tests/h_vaex
-    exit 0
-fi
-
-if [[ ${TASK} == "narwhals" ]]; then
-    pip install -e .
-    pip install polars pandas narwhals
-    pytest plugin_tests/h_narwhals
-    exit 0
-fi
-
-if [[ ${TASK} == "tests" ]]; then
-    pip install .
-    # https://github.com/plotly/Kaleido/issues/226
-    pip install "kaleido<0.4.0" # kaleido 0.4.0 breaks plotly; TODO: remove this
-    pytest \
-        --cov=hamilton \
-        --ignore tests/integrations \
-        tests/
-    exit 0
-fi
-
-echo "ERROR: did not recognize TASK '${TASK}'"
-exit 1
diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index 6c1018337..000000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,181 +0,0 @@
-version: 2.1
-jobs:
-  check_for_changes:
-    docker:
-      - image: circleci/python:3.10
-    steps:
-      - checkout
-      - run:
-          name: Check for changes in specific paths
-          command: |
-            set +e
-            git diff --name-only origin/main...HEAD | grep '^.ci\|^.circleci\|^graph_adapter_tests\|^hamilton\|^plugin_tests\|^tests\|^requirements\|setup' > /dev/null
-            if [ $? -eq 0 ]; then
-              echo "Changes found in target paths."
-              echo 'true' > /tmp/changes_detected
-            else
-              echo "No changes found in target paths."
-              echo 'false' > /tmp/changes_detected
-            fi
-      - persist_to_workspace:
-          root: /tmp
-          paths:
-            - changes_detected
-  test:
-    parameters:
-      python-version:
-        type: string
-      task:
-        type: string
-    docker:
-      - image: cimg/python:<< parameters.python-version >>
-    environment:
-      TASK: << parameters.task >>
-      CI: true
-    steps:
-      - checkout
-      - attach_workspace:
-          at: /tmp
-      - run:
-          name: Check if changes were detected
-          command: |
-            if grep -q 'false' /tmp/changes_detected; then
-              echo "No changes detected, skipping job..."
-              circleci-agent step halt
-            fi
-      - run:
-          name: install dependencies
-          command: .ci/setup.sh
-      - run:
-          name: run tests
-          command: .ci/test.sh
-workflows:
-  unit-test-workflow:
-    jobs:
-      - check_for_changes
-      - test:
-          requires:
-            - check_for_changes
-          name: build-py38
-          python-version: '3.8'
-          task: tests
-      - test:
-          requires:
-            - check_for_changes
-          name: build-py39
-          python-version: '3.9'
-          task: tests
-      - test:
-          requires:
-            - check_for_changes
-          name: build-py310
-          python-version: '3.10'
-          task: tests
-      - test:
-          name: build-py311
-          python-version: '3.11'
-          task: tests
-      - test:
-          name: build-py312
-          python-version: '3.12'
-          task: tests
-      - test:
-          name: pre-commit
-          python-version: '3.11'
-          task: pre-commit
-      - test:
-          requires:
-            - check_for_changes
-          name: dask-py39
-          python-version: '3.9'
-          task: dask
-      - test:
-          requires:
-            - check_for_changes
-          name: dask-py311
-          python-version: '3.11'
-          task: dask
-      - test:
-          requires:
-            - check_for_changes
-          name: ray-py11
-          python-version: '3.11'
-          task: ray
-      - test:
-          requires:
-            - check_for_changes
-          name: vaex-py310
-          python-version: '3.10'
-          task: vaex
-      - test:
-          requires:
-            - check_for_changes
-          name: spark-py39
-          python-version: '3.9'
-          task: pyspark
-      - test:
-          requires:
-            - check_for_changes
-          name: spark-py310
-          python-version: '3.10'
-          task: pyspark
-      - test:
-          requires:
-            - check_for_changes
-          name: spark-py311
-          python-version: '3.11'
-          task: pyspark
-      - test:
-          requires:
-            - check_for_changes
-          name: spark-py312
-          python-version: '3.12'
-          task: pyspark
-      - test:
-          requires:
-            - check_for_changes
-          name: integrations-py38
-          python-version: '3.8'
-          task: integrations
-      - test:
-          requires:
-            - check_for_changes
-          name: integrations-py39
-          python-version: '3.9'
-          task: integrations
-      - test:
-          requires:
-            - check_for_changes
-          name: integrations-py310
-          python-version: '3.10'
-          task: integrations
-      - test:
-          requires:
-            - check_for_changes
-          name: integrations-py311
-          python-version: '3.11'
-          task: integrations
-      - test:
-          requires:
-            - check_for_changes
-          name: integrations-py312
-          python-version: '3.12'
-          task: integrations
-      - test:
-          requires:
-            - check_for_changes
-          name: narwhals-py39
-          python-version: '3.9'
-          task: narwhals
-      - test:
-          requires:
-            - check_for_changes
-          name: narwhals-py310
-          python-version: '3.10'
-          task: narwhals
-      - test:
-          requires:
-            - check_for_changes
-          name: narwhals-py311
-          python-version: '3.11'
-          task: narwhals
diff --git a/.github/workflows/hamilton-main.yml b/.github/workflows/hamilton-main.yml
new file mode 100644
index 000000000..c72c19414
--- /dev/null
+++ b/.github/workflows/hamilton-main.yml
@@ -0,0 +1,149 @@
+name: Unit Tests
+
+on:
+  workflow_dispatch:
+
+  pull_request:
+    branches:
+        - main
+    paths:
+        - '.github/**'
+        - 'hamilton/**'
+        - 'plugin_tests/**'
+        - 'tests/**'
+        - 'pyproject.toml'
+
+jobs:
+  test:
+    name: "Unit Tests"
+    runs-on: ${{ matrix.os }}
+    strategy:
+    #   fail-fast: true
+      matrix:
+        os:
+          - ubuntu-latest
+        python-version:
+          - 3.8
+          - 3.9
+          - 3.10
+          - 3.11
+          - 3.12
+    env:
+      UV_PRERELEASE: "allow"
+      HAMILTON_TELEMETRY_ENABLED: false
+
+    steps:
+        - name: Install Graphviz on Linux
+          if: runner.os == 'Linux'
+          run: sudo apt-get update && sudo apt-get install --yes --no-install-recommends graphviz
+
+        - name: Install Graphviz on Windows
+          if: runner.os == 'Windows'
+          run: choco install graphviz
+          shell: powershell
+
+        - name: Install Graphviz on macOS
+          if: runner.os == 'macOS'
+          run: brew install graphviz
+
+        - name: Checkout repository
+          uses: actions/checkout@v4
+
+        - name: Install uv and set the python version
+          uses: astral-sh/setup-uv@v6
+          with:
+            python-version: ${{ matrix.python-version }}
+            enable-cache: true
+            cache-dependency-glob: "uv.lock"
+            activate-environment: true
+
+        # It's enough to do it on single OS
+        - name: Check linting with pre-commit
+          if: ${{ runner.os == 'Linux' }}
+          run: |
+            uv sync --extra dev
+            uv run pre-commit install
+            uv run pre-commit run --all-files
+
+        - name: Test hamilton main package
+          run: |
+            uv sync --extra test
+            uv pip install "kaleido<0.4.0"
+            uv run pytest tests/ --cov=hamilton --ignore tests/integrations
+
+        - name: Test integrations
+          if: ${{ matrix.python-version == '3.9' }}
+          run: |
+            uv sync --extra test --extra pandera
+            uv pip install -r tests/integrations/pandera/requirements.txt
+            uv pip install dask-expr
+            uv run pytest tests/integrations
+
+        - name: Test integrations
+          if: ${{ matrix.python-version != '3.9' }}
+          run: |
+            uv sync --extra test --extra pandera
+            uv pip install -r tests/integrations/pandera/requirements.txt
+            uv run pytest tests/integrations
+
+        - name: Test pandas
+          run: |
+            uv sync --extra test
+            uv run pytest plugin_tests/h_pandas
+
+        - name: Test polars
+          run: |
+            uv sync --extra test
+            uv pip install polars
+            uv run pytest plugin_tests/h_polars
+        - name: Test narwhals
+          run: |
+            uv sync --extra test
+            uv pip install polars pandas narwhals
+            uv run pytest plugin_tests/h_narwhals
+
+        - name: Test dask
+          # Dask supports >= py3.9
+          if: ${{ matrix.python-version != '3.8' }}
+          run: |
+            uv sync --extra test --extra dask
+            uv run pytest plugin_tests/h_dask
+
+        - name: Test ray
+          # Ray supports >= py3.9
+          if: ${{ matrix.python-version != '3.8' }}
+          run: |
+            uv sync --extra test --extra ray
+            uv run pytest plugin_tests/h_ray
+
+        - name: Test pyspark
+          # Spark supports >= py3.9
+          if: ${{ matrix.python-version != '3.8' && runner.os == 'Linux' }}
+          run: |
+            sudo apt-get install --no-install-recommends --yes default-jre
+            uv sync --extra test --extra pyspark
+            pip install 'numpy<2' 'pyspark[connect]'
+            uv run pytest plugin_tests/h_spark
+
+        - name: Test pyspark
+          # Spark supports >= py3.9
+          if: ${{ matrix.python-version != '3.8' && runner.os != 'Linux' }}
+          run: |
+            uv sync --extra test --extra pyspark
+            uv pip install 'numpy<2' 'pyspark[connect]'
+            uv run pytest plugin_tests/h_spark
+
+        - name: Test vaex
+          # Vaex supports <= py3.9
+          if: ${{ runner.os == 'Linux' && (matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10') }}
+          run: |
+            sudo apt-get install --no-install-recommends --yes libpcre3-dev cargo
+            uv sync --extra test --extra vaex
+            uv run pytest plugin_tests/h_vaex
+
+        - name: Test vaex
+          # Vaex supports <= py3.9
+          if: ${{ runner.os != 'Linux' && (matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10') }}
+          run: |
+            uv sync --extra test --extra vaex
+            uv run pytest plugin_tests/h_vaex

From d58351ac3604af6cb3812f3563a95cce687fa8db Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Sun, 15 Jun 2025 00:00:44 +0100
Subject: [PATCH 02/12] Fix linting issues

---
 examples/validate_examples.py              |  2 ++
 scripts/test_memory.py                     |  4 ++--
 ui/sdk/src/hamilton_sdk/adapters.py        | 12 ++++++------
 writeups/garbage_collection/memory_test.py |  2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/examples/validate_examples.py b/examples/validate_examples.py
index d85dcbbf7..fb361158e 100644
--- a/examples/validate_examples.py
+++ b/examples/validate_examples.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import argparse
 import logging
 import pathlib
diff --git a/scripts/test_memory.py b/scripts/test_memory.py
index a3358842a..8567fca75 100644
--- a/scripts/test_memory.py
+++ b/scripts/test_memory.py
@@ -49,7 +49,7 @@ def foo_0(memory_size: int = 100_000_000) -> pd.DataFrame:
 
 
 @parameterize(
-    **{f"foo_{i}": {"foo_i_minus_one": source(f"foo_{i-1}")} for i in range(1, NUM_ITERS)}
+    **{f"foo_{i}": {"foo_i_minus_one": source(f"foo_{i - 1}")} for i in range(1, NUM_ITERS)}
 )
 def foo_i(foo_i_minus_one: pd.DataFrame) -> pd.DataFrame:
     global count
@@ -61,4 +61,4 @@ def foo_i(foo_i_minus_one: pd.DataFrame) -> pd.DataFrame:
 if __name__ == "__main__":
     mod = create_temporary_module(foo_i, foo_0)
     dr = driver.Builder().with_modules(mod).build()
-    output = dr.execute([f"foo_{NUM_ITERS-1}"], inputs=dict(memory_size=100_000_000))
+    output = dr.execute([f"foo_{NUM_ITERS - 1}"], inputs=dict(memory_size=100_000_000))
diff --git a/ui/sdk/src/hamilton_sdk/adapters.py b/ui/sdk/src/hamilton_sdk/adapters.py
index 23cbb3a2c..ddb3ee45d 100644
--- a/ui/sdk/src/hamilton_sdk/adapters.py
+++ b/ui/sdk/src/hamilton_sdk/adapters.py
@@ -8,17 +8,17 @@
 from types import ModuleType
 from typing import Any, Callable, Dict, List, Optional, Union
 
+from hamilton import graph as h_graph
+from hamilton import node
+from hamilton.data_quality import base as dq_base
+from hamilton.lifecycle import base
+
 from hamilton_sdk import driver
 from hamilton_sdk.api import clients, constants
 from hamilton_sdk.tracking import runs
 from hamilton_sdk.tracking.runs import Status, TrackingState
 from hamilton_sdk.tracking.trackingtypes import TaskRun
 
-from hamilton import graph as h_graph
-from hamilton import node
-from hamilton.data_quality import base as dq_base
-from hamilton.lifecycle import base
-
 logger = logging.getLogger(__name__)
 
 
@@ -314,7 +314,7 @@ def post_node_execute(
         for i, other_result in enumerate(other_results):
             other_attr = dict(
                 node_name=get_node_name(node_, task_id),
-                name=other_result.get("name", f"Attribute {i+1}"),  # retrieve name if specified
+                name=other_result.get("name", f"Attribute {i + 1}"),  # retrieve name if specified
                 type=other_result["observability_type"],
                 # 0.0.3 -> 3
                 schema_version=int(other_result["observability_schema_version"].split(".")[-1]),
diff --git a/writeups/garbage_collection/memory_test.py b/writeups/garbage_collection/memory_test.py
index 5912fba58..572517bbb 100644
--- a/writeups/garbage_collection/memory_test.py
+++ b/writeups/garbage_collection/memory_test.py
@@ -46,7 +46,7 @@ def foo_0(memory_size: int = 100_000_000) -> pd.DataFrame:
 
 
 @parameterize(
-    **{f"foo_{i}": {"foo_i_minus_one": source(f"foo_{i-1}")} for i in range(1, NUM_ITERS)}
+    **{f"foo_{i}": {"foo_i_minus_one": source(f"foo_{i - 1}")} for i in range(1, NUM_ITERS)}
 )
 def foo_i(foo_i_minus_one: pd.DataFrame) -> pd.DataFrame:
     global count

From 0a58b9c8956ee46ff42d8d28602b56633a0e0445 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Sun, 15 Jun 2025 00:00:12 +0100
Subject: [PATCH 03/12] Fix tests and minor bug issues

---
 .github/workflows/hamilton-main.yml | 24 +++++++++++++++---------
 pyproject.toml                      | 16 ++++++++++------
 tests/test_base.py                  |  7 ++++---
 tests/test_telemetry.py             |  7 +++----
 4 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/hamilton-main.yml b/.github/workflows/hamilton-main.yml
index c72c19414..89775e643 100644
--- a/.github/workflows/hamilton-main.yml
+++ b/.github/workflows/hamilton-main.yml
@@ -18,16 +18,16 @@ jobs:
     name: "Unit Tests"
     runs-on: ${{ matrix.os }}
     strategy:
-    #   fail-fast: true
+      fail-fast: false # will change this to true at the end, but want to see tests failing on all use cases
       matrix:
         os:
           - ubuntu-latest
         python-version:
-          - 3.8
-          - 3.9
-          - 3.10
-          - 3.11
-          - 3.12
+          - '3.8'
+          - '3.9'
+          - '3.10'
+          - '3.11'
+          - '3.12'
     env:
       UV_PRERELEASE: "allow"
       HAMILTON_TELEMETRY_ENABLED: false
@@ -44,7 +44,9 @@ jobs:
 
         - name: Install Graphviz on macOS
           if: runner.os == 'macOS'
-          run: brew install graphviz
+          run: |
+            brew install graphviz
+            brew install libomp
 
         - name: Checkout repository
           uses: actions/checkout@v4
@@ -71,6 +73,7 @@ jobs:
             uv pip install "kaleido<0.4.0"
             uv run pytest tests/ --cov=hamilton --ignore tests/integrations
 
+
         - name: Test integrations
           if: ${{ matrix.python-version == '3.9' }}
           run: |
@@ -96,6 +99,7 @@ jobs:
             uv sync --extra test
             uv pip install polars
             uv run pytest plugin_tests/h_polars
+
         - name: Test narwhals
           run: |
             uv sync --extra test
@@ -134,16 +138,18 @@ jobs:
             uv run pytest plugin_tests/h_spark
 
         - name: Test vaex
-          # Vaex supports <= py3.9
+          # Vaex supports <= py3.10 and numpy<2
           if: ${{ runner.os == 'Linux' && (matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10') }}
           run: |
             sudo apt-get install --no-install-recommends --yes libpcre3-dev cargo
             uv sync --extra test --extra vaex
+            uv pip install "numpy<2"
             uv run pytest plugin_tests/h_vaex
 
         - name: Test vaex
-          # Vaex supports <= py3.9
+          # Vaex supports <= py3.10 and numpy<2
           if: ${{ runner.os != 'Linux' && (matrix.python-version == '3.8' || matrix.python-version == '3.9' || matrix.python-version == '3.10') }}
           run: |
             uv sync --extra test --extra vaex
+            uv pip install "numpy<2"
             uv run pytest plugin_tests/h_vaex
diff --git a/pyproject.toml b/pyproject.toml
index 68130ffd7..89189ea42 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,6 @@ dependencies = [
 cli = ["typer"]
 dask = ["dask[complete]"]  # commonly you'll want everything.
 dask-array = ["dask[array]"]
-dask-core = ["dask-core"]
 dask-dataframe = ["dask[dataframe]"]
 dask-diagnostics = ["dask[diagnostics]"]
 dask-distributed = ["dask[distributed]"]
@@ -57,8 +56,7 @@ docs = [
   "diskcache",
   # required for all the plugins
   "dlt",
-  # furo -- install from main for now until the next release is out:
-  "furo @ git+https://github.com/pradyunsg/furo@main",
+  "furo",
   "gitpython", # Required for parsing git info for generation of data-adapter docs
   "grpcio-status",
   "lightgbm",
@@ -69,6 +67,7 @@ docs = [
   "myst-nb",
   "narwhals",
   "numpy < 2.0.0",
+  "packaging",
   "pandera",
   "pillow",
   "polars",
@@ -111,10 +110,11 @@ rich = ["rich"]
 sdk = ["sf-hamilton-sdk"]
 slack = ["slack-sdk"]
 test = [
-  "connectorx",
+  "connectorx<=0.3.2; python_version=='3.8'",
+  "connectorx; python_version!='3.8'",
   "dask[complete]",
   "dask-expr; python_version == '3.9'",
-  "datasets", # huggingface datasets
+  "datasets>=2.18.0", # huggingface datasets -- https://github.com/huggingface/datasets/issues/6737#issuecomment-2107336816
   "diskcache",
   "dlt",
   "fsspec",
@@ -149,7 +149,11 @@ test = [
 ]
 tqdm = ["tqdm"]
 ui = ["sf-hamilton-ui"]
-vaex = ["vaex"]
+
+# vaex -- on >=py3.11 only core part available https://github.com/vaexio/vaex/pull/2331#issuecomment-2437198176
+vaex = [
+  "vaex; python_version <= '3.10'"
+  ]
 visualization = ["graphviz", "networkx"]
 
 [project.entry-points.console_scripts]
diff --git a/tests/test_base.py b/tests/test_base.py
index 447d506db..e8a44c9a6 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -5,6 +5,7 @@
 import pandas as pd
 import pytest
 from numpy import testing
+from packaging import version
 
 from hamilton import base
 
@@ -279,7 +280,7 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected
 # Still supporting old pandas version, although we should phase off...
 int_64_index = "Index:::int64" if pd.__version__ >= "2.0.0" else "RangeIndex:::int64"
 
-PD_VERSION = tuple(int(item) for item in pd.__version__.split("."))
+PD_VERSION = version.parse(pd.__version__)
 
 
 @pytest.mark.parametrize(
@@ -326,7 +327,7 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected
             {"a": pd.Series([1, 2, 3]).index},
             ({"Index:::int64": ["a"]}, {}, {}),
             marks=pytest.mark.skipif(
-                PD_VERSION < (2, 0, 0),
+                PD_VERSION < version.parse("2.0.0"),
                 reason="Pandas 2.0 changed default indices but we still " "support pandas <2.0",
             ),
         ),
@@ -334,7 +335,7 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected
             {"a": pd.Series([1, 2, 3]).index},
             ({"Int64Index:::int64": ["a"]}, {}, {}),
             marks=pytest.mark.skipif(
-                PD_VERSION >= (2, 0, 0),
+                PD_VERSION >= version.parse("2.0.0"),
                 reason="Pandas 2.0 changed default indices but we still " "support pandas <2.0",
             ),
         ),
diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py
index cc378a562..a1b30045e 100644
--- a/tests/test_telemetry.py
+++ b/tests/test_telemetry.py
@@ -126,10 +126,9 @@ def test_sanitize_error_general():
         import re
 
         actual = re.sub(r"line \d\d\d", "line XXX", actual)
-        expected = (
-            """...<USER_CODE>...\n...hamilton/telemetry.py, line XXX, in get_adapter_name\n"""
-        )
-        # if this fails -- run it how circleci runs it
+        expected = """...hamilton/tests/test_telemetry.py, line XXX, in test_sanitize_error_general\n...hamilton/hamilton/telemetry.py, line XXX, in get_adapter_name\n"""
+
+        # if this fails -- run it how github actions run it
         assert actual == expected
 
 

From a21f8007d4709ac5721078796b2402a1533a37b1 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Tue, 17 Jun 2025 19:56:04 +0100
Subject: [PATCH 04/12] Fix SDK

Fix Polars hist lower bound tests
Fix test by changing sql.DataFrame to sql.classic.DataFrame
Fix sanitize error to work on CI
---
 tests/test_telemetry.py                        | 2 +-
 ui/sdk/tests/tracking/test_polars_col_stats.py | 2 +-
 ui/sdk/tests/tracking/test_polars_stats.py     | 4 ++--
 ui/sdk/tests/tracking/test_pyspark_stats.py    | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py
index a1b30045e..337cd17f7 100644
--- a/tests/test_telemetry.py
+++ b/tests/test_telemetry.py
@@ -126,7 +126,7 @@ def test_sanitize_error_general():
         import re
 
         actual = re.sub(r"line \d\d\d", "line XXX", actual)
-        expected = """...hamilton/tests/test_telemetry.py, line XXX, in test_sanitize_error_general\n...hamilton/hamilton/telemetry.py, line XXX, in get_adapter_name\n"""
+        expected = """...hamilton/hamilton/tests/test_telemetry.py, line XXX, in test_sanitize_error_general\n...hamilton/hamilton/hamilton/telemetry.py, line XXX, in get_adapter_name\n"""
 
         # if this fails -- run it how github actions run it
         assert actual == expected
diff --git a/ui/sdk/tests/tracking/test_polars_col_stats.py b/ui/sdk/tests/tracking/test_polars_col_stats.py
index a20e79385..3f3a976a4 100644
--- a/ui/sdk/tests/tracking/test_polars_col_stats.py
+++ b/ui/sdk/tests/tracking/test_polars_col_stats.py
@@ -66,7 +66,7 @@ def test_quantiles(example_df):
 
 def test_histogram(example_df):
     assert pcs.histogram(example_df["a"], num_hist_bins=3) == {
-        "(0.996, 2.333333]": 2,
+        "[1.0, 2.333333]": 2,
         "(2.333333, 3.666667]": 1,
         "(3.666667, 5.0]": 2,
     }
diff --git a/ui/sdk/tests/tracking/test_polars_stats.py b/ui/sdk/tests/tracking/test_polars_stats.py
index 76333c7c8..a8a9d43a9 100644
--- a/ui/sdk/tests/tracking/test_polars_stats.py
+++ b/ui/sdk/tests/tracking/test_polars_stats.py
@@ -30,7 +30,7 @@ def test_compute_stats_df():
                 "count": 5,
                 "data_type": "Int64",
                 "histogram": {
-                    "(0.996, 1.4]": 1,
+                    "[1.0, 1.4]": 1,
                     "(1.4, 1.8]": 0,
                     "(1.8, 2.2]": 1,
                     "(2.2, 2.6]": 0,
@@ -76,7 +76,7 @@ def test_compute_stats_df():
                 "count": 5,
                 "data_type": "Float64",
                 "histogram": {
-                    "(0.996, 1.4]": 1,
+                    "[1.0, 1.4]": 1,
                     "(1.4, 1.8]": 0,
                     "(1.8, 2.2]": 1,
                     "(2.2, 2.6]": 0,
diff --git a/ui/sdk/tests/tracking/test_pyspark_stats.py b/ui/sdk/tests/tracking/test_pyspark_stats.py
index 5d239f839..694afc34f 100644
--- a/ui/sdk/tests/tracking/test_pyspark_stats.py
+++ b/ui/sdk/tests/tracking/test_pyspark_stats.py
@@ -19,7 +19,7 @@ def test_compute_stats_pyspark():
         "observability_schema_version": "0.0.2",
         "observability_type": "dict",
         "observability_value": {
-            "type": "<class 'pyspark.sql.dataframe.DataFrame'>",
+            "type": "<class 'pyspark.sql.classic.dataframe.DataFrame'>",
             "value": {
                 "columns": [
                     {

From 862ba870771c7c39dde83448be849a901e4c3990 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Wed, 18 Jun 2025 23:25:49 +0100
Subject: [PATCH 05/12] Fix pandas/polars plugin tests

---
 plugin_tests/h_pandas/test_with_columns.py | 6 +++---
 plugin_tests/h_polars/test_with_columns.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/plugin_tests/h_pandas/test_with_columns.py b/plugin_tests/h_pandas/test_with_columns.py
index f9012e718..85076c4f0 100644
--- a/plugin_tests/h_pandas/test_with_columns.py
+++ b/plugin_tests/h_pandas/test_with_columns.py
@@ -246,7 +246,7 @@ def target_fn(upstream_df: pd.DataFrame) -> pd.DataFrame:
         upstream_df=dummy_df(),
         dummy_fn_with_columns=dummy_fn_with_columns(col_1=pd.Series([1, 2, 3, 4])),
     )
-    assert merge_node.name == "__append"
+    assert merge_node.name == "_append"
     assert merge_node.type == pd.DataFrame
 
     pd.testing.assert_series_equal(output_df["col_1"], pd.Series([1, 2, 3, 4]), check_names=False)
@@ -275,7 +275,7 @@ def col_1() -> pd.Series:
     merge_node = output_nodes[-1]
 
     output_df = merge_node.callable(upstream_df=dummy_df(), col_1=col_1())
-    assert merge_node.name == "__append"
+    assert merge_node.name == "_append"
     assert merge_node.type == pd.DataFrame
 
     pd.testing.assert_series_equal(output_df["col_1"], pd.Series([0, 3, 5, 7]), check_names=False)
@@ -303,7 +303,7 @@ def target_fn(upstream_df: pd.DataFrame) -> pd.DataFrame:
     assert nodes_[0].name == "target_fn"
     assert nodes_[1].name == "dummy_namespace.dummy_fn_with_columns"
     assert nodes_[2].name == "dummy_namespace.col_1"
-    assert nodes_[3].name == "dummy_namespace.__append"
+    assert nodes_[3].name == "dummy_namespace._append"
 
 
 def test_end_to_end_with_columns_automatic_extract():
diff --git a/plugin_tests/h_polars/test_with_columns.py b/plugin_tests/h_polars/test_with_columns.py
index 151347fb7..892fb4cee 100644
--- a/plugin_tests/h_polars/test_with_columns.py
+++ b/plugin_tests/h_polars/test_with_columns.py
@@ -144,7 +144,7 @@ def target_fn(upstream_df: pl.DataFrame) -> pl.DataFrame:
         upstream_df=dummy_df(),
         dummy_fn_with_columns=dummy_fn_with_columns(col_1=pl.Series([1, 2, 3, 4])),
     )
-    assert merge_node.name == "__append"
+    assert merge_node.name == "_append"
     assert merge_node.type == pl.DataFrame
 
     pl.testing.assert_series_equal(output_df["col_1"], pl.Series([1, 2, 3, 4]), check_names=False)
@@ -174,7 +174,7 @@ def col_1() -> pl.Series:
     merge_node = output_nodes[-1]
 
     output_df = merge_node.callable(upstream_df=dummy_df(), col_1=col_1())
-    assert merge_node.name == "__append"
+    assert merge_node.name == "_append"
     assert merge_node.type == pl.DataFrame
 
     pl.testing.assert_series_equal(
@@ -204,7 +204,7 @@ def target_fn(upstream_df: pl.DataFrame) -> pl.DataFrame:
     assert nodes_[0].name == "target_fn"
     assert nodes_[1].name == "dummy_namespace.dummy_fn_with_columns"
     assert nodes_[2].name == "dummy_namespace.col_1"
-    assert nodes_[3].name == "dummy_namespace.__append"
+    assert nodes_[3].name == "dummy_namespace._append"
 
 
 def test_end_to_end_with_columns_automatic_extract():

From 10fc45d77260efc2df033f751dd1a4cf343e3733 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Wed, 18 Jun 2025 23:26:35 +0100
Subject: [PATCH 06/12] Pin dask minimal dependency that resolve bug

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 89189ea42..f87cdab0b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,7 +50,7 @@ docs = [
   "sf-hamilton[dev]",
   "alabaster>=0.7,<0.8,!=0.7.5", # read the docs pins
   "commonmark==0.9.1", # read the docs pins
-  "dask-expr; python_version == '3.9'",
+  "dask-expr>=1.1.14; python_version >= '3.10'", # Bugfix only available after py3.10 https://github.com/dask/dask-expr/pull/1150
   "dask[distributed]",
   "ddtrace<3.0",
   "diskcache",
@@ -113,7 +113,7 @@ test = [
   "connectorx<=0.3.2; python_version=='3.8'",
   "connectorx; python_version!='3.8'",
   "dask[complete]",
-  "dask-expr; python_version == '3.9'",
+  "dask-expr>=1.1.14; python_version >= '3.10'", # Bugfix only available after py3.10 https://github.com/dask/dask-expr/pull/1150
   "datasets>=2.18.0", # huggingface datasets -- https://github.com/huggingface/datasets/issues/6737#issuecomment-2107336816
   "diskcache",
   "dlt",

From e27433c8db457b3a65f89a57cb1c2c126dfc5572 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Thu, 19 Jun 2025 20:57:08 +0100
Subject: [PATCH 07/12] Fix ray venv bug

---
 .github/workflows/hamilton-main.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/hamilton-main.yml b/.github/workflows/hamilton-main.yml
index 89775e643..a34551a7d 100644
--- a/.github/workflows/hamilton-main.yml
+++ b/.github/workflows/hamilton-main.yml
@@ -116,6 +116,8 @@ jobs:
         - name: Test ray
           # Ray supports >= py3.9
           if: ${{ matrix.python-version != '3.8' }}
+          env:
+            RAY_ENABLE_UV_RUN_RUNTIME_ENV: 0 # https://github.com/ray-project/ray/issues/53848
           run: |
             uv sync --extra test --extra ray
             uv run pytest plugin_tests/h_ray

From ab72c56b5194de7b62b4c64623e0caf7a9baec72 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Thu, 19 Jun 2025 21:36:52 +0100
Subject: [PATCH 08/12] Add grpcio dependency for spark

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index f87cdab0b..117689d86 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -103,7 +103,8 @@ pandera = ["pandera"]
 pydantic = ["pydantic>=2.0"]
 pyspark = [
   # we have to run these dependencies because Spark does not check to ensure the right target was called
-  "pyspark[pandas_on_spark,sql]"
+  "pyspark[pandas_on_spark,sql]",
+  "grpcio"
 ]
 ray = ["ray>=2.0.0", "pyarrow"]
 rich = ["rich"]

From c16ed7d831fc8d2814867d88f13d89ab4c939a9e Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Thu, 19 Jun 2025 22:04:00 +0100
Subject: [PATCH 09/12] Try force reinstall grcpio stuff

---
 .github/workflows/hamilton-main.yml | 6 ++++--
 pyproject.toml                      | 1 -
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/hamilton-main.yml b/.github/workflows/hamilton-main.yml
index a34551a7d..5b87bdb74 100644
--- a/.github/workflows/hamilton-main.yml
+++ b/.github/workflows/hamilton-main.yml
@@ -128,7 +128,8 @@ jobs:
           run: |
             sudo apt-get install --no-install-recommends --yes default-jre
             uv sync --extra test --extra pyspark
-            pip install 'numpy<2' 'pyspark[connect]'
+            uv pip install 'numpy<2' 'pyspark[connect]' 'grpcio'
+            uv pip install --no-cache --reinstall --strict 'grpcio-status >= 1.48.1'
             uv run pytest plugin_tests/h_spark
 
         - name: Test pyspark
@@ -136,7 +137,8 @@ jobs:
           if: ${{ matrix.python-version != '3.8' && runner.os != 'Linux' }}
           run: |
             uv sync --extra test --extra pyspark
-            uv pip install 'numpy<2' 'pyspark[connect]'
+            uv pip install 'numpy<2' 'pyspark[connect]' 'grpcio'
+            uv pip install --no-cache --reinstall --strict 'grpcio-status >= 1.48.1'
             uv run pytest plugin_tests/h_spark
 
         - name: Test vaex
diff --git a/pyproject.toml b/pyproject.toml
index 117689d86..164803819 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -104,7 +104,6 @@ pydantic = ["pydantic>=2.0"]
 pyspark = [
   # we have to run these dependencies because Spark does not check to ensure the right target was called
   "pyspark[pandas_on_spark,sql]",
-  "grpcio"
 ]
 ray = ["ray>=2.0.0", "pyarrow"]
 rich = ["rich"]

From 1a467b04dceac48e659bfc6e535942bbc3e2a578 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Thu, 19 Jun 2025 22:21:23 +0100
Subject: [PATCH 10/12] Disable pyspark ANSI

---
 .github/workflows/hamilton-main.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/hamilton-main.yml b/.github/workflows/hamilton-main.yml
index 5b87bdb74..345ce42cc 100644
--- a/.github/workflows/hamilton-main.yml
+++ b/.github/workflows/hamilton-main.yml
@@ -125,6 +125,8 @@ jobs:
         - name: Test pyspark
           # Spark supports >= py3.9
           if: ${{ matrix.python-version != '3.8' && runner.os == 'Linux' }}
+          env:
+            PYSPARK_SUBMIT_ARGS: "--conf spark.sql.ansi.enabled=false pyspark-shell"
           run: |
             sudo apt-get install --no-install-recommends --yes default-jre
             uv sync --extra test --extra pyspark
@@ -135,6 +137,8 @@ jobs:
         - name: Test pyspark
           # Spark supports >= py3.9
           if: ${{ matrix.python-version != '3.8' && runner.os != 'Linux' }}
+          env:
+            PYSPARK_SUBMIT_ARGS: "--conf spark.sql.ansi.enabled=false pyspark-shell"
           run: |
             uv sync --extra test --extra pyspark
             uv pip install 'numpy<2' 'pyspark[connect]' 'grpcio'

From 605497d81a3d55c1756c4150564ced766da93e38 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Thu, 19 Jun 2025 22:45:13 +0100
Subject: [PATCH 11/12] Try to fix SparkInputValidator test

---
 plugin_tests/h_spark/test_h_spark.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/plugin_tests/h_spark/test_h_spark.py b/plugin_tests/h_spark/test_h_spark.py
index 36bc295a4..2532c35c9 100644
--- a/plugin_tests/h_spark/test_h_spark.py
+++ b/plugin_tests/h_spark/test_h_spark.py
@@ -6,7 +6,6 @@
 import pytest
 from pyspark import Row
 from pyspark.sql import Column, DataFrame, SparkSession, types
-from pyspark.sql.connect.dataframe import DataFrame as CDataFrame
 from pyspark.sql.connect.session import SparkSession as CSparkSession
 from pyspark.sql.functions import column
 
@@ -889,19 +888,10 @@ def test_create_selector_node(spark_session):
     )
 
 
-def test_spark_input_adapter_dataframe():
+def test_spark_input_adapter_dataframe(spark_session):
     # We have to do these at is is very difficult to mock out connect.x objects
-
-    class ConnectDataFrame(CDataFrame):
-        def __init__(self):
-            pass
-
-        def __repr__(self):
-            return "df"
-
-    assert SparkInputValidator().do_validate_input(
-        node_type=DataFrame, input_value=ConnectDataFrame()
-    )
+    df = spark_session.range(1)
+    assert SparkInputValidator().do_validate_input(node_type=DataFrame, input_value=df)
 
 
 def test_spark_input_adapter_connector():

From 63832025d1aea4ed12c2d18ad2c1aef7d650c631 Mon Sep 17 00:00:00 2001
From: jernejfrank <jernejfrank@gmail.com>
Date: Fri, 20 Jun 2025 08:22:49 +0100
Subject: [PATCH 12/12] Add reason for fail-safe false

---
 .github/workflows/hamilton-main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/hamilton-main.yml b/.github/workflows/hamilton-main.yml
index 345ce42cc..d7ef1daed 100644
--- a/.github/workflows/hamilton-main.yml
+++ b/.github/workflows/hamilton-main.yml
@@ -18,7 +18,7 @@ jobs:
     name: "Unit Tests"
     runs-on: ${{ matrix.os }}
     strategy:
-      fail-fast: false # will change this to true at the end, but want to see tests failing on all use cases
+      fail-fast: false # want to see for each version if fails are different
       matrix:
         os:
           - ubuntu-latest