diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 49340c6c82..186a187268 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -10,5 +10,7 @@ ### Bundles * Fix reading dashboard contents when the sync root is different than the bundle root ([#3006](https://github.com/databricks/cli/pull/3006)) +* When glob for wheels is used, like "\*.whl", it will filter out different version of the same package and will only take the most recent version. ([#2982](https://github.com/databricks/cli/pull/2982)) +* When building Python artifacts as part of "bundle deploy" we no longer delete `dist`, `build`, `*egg-info` and `__pycache__` directories. ([#2982](https://github.com/databricks/cli/pull/2982)) ### API Changes diff --git a/acceptance/bundle/artifacts/whl_change_version/.gitignore b/acceptance/bundle/artifacts/whl_change_version/.gitignore new file mode 100644 index 0000000000..f03e23bc26 --- /dev/null +++ b/acceptance/bundle/artifacts/whl_change_version/.gitignore @@ -0,0 +1,3 @@ +build/ +*.egg-info +.databricks diff --git a/acceptance/bundle/artifacts/whl_change_version/databricks.yml b/acceptance/bundle/artifacts/whl_change_version/databricks.yml new file mode 100644 index 0000000000..5a8d55ded0 --- /dev/null +++ b/acceptance/bundle/artifacts/whl_change_version/databricks.yml @@ -0,0 +1,12 @@ +resources: + jobs: + test_job: + name: "[${bundle.target}] My Wheel Job" + tasks: + - task_key: TestTask + existing_cluster_id: "0717-aaaaa-bbbbbb" + python_wheel_task: + package_name: "my_test_code" + entry_point: "run" + libraries: + - whl: ./dist/*.whl diff --git a/acceptance/bundle/artifacts/whl_change_version/my_test_code/__init__.py b/acceptance/bundle/artifacts/whl_change_version/my_test_code/__init__.py new file mode 100644 index 0000000000..e96953330e --- /dev/null +++ b/acceptance/bundle/artifacts/whl_change_version/my_test_code/__init__.py @@ -0,0 +1,2 @@ +__version__ = "0.1.0" +__author__ = "Databricks" diff --git a/acceptance/bundle/artifacts/whl_change_version/my_test_code/__main__.py b/acceptance/bundle/artifacts/whl_change_version/my_test_code/__main__.py new file mode 100644 index 0000000000..ea918ce2d5 --- /dev/null +++ b/acceptance/bundle/artifacts/whl_change_version/my_test_code/__main__.py @@ -0,0 +1,16 @@ +""" +The entry point of the Python Wheel +""" + +import sys + + +def main(): + # This method will print the provided arguments + print("Hello from my func") + print("Got arguments:") + print(sys.argv) + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/artifacts/whl_change_version/output.txt b/acceptance/bundle/artifacts/whl_change_version/output.txt new file mode 100644 index 0000000000..a2bfb79758 --- /dev/null +++ b/acceptance/bundle/artifacts/whl_change_version/output.txt @@ -0,0 +1,138 @@ + +>>> [CLI] bundle deploy +Building python_artifact... +Uploading dist/my_test_code-0.1.0-py3-none-any.whl... +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> find.py --expect 1 whl +dist/my_test_code-0.1.0-py3-none-any.whl + +=== Expecting 1 wheel in libraries section in /jobs/create +>>> jq -s .[] | select(.path=="/api/2.2/jobs/create") | .body.tasks out.requests.txt +[ + { + "existing_cluster_id": "0717-aaaaa-bbbbbb", + "libraries": [ + { + "whl": "/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/artifacts/.internal/my_test_code-0.1.0-py3-none-any.whl" + } + ], + "python_wheel_task": { + "entry_point": "run", + "package_name": "my_test_code" + }, + "task_key": "TestTask" + } +] + +=== Expecting 1 wheel to be uploaded +>>> jq .path out.requests.txt +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/artifacts/.internal/my_test_code-0.1.0-py3-none-any.whl" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/.gitignore" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/databricks.yml" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/dist/my_test_code-0.1.0-py3-none-any.whl" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/my_test_code/__init__.py" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/my_test_code/__main__.py" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/out.requests.txt" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/output.txt" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/repls.json" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/script" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/setup.py" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deploy.lock" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deployment.json" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/metadata.json" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/terraform.tfstate" + +>>> update_file.py my_test_code/__init__.py 0.1.0 0.2.0 + +>>> [CLI] bundle deploy +Building python_artifact... +Uploading dist/my_test_code-0.2.0-py3-none-any.whl... +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> find.py --expect 2 whl +dist/my_test_code-0.1.0-py3-none-any.whl +dist/my_test_code-0.2.0-py3-none-any.whl + +=== Expecting 1 wheel in libraries section in /jobs/reset +>>> jq -s .[] | select(.path=="/api/2.2/jobs/reset") | .body.new_settings.tasks out.requests.txt +[ + { + "existing_cluster_id": "0717-aaaaa-bbbbbb", + "libraries": [ + { + "whl": "/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/artifacts/.internal/my_test_code-0.2.0-py3-none-any.whl" + } + ], + "python_wheel_task": { + "entry_point": "run", + "package_name": "my_test_code" + }, + "task_key": "TestTask" + } +] + +=== Expecting 1 wheel to be uploaded +>>> jq .path out.requests.txt +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/artifacts/.internal/my_test_code-0.2.0-py3-none-any.whl" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/dist/my_test_code-0.2.0-py3-none-any.whl" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/my_test_code/__init__.py" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/out.requests.txt" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/output.txt" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deploy.lock" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deployment.json" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/metadata.json" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/terraform.tfstate" + +=== Restore config to target old wheel +>>> update_file.py databricks.yml ./dist/*.whl ./dist/my*0.1.0*.whl + +>>> [CLI] bundle deploy +Building python_artifact... +Uploading dist/my_test_code-0.1.0-py3-none-any.whl... +Uploading dist/my_test_code-0.2.0-py3-none-any.whl... +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> find.py --expect 2 whl +dist/my_test_code-0.1.0-py3-none-any.whl +dist/my_test_code-0.2.0-py3-none-any.whl + +=== Expecting 1 wheel in libraries section in /jobs/reset +>>> jq -s .[] | select(.path=="/api/2.2/jobs/reset") | .body.new_settings.tasks out.requests.txt +[ + { + "existing_cluster_id": "0717-aaaaa-bbbbbb", + "libraries": [ + { + "whl": "/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/artifacts/.internal/my_test_code-0.1.0-py3-none-any.whl" + } + ], + "python_wheel_task": { + "entry_point": "run", + "package_name": "my_test_code" + }, + "task_key": "TestTask" + } +] + +=== Expecting 1 wheel to be uploaded +>>> jq .path out.requests.txt +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/artifacts/.internal/my_test_code-0.1.0-py3-none-any.whl" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/artifacts/.internal/my_test_code-0.2.0-py3-none-any.whl" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/databricks.yml" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/dist/my_test_code-0.2.0-py3-none-any.whl" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/out.requests.txt" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files/output.txt" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deploy.lock" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/deployment.json" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/metadata.json" +"/api/2.0/workspace-files/import-file/Workspace/Users/[USERNAME]/.bundle/test-bundle/default/state/terraform.tfstate" diff --git a/acceptance/bundle/artifacts/whl_change_version/script b/acceptance/bundle/artifacts/whl_change_version/script new file mode 100644 index 0000000000..2ccc104307 --- /dev/null +++ b/acceptance/bundle/artifacts/whl_change_version/script @@ -0,0 +1,39 @@ +trace $CLI bundle deploy + +trace find.py --expect 1 whl + +title "Expecting 1 wheel in libraries section in /jobs/create" +trace jq -s '.[] | select(.path=="/api/2.2/jobs/create") | .body.tasks' out.requests.txt + +title "Expecting 1 wheel to be uploaded" +trace jq .path out.requests.txt | grep import | sort + +rm out.requests.txt + + +trace update_file.py my_test_code/__init__.py 0.1.0 0.2.0 +trace $CLI bundle deploy + +trace find.py --expect 2 whl # there are now 2 wheels on disk + +title "Expecting 1 wheel in libraries section in /jobs/reset" +trace jq -s '.[] | select(.path=="/api/2.2/jobs/reset") | .body.new_settings.tasks' out.requests.txt + +title "Expecting 1 wheel to be uploaded" +trace jq .path out.requests.txt | grep import | sort + +rm out.requests.txt + + +title 'Restore config to target old wheel' +trace update_file.py databricks.yml './dist/*.whl' './dist/my*0.1.0*.whl' +trace $CLI bundle deploy +trace find.py --expect 2 whl + +title "Expecting 1 wheel in libraries section in /jobs/reset" +trace jq -s '.[] | select(.path=="/api/2.2/jobs/reset") | .body.new_settings.tasks' out.requests.txt + +title "Expecting 1 wheel to be uploaded" +trace jq .path out.requests.txt | grep import | sort + +rm out.requests.txt diff --git a/acceptance/bundle/artifacts/whl_change_version/setup.py b/acceptance/bundle/artifacts/whl_change_version/setup.py new file mode 100644 index 0000000000..7a1317b2f5 --- /dev/null +++ b/acceptance/bundle/artifacts/whl_change_version/setup.py @@ -0,0 +1,15 @@ +from setuptools import setup, find_packages + +import my_test_code + +setup( + name="my_test_code", + version=my_test_code.__version__, + author=my_test_code.__author__, + url="https://databricks.com", + author_email="john.doe@databricks.com", + description="my test wheel", + packages=find_packages(include=["my_test_code"]), + entry_points={"group_1": "run=my_test_code.__main__:main"}, + install_requires=["setuptools"], +) diff --git a/bundle/artifacts/build.go b/bundle/artifacts/build.go index ff30e10fbd..e062ee0b17 100644 --- a/bundle/artifacts/build.go +++ b/bundle/artifacts/build.go @@ -38,8 +38,6 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { }) } - cleanupPythonDistBuild(ctx, b) - for _, artifactName := range utils.SortedKeys(b.Config.Artifacts) { a := b.Config.Artifacts[artifactName] diff --git a/bundle/artifacts/cleanup.go b/bundle/artifacts/cleanup.go deleted file mode 100644 index 9937178206..0000000000 --- a/bundle/artifacts/cleanup.go +++ /dev/null @@ -1,38 +0,0 @@ -package artifacts - -import ( - "context" - "os" - "path/filepath" - - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/libs/log" - "github.com/databricks/cli/libs/python" - "github.com/databricks/cli/libs/utils" -) - -func cleanupPythonDistBuild(ctx context.Context, b *bundle.Bundle) { - removeFolders := make(map[string]bool, len(b.Config.Artifacts)) - cleanupWheelFolders := make(map[string]bool, len(b.Config.Artifacts)) - - for _, artifactName := range utils.SortedKeys(b.Config.Artifacts) { - artifact := b.Config.Artifacts[artifactName] - if artifact.Type == "whl" && artifact.BuildCommand != "" { - dir := artifact.Path - removeFolders[filepath.Join(dir, "dist")] = true - cleanupWheelFolders[dir] = true - } - } - - for _, dir := range utils.SortedKeys(removeFolders) { - err := os.RemoveAll(dir) - if err != nil { - log.Infof(ctx, "Failed to remove %s: %s", dir, err) - } - } - - for _, dir := range utils.SortedKeys(cleanupWheelFolders) { - log.Infof(ctx, "Cleaning up Python build artifacts in %s", dir) - python.CleanupWheelFolder(dir) - } -} diff --git a/bundle/artifacts/expand_globs.go b/bundle/artifacts/expand_globs.go index 1d9e188a0c..af358d3dda 100644 --- a/bundle/artifacts/expand_globs.go +++ b/bundle/artifacts/expand_globs.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/patchwheel" ) func createGlobError(v dyn.Value, p dyn.Path, message string) diag.Diagnostic { @@ -72,6 +73,12 @@ func (e expandGlobs) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnosti return v, nil } + // Note, we're applying this for all artifact types, not just "whl". + // Rationale: + // 1. type is optional + // 2. if you have wheels in other artifact type, maybe you still want the filter logic? impossible to say. + matches = patchwheel.FilterLatestWheels(ctx, matches) + if len(matches) == 1 && matches[0] == source { // No glob expansion was performed. // Keep node unchanged. We need to ensure that "patched" field remains and not wiped out by code below. diff --git a/bundle/config/mutator/resourcemutator/expand_pipeline_glob_paths.go b/bundle/config/mutator/resourcemutator/expand_pipeline_glob_paths.go index a89de3c0cc..38f835793e 100644 --- a/bundle/config/mutator/resourcemutator/expand_pipeline_glob_paths.go +++ b/bundle/config/mutator/resourcemutator/expand_pipeline_glob_paths.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/patchwheel" ) type expandPipelineGlobPaths struct{} @@ -17,7 +18,7 @@ func ExpandPipelineGlobPaths() bundle.Mutator { return &expandPipelineGlobPaths{} } -func (m *expandPipelineGlobPaths) expandLibrary(dir string, v dyn.Value) ([]dyn.Value, error) { +func (m *expandPipelineGlobPaths) expandLibrary(ctx context.Context, dir string, v dyn.Value) ([]dyn.Value, error) { // Probe for the path field in the library. for _, p := range []dyn.Path{ dyn.NewPath(dyn.Key("notebook"), dyn.Key("path")), @@ -47,6 +48,8 @@ func (m *expandPipelineGlobPaths) expandLibrary(dir string, v dyn.Value) ([]dyn. return []dyn.Value{v}, nil } + matches = patchwheel.FilterLatestWheels(ctx, matches) + // Emit a new value for each match. var ev []dyn.Value for _, match := range matches { @@ -69,7 +72,7 @@ func (m *expandPipelineGlobPaths) expandLibrary(dir string, v dyn.Value) ([]dyn. return []dyn.Value{v}, nil } -func (m *expandPipelineGlobPaths) expandSequence(dir string, p dyn.Path, v dyn.Value) (dyn.Value, error) { +func (m *expandPipelineGlobPaths) expandSequence(ctx context.Context, dir string, p dyn.Path, v dyn.Value) (dyn.Value, error) { s, ok := v.AsSequence() if !ok { return dyn.InvalidValue, fmt.Errorf("expected sequence, got %s", v.Kind()) @@ -77,7 +80,7 @@ func (m *expandPipelineGlobPaths) expandSequence(dir string, p dyn.Path, v dyn.V var vs []dyn.Value for _, sv := range s { - v, err := m.expandLibrary(dir, sv) + v, err := m.expandLibrary(ctx, dir, sv) if err != nil { return dyn.InvalidValue, err } @@ -88,7 +91,7 @@ func (m *expandPipelineGlobPaths) expandSequence(dir string, p dyn.Path, v dyn.V return dyn.NewValue(vs, v.Locations()), nil } -func (m *expandPipelineGlobPaths) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { +func (m *expandPipelineGlobPaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { p := dyn.NewPattern( dyn.Key("resources"), @@ -99,7 +102,7 @@ func (m *expandPipelineGlobPaths) Apply(_ context.Context, b *bundle.Bundle) dia // Visit each pipeline's "libraries" field and expand any glob patterns. return dyn.MapByPattern(v, p, func(path dyn.Path, value dyn.Value) (dyn.Value, error) { - return m.expandSequence(b.BundleRootPath, path, value) + return m.expandSequence(ctx, b.BundleRootPath, path, value) }) }) diff --git a/bundle/libraries/expand_glob_references.go b/bundle/libraries/expand_glob_references.go index 7a808f6270..039157b086 100644 --- a/bundle/libraries/expand_glob_references.go +++ b/bundle/libraries/expand_glob_references.go @@ -9,6 +9,7 @@ import ( "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/patchwheel" ) type expand struct{} @@ -37,7 +38,7 @@ func getLibDetails(v dyn.Value) (string, string, bool) { return "", "", false } -func findMatches(b *bundle.Bundle, path string) ([]string, error) { +func findMatches(ctx context.Context, b *bundle.Bundle, path string) ([]string, error) { matches, err := filepath.Glob(filepath.Join(b.SyncRootPath, path)) if err != nil { return nil, err @@ -51,6 +52,8 @@ func findMatches(b *bundle.Bundle, path string) ([]string, error) { } } + matches = patchwheel.FilterLatestWheels(ctx, matches) + // We make the matched path relative to the sync root path before storing it // to allow upload mutator to distinguish between local and remote paths for i, match := range matches { @@ -69,7 +72,7 @@ func isGlobPattern(path string) bool { return strings.ContainsAny(path, "*?[") } -func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostics, []dyn.Value) { +func expandLibraries(ctx context.Context, b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostics, []dyn.Value) { var output []dyn.Value var diags diag.Diagnostics @@ -84,7 +87,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic lp = lp.Append(dyn.Key(libType)) - matches, err := findMatches(b, path) + matches, err := findMatches(ctx, b, path) if err != nil { diags = diags.Append(matchError(lp, lib.Locations(), err.Error())) continue @@ -100,7 +103,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic return diags, output } -func expandEnvironmentDeps(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostics, []dyn.Value) { +func expandEnvironmentDeps(ctx context.Context, b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostics, []dyn.Value) { var output []dyn.Value var diags diag.Diagnostics @@ -113,7 +116,7 @@ func expandEnvironmentDeps(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diag continue } - matches, err := findMatches(b, path) + matches, err := findMatches(ctx, b, path) if err != nil { diags = diags.Append(matchError(lp, dep.Locations(), err.Error())) continue @@ -129,7 +132,7 @@ func expandEnvironmentDeps(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diag type expandPattern struct { pattern dyn.Pattern - fn func(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostics, []dyn.Value) + fn func(ctx context.Context, b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostics, []dyn.Value) } var taskLibrariesPattern = dyn.NewPattern( @@ -184,7 +187,7 @@ func (e *expand) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { var err error for _, expander := range expanders { v, err = dyn.MapByPattern(v, expander.pattern, func(p dyn.Path, lv dyn.Value) (dyn.Value, error) { - d, output := expander.fn(b, p, lv) + d, output := expander.fn(ctx, b, p, lv) diags = diags.Extend(d) return dyn.V(output), nil }) diff --git a/libs/patchwheel/filter.go b/libs/patchwheel/filter.go new file mode 100644 index 0000000000..f792b14ef3 --- /dev/null +++ b/libs/patchwheel/filter.go @@ -0,0 +1,57 @@ +package patchwheel + +import ( + "context" + + "github.com/databricks/cli/libs/log" +) + +// FilterLatestWheels iterates over provided wheel file paths, groups them by distribution name +// and, for every group, keeps only the wheel that has the latest version according to a best-effort +// comparison of the version strings. Returned slice preserves the order of the input slice – the +// first occurrence of the chosen wheel for every distribution is retained. +// +// The comparison is *heuristic*: the algorithm tokenises version strings into alternating numeric +// and non-numeric chunks. Numeric chunks are compared as integers, while non-numeric chunks are +// compared lexicographically. This covers common cases such as "1.2.10" > "1.2.3" and timestamps +// added via calculateNewVersion (e.g. "1.2.3+1741091696…" > "1.2.3"). It does not attempt to +// implement the full PEP 440 specification, which is unnecessary for the dynamic versions +// produced by this package. +func FilterLatestWheels(ctx context.Context, paths []string) []string { + // Build output incrementally, preserving the order of the *chosen* wheels. + out := make([]string, 0, len(paths)) + + // distribution -> index in out slice + bestIdx := make(map[string]int) + + for _, p := range paths { + info, err := ParseWheelFilename(p) + if err != nil { + // Unparsable: always keep. + out = append(out, p) + continue + } + + if idx, seen := bestIdx[info.Distribution]; !seen { + // First wheel for this distribution. + bestIdx[info.Distribution] = len(out) + out = append(out, p) + continue + } else { + // Compare against the current winner. + winnerPath := out[idx] + winnerInfo, _ := ParseWheelFilename(winnerPath) // guaranteed parseable + + if compareVersion(info.Version, winnerInfo.Version) > 0 { + // Current wheel wins: replace earlier entry in-place. + log.Debugf(ctx, "Skipping wheel %s (older than %s)", winnerPath, p) + out[idx] = p + } else { + // Current wheel loses. + log.Debugf(ctx, "Skipping wheel %s (older than %s)", p, winnerPath) + } + } + } + + return out +} diff --git a/libs/patchwheel/filter_test.go b/libs/patchwheel/filter_test.go new file mode 100644 index 0000000000..2ed2af831b --- /dev/null +++ b/libs/patchwheel/filter_test.go @@ -0,0 +1,33 @@ +package patchwheel + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFilterLatestWheels(t *testing.T) { + paths := []string{ + "project_name_bvs7tide6bhhpjy4dmcsb2qg44-0.0.1+20250604.74809-py3-none-any.whl", + "not-a-wheel.txt", + "mypkg-0.1.0-py3-none-any.whl", + "mypkg-0.2.0-py3-none-any.whl", + "other-1.0.0-py3-none-any.whl", + "other-0.9.0-py3-none-any.whl", + "project_name_bvs7tide6bhhpjy4dmcsb2qg44-0.0.1+20250604.74804-py3-none-any.whl", + "not-a-wheel.whl", + "hello-1.2.3-py3-none-any.whl", + "hello-1.2.3+1741091696780123321-py3-none-any.whl", + } + + filtered := FilterLatestWheels(context.Background(), paths) + require.ElementsMatch(t, []string{ + "project_name_bvs7tide6bhhpjy4dmcsb2qg44-0.0.1+20250604.74809-py3-none-any.whl", + "not-a-wheel.txt", + "mypkg-0.2.0-py3-none-any.whl", + "other-1.0.0-py3-none-any.whl", + "not-a-wheel.whl", + "hello-1.2.3+1741091696780123321-py3-none-any.whl", + }, filtered) +} diff --git a/libs/patchwheel/version.go b/libs/patchwheel/version.go new file mode 100644 index 0000000000..af5b49bf58 --- /dev/null +++ b/libs/patchwheel/version.go @@ -0,0 +1,79 @@ +package patchwheel + +import ( + "strconv" + "unicode" +) + +// compareVersion compares version strings a and b. +// Returns: +// +// 1 if a > b +// -1 if a < b +// 0 if equal. +// +// The algorithm splits each string into consecutive numeric and non-numeric tokens and compares them +// pair-wise: +// - Numeric tokens are compared as integers. +// - Non-numeric tokens are compared lexicographically. +// - Numeric tokens are considered greater than non-numeric tokens when types differ. +// +// Missing tokens are treated as zero-length strings / 0. +func compareVersion(a, b string) int { + ta := tokenizeVersion(a) + tb := tokenizeVersion(b) + + minLen := min(len(ta), len(tb)) + + for i, tokA := range ta[:minLen] { + tokB := tb[i] + + if tokA.numeric && tokB.numeric { + intA, _ := strconv.Atoi(tokA.value) + intB, _ := strconv.Atoi(tokB.value) + if intA != intB { + if intA > intB { + return 1 + } + return -1 + } + continue + } + + if tokA.value != tokB.value { + if tokA.value > tokB.value { + return 1 + } + return -1 + } + } + + // All shared tokens are equal; the longer version wins if it has extra tokens. + if len(ta) > len(tb) { + return 1 + } + if len(tb) > len(ta) { + return -1 + } + return 0 +} + +type versionToken struct { + numeric bool + value string +} + +func tokenizeVersion(v string) []versionToken { + var tokens []versionToken + start := 0 + for start < len(v) { + isDigit := unicode.IsDigit(rune(v[start])) + end := start + for end < len(v) && unicode.IsDigit(rune(v[end])) == isDigit { + end++ + } + tokens = append(tokens, versionToken{numeric: isDigit, value: v[start:end]}) + start = end + } + return tokens +} diff --git a/libs/patchwheel/version_test.go b/libs/patchwheel/version_test.go new file mode 100644 index 0000000000..2930004555 --- /dev/null +++ b/libs/patchwheel/version_test.go @@ -0,0 +1,37 @@ +package patchwheel + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCompareVersion(t *testing.T) { + cases := []struct { + a, b string + expect int + }{ + {"1.2.10", "1.2.3", 1}, + {"1.2.3", "1.2.3", 0}, + {"1.2.3+2", "1.2.3", 1}, + {"10.0.0", "2.0.0", 1}, + {"1.2.3a", "1.2.3", 1}, // non-numeric suffix greater lexicographically + {"1.2.3.1", "1.2.3", 1}, // leftover tokens make version greater + {"1.2.3", "1.2.3.0", -1}, + {"0.0.1+20250604.74804", "0.0.1+20250604.74809", -1}, + } + + for _, tc := range cases { + t.Run(fmt.Sprintf("%s_vs_%s", tc.a, tc.b), func(t *testing.T) { + got := compareVersion(tc.a, tc.b) + require.Equal(t, tc.expect, got) + }) + + // Mirror case + t.Run(fmt.Sprintf("%s_vs_%s_mirror", tc.b, tc.a), func(t *testing.T) { + got := compareVersion(tc.b, tc.a) + require.Equal(t, -tc.expect, got) + }) + } +} diff --git a/libs/python/utils.go b/libs/python/utils.go deleted file mode 100644 index b94f5d4f65..0000000000 --- a/libs/python/utils.go +++ /dev/null @@ -1,48 +0,0 @@ -package python - -import ( - "context" - "os" - "path/filepath" - "strings" - - "github.com/databricks/cli/libs/log" -) - -func CleanupWheelFolder(dir string) { - // there or not there - we don't care - os.RemoveAll(filepath.Join(dir, "__pycache__")) - os.RemoveAll(filepath.Join(dir, "build")) - eggInfo := FindFilesWithSuffixInPath(dir, ".egg-info") - if len(eggInfo) == 0 { - return - } - for _, f := range eggInfo { - os.RemoveAll(f) - } -} - -func FindFilesWithSuffixInPath(dir, suffix string) []string { - f, err := os.Open(dir) - if err != nil { - log.Debugf(context.Background(), "open dir %s: %s", dir, err) - return nil - } - defer f.Close() - - entries, err := f.ReadDir(0) - if err != nil { - log.Debugf(context.Background(), "read dir %s: %s", dir, err) - // todo: log - return nil - } - - var files []string - for _, child := range entries { - if !strings.HasSuffix(child.Name(), suffix) { - continue - } - files = append(files, filepath.Join(dir, child.Name())) - } - return files -} diff --git a/libs/python/utils_test.go b/libs/python/utils_test.go deleted file mode 100644 index 1656d1ecb4..0000000000 --- a/libs/python/utils_test.go +++ /dev/null @@ -1,21 +0,0 @@ -package python - -import ( - "os" - "path/filepath" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestFindFilesWithSuffixInPath(t *testing.T) { - dir, err := os.Getwd() - require.NoError(t, err) - - files := FindFilesWithSuffixInPath(dir, "test.go") - - matches, err := filepath.Glob(filepath.Join(dir, "*test.go")) - require.NoError(t, err) - - require.ElementsMatch(t, files, matches) -}