From f01c8cd5e3c18dadae236eaab583caf78797dbc2 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 15 Aug 2024 10:26:02 +0200 Subject: [PATCH 01/18] Take optional list of paths to include in a fileset --- bundle/config/sync.go | 3 + .../config/validate/validate_sync_patterns.go | 2 +- bundle/deploy/state_test.go | 4 +- bundle/deploy/state_update_test.go | 2 +- libs/fileset/fileset.go | 58 ++++++++++++++++--- libs/fileset/glob_test.go | 12 ++-- libs/git/fileset.go | 10 ++-- libs/git/fileset_test.go | 4 +- libs/sync/snapshot_state_test.go | 2 +- libs/sync/snapshot_test.go | 28 ++++----- libs/sync/sync.go | 6 +- 11 files changed, 88 insertions(+), 43 deletions(-) diff --git a/bundle/config/sync.go b/bundle/config/sync.go index 0580e4c4ff..8005969f60 100644 --- a/bundle/config/sync.go +++ b/bundle/config/sync.go @@ -1,6 +1,9 @@ package config type Sync struct { + // Paths contains a list of globs evaluated relative to the bundle root path + Paths []string `json:"paths,omitempty"` + // Include contains a list of globs evaluated relative to the bundle root path // to explicitly include files that were excluded by the user's gitignore. Include []string `json:"include,omitempty"` diff --git a/bundle/config/validate/validate_sync_patterns.go b/bundle/config/validate/validate_sync_patterns.go index fd011bf780..52f06835c4 100644 --- a/bundle/config/validate/validate_sync_patterns.go +++ b/bundle/config/validate/validate_sync_patterns.go @@ -63,7 +63,7 @@ func checkPatterns(patterns []string, path string, rb bundle.ReadOnlyBundle) (di return err } - all, err := fs.All() + all, err := fs.Files() if err != nil { return err } diff --git a/bundle/deploy/state_test.go b/bundle/deploy/state_test.go index 5e1e542300..d149b0efaf 100644 --- a/bundle/deploy/state_test.go +++ b/bundle/deploy/state_test.go @@ -18,7 +18,7 @@ func TestFromSlice(t *testing.T) { testutil.Touch(t, tmpDir, "test2.py") testutil.Touch(t, tmpDir, "test3.py") - files, err := fileset.All() + files, err := fileset.Files() require.NoError(t, err) f, err := FromSlice(files) @@ -38,7 +38,7 @@ func TestToSlice(t *testing.T) { testutil.Touch(t, tmpDir, "test2.py") testutil.Touch(t, tmpDir, "test3.py") - files, err := fileset.All() + files, err := fileset.Files() require.NoError(t, err) f, err := FromSlice(files) diff --git a/bundle/deploy/state_update_test.go b/bundle/deploy/state_update_test.go index 2982546d5d..72096d1429 100644 --- a/bundle/deploy/state_update_test.go +++ b/bundle/deploy/state_update_test.go @@ -23,7 +23,7 @@ func setupBundleForStateUpdate(t *testing.T) *bundle.Bundle { testutil.Touch(t, tmpDir, "test1.py") testutil.TouchNotebook(t, tmpDir, "test2.py") - files, err := fileset.New(vfs.MustNew(tmpDir)).All() + files, err := fileset.New(vfs.MustNew(tmpDir)).Files() require.NoError(t, err) return &bundle.Bundle{ diff --git a/libs/fileset/fileset.go b/libs/fileset/fileset.go index d0f00f97ae..69fd4eb3b0 100644 --- a/libs/fileset/fileset.go +++ b/libs/fileset/fileset.go @@ -4,24 +4,49 @@ import ( "fmt" "io/fs" "os" + pathlib "path" + "path/filepath" "github.com/databricks/cli/libs/vfs" ) -// FileSet facilitates fast recursive file listing of a path. +// FileSet facilitates recursive file listing for paths rooted at a given directory. // It optionally takes into account ignore rules through the [Ignorer] interface. type FileSet struct { // Root path of the fileset. root vfs.Path + // Paths to include in the fileset. + // Files are included as-is (if not ignored) and directories are traversed recursively. + // Can be equal to [fileset.All] to recursively traverse the root. + paths []string + // Ignorer interface to check if a file or directory should be ignored. ignore Ignorer } // New returns a [FileSet] for the given root path. -func New(root vfs.Path) *FileSet { +// It optionally accepts a list of paths relative to the root to include in the fileset. +// If not specified, it defaults to including all files in the root path. +func New(root vfs.Path, args ...[]string) *FileSet { + // Default to including all files in the root path. + if len(args) == 0 { + args = [][]string{{"."}} + } + + // Collect list of normalized and cleaned paths. + var paths []string + for _, arg := range args { + for _, path := range arg { + path = filepath.ToSlash(path) + path = pathlib.Clean(path) + paths = append(paths, path) + } + } + return &FileSet{ root: root, + paths: paths, ignore: nopIgnorer{}, } } @@ -36,16 +61,27 @@ func (w *FileSet) SetIgnorer(ignore Ignorer) { w.ignore = ignore } -// Return all tracked files for Repo -func (w *FileSet) All() ([]File, error) { - return w.recursiveListFiles() +// Files returns performs recursive listing on all configured paths and returns +// the collection of files it finds (and are not ignored). +// The returned slice does not contain duplicates. +// The order of files in the slice is stable. +func (w *FileSet) Files() (out []File, err error) { + seen := make(map[string]struct{}) + for _, p := range w.paths { + files, err := w.recursiveListFiles(p, seen) + if err != nil { + return nil, err + } + out = append(out, files...) + } + return out, nil } // Recursively traverses dir in a depth first manner and returns a list of all files // that are being tracked in the FileSet (ie not being ignored for matching one of the // patterns in w.ignore) -func (w *FileSet) recursiveListFiles() (fileList []File, err error) { - err = fs.WalkDir(w.root, ".", func(name string, d fs.DirEntry, err error) error { +func (w *FileSet) recursiveListFiles(path string, seen map[string]struct{}) (out []File, err error) { + err = fs.WalkDir(w.root, path, func(name string, d fs.DirEntry, err error) error { if err != nil { return err } @@ -78,7 +114,13 @@ func (w *FileSet) recursiveListFiles() (fileList []File, err error) { return nil } - fileList = append(fileList, NewFile(w.root, d, name)) + // Skip duplicates + if _, ok := seen[name]; ok { + return nil + } + + seen[name] = struct{}{} + out = append(out, NewFile(w.root, d, name)) return nil }) return diff --git a/libs/fileset/glob_test.go b/libs/fileset/glob_test.go index 8418df73a2..793f35f260 100644 --- a/libs/fileset/glob_test.go +++ b/libs/fileset/glob_test.go @@ -29,7 +29,7 @@ func TestGlobFileset(t *testing.T) { }) require.NoError(t, err) - files, err := g.All() + files, err := g.Files() require.NoError(t, err) // +1 as there's one folder in ../filer @@ -46,7 +46,7 @@ func TestGlobFileset(t *testing.T) { }) require.NoError(t, err) - files, err = g.All() + files, err = g.Files() require.NoError(t, err) require.Equal(t, len(files), 0) } @@ -61,7 +61,7 @@ func TestGlobFilesetWithRelativeRoot(t *testing.T) { }) require.NoError(t, err) - files, err := g.All() + files, err := g.Files() require.NoError(t, err) require.Equal(t, len(files), len(entries)) } @@ -82,7 +82,7 @@ func TestGlobFilesetRecursively(t *testing.T) { }) require.NoError(t, err) - files, err := g.All() + files, err := g.Files() require.NoError(t, err) require.ElementsMatch(t, entries, collectRelativePaths(files)) } @@ -103,7 +103,7 @@ func TestGlobFilesetDir(t *testing.T) { }) require.NoError(t, err) - files, err := g.All() + files, err := g.Files() require.NoError(t, err) require.ElementsMatch(t, entries, collectRelativePaths(files)) } @@ -124,7 +124,7 @@ func TestGlobFilesetDoubleQuotesWithFilePatterns(t *testing.T) { }) require.NoError(t, err) - files, err := g.All() + files, err := g.Files() require.NoError(t, err) require.ElementsMatch(t, entries, collectRelativePaths(files)) } diff --git a/libs/git/fileset.go b/libs/git/fileset.go index f1986aa201..bb1cd4692f 100644 --- a/libs/git/fileset.go +++ b/libs/git/fileset.go @@ -7,15 +7,15 @@ import ( // FileSet is Git repository aware implementation of [fileset.FileSet]. // It forces checking if gitignore files have been modified every -// time a call to [FileSet.All] is made. +// time a call to [FileSet.Files] is made. type FileSet struct { fileset *fileset.FileSet view *View } // NewFileSet returns [FileSet] for the Git repository located at `root`. -func NewFileSet(root vfs.Path) (*FileSet, error) { - fs := fileset.New(root) +func NewFileSet(root vfs.Path, paths ...[]string) (*FileSet, error) { + fs := fileset.New(root, paths...) v, err := NewView(root) if err != nil { return nil, err @@ -35,9 +35,9 @@ func (f *FileSet) IgnoreDirectory(dir string) (bool, error) { return f.view.IgnoreDirectory(dir) } -func (f *FileSet) All() ([]fileset.File, error) { +func (f *FileSet) Files() ([]fileset.File, error) { f.view.repo.taintIgnoreRules() - return f.fileset.All() + return f.fileset.Files() } func (f *FileSet) EnsureValidGitIgnoreExists() error { diff --git a/libs/git/fileset_test.go b/libs/git/fileset_test.go index 4e6172bfd0..37f3611d1f 100644 --- a/libs/git/fileset_test.go +++ b/libs/git/fileset_test.go @@ -15,7 +15,7 @@ import ( func testFileSetAll(t *testing.T, root string) { fileSet, err := NewFileSet(vfs.MustNew(root)) require.NoError(t, err) - files, err := fileSet.All() + files, err := fileSet.Files() require.NoError(t, err) require.Len(t, files, 3) assert.Equal(t, path.Join("a", "b", "world.txt"), files[0].Relative) @@ -37,7 +37,7 @@ func TestFileSetNonCleanRoot(t *testing.T) { // This should yield the same result as above test. fileSet, err := NewFileSet(vfs.MustNew("./testdata/../testdata")) require.NoError(t, err) - files, err := fileSet.All() + files, err := fileSet.Files() require.NoError(t, err) assert.Len(t, files, 3) } diff --git a/libs/sync/snapshot_state_test.go b/libs/sync/snapshot_state_test.go index 92c14e8e06..248e5832ca 100644 --- a/libs/sync/snapshot_state_test.go +++ b/libs/sync/snapshot_state_test.go @@ -13,7 +13,7 @@ import ( func TestSnapshotState(t *testing.T) { fileSet := fileset.New(vfs.MustNew("./testdata/sync-fileset")) - files, err := fileSet.All() + files, err := fileSet.Files() require.NoError(t, err) // Assert initial contents of the fileset diff --git a/libs/sync/snapshot_test.go b/libs/sync/snapshot_test.go index 050b5d9653..b7830406de 100644 --- a/libs/sync/snapshot_test.go +++ b/libs/sync/snapshot_test.go @@ -47,7 +47,7 @@ func TestDiff(t *testing.T) { defer f2.Close(t) // New files are put - files, err := fileSet.All() + files, err := fileSet.Files() assert.NoError(t, err) change, err := state.diff(ctx, files) assert.NoError(t, err) @@ -62,7 +62,7 @@ func TestDiff(t *testing.T) { // world.txt is editted f2.Overwrite(t, "bunnies are cute.") assert.NoError(t, err) - files, err = fileSet.All() + files, err = fileSet.Files() assert.NoError(t, err) change, err = state.diff(ctx, files) assert.NoError(t, err) @@ -77,7 +77,7 @@ func TestDiff(t *testing.T) { // hello.txt is deleted f1.Remove(t) assert.NoError(t, err) - files, err = fileSet.All() + files, err = fileSet.Files() assert.NoError(t, err) change, err = state.diff(ctx, files) assert.NoError(t, err) @@ -113,7 +113,7 @@ func TestSymlinkDiff(t *testing.T) { err = os.Symlink(filepath.Join(projectDir, "foo"), filepath.Join(projectDir, "bar")) assert.NoError(t, err) - files, err := fileSet.All() + files, err := fileSet.Files() assert.NoError(t, err) change, err := state.diff(ctx, files) assert.NoError(t, err) @@ -141,7 +141,7 @@ func TestFolderDiff(t *testing.T) { defer f1.Close(t) f1.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")") - files, err := fileSet.All() + files, err := fileSet.Files() assert.NoError(t, err) change, err := state.diff(ctx, files) assert.NoError(t, err) @@ -153,7 +153,7 @@ func TestFolderDiff(t *testing.T) { assert.Contains(t, change.put, "foo/bar.py") f1.Remove(t) - files, err = fileSet.All() + files, err = fileSet.Files() assert.NoError(t, err) change, err = state.diff(ctx, files) assert.NoError(t, err) @@ -184,7 +184,7 @@ func TestPythonNotebookDiff(t *testing.T) { defer foo.Close(t) // Case 1: notebook foo.py is uploaded - files, err := fileSet.All() + files, err := fileSet.Files() assert.NoError(t, err) foo.Overwrite(t, "# Databricks notebook source\nprint(\"abc\")") change, err := state.diff(ctx, files) @@ -199,7 +199,7 @@ func TestPythonNotebookDiff(t *testing.T) { // Case 2: notebook foo.py is converted to python script by removing // magic keyword foo.Overwrite(t, "print(\"abc\")") - files, err = fileSet.All() + files, err = fileSet.Files() assert.NoError(t, err) change, err = state.diff(ctx, files) assert.NoError(t, err) @@ -213,7 +213,7 @@ func TestPythonNotebookDiff(t *testing.T) { // Case 3: Python script foo.py is converted to a databricks notebook foo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")") - files, err = fileSet.All() + files, err = fileSet.Files() assert.NoError(t, err) change, err = state.diff(ctx, files) assert.NoError(t, err) @@ -228,7 +228,7 @@ func TestPythonNotebookDiff(t *testing.T) { // Case 4: Python notebook foo.py is deleted, and its remote name is used in change.delete foo.Remove(t) assert.NoError(t, err) - files, err = fileSet.All() + files, err = fileSet.Files() assert.NoError(t, err) change, err = state.diff(ctx, files) assert.NoError(t, err) @@ -260,7 +260,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { defer pythonFoo.Close(t) vanillaFoo := testfile.CreateFile(t, filepath.Join(projectDir, "foo")) defer vanillaFoo.Close(t) - files, err := fileSet.All() + files, err := fileSet.Files() assert.NoError(t, err) change, err := state.diff(ctx, files) assert.NoError(t, err) @@ -271,7 +271,7 @@ func TestErrorWhenIdenticalRemoteName(t *testing.T) { // errors out because they point to the same destination pythonFoo.Overwrite(t, "# Databricks notebook source\nprint(\"def\")") - files, err = fileSet.All() + files, err = fileSet.Files() assert.NoError(t, err) change, err = state.diff(ctx, files) assert.ErrorContains(t, err, "both foo and foo.py point to the same remote file location foo. Please remove one of them from your local project") @@ -296,7 +296,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { pythonFoo := testfile.CreateFile(t, filepath.Join(projectDir, "foo.py")) defer pythonFoo.Close(t) pythonFoo.Overwrite(t, "# Databricks notebook source\n") - files, err := fileSet.All() + files, err := fileSet.Files() assert.NoError(t, err) change, err := state.diff(ctx, files) assert.NoError(t, err) @@ -308,7 +308,7 @@ func TestNoErrorRenameWithIdenticalRemoteName(t *testing.T) { sqlFoo := testfile.CreateFile(t, filepath.Join(projectDir, "foo.sql")) defer sqlFoo.Close(t) sqlFoo.Overwrite(t, "-- Databricks notebook source\n") - files, err = fileSet.All() + files, err = fileSet.Files() assert.NoError(t, err) change, err = state.diff(ctx, files) assert.NoError(t, err) diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 3d5bc61ec4..ffcf3878ee 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -195,14 +195,14 @@ func (s *Sync) GetFileList(ctx context.Context) ([]fileset.File, error) { all := set.NewSetF(func(f fileset.File) string { return f.Relative }) - gitFiles, err := s.fileSet.All() + gitFiles, err := s.fileSet.Files() if err != nil { log.Errorf(ctx, "cannot list files: %s", err) return nil, err } all.Add(gitFiles...) - include, err := s.includeFileSet.All() + include, err := s.includeFileSet.Files() if err != nil { log.Errorf(ctx, "cannot list include files: %s", err) return nil, err @@ -210,7 +210,7 @@ func (s *Sync) GetFileList(ctx context.Context) ([]fileset.File, error) { all.Add(include...) - exclude, err := s.excludeFileSet.All() + exclude, err := s.excludeFileSet.Files() if err != nil { log.Errorf(ctx, "cannot list exclude files: %s", err) return nil, err From 020d447e85c04dd636d2ef20d268e0d3a49d95fb Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 15 Aug 2024 16:45:26 +0200 Subject: [PATCH 02/18] Add fileset test coverage --- libs/fileset/fileset.go | 47 ++++++------ libs/fileset/fileset_test.go | 136 +++++++++++++++++++++++++++++++++++ libs/fileset/testdata/dir1/a | 0 libs/fileset/testdata/dir1/b | 0 libs/fileset/testdata/dir2/a | 0 libs/fileset/testdata/dir2/b | 0 libs/fileset/testdata/dir3/a | 1 + 7 files changed, 164 insertions(+), 20 deletions(-) create mode 100644 libs/fileset/fileset_test.go create mode 100644 libs/fileset/testdata/dir1/a create mode 100644 libs/fileset/testdata/dir1/b create mode 100644 libs/fileset/testdata/dir2/a create mode 100644 libs/fileset/testdata/dir2/b create mode 120000 libs/fileset/testdata/dir3/a diff --git a/libs/fileset/fileset.go b/libs/fileset/fileset.go index 69fd4eb3b0..88f6ab8574 100644 --- a/libs/fileset/fileset.go +++ b/libs/fileset/fileset.go @@ -3,9 +3,9 @@ package fileset import ( "fmt" "io/fs" - "os" pathlib "path" "path/filepath" + "slices" "github.com/databricks/cli/libs/vfs" ) @@ -40,6 +40,12 @@ func New(root vfs.Path, args ...[]string) *FileSet { for _, path := range arg { path = filepath.ToSlash(path) path = pathlib.Clean(path) + + // Skip path if it's already in the list. + if slices.Contains(paths, path) { + continue + } + paths = append(paths, path) } } @@ -86,16 +92,13 @@ func (w *FileSet) recursiveListFiles(path string, seen map[string]struct{}) (out return err } - // skip symlinks info, err := d.Info() if err != nil { return err } - if info.Mode()&os.ModeSymlink != 0 { - return nil - } - if d.IsDir() { + switch { + case info.Mode().IsDir(): ign, err := w.ignore.IgnoreDirectory(name) if err != nil { return fmt.Errorf("cannot check if %s should be ignored: %w", name, err) @@ -103,24 +106,28 @@ func (w *FileSet) recursiveListFiles(path string, seen map[string]struct{}) (out if ign { return fs.SkipDir } - return nil - } - ign, err := w.ignore.IgnoreFile(name) - if err != nil { - return fmt.Errorf("cannot check if %s should be ignored: %w", name, err) - } - if ign { - return nil - } + case info.Mode().IsRegular(): + ign, err := w.ignore.IgnoreFile(name) + if err != nil { + return fmt.Errorf("cannot check if %s should be ignored: %w", name, err) + } + if ign { + return nil + } + + // Skip duplicates + if _, ok := seen[name]; ok { + return nil + } + + seen[name] = struct{}{} + out = append(out, NewFile(w.root, d, name)) - // Skip duplicates - if _, ok := seen[name]; ok { - return nil + default: + // Skip non-regular files (e.g. symlinks). } - seen[name] = struct{}{} - out = append(out, NewFile(w.root, d, name)) return nil }) return diff --git a/libs/fileset/fileset_test.go b/libs/fileset/fileset_test.go new file mode 100644 index 0000000000..a0d01b011f --- /dev/null +++ b/libs/fileset/fileset_test.go @@ -0,0 +1,136 @@ +package fileset + +import ( + "errors" + "testing" + + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" +) + +func TestFileSet_NoPaths(t *testing.T) { + fs := New(vfs.MustNew("testdata")) + files, err := fs.Files() + if !assert.NoError(t, err) { + return + } + + assert.Len(t, files, 4) + assert.Equal(t, "dir1/a", files[0].Relative) + assert.Equal(t, "dir1/b", files[1].Relative) + assert.Equal(t, "dir2/a", files[2].Relative) + assert.Equal(t, "dir2/b", files[3].Relative) +} + +func TestFileSet_DuplicatePaths(t *testing.T) { + fs := New(vfs.MustNew("testdata"), []string{"dir1", "dir1"}) + files, err := fs.Files() + if !assert.NoError(t, err) { + return + } + + assert.Len(t, files, 2) + assert.Equal(t, "dir1/a", files[0].Relative) + assert.Equal(t, "dir1/b", files[1].Relative) +} + +func TestFileSet_OverlappingPaths(t *testing.T) { + fs := New(vfs.MustNew("testdata"), []string{"dir1", "dir1/a"}) + files, err := fs.Files() + if !assert.NoError(t, err) { + return + } + + assert.Len(t, files, 2) + assert.Equal(t, "dir1/a", files[0].Relative) + assert.Equal(t, "dir1/b", files[1].Relative) +} + +func TestFileSet_IgnoreDirError(t *testing.T) { + testError := errors.New("test error") + fs := New(vfs.MustNew("testdata")) + fs.SetIgnorer(testIgnorer{dirErr: testError}) + _, err := fs.Files() + assert.ErrorIs(t, err, testError) +} + +func TestFileSet_IgnoreDir(t *testing.T) { + fs := New(vfs.MustNew("testdata")) + fs.SetIgnorer(testIgnorer{dir: []string{"dir1"}}) + files, err := fs.Files() + if !assert.NoError(t, err) { + return + } + + assert.Len(t, files, 2) + assert.Equal(t, "dir2/a", files[0].Relative) + assert.Equal(t, "dir2/b", files[1].Relative) +} + +func TestFileSet_IgnoreFileError(t *testing.T) { + testError := errors.New("test error") + fs := New(vfs.MustNew("testdata")) + fs.SetIgnorer(testIgnorer{fileErr: testError}) + _, err := fs.Files() + assert.ErrorIs(t, err, testError) +} + +func TestFileSet_IgnoreFile(t *testing.T) { + fs := New(vfs.MustNew("testdata")) + fs.SetIgnorer(testIgnorer{file: []string{"dir1/a"}}) + files, err := fs.Files() + if !assert.NoError(t, err) { + return + } + + assert.Len(t, files, 3) + assert.Equal(t, "dir1/b", files[0].Relative) + assert.Equal(t, "dir2/a", files[1].Relative) + assert.Equal(t, "dir2/b", files[2].Relative) +} + +type testIgnorer struct { + // dir is a list of directories to ignore. Strings are compared verbatim. + dir []string + + // dirErr is an error to return when IgnoreDirectory is called. + dirErr error + + // file is a list of files to ignore. Strings are compared verbatim. + file []string + + // fileErr is an error to return when IgnoreFile is called. + fileErr error +} + +// IgnoreDirectory returns true if the path is in the dir list. +// If dirErr is set, it returns dirErr. +func (t testIgnorer) IgnoreDirectory(path string) (bool, error) { + if t.dirErr != nil { + return false, t.dirErr + } + + for _, d := range t.dir { + if d == path { + return true, nil + } + } + + return false, nil +} + +// IgnoreFile returns true if the path is in the file list. +// If fileErr is set, it returns fileErr. +func (t testIgnorer) IgnoreFile(path string) (bool, error) { + if t.fileErr != nil { + return false, t.fileErr + } + + for _, f := range t.file { + if f == path { + return true, nil + } + } + + return false, nil +} diff --git a/libs/fileset/testdata/dir1/a b/libs/fileset/testdata/dir1/a new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/fileset/testdata/dir1/b b/libs/fileset/testdata/dir1/b new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/fileset/testdata/dir2/a b/libs/fileset/testdata/dir2/a new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/fileset/testdata/dir2/b b/libs/fileset/testdata/dir2/b new file mode 100644 index 0000000000..e69de29bb2 diff --git a/libs/fileset/testdata/dir3/a b/libs/fileset/testdata/dir3/a new file mode 120000 index 0000000000..5ac5651e90 --- /dev/null +++ b/libs/fileset/testdata/dir3/a @@ -0,0 +1 @@ +../dir1/a \ No newline at end of file From bee14af98f8990acb204aee1851cca41b7aa014d Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 15 Aug 2024 16:52:31 +0200 Subject: [PATCH 03/18] Test it is not possible to escape the fileset root --- libs/fileset/fileset_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libs/fileset/fileset_test.go b/libs/fileset/fileset_test.go index a0d01b011f..be27b6b6f5 100644 --- a/libs/fileset/fileset_test.go +++ b/libs/fileset/fileset_test.go @@ -22,6 +22,14 @@ func TestFileSet_NoPaths(t *testing.T) { assert.Equal(t, "dir2/b", files[3].Relative) } +func TestFileSet_ParentPath(t *testing.T) { + fs := New(vfs.MustNew("testdata"), []string{".."}) + _, err := fs.Files() + + // It is impossible to escape the root directory. + assert.Error(t, err) +} + func TestFileSet_DuplicatePaths(t *testing.T) { fs := New(vfs.MustNew("testdata"), []string{"dir1", "dir1"}) files, err := fs.Files() From 13e22a4d2476246be1695eef069cbba0e6cc2efd Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 15 Aug 2024 17:03:45 +0200 Subject: [PATCH 04/18] Revert bundle config change --- bundle/config/sync.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/bundle/config/sync.go b/bundle/config/sync.go index 8005969f60..0580e4c4ff 100644 --- a/bundle/config/sync.go +++ b/bundle/config/sync.go @@ -1,9 +1,6 @@ package config type Sync struct { - // Paths contains a list of globs evaluated relative to the bundle root path - Paths []string `json:"paths,omitempty"` - // Include contains a list of globs evaluated relative to the bundle root path // to explicitly include files that were excluded by the user's gitignore. Include []string `json:"include,omitempty"` From 3ad57ff921416e2e53a57906b79bb3cc0ba46977 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 15 Aug 2024 17:04:42 +0200 Subject: [PATCH 05/18] Comment --- libs/fileset/fileset.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/fileset/fileset.go b/libs/fileset/fileset.go index 88f6ab8574..00c6dcfa4c 100644 --- a/libs/fileset/fileset.go +++ b/libs/fileset/fileset.go @@ -18,7 +18,7 @@ type FileSet struct { // Paths to include in the fileset. // Files are included as-is (if not ignored) and directories are traversed recursively. - // Can be equal to [fileset.All] to recursively traverse the root. + // Defaults to []string{"."} if not specified. paths []string // Ignorer interface to check if a file or directory should be ignored. From 811a08d9265cdb7fa14b4afb9738cfcb7677525a Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Thu, 15 Aug 2024 17:15:00 +0200 Subject: [PATCH 06/18] Fix glob test --- libs/fileset/glob_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libs/fileset/glob_test.go b/libs/fileset/glob_test.go index 793f35f260..9eb786db99 100644 --- a/libs/fileset/glob_test.go +++ b/libs/fileset/glob_test.go @@ -24,6 +24,11 @@ func TestGlobFileset(t *testing.T) { entries, err := root.ReadDir(".") require.NoError(t, err) + // Remove testdata folder from entries + entries = slices.DeleteFunc(entries, func(de fs.DirEntry) bool { + return de.Name() == "testdata" + }) + g, err := NewGlobSet(root, []string{ "./*.go", }) @@ -32,7 +37,6 @@ func TestGlobFileset(t *testing.T) { files, err := g.Files() require.NoError(t, err) - // +1 as there's one folder in ../filer require.Equal(t, len(files), len(entries)) for _, f := range files { exists := slices.ContainsFunc(entries, func(de fs.DirEntry) bool { From 6f5679ddba869776c5451c41c3eb87683635a73a Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 16 Aug 2024 13:56:44 +0200 Subject: [PATCH 07/18] Pass through paths argument to libs/sync --- bundle/deploy/files/sync.go | 8 +++++--- cmd/sync/sync.go | 6 +++++- cmd/sync/sync_test.go | 4 ++-- libs/sync/sync.go | 14 ++++++++------ libs/sync/watchdog.go | 2 +- 5 files changed, 21 insertions(+), 13 deletions(-) diff --git a/bundle/deploy/files/sync.go b/bundle/deploy/files/sync.go index a308668d3b..dc45053f9c 100644 --- a/bundle/deploy/files/sync.go +++ b/bundle/deploy/files/sync.go @@ -28,10 +28,12 @@ func GetSyncOptions(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.SyncOp } opts := &sync.SyncOptions{ - LocalPath: rb.BundleRoot(), + LocalRoot: rb.BundleRoot(), + Paths: []string{"."}, + Include: includes, + Exclude: rb.Config().Sync.Exclude, + RemotePath: rb.Config().Workspace.FilePath, - Include: includes, - Exclude: rb.Config().Sync.Exclude, Host: rb.WorkspaceClient().Config.Host, Full: false, diff --git a/cmd/sync/sync.go b/cmd/sync/sync.go index bab4515937..23a4c018f1 100644 --- a/cmd/sync/sync.go +++ b/cmd/sync/sync.go @@ -47,7 +47,11 @@ func (f *syncFlags) syncOptionsFromArgs(cmd *cobra.Command, args []string) (*syn } opts := sync.SyncOptions{ - LocalPath: vfs.MustNew(args[0]), + LocalRoot: vfs.MustNew(args[0]), + Paths: []string{"."}, + Include: nil, + Exclude: nil, + RemotePath: args[1], Full: f.full, PollInterval: f.interval, diff --git a/cmd/sync/sync_test.go b/cmd/sync/sync_test.go index 564aeae56e..0d0c573852 100644 --- a/cmd/sync/sync_test.go +++ b/cmd/sync/sync_test.go @@ -33,7 +33,7 @@ func TestSyncOptionsFromBundle(t *testing.T) { f := syncFlags{} opts, err := f.syncOptionsFromBundle(New(), []string{}, b) require.NoError(t, err) - assert.Equal(t, tempDir, opts.LocalPath.Native()) + assert.Equal(t, tempDir, opts.LocalRoot.Native()) assert.Equal(t, "/Users/jane@doe.com/path", opts.RemotePath) assert.Equal(t, filepath.Join(tempDir, ".databricks", "bundle", "default"), opts.SnapshotBasePath) assert.NotNil(t, opts.WorkspaceClient) @@ -59,6 +59,6 @@ func TestSyncOptionsFromArgs(t *testing.T) { cmd.SetContext(root.SetWorkspaceClient(context.Background(), nil)) opts, err := f.syncOptionsFromArgs(cmd, []string{local, remote}) require.NoError(t, err) - assert.Equal(t, local, opts.LocalPath.Native()) + assert.Equal(t, local, opts.LocalRoot.Native()) assert.Equal(t, remote, opts.RemotePath) } diff --git a/libs/sync/sync.go b/libs/sync/sync.go index ffcf3878ee..9eaebf2ade 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -16,10 +16,12 @@ import ( ) type SyncOptions struct { - LocalPath vfs.Path + LocalRoot vfs.Path + Paths []string + Include []string + Exclude []string + RemotePath string - Include []string - Exclude []string Full bool @@ -51,7 +53,7 @@ type Sync struct { // New initializes and returns a new [Sync] instance. func New(ctx context.Context, opts SyncOptions) (*Sync, error) { - fileSet, err := git.NewFileSet(opts.LocalPath) + fileSet, err := git.NewFileSet(opts.LocalRoot, opts.Paths) if err != nil { return nil, err } @@ -61,12 +63,12 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { return nil, err } - includeFileSet, err := fileset.NewGlobSet(opts.LocalPath, opts.Include) + includeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Include) if err != nil { return nil, err } - excludeFileSet, err := fileset.NewGlobSet(opts.LocalPath, opts.Exclude) + excludeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Exclude) if err != nil { return nil, err } diff --git a/libs/sync/watchdog.go b/libs/sync/watchdog.go index ca7ec46e9b..cc2ca83c56 100644 --- a/libs/sync/watchdog.go +++ b/libs/sync/watchdog.go @@ -57,7 +57,7 @@ func (s *Sync) applyMkdir(ctx context.Context, localName string) error { func (s *Sync) applyPut(ctx context.Context, localName string) error { s.notifyProgress(ctx, EventActionPut, localName, 0.0) - localFile, err := s.LocalPath.Open(localName) + localFile, err := s.LocalRoot.Open(localName) if err != nil { return err } From 8a3f93ba38d4d8dbf1b960dff5887a9bfee3132f Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Fri, 16 Aug 2024 13:49:49 +0200 Subject: [PATCH 08/18] Add paths field to bundle sync configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This field allows a user to configure paths to synchronize to the workspace. Allowed values are relative paths to files and directories, anchored at the directory where the field is set. If one or more values traverse up the directory tree (to an ancestor of the bundle root directory), the CLI will dynamically figure out the root path to use to ensure that the file tree structure remains intact. For example, given a `databricks.yml` in `my_bundle` that includes: ```yaml sync: paths: - ../common - . ``` Then upon synchronization, the workspace will look like: ``` . ├── common │ └── lib.py └── my_bundle ├── databricks.yml └── notebook.py ``` If not set behavior remains identical. --- bundle/bundle.go | 6 + bundle/bundle_read_only.go | 4 + bundle/config/mutator/configure_wsfs.go | 4 +- bundle/config/mutator/rewrite_sync_paths.go | 4 + .../config/mutator/rewrite_sync_paths_test.go | 16 ++ bundle/config/mutator/sync_default_path.go | 48 +++++ .../config/mutator/sync_default_path_test.go | 82 ++++++++ bundle/config/mutator/sync_infer_root.go | 120 +++++++++++ .../mutator/sync_infer_root_internal_test.go | 53 +++++ bundle/config/mutator/sync_infer_root_test.go | 191 ++++++++++++++++++ bundle/config/mutator/translate_paths.go | 12 +- bundle/config/mutator/translate_paths_test.go | 60 +++--- bundle/config/sync.go | 4 + bundle/deploy/files/sync.go | 4 +- bundle/deploy/state_pull.go | 2 +- bundle/deploy/state_pull_test.go | 8 +- bundle/phases/initialize.go | 11 + bundle/tests/loader.go | 2 + bundle/tests/sync/paths/databricks.yml | 20 ++ .../tests/sync/paths_no_root/databricks.yml | 26 +++ .../sync/shared_code/bundle/databricks.yml | 10 + .../tests/sync/shared_code/common/library.txt | 1 + bundle/tests/sync_test.go | 38 ++++ cmd/sync/sync_test.go | 6 +- 24 files changed, 687 insertions(+), 45 deletions(-) create mode 100644 bundle/config/mutator/sync_default_path.go create mode 100644 bundle/config/mutator/sync_default_path_test.go create mode 100644 bundle/config/mutator/sync_infer_root.go create mode 100644 bundle/config/mutator/sync_infer_root_internal_test.go create mode 100644 bundle/config/mutator/sync_infer_root_test.go create mode 100644 bundle/tests/sync/paths/databricks.yml create mode 100644 bundle/tests/sync/paths_no_root/databricks.yml create mode 100644 bundle/tests/sync/shared_code/bundle/databricks.yml create mode 100644 bundle/tests/sync/shared_code/common/library.txt diff --git a/bundle/bundle.go b/bundle/bundle.go index 032d98abc8..d163ea38ae 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -39,6 +39,12 @@ type Bundle struct { // Exclusively use this field for filesystem operations. BundleRoot vfs.Path + // SyncRoot is a virtual filesystem path to the root of the files that + // are synchronized to the workspace. It can be an ancestor to [BundleRoot], + // but not a descendant; that is, [SyncRoot] must contain [BundleRoot]. + SyncRoot vfs.Path + SyncRootPath string + Config config.Root // Metadata about the bundle deployment. This is the interface Databricks services diff --git a/bundle/bundle_read_only.go b/bundle/bundle_read_only.go index 59084f2ace..74b9d94dea 100644 --- a/bundle/bundle_read_only.go +++ b/bundle/bundle_read_only.go @@ -28,6 +28,10 @@ func (r ReadOnlyBundle) BundleRoot() vfs.Path { return r.b.BundleRoot } +func (r ReadOnlyBundle) SyncRoot() vfs.Path { + return r.b.SyncRoot +} + func (r ReadOnlyBundle) WorkspaceClient() *databricks.WorkspaceClient { return r.b.WorkspaceClient() } diff --git a/bundle/config/mutator/configure_wsfs.go b/bundle/config/mutator/configure_wsfs.go index c7b764f000..1d1bec5824 100644 --- a/bundle/config/mutator/configure_wsfs.go +++ b/bundle/config/mutator/configure_wsfs.go @@ -24,7 +24,7 @@ func (m *configureWSFS) Name() string { } func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - root := b.BundleRoot.Native() + root := b.SyncRoot.Native() // The bundle root must be located in /Workspace/ if !strings.HasPrefix(root, "/Workspace/") { @@ -45,6 +45,6 @@ func (m *configureWSFS) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return diag.FromErr(err) } - b.BundleRoot = p + b.SyncRoot = p return nil } diff --git a/bundle/config/mutator/rewrite_sync_paths.go b/bundle/config/mutator/rewrite_sync_paths.go index cfdc55f367..888714abeb 100644 --- a/bundle/config/mutator/rewrite_sync_paths.go +++ b/bundle/config/mutator/rewrite_sync_paths.go @@ -45,6 +45,10 @@ func (m *rewriteSyncPaths) makeRelativeTo(root string) dyn.MapFunc { func (m *rewriteSyncPaths) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { return dyn.Map(v, "sync", func(_ dyn.Path, v dyn.Value) (nv dyn.Value, err error) { + v, err = dyn.Map(v, "paths", dyn.Foreach(m.makeRelativeTo(b.RootPath))) + if err != nil { + return dyn.InvalidValue, err + } v, err = dyn.Map(v, "include", dyn.Foreach(m.makeRelativeTo(b.RootPath))) if err != nil { return dyn.InvalidValue, err diff --git a/bundle/config/mutator/rewrite_sync_paths_test.go b/bundle/config/mutator/rewrite_sync_paths_test.go index 56ada19e67..fa7f124b79 100644 --- a/bundle/config/mutator/rewrite_sync_paths_test.go +++ b/bundle/config/mutator/rewrite_sync_paths_test.go @@ -17,6 +17,10 @@ func TestRewriteSyncPathsRelative(t *testing.T) { RootPath: ".", Config: config.Root{ Sync: config.Sync{ + Paths: []string{ + ".", + "../common", + }, Include: []string{ "foo", "bar", @@ -29,6 +33,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) { }, } + bundletest.SetLocation(b, "sync.paths[0]", "./databricks.yml") + bundletest.SetLocation(b, "sync.paths[1]", "./databricks.yml") bundletest.SetLocation(b, "sync.include[0]", "./file.yml") bundletest.SetLocation(b, "sync.include[1]", "./a/file.yml") bundletest.SetLocation(b, "sync.exclude[0]", "./a/b/file.yml") @@ -37,6 +43,8 @@ func TestRewriteSyncPathsRelative(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0]) + assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1]) assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0]) assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1]) assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0]) @@ -48,6 +56,10 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { RootPath: "/tmp/dir", Config: config.Root{ Sync: config.Sync{ + Paths: []string{ + ".", + "../common", + }, Include: []string{ "foo", "bar", @@ -60,6 +72,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { }, } + bundletest.SetLocation(b, "sync.paths[0]", "/tmp/dir/databricks.yml") + bundletest.SetLocation(b, "sync.paths[1]", "/tmp/dir/databricks.yml") bundletest.SetLocation(b, "sync.include[0]", "/tmp/dir/file.yml") bundletest.SetLocation(b, "sync.include[1]", "/tmp/dir/a/file.yml") bundletest.SetLocation(b, "sync.exclude[0]", "/tmp/dir/a/b/file.yml") @@ -68,6 +82,8 @@ func TestRewriteSyncPathsAbsolute(t *testing.T) { diags := bundle.Apply(context.Background(), b, mutator.RewriteSyncPaths()) assert.NoError(t, diags.Error()) + assert.Equal(t, filepath.Clean("."), b.Config.Sync.Paths[0]) + assert.Equal(t, filepath.Clean("../common"), b.Config.Sync.Paths[1]) assert.Equal(t, filepath.Clean("foo"), b.Config.Sync.Include[0]) assert.Equal(t, filepath.Clean("a/bar"), b.Config.Sync.Include[1]) assert.Equal(t, filepath.Clean("a/b/baz"), b.Config.Sync.Exclude[0]) diff --git a/bundle/config/mutator/sync_default_path.go b/bundle/config/mutator/sync_default_path.go new file mode 100644 index 0000000000..8e14ce202c --- /dev/null +++ b/bundle/config/mutator/sync_default_path.go @@ -0,0 +1,48 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" +) + +type syncDefaultPath struct{} + +// SyncDefaultPath configures the default sync path to be equal to the bundle root. +func SyncDefaultPath() bundle.Mutator { + return &syncDefaultPath{} +} + +func (m *syncDefaultPath) Name() string { + return "SyncDefaultPath" +} + +func (m *syncDefaultPath) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + isset := false + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + pv, _ := dyn.Get(v, "sync.paths") + + // If the sync paths field is already set, do nothing. + // We know it is set if its value is either a nil or a sequence (empty or not). + switch pv.Kind() { + case dyn.KindNil, dyn.KindSequence: + isset = true + } + + return v, nil + }) + if err != nil { + return diag.FromErr(err) + } + + // If the sync paths field is already set, do nothing. + if isset { + return nil + } + + // Set the sync paths to the default value. + b.Config.Sync.Paths = []string{"."} + return nil +} diff --git a/bundle/config/mutator/sync_default_path_test.go b/bundle/config/mutator/sync_default_path_test.go new file mode 100644 index 0000000000..a37e913d23 --- /dev/null +++ b/bundle/config/mutator/sync_default_path_test.go @@ -0,0 +1,82 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSyncDefaultPath_DefaultIfUnset(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{}, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncDefaultPath()) + require.NoError(t, diags.Error()) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) +} + +func TestSyncDefaultPath_SkipIfSet(t *testing.T) { + tcases := []struct { + name string + paths dyn.Value + expect []string + }{ + { + name: "nil", + paths: dyn.V(nil), + expect: nil, + }, + { + name: "empty sequence", + paths: dyn.V([]dyn.Value{}), + expect: []string{}, + }, + { + name: "non-empty sequence", + paths: dyn.V([]dyn.Value{dyn.V("something")}), + expect: []string{"something"}, + }, + } + + for _, tcase := range tcases { + t.Run(tcase.name, func(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{}, + } + + diags := bundle.ApplyFunc(context.Background(), b, func(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + v, err := dyn.Set(v, "sync", dyn.V(dyn.NewMapping())) + if err != nil { + return dyn.InvalidValue, err + } + v, err = dyn.Set(v, "sync.paths", tcase.paths) + if err != nil { + return dyn.InvalidValue, err + } + return v, nil + }) + return diag.FromErr(err) + }) + require.NoError(t, diags.Error()) + + ctx := context.Background() + diags = bundle.Apply(ctx, b, mutator.SyncDefaultPath()) + require.NoError(t, diags.Error()) + + // If the sync paths field is already set, do nothing. + assert.Equal(t, tcase.expect, b.Config.Sync.Paths) + }) + } +} diff --git a/bundle/config/mutator/sync_infer_root.go b/bundle/config/mutator/sync_infer_root.go new file mode 100644 index 0000000000..a30cbfb25e --- /dev/null +++ b/bundle/config/mutator/sync_infer_root.go @@ -0,0 +1,120 @@ +package mutator + +import ( + "context" + "fmt" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/vfs" +) + +type syncInferRoot struct{} + +// SyncInferRoot is a mutator that infers the root path of all files to synchronize by looking at the +// paths in the sync configuration. The sync root may be different from the bundle root +// when the user intends to synchronize files outside the bundle root. +// +// The sync root can be equivalent to or an ancestor of the bundle root, but not a descendant. +// That is, the sync root must contain the bundle root. +// +// This mutator requires all sync-related paths and patterns to be relative to the bundle root path. +// This is done by the [RewriteSyncPaths] mutator, which must run before this mutator. +func SyncInferRoot() bundle.Mutator { + return &syncInferRoot{} +} + +func (m *syncInferRoot) Name() string { + return "SyncInferRoot" +} + +// computeRoot finds the innermost path that contains the specified path. +// It traverses up the root path until it finds the innermost path. +// If the path does not exist, it returns an empty string. +// +// See "sync_infer_root_internal_test.go" for examples. +func (m *syncInferRoot) computeRoot(path string, root string) string { + for !filepath.IsLocal(path) { + // Break if we have reached the root of the filesystem. + dir := filepath.Dir(root) + if dir == root { + return "" + } + + // Update the sync path as we navigate up the directory tree. + path = filepath.Join(filepath.Base(root), path) + + // Move up the directory tree. + root = dir + } + + return root +} + +func (m *syncInferRoot) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + + // Use the bundle root path as the starting point for inferring the sync root path. + bundleRootPath := b.RootPath + + // Infer the sync root path by looking at each one of the sync paths. + // Every sync path must be a descendant of the final sync root path. + syncRootPath := bundleRootPath + for _, path := range b.Config.Sync.Paths { + computedPath := m.computeRoot(path, bundleRootPath) + if computedPath == "" { + continue + } + + // Update sync root path if the computed root path is an ancestor of the current sync root path. + if len(computedPath) < len(syncRootPath) { + syncRootPath = computedPath + } + } + + // The new sync root path can only be an ancestor of the previous root path. + // Compute the relative path from the sync root to the bundle root. + rel, err := filepath.Rel(syncRootPath, bundleRootPath) + if err != nil { + return diag.FromErr(err) + } + + // If during computation of the sync root path we hit the root of the filesystem, + // then one or more of the sync paths are outside the filesystem. + // Check if this happened by verifying that none of the paths escape the root + // when joined with the sync root path. + for i, path := range b.Config.Sync.Paths { + if filepath.IsLocal(filepath.Join(rel, path)) { + continue + } + + diags = append(diags, diag.Diagnostic{ + Severity: diag.Error, + Summary: fmt.Sprintf("invalid sync path %q", path), + Locations: b.Config.GetLocations(fmt.Sprintf("sync.paths[%d]", i)), + Paths: []dyn.Path{dyn.NewPath(dyn.Key("sync"), dyn.Key("paths"), dyn.Index(i))}, + }) + } + + if diags.HasError() { + return diags + } + + // Update all paths in the sync configuration to be relative to the sync root. + for i, p := range b.Config.Sync.Paths { + b.Config.Sync.Paths[i] = filepath.Join(rel, p) + } + for i, p := range b.Config.Sync.Include { + b.Config.Sync.Include[i] = filepath.Join(rel, p) + } + for i, p := range b.Config.Sync.Exclude { + b.Config.Sync.Exclude[i] = filepath.Join(rel, p) + } + + // Configure the sync root path. + b.SyncRoot = vfs.MustNew(syncRootPath) + b.SyncRootPath = syncRootPath + return nil +} diff --git a/bundle/config/mutator/sync_infer_root_internal_test.go b/bundle/config/mutator/sync_infer_root_internal_test.go new file mode 100644 index 0000000000..010907ae7d --- /dev/null +++ b/bundle/config/mutator/sync_infer_root_internal_test.go @@ -0,0 +1,53 @@ +package mutator + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSyncInferRootInternal_ComputeRoot(t *testing.T) { + s := syncInferRoot{} + + tcases := []struct { + path string + root string + out string + }{ + { + path: ".", + root: "/tmp/some/dir", + out: "/tmp/some/dir", + }, + { + path: "sub", + root: "/tmp/some/dir", + out: "/tmp/some/dir", + }, + { + path: "../common", + root: "/tmp/some/dir", + out: "/tmp/some", + }, + { + path: "../../../../../../common", + root: "/tmp/some/dir/that/is/very/deeply/nested", + out: "/tmp/some", + }, + { + path: "../common", + root: "/tmp", + out: "/", + }, + { + path: "../common", + root: "/", + out: "", + }, + } + + for _, tc := range tcases { + out := s.computeRoot(tc.path, tc.root) + assert.Equal(t, tc.out, out) + } +} diff --git a/bundle/config/mutator/sync_infer_root_test.go b/bundle/config/mutator/sync_infer_root_test.go new file mode 100644 index 0000000000..ad113a6be9 --- /dev/null +++ b/bundle/config/mutator/sync_infer_root_test.go @@ -0,0 +1,191 @@ +package mutator_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/internal/bundletest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestSyncInferRoot_NominalAbsolute(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + ".", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, b.RootPath, b.SyncRootPath) + + // Check that the paths are unchanged. + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) + assert.Equal(t, []string{"foo", "bar"}, b.Config.Sync.Include) + assert.Equal(t, []string{"baz", "qux"}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_NominalRelative(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "./some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + ".", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, b.RootPath, b.SyncRootPath) + + // Check that the paths are unchanged. + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) + assert.Equal(t, []string{"foo", "bar"}, b.Config.Sync.Include) + assert.Equal(t, []string{"baz", "qux"}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_ParentDirectory(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "../common", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, "/tmp/some", b.SyncRootPath) + + // Check that the paths are updated. + assert.Equal(t, []string{"common"}, b.Config.Sync.Paths) + assert.Equal(t, []string{"dir/foo", "dir/bar"}, b.Config.Sync.Include) + assert.Equal(t, []string{"dir/baz", "dir/qux"}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_ManyParentDirectories(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir/that/is/very/deeply/nested", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "../../../../../../common", + }, + Include: []string{ + "foo", + "bar", + }, + Exclude: []string{ + "baz", + "qux", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, "/tmp/some", b.SyncRootPath) + + // Check that the paths are updated. + assert.Equal(t, []string{"common"}, b.Config.Sync.Paths) + assert.Equal(t, []string{"dir/that/is/very/deeply/nested/foo", "dir/that/is/very/deeply/nested/bar"}, b.Config.Sync.Include) + assert.Equal(t, []string{"dir/that/is/very/deeply/nested/baz", "dir/that/is/very/deeply/nested/qux"}, b.Config.Sync.Exclude) +} + +func TestSyncInferRoot_MultiplePaths(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/bundle/root", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "./foo", + "../common", + "./bar", + "../../baz", + }, + }, + }, + } + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + assert.NoError(t, diags.Error()) + assert.Equal(t, "/tmp/some", b.SyncRootPath) + + // Check that the paths are updated. + assert.Equal(t, "bundle/root/foo", b.Config.Sync.Paths[0]) + assert.Equal(t, "bundle/common", b.Config.Sync.Paths[1]) + assert.Equal(t, "bundle/root/bar", b.Config.Sync.Paths[2]) + assert.Equal(t, "baz", b.Config.Sync.Paths[3]) +} + +func TestSyncInferRoot_Error(t *testing.T) { + b := &bundle.Bundle{ + RootPath: "/tmp/some/dir", + Config: config.Root{ + Sync: config.Sync{ + Paths: []string{ + "../../../../error", + "../../../thisworks", + "../../../../../error", + }, + }, + }, + } + + bundletest.SetLocation(b, "sync.paths", "databricks.yml") + + ctx := context.Background() + diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) + require.Len(t, diags, 2) + assert.Equal(t, `invalid sync path "../../../../error"`, diags[0].Summary) + assert.Equal(t, "databricks.yml:0:0", diags[0].Locations[0].String()) + assert.Equal(t, "sync.paths[0]", diags[0].Paths[0].String()) + assert.Equal(t, `invalid sync path "../../../../../error"`, diags[1].Summary) + assert.Equal(t, "databricks.yml:0:0", diags[1].Locations[0].String()) + assert.Equal(t, "sync.paths[2]", diags[1].Paths[0].String()) +} diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index 28f7d3d30b..5f22570e7f 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -93,14 +93,14 @@ func (t *translateContext) rewritePath( return nil } - // Local path must be contained in the bundle root. + // Local path must be contained in the sync root. // If it isn't, it won't be synchronized into the workspace. - localRelPath, err := filepath.Rel(t.b.RootPath, localPath) + localRelPath, err := filepath.Rel(t.b.SyncRootPath, localPath) if err != nil { return err } if strings.HasPrefix(localRelPath, "..") { - return fmt.Errorf("path %s is not contained in bundle root path", localPath) + return fmt.Errorf("path %s is not contained in sync root path", localPath) } // Prefix remote path with its remote root path. @@ -118,7 +118,7 @@ func (t *translateContext) rewritePath( } func (t *translateContext) translateNotebookPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.BundleRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("notebook %s not found", literal) } @@ -134,7 +134,7 @@ func (t *translateContext) translateNotebookPath(literal, localFullPath, localRe } func (t *translateContext) translateFilePath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - nb, _, err := notebook.DetectWithFS(t.b.BundleRoot, filepath.ToSlash(localRelPath)) + nb, _, err := notebook.DetectWithFS(t.b.SyncRoot, filepath.ToSlash(localRelPath)) if errors.Is(err, fs.ErrNotExist) { return "", fmt.Errorf("file %s not found", literal) } @@ -148,7 +148,7 @@ func (t *translateContext) translateFilePath(literal, localFullPath, localRelPat } func (t *translateContext) translateDirectoryPath(literal, localFullPath, localRelPath, remotePath string) (string, error) { - info, err := t.b.BundleRoot.Stat(filepath.ToSlash(localRelPath)) + info, err := t.b.SyncRoot.Stat(filepath.ToSlash(localRelPath)) if err != nil { return "", err } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 780a540df0..92ae396efa 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -41,8 +41,8 @@ func touchEmptyFile(t *testing.T, path string) { func TestTranslatePathsSkippedWithGitSource(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -112,8 +112,8 @@ func TestTranslatePaths(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "dist", "task.jar")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -280,8 +280,8 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "job", "my_dbt_project", "dbt_project.yml")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -371,12 +371,12 @@ func TestTranslatePathsInSubdirectories(t *testing.T) { ) } -func TestTranslatePathsOutsideBundleRoot(t *testing.T) { +func TestTranslatePathsOutsideSyncRoot(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -402,15 +402,15 @@ func TestTranslatePathsOutsideBundleRoot(t *testing.T) { bundletest.SetLocation(b, ".", filepath.Join(dir, "../resource.yml")) diags := bundle.Apply(context.Background(), b, mutator.TranslatePaths()) - assert.ErrorContains(t, diags.Error(), "is not contained in bundle root") + assert.ErrorContains(t, diags.Error(), "is not contained in sync root path") } func TestJobNotebookDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -440,8 +440,8 @@ func TestJobFileDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -471,8 +471,8 @@ func TestPipelineNotebookDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ @@ -502,8 +502,8 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Pipelines: map[string]*resources.Pipeline{ @@ -534,8 +534,8 @@ func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -569,8 +569,8 @@ func TestJobNotebookTaskWithFileSourceError(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "my_file.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -604,8 +604,8 @@ func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "my_file.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -639,8 +639,8 @@ func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Workspace: config.Workspace{ FilePath: "/bundle", @@ -675,8 +675,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { touchEmptyFile(t, filepath.Join(dir, "env2.py")) b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Resources: config.Resources{ Jobs: map[string]*resources.Job{ @@ -715,8 +715,8 @@ func TestTranslatePathJobEnvironments(t *testing.T) { func TestTranslatePathWithComplexVariables(t *testing.T) { dir := t.TempDir() b := &bundle.Bundle{ - RootPath: dir, - BundleRoot: vfs.MustNew(dir), + SyncRootPath: dir, + SyncRoot: vfs.MustNew(dir), Config: config.Root{ Variables: map[string]*variable.Variable{ "cluster_libraries": { diff --git a/bundle/config/sync.go b/bundle/config/sync.go index 0580e4c4ff..377b1333ed 100644 --- a/bundle/config/sync.go +++ b/bundle/config/sync.go @@ -1,6 +1,10 @@ package config type Sync struct { + // Paths contains a list of paths to synchronize relative to the bundle root path. + // If not configured, this defaults to synchronizing everything in the bundle root path (i.e. `.`). + Paths []string `json:"paths,omitempty"` + // Include contains a list of globs evaluated relative to the bundle root path // to explicitly include files that were excluded by the user's gitignore. Include []string `json:"include,omitempty"` diff --git a/bundle/deploy/files/sync.go b/bundle/deploy/files/sync.go index dc45053f9c..347ed30793 100644 --- a/bundle/deploy/files/sync.go +++ b/bundle/deploy/files/sync.go @@ -28,8 +28,8 @@ func GetSyncOptions(ctx context.Context, rb bundle.ReadOnlyBundle) (*sync.SyncOp } opts := &sync.SyncOptions{ - LocalRoot: rb.BundleRoot(), - Paths: []string{"."}, + LocalRoot: rb.SyncRoot(), + Paths: rb.Config().Sync.Paths, Include: includes, Exclude: rb.Config().Sync.Exclude, diff --git a/bundle/deploy/state_pull.go b/bundle/deploy/state_pull.go index 24ed9d360e..5e301a6f35 100644 --- a/bundle/deploy/state_pull.go +++ b/bundle/deploy/state_pull.go @@ -85,7 +85,7 @@ func (s *statePull) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } log.Infof(ctx, "Creating new snapshot") - snapshot, err := sync.NewSnapshot(state.Files.ToSlice(b.BundleRoot), opts) + snapshot, err := sync.NewSnapshot(state.Files.ToSlice(b.SyncRoot), opts) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_pull_test.go b/bundle/deploy/state_pull_test.go index 38f0b40213..f75193065a 100644 --- a/bundle/deploy/state_pull_test.go +++ b/bundle/deploy/state_pull_test.go @@ -64,6 +64,10 @@ func testStatePull(t *testing.T, opts statePullOpts) { b := &bundle.Bundle{ RootPath: tmpDir, BundleRoot: vfs.MustNew(tmpDir), + + SyncRootPath: tmpDir, + SyncRoot: vfs.MustNew(tmpDir), + Config: config.Root{ Bundle: config.Bundle{ Target: "default", @@ -81,11 +85,11 @@ func testStatePull(t *testing.T, opts statePullOpts) { ctx := context.Background() for _, file := range opts.localFiles { - testutil.Touch(t, b.RootPath, "bar", file) + testutil.Touch(t, b.SyncRootPath, "bar", file) } for _, file := range opts.localNotebooks { - testutil.TouchNotebook(t, b.RootPath, "bar", file) + testutil.TouchNotebook(t, b.SyncRootPath, "bar", file) } if opts.withExistingSnapshot { diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index fac3066dcb..25591a0a8e 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -21,7 +21,18 @@ func Initialize() bundle.Mutator { "initialize", []bundle.Mutator{ validate.AllResourcesHaveValues(), + + // Update all path fields in the sync block to be relative to the bundle root path. mutator.RewriteSyncPaths(), + + // Configure the default sync path to equal the bundle root if not explicitly configured. + // By default, this means all files in the bundle root directory are synchronized. + mutator.SyncDefaultPath(), + + // Figure out if the sync root path is identical or an ancestor of the bundle root path. + // If it is an ancestor, this updates all paths to be relative to the sync root path. + mutator.SyncInferRoot(), + mutator.MergeJobClusters(), mutator.MergeJobParameters(), mutator.MergeJobTasks(), diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index 069f09358c..456049b609 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -36,6 +36,8 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { diags := bundle.Apply(ctx, b, bundle.Seq( phases.LoadNamedTarget(env), mutator.RewriteSyncPaths(), + mutator.SyncDefaultPath(), + mutator.SyncInferRoot(), mutator.MergeJobClusters(), mutator.MergeJobParameters(), mutator.MergeJobTasks(), diff --git a/bundle/tests/sync/paths/databricks.yml b/bundle/tests/sync/paths/databricks.yml new file mode 100644 index 0000000000..9ef6fa032a --- /dev/null +++ b/bundle/tests/sync/paths/databricks.yml @@ -0,0 +1,20 @@ +bundle: + name: sync_paths + +workspace: + host: https://acme.cloud.databricks.com/ + +sync: + paths: + - src + +targets: + development: + sync: + paths: + - development + + staging: + sync: + paths: + - staging diff --git a/bundle/tests/sync/paths_no_root/databricks.yml b/bundle/tests/sync/paths_no_root/databricks.yml new file mode 100644 index 0000000000..7841cf1aef --- /dev/null +++ b/bundle/tests/sync/paths_no_root/databricks.yml @@ -0,0 +1,26 @@ +bundle: + name: sync_paths + +workspace: + host: https://acme.cloud.databricks.com/ + +targets: + development: + sync: + paths: + - development + + staging: + sync: + paths: + - staging + + nothing: ~ + + nil: + sync: + paths: ~ + + empty: + sync: + paths: [] diff --git a/bundle/tests/sync/shared_code/bundle/databricks.yml b/bundle/tests/sync/shared_code/bundle/databricks.yml new file mode 100644 index 0000000000..738b6170cb --- /dev/null +++ b/bundle/tests/sync/shared_code/bundle/databricks.yml @@ -0,0 +1,10 @@ +bundle: + name: shared_code + +workspace: + host: https://acme.cloud.databricks.com/ + +sync: + paths: + - "../common" + - "." diff --git a/bundle/tests/sync/shared_code/common/library.txt b/bundle/tests/sync/shared_code/common/library.txt new file mode 100644 index 0000000000..83b3238433 --- /dev/null +++ b/bundle/tests/sync/shared_code/common/library.txt @@ -0,0 +1 @@ +Placeholder for files to be deployed as part of multiple bundles. diff --git a/bundle/tests/sync_test.go b/bundle/tests/sync_test.go index d08e889c34..17d0ff553d 100644 --- a/bundle/tests/sync_test.go +++ b/bundle/tests/sync_test.go @@ -63,3 +63,41 @@ func TestSyncNilRoot(t *testing.T) { assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) } + +func TestSyncPaths(t *testing.T) { + var b *bundle.Bundle + + b = loadTarget(t, "./sync/paths", "development") + assert.ElementsMatch(t, []string{"src", "development"}, b.Config.Sync.Paths) + + b = loadTarget(t, "./sync/paths", "staging") + assert.ElementsMatch(t, []string{"src", "staging"}, b.Config.Sync.Paths) +} + +func TestSyncPathsNoRoot(t *testing.T) { + var b *bundle.Bundle + + b = loadTarget(t, "./sync/paths_no_root", "development") + assert.ElementsMatch(t, []string{"development"}, b.Config.Sync.Paths) + + b = loadTarget(t, "./sync/paths_no_root", "staging") + assert.ElementsMatch(t, []string{"staging"}, b.Config.Sync.Paths) + + // If not set at all, it defaults to "." + b = loadTarget(t, "./sync/paths_no_root", "nothing") + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) + + // If set to nil, it won't sync anything. + b = loadTarget(t, "./sync/paths_no_root", "nil") + assert.Len(t, b.Config.Sync.Paths, 0) + + // If set to an empty sequence, it won't sync anything. + b = loadTarget(t, "./sync/paths_no_root", "empty") + assert.Len(t, b.Config.Sync.Paths, 0) +} + +func TestSyncSharedCode(t *testing.T) { + b := loadTarget(t, "./sync/shared_code/bundle", "default") + assert.Equal(t, filepath.FromSlash("sync/shared_code"), b.SyncRootPath) + assert.ElementsMatch(t, []string{"common", "bundle"}, b.Config.Sync.Paths) +} diff --git a/cmd/sync/sync_test.go b/cmd/sync/sync_test.go index 0d0c573852..bd03eec910 100644 --- a/cmd/sync/sync_test.go +++ b/cmd/sync/sync_test.go @@ -17,8 +17,10 @@ import ( func TestSyncOptionsFromBundle(t *testing.T) { tempDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tempDir, - BundleRoot: vfs.MustNew(tempDir), + RootPath: tempDir, + BundleRoot: vfs.MustNew(tempDir), + SyncRootPath: tempDir, + SyncRoot: vfs.MustNew(tempDir), Config: config.Root{ Bundle: config.Bundle{ Target: "default", From 8e568cb1b00887d103893f47e94fd77af6f36c7d Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 19 Aug 2024 16:54:17 +0200 Subject: [PATCH 09/18] Update tests --- bundle/tests/loader.go | 29 +++++++++++++++ bundle/tests/pipeline_glob_paths_test.go | 37 +------------------ .../tests/relative_path_translation_test.go | 29 +-------------- .../tests/sync/paths_no_root/databricks.yml | 2 +- bundle/tests/sync_test.go | 33 +++++++++++++++-- 5 files changed, 64 insertions(+), 66 deletions(-) diff --git a/bundle/tests/loader.go b/bundle/tests/loader.go index 456049b609..5c48d81cb9 100644 --- a/bundle/tests/loader.go +++ b/bundle/tests/loader.go @@ -8,6 +8,10 @@ import ( "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/libs/diag" + "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) @@ -45,3 +49,28 @@ func loadTargetWithDiags(path, env string) (*bundle.Bundle, diag.Diagnostics) { )) return b, diags } + +func configureMock(t *testing.T, b *bundle.Bundle) { + // Configure mock workspace client + m := mocks.NewMockWorkspaceClient(t) + m.WorkspaceClient.Config = &config.Config{ + Host: "https://mock.databricks.workspace.com", + } + m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ + UserName: "user@domain.com", + }, nil) + b.SetWorkpaceClient(m.WorkspaceClient) +} + +func initializeTarget(t *testing.T, path, env string) (*bundle.Bundle, diag.Diagnostics) { + b := load(t, path) + configureMock(t, b) + + ctx := context.Background() + diags := bundle.Apply(ctx, b, bundle.Seq( + mutator.SelectTarget(env), + phases.Initialize(), + )) + + return b, diags +} diff --git a/bundle/tests/pipeline_glob_paths_test.go b/bundle/tests/pipeline_glob_paths_test.go index bf5039b5ff..c1c62cfb6d 100644 --- a/bundle/tests/pipeline_glob_paths_test.go +++ b/bundle/tests/pipeline_glob_paths_test.go @@ -1,33 +1,13 @@ package config_tests import ( - "context" "testing" - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/phases" - "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" - "github.com/databricks/databricks-sdk-go/service/iam" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) func TestExpandPipelineGlobPaths(t *testing.T) { - b := loadTarget(t, "./pipeline_glob_paths", "default") - - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) - - ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Initialize()) + b, diags := initializeTarget(t, "./pipeline_glob_paths", "default") require.NoError(t, diags.Error()) require.Equal( t, @@ -37,19 +17,6 @@ func TestExpandPipelineGlobPaths(t *testing.T) { } func TestExpandPipelineGlobPathsWithNonExistent(t *testing.T) { - b := loadTarget(t, "./pipeline_glob_paths", "error") - - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) - - ctx := context.Background() - diags := bundle.Apply(ctx, b, phases.Initialize()) + _, diags := initializeTarget(t, "./pipeline_glob_paths", "error") require.ErrorContains(t, diags.Error(), "notebook ./non-existent not found") } diff --git a/bundle/tests/relative_path_translation_test.go b/bundle/tests/relative_path_translation_test.go index d5b80bea5f..199871d23c 100644 --- a/bundle/tests/relative_path_translation_test.go +++ b/bundle/tests/relative_path_translation_test.go @@ -1,36 +1,14 @@ package config_tests import ( - "context" "testing" - "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/phases" - "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" - "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) -func configureMock(t *testing.T, b *bundle.Bundle) { - // Configure mock workspace client - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &config.Config{ - Host: "https://mock.databricks.workspace.com", - } - m.GetMockCurrentUserAPI().EXPECT().Me(mock.Anything).Return(&iam.User{ - UserName: "user@domain.com", - }, nil) - b.SetWorkpaceClient(m.WorkspaceClient) -} - func TestRelativePathTranslationDefault(t *testing.T) { - b := loadTarget(t, "./relative_path_translation", "default") - configureMock(t, b) - - diags := bundle.Apply(context.Background(), b, phases.Initialize()) + b, diags := initializeTarget(t, "./relative_path_translation", "default") require.NoError(t, diags.Error()) t0 := b.Config.Resources.Jobs["job"].Tasks[0] @@ -40,10 +18,7 @@ func TestRelativePathTranslationDefault(t *testing.T) { } func TestRelativePathTranslationOverride(t *testing.T) { - b := loadTarget(t, "./relative_path_translation", "override") - configureMock(t, b) - - diags := bundle.Apply(context.Background(), b, phases.Initialize()) + b, diags := initializeTarget(t, "./relative_path_translation", "override") require.NoError(t, diags.Error()) t0 := b.Config.Resources.Jobs["job"].Tasks[0] diff --git a/bundle/tests/sync/paths_no_root/databricks.yml b/bundle/tests/sync/paths_no_root/databricks.yml index 7841cf1aef..df15b12b65 100644 --- a/bundle/tests/sync/paths_no_root/databricks.yml +++ b/bundle/tests/sync/paths_no_root/databricks.yml @@ -15,7 +15,7 @@ targets: paths: - staging - nothing: ~ + undefined: ~ nil: sync: diff --git a/bundle/tests/sync_test.go b/bundle/tests/sync_test.go index 17d0ff553d..15644b67e6 100644 --- a/bundle/tests/sync_test.go +++ b/bundle/tests/sync_test.go @@ -12,14 +12,20 @@ func TestSyncOverride(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/override", "development") + assert.Equal(t, filepath.FromSlash("sync/override"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("src/*"), filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override", "staging") + assert.Equal(t, filepath.FromSlash("sync/override"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("src/*"), filepath.FromSlash("fixtures/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override", "prod") + assert.Equal(t, filepath.FromSlash("sync/override"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("src/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) } @@ -28,14 +34,20 @@ func TestSyncOverrideNoRootSync(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/override_no_root", "development") + assert.Equal(t, filepath.FromSlash("sync/override_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override_no_root", "staging") + assert.Equal(t, filepath.FromSlash("sync/override_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("fixtures/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/override_no_root", "prod") + assert.Equal(t, filepath.FromSlash("sync/override_no_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{}, b.Config.Sync.Exclude) } @@ -44,10 +56,14 @@ func TestSyncNil(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/nil", "development") + assert.Equal(t, filepath.FromSlash("sync/nil"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.Nil(t, b.Config.Sync.Include) assert.Nil(t, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/nil", "staging") + assert.Equal(t, filepath.FromSlash("sync/nil"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) } @@ -56,10 +72,14 @@ func TestSyncNilRoot(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/nil_root", "development") + assert.Equal(t, filepath.FromSlash("sync/nil_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.Nil(t, b.Config.Sync.Include) assert.Nil(t, b.Config.Sync.Exclude) b = loadTarget(t, "./sync/nil_root", "staging") + assert.Equal(t, filepath.FromSlash("sync/nil_root"), b.SyncRootPath) + assert.Equal(t, []string{"."}, b.Config.Sync.Paths) assert.ElementsMatch(t, []string{filepath.FromSlash("tests/*")}, b.Config.Sync.Include) assert.ElementsMatch(t, []string{filepath.FromSlash("dist")}, b.Config.Sync.Exclude) } @@ -68,31 +88,38 @@ func TestSyncPaths(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/paths", "development") - assert.ElementsMatch(t, []string{"src", "development"}, b.Config.Sync.Paths) + assert.Equal(t, filepath.FromSlash("sync/paths"), b.SyncRootPath) + assert.Equal(t, []string{"src", "development"}, b.Config.Sync.Paths) b = loadTarget(t, "./sync/paths", "staging") - assert.ElementsMatch(t, []string{"src", "staging"}, b.Config.Sync.Paths) + assert.Equal(t, filepath.FromSlash("sync/paths"), b.SyncRootPath) + assert.Equal(t, []string{"src", "staging"}, b.Config.Sync.Paths) } func TestSyncPathsNoRoot(t *testing.T) { var b *bundle.Bundle b = loadTarget(t, "./sync/paths_no_root", "development") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) assert.ElementsMatch(t, []string{"development"}, b.Config.Sync.Paths) b = loadTarget(t, "./sync/paths_no_root", "staging") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) assert.ElementsMatch(t, []string{"staging"}, b.Config.Sync.Paths) // If not set at all, it defaults to "." - b = loadTarget(t, "./sync/paths_no_root", "nothing") + b = loadTarget(t, "./sync/paths_no_root", "undefined") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) assert.Equal(t, []string{"."}, b.Config.Sync.Paths) // If set to nil, it won't sync anything. b = loadTarget(t, "./sync/paths_no_root", "nil") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) assert.Len(t, b.Config.Sync.Paths, 0) // If set to an empty sequence, it won't sync anything. b = loadTarget(t, "./sync/paths_no_root", "empty") + assert.Equal(t, filepath.FromSlash("sync/paths_no_root"), b.SyncRootPath) assert.Len(t, b.Config.Sync.Paths, 0) } From d0d2501895e194ffb11d8e8d8a0168eb4d7c5348 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 19 Aug 2024 17:31:20 +0200 Subject: [PATCH 10/18] Windows tests --- bundle/config/mutator/sync_infer_root.go | 2 +- .../mutator/sync_infer_root_internal_test.go | 15 +++++++++- bundle/config/mutator/sync_infer_root_test.go | 29 ++++++++++++------- 3 files changed, 33 insertions(+), 13 deletions(-) diff --git a/bundle/config/mutator/sync_infer_root.go b/bundle/config/mutator/sync_infer_root.go index a30cbfb25e..65ca2abca5 100644 --- a/bundle/config/mutator/sync_infer_root.go +++ b/bundle/config/mutator/sync_infer_root.go @@ -50,7 +50,7 @@ func (m *syncInferRoot) computeRoot(path string, root string) string { root = dir } - return root + return filepath.Clean(root) } func (m *syncInferRoot) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { diff --git a/bundle/config/mutator/sync_infer_root_internal_test.go b/bundle/config/mutator/sync_infer_root_internal_test.go index 010907ae7d..a08007326c 100644 --- a/bundle/config/mutator/sync_infer_root_internal_test.go +++ b/bundle/config/mutator/sync_infer_root_internal_test.go @@ -1,6 +1,7 @@ package mutator import ( + "path/filepath" "testing" "github.com/stretchr/testify/assert" @@ -15,39 +16,51 @@ func TestSyncInferRootInternal_ComputeRoot(t *testing.T) { out string }{ { + // Test that "." doesn't change the root. path: ".", root: "/tmp/some/dir", out: "/tmp/some/dir", }, { + // Test that a subdirectory doesn't change the root. path: "sub", root: "/tmp/some/dir", out: "/tmp/some/dir", }, { + // Test that a parent directory changes the root. path: "../common", root: "/tmp/some/dir", out: "/tmp/some", }, { + // Test that a deeply nested parent directory changes the root. path: "../../../../../../common", root: "/tmp/some/dir/that/is/very/deeply/nested", out: "/tmp/some", }, { + // Test that a parent directory changes the root at the filesystem root boundary. path: "../common", root: "/tmp", out: "/", }, { + // Test that an invalid parent directory doesn't change the root and returns an empty string. path: "../common", root: "/", out: "", }, + { + // Test that the returned path is cleaned even if the root doesn't change. + path: "sub", + root: "/tmp/some/../dir", + out: "/tmp/dir", + }, } for _, tc := range tcases { out := s.computeRoot(tc.path, tc.root) - assert.Equal(t, tc.out, out) + assert.Equal(t, tc.out, filepath.ToSlash(out)) } } diff --git a/bundle/config/mutator/sync_infer_root_test.go b/bundle/config/mutator/sync_infer_root_test.go index ad113a6be9..ef108af809 100644 --- a/bundle/config/mutator/sync_infer_root_test.go +++ b/bundle/config/mutator/sync_infer_root_test.go @@ -2,6 +2,7 @@ package mutator_test import ( "context" + "path/filepath" "testing" "github.com/databricks/cli/bundle" @@ -97,12 +98,12 @@ func TestSyncInferRoot_ParentDirectory(t *testing.T) { ctx := context.Background() diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) assert.NoError(t, diags.Error()) - assert.Equal(t, "/tmp/some", b.SyncRootPath) + assert.Equal(t, filepath.FromSlash("/tmp/some"), b.SyncRootPath) // Check that the paths are updated. assert.Equal(t, []string{"common"}, b.Config.Sync.Paths) - assert.Equal(t, []string{"dir/foo", "dir/bar"}, b.Config.Sync.Include) - assert.Equal(t, []string{"dir/baz", "dir/qux"}, b.Config.Sync.Exclude) + assert.Equal(t, []string{filepath.FromSlash("dir/foo"), filepath.FromSlash("dir/bar")}, b.Config.Sync.Include) + assert.Equal(t, []string{filepath.FromSlash("dir/baz"), filepath.FromSlash("dir/qux")}, b.Config.Sync.Exclude) } func TestSyncInferRoot_ManyParentDirectories(t *testing.T) { @@ -128,12 +129,18 @@ func TestSyncInferRoot_ManyParentDirectories(t *testing.T) { ctx := context.Background() diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) assert.NoError(t, diags.Error()) - assert.Equal(t, "/tmp/some", b.SyncRootPath) + assert.Equal(t, filepath.FromSlash("/tmp/some"), b.SyncRootPath) // Check that the paths are updated. assert.Equal(t, []string{"common"}, b.Config.Sync.Paths) - assert.Equal(t, []string{"dir/that/is/very/deeply/nested/foo", "dir/that/is/very/deeply/nested/bar"}, b.Config.Sync.Include) - assert.Equal(t, []string{"dir/that/is/very/deeply/nested/baz", "dir/that/is/very/deeply/nested/qux"}, b.Config.Sync.Exclude) + assert.Equal(t, []string{ + filepath.FromSlash("dir/that/is/very/deeply/nested/foo"), + filepath.FromSlash("dir/that/is/very/deeply/nested/bar"), + }, b.Config.Sync.Include) + assert.Equal(t, []string{ + filepath.FromSlash("dir/that/is/very/deeply/nested/baz"), + filepath.FromSlash("dir/that/is/very/deeply/nested/qux"), + }, b.Config.Sync.Exclude) } func TestSyncInferRoot_MultiplePaths(t *testing.T) { @@ -154,13 +161,13 @@ func TestSyncInferRoot_MultiplePaths(t *testing.T) { ctx := context.Background() diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) assert.NoError(t, diags.Error()) - assert.Equal(t, "/tmp/some", b.SyncRootPath) + assert.Equal(t, "/tmp/some", filepath.FromSlash(b.SyncRootPath)) // Check that the paths are updated. - assert.Equal(t, "bundle/root/foo", b.Config.Sync.Paths[0]) - assert.Equal(t, "bundle/common", b.Config.Sync.Paths[1]) - assert.Equal(t, "bundle/root/bar", b.Config.Sync.Paths[2]) - assert.Equal(t, "baz", b.Config.Sync.Paths[3]) + assert.Equal(t, "bundle/root/foo", filepath.FromSlash(b.Config.Sync.Paths[0])) + assert.Equal(t, "bundle/common", filepath.FromSlash(b.Config.Sync.Paths[1])) + assert.Equal(t, "bundle/root/bar", filepath.FromSlash(b.Config.Sync.Paths[2])) + assert.Equal(t, "baz", filepath.FromSlash(b.Config.Sync.Paths[3])) } func TestSyncInferRoot_Error(t *testing.T) { From a968928edbccf3b9fd6e2017388e4b1ead539d87 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 19 Aug 2024 17:45:21 +0200 Subject: [PATCH 11/18] More Windows tests --- bundle/config/mutator/sync_infer_root_test.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bundle/config/mutator/sync_infer_root_test.go b/bundle/config/mutator/sync_infer_root_test.go index ef108af809..383e56769e 100644 --- a/bundle/config/mutator/sync_infer_root_test.go +++ b/bundle/config/mutator/sync_infer_root_test.go @@ -36,7 +36,7 @@ func TestSyncInferRoot_NominalAbsolute(t *testing.T) { ctx := context.Background() diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) assert.NoError(t, diags.Error()) - assert.Equal(t, b.RootPath, b.SyncRootPath) + assert.Equal(t, filepath.FromSlash("/tmp/some/dir"), b.SyncRootPath) // Check that the paths are unchanged. assert.Equal(t, []string{"."}, b.Config.Sync.Paths) @@ -67,7 +67,7 @@ func TestSyncInferRoot_NominalRelative(t *testing.T) { ctx := context.Background() diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) assert.NoError(t, diags.Error()) - assert.Equal(t, b.RootPath, b.SyncRootPath) + assert.Equal(t, filepath.FromSlash("some/dir"), b.SyncRootPath) // Check that the paths are unchanged. assert.Equal(t, []string{"."}, b.Config.Sync.Paths) @@ -161,13 +161,13 @@ func TestSyncInferRoot_MultiplePaths(t *testing.T) { ctx := context.Background() diags := bundle.Apply(ctx, b, mutator.SyncInferRoot()) assert.NoError(t, diags.Error()) - assert.Equal(t, "/tmp/some", filepath.FromSlash(b.SyncRootPath)) + assert.Equal(t, filepath.FromSlash("/tmp/some"), b.SyncRootPath) // Check that the paths are updated. - assert.Equal(t, "bundle/root/foo", filepath.FromSlash(b.Config.Sync.Paths[0])) - assert.Equal(t, "bundle/common", filepath.FromSlash(b.Config.Sync.Paths[1])) - assert.Equal(t, "bundle/root/bar", filepath.FromSlash(b.Config.Sync.Paths[2])) - assert.Equal(t, "baz", filepath.FromSlash(b.Config.Sync.Paths[3])) + assert.Equal(t, filepath.FromSlash("bundle/root/foo"), b.Config.Sync.Paths[0]) + assert.Equal(t, filepath.FromSlash("bundle/common"), b.Config.Sync.Paths[1]) + assert.Equal(t, filepath.FromSlash("bundle/root/bar"), b.Config.Sync.Paths[2]) + assert.Equal(t, filepath.FromSlash("baz"), b.Config.Sync.Paths[3]) } func TestSyncInferRoot_Error(t *testing.T) { From 56d1f2ff1cdd2c13fc68d19ede191070d08a5e4a Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Mon, 19 Aug 2024 17:56:51 +0200 Subject: [PATCH 12/18] Final test fix --- bundle/config/mutator/sync_infer_root.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bundle/config/mutator/sync_infer_root.go b/bundle/config/mutator/sync_infer_root.go index 65ca2abca5..012acf8001 100644 --- a/bundle/config/mutator/sync_infer_root.go +++ b/bundle/config/mutator/sync_infer_root.go @@ -57,7 +57,7 @@ func (m *syncInferRoot) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno var diags diag.Diagnostics // Use the bundle root path as the starting point for inferring the sync root path. - bundleRootPath := b.RootPath + bundleRootPath := filepath.Clean(b.RootPath) // Infer the sync root path by looking at each one of the sync paths. // Every sync path must be a descendant of the final sync root path. From 64de68be65aefbe651556c69a9e03eb9351e67d2 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 21 Aug 2024 13:57:11 +0200 Subject: [PATCH 13/18] Test relative path case --- bundle/config/mutator/sync_infer_root_internal_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bundle/config/mutator/sync_infer_root_internal_test.go b/bundle/config/mutator/sync_infer_root_internal_test.go index a08007326c..9ab9c88f46 100644 --- a/bundle/config/mutator/sync_infer_root_internal_test.go +++ b/bundle/config/mutator/sync_infer_root_internal_test.go @@ -57,6 +57,12 @@ func TestSyncInferRootInternal_ComputeRoot(t *testing.T) { root: "/tmp/some/../dir", out: "/tmp/dir", }, + { + // Test that a relative root path also works. + path: "../common", + root: "foo/bar", + out: "foo", + }, } for _, tc := range tcases { From f468f282e81b5fa9df6908491b9d65ee66c86230 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 21 Aug 2024 13:57:19 +0200 Subject: [PATCH 14/18] Comments --- bundle/bundle.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bundle/bundle.go b/bundle/bundle.go index d163ea38ae..8b5ff976d0 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -39,10 +39,12 @@ type Bundle struct { // Exclusively use this field for filesystem operations. BundleRoot vfs.Path - // SyncRoot is a virtual filesystem path to the root of the files that - // are synchronized to the workspace. It can be an ancestor to [BundleRoot], - // but not a descendant; that is, [SyncRoot] must contain [BundleRoot]. - SyncRoot vfs.Path + // SyncRoot is a virtual filesystem path to the root directory of the files that are synchronized to the workspace. + // It can be an ancestor to [BundleRoot], but not a descendant; that is, [SyncRoot] must contain [BundleRoot]. + SyncRoot vfs.Path + + // SyncRootPath is the local path to the root directory of files that are synchronized to the workspace. + // It is equal to `SyncRoot.Native()` and included as dedicated field for convenient access. SyncRootPath string Config config.Root From 98ef3f4092326e58305738abef1ac79c51bcb945 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 21 Aug 2024 14:39:44 +0200 Subject: [PATCH 15/18] Update trampoline code to use sync root path to compute relative path --- bundle/config/mutator/trampoline.go | 2 +- bundle/config/mutator/trampoline_test.go | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/bundle/config/mutator/trampoline.go b/bundle/config/mutator/trampoline.go index dde9a299eb..dcca501493 100644 --- a/bundle/config/mutator/trampoline.go +++ b/bundle/config/mutator/trampoline.go @@ -82,7 +82,7 @@ func (m *trampoline) generateNotebookWrapper(ctx context.Context, b *bundle.Bund return err } - internalDirRel, err := filepath.Rel(b.RootPath, internalDir) + internalDirRel, err := filepath.Rel(b.SyncRootPath, internalDir) if err != nil { return err } diff --git a/bundle/config/mutator/trampoline_test.go b/bundle/config/mutator/trampoline_test.go index de395c1659..0c5e775470 100644 --- a/bundle/config/mutator/trampoline_test.go +++ b/bundle/config/mutator/trampoline_test.go @@ -56,8 +56,12 @@ func TestGenerateTrampoline(t *testing.T) { } b := &bundle.Bundle{ - RootPath: tmpDir, + RootPath: filepath.Join(tmpDir, "parent", "my_bundle"), + SyncRootPath: filepath.Join(tmpDir, "parent"), Config: config.Root{ + Workspace: config.Workspace{ + FilePath: "/Workspace/files", + }, Bundle: config.Bundle{ Target: "development", }, @@ -89,6 +93,6 @@ func TestGenerateTrampoline(t *testing.T) { require.Equal(t, "Hello from Trampoline", string(bytes)) task := b.Config.Resources.Jobs["test"].Tasks[0] - require.Equal(t, task.NotebookTask.NotebookPath, ".databricks/bundle/development/.internal/notebook_test_to_trampoline") + require.Equal(t, task.NotebookTask.NotebookPath, "/Workspace/files/my_bundle/.databricks/bundle/development/.internal/notebook_test_to_trampoline") require.Nil(t, task.PythonWheelTask) } From 7a6a7eb2a7e919f9d75a5b64fee6be9733f1da88 Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 21 Aug 2024 14:50:50 +0200 Subject: [PATCH 16/18] Fix other trampoline test --- bundle/config/mutator/trampoline_test.go | 2 +- bundle/python/conditional_transform_test.go | 22 ++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bundle/config/mutator/trampoline_test.go b/bundle/config/mutator/trampoline_test.go index 0c5e775470..08d3c82208 100644 --- a/bundle/config/mutator/trampoline_test.go +++ b/bundle/config/mutator/trampoline_test.go @@ -93,6 +93,6 @@ func TestGenerateTrampoline(t *testing.T) { require.Equal(t, "Hello from Trampoline", string(bytes)) task := b.Config.Resources.Jobs["test"].Tasks[0] - require.Equal(t, task.NotebookTask.NotebookPath, "/Workspace/files/my_bundle/.databricks/bundle/development/.internal/notebook_test_to_trampoline") + require.Equal(t, "/Workspace/files/my_bundle/.databricks/bundle/development/.internal/notebook_test_to_trampoline", task.NotebookTask.NotebookPath) require.Nil(t, task.PythonWheelTask) } diff --git a/bundle/python/conditional_transform_test.go b/bundle/python/conditional_transform_test.go index 677970d70a..1d397f7a7f 100644 --- a/bundle/python/conditional_transform_test.go +++ b/bundle/python/conditional_transform_test.go @@ -2,7 +2,6 @@ package python import ( "context" - "path" "path/filepath" "testing" @@ -18,11 +17,15 @@ func TestNoTransformByDefault(t *testing.T) { tmpDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tmpDir, + RootPath: filepath.Join(tmpDir, "parent", "my_bundle"), + SyncRootPath: filepath.Join(tmpDir, "parent"), Config: config.Root{ Bundle: config.Bundle{ Target: "development", }, + Workspace: config.Workspace{ + FilePath: "/Workspace/files", + }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ "job1": { @@ -63,11 +66,15 @@ func TestTransformWithExperimentalSettingSetToTrue(t *testing.T) { tmpDir := t.TempDir() b := &bundle.Bundle{ - RootPath: tmpDir, + RootPath: filepath.Join(tmpDir, "parent", "my_bundle"), + SyncRootPath: filepath.Join(tmpDir, "parent"), Config: config.Root{ Bundle: config.Bundle{ Target: "development", }, + Workspace: config.Workspace{ + FilePath: "/Workspace/files", + }, Resources: config.Resources{ Jobs: map[string]*resources.Job{ "job1": { @@ -102,14 +109,7 @@ func TestTransformWithExperimentalSettingSetToTrue(t *testing.T) { task := b.Config.Resources.Jobs["job1"].Tasks[0] require.Nil(t, task.PythonWheelTask) require.NotNil(t, task.NotebookTask) - - dir, err := b.InternalDir(context.Background()) - require.NoError(t, err) - - internalDirRel, err := filepath.Rel(b.RootPath, dir) - require.NoError(t, err) - - require.Equal(t, path.Join(filepath.ToSlash(internalDirRel), "notebook_job1_key1"), task.NotebookTask.NotebookPath) + require.Equal(t, "/Workspace/files/my_bundle/.databricks/bundle/development/.internal/notebook_job1_key1", task.NotebookTask.NotebookPath) require.Len(t, task.Libraries, 1) require.Equal(t, "/Workspace/Users/test@test.com/bundle/dist/test.jar", task.Libraries[0].Jar) From 7ea37a7ad8d8dcd3efb30acd7ee6616c9f27ccfa Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 21 Aug 2024 15:15:32 +0200 Subject: [PATCH 17/18] Use sync root path in metadata computation --- bundle/deploy/metadata/compute.go | 7 ++++--- bundle/deploy/metadata/compute_test.go | 12 +++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go index 6ab997e27a..984e0b05ec 100644 --- a/bundle/deploy/metadata/compute.go +++ b/bundle/deploy/metadata/compute.go @@ -37,10 +37,11 @@ func (m *compute) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { // Set job config paths in metadata jobsMetadata := make(map[string]*metadata.Job) for name, job := range b.Config.Resources.Jobs { - // Compute config file path the job is defined in, relative to the bundle - // root + // Compute config file path the job is defined in, relative to the sync root. + // You can join the file path (below) with this relative path to find the + // workspace path containing the YAML file that defines the job. l := b.Config.GetLocation("resources.jobs." + name) - relativePath, err := filepath.Rel(b.RootPath, l.File) + relativePath, err := filepath.Rel(b.SyncRootPath, l.File) if err != nil { return diag.Errorf("failed to compute relative path for job %s: %v", name, err) } diff --git a/bundle/deploy/metadata/compute_test.go b/bundle/deploy/metadata/compute_test.go index 6d43f845b1..14acd4bdbb 100644 --- a/bundle/deploy/metadata/compute_test.go +++ b/bundle/deploy/metadata/compute_test.go @@ -16,6 +16,8 @@ import ( func TestComputeMetadataMutator(t *testing.T) { b := &bundle.Bundle{ + RootPath: "parent/my_bundle", + SyncRootPath: "parent", Config: config.Root{ Workspace: config.Workspace{ RootPath: "/Users/shreyas.goenka@databricks.com", @@ -55,9 +57,9 @@ func TestComputeMetadataMutator(t *testing.T) { }, } - bundletest.SetLocation(b, "resources.jobs.my-job-1", "a/b/c") - bundletest.SetLocation(b, "resources.jobs.my-job-2", "d/e/f") - bundletest.SetLocation(b, "resources.pipelines.my-pipeline", "abc") + bundletest.SetLocation(b, "resources.jobs.my-job-1", "parent/my_bundle/a/b/c") + bundletest.SetLocation(b, "resources.jobs.my-job-2", "parent/my_bundle/d/e/f") + bundletest.SetLocation(b, "resources.pipelines.my-pipeline", "parent/my_bundle/abc") expectedMetadata := metadata.Metadata{ Version: metadata.Version, @@ -79,11 +81,11 @@ func TestComputeMetadataMutator(t *testing.T) { Resources: metadata.Resources{ Jobs: map[string]*metadata.Job{ "my-job-1": { - RelativePath: "a/b/c", + RelativePath: "my_bundle/a/b/c", ID: "1111", }, "my-job-2": { - RelativePath: "d/e/f", + RelativePath: "my_bundle/d/e/f", ID: "2222", }, }, From f18c453c5984d04c8a884556d5e7cacf25bbad7b Mon Sep 17 00:00:00 2001 From: Pieter Noordhuis Date: Wed, 21 Aug 2024 16:10:00 +0200 Subject: [PATCH 18/18] Revert "Use sync root path in metadata computation" This reverts commit 7ea37a7ad8d8dcd3efb30acd7ee6616c9f27ccfa. --- bundle/deploy/metadata/compute.go | 7 +++---- bundle/deploy/metadata/compute_test.go | 12 +++++------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/bundle/deploy/metadata/compute.go b/bundle/deploy/metadata/compute.go index 984e0b05ec..6ab997e27a 100644 --- a/bundle/deploy/metadata/compute.go +++ b/bundle/deploy/metadata/compute.go @@ -37,11 +37,10 @@ func (m *compute) Apply(_ context.Context, b *bundle.Bundle) diag.Diagnostics { // Set job config paths in metadata jobsMetadata := make(map[string]*metadata.Job) for name, job := range b.Config.Resources.Jobs { - // Compute config file path the job is defined in, relative to the sync root. - // You can join the file path (below) with this relative path to find the - // workspace path containing the YAML file that defines the job. + // Compute config file path the job is defined in, relative to the bundle + // root l := b.Config.GetLocation("resources.jobs." + name) - relativePath, err := filepath.Rel(b.SyncRootPath, l.File) + relativePath, err := filepath.Rel(b.RootPath, l.File) if err != nil { return diag.Errorf("failed to compute relative path for job %s: %v", name, err) } diff --git a/bundle/deploy/metadata/compute_test.go b/bundle/deploy/metadata/compute_test.go index 14acd4bdbb..6d43f845b1 100644 --- a/bundle/deploy/metadata/compute_test.go +++ b/bundle/deploy/metadata/compute_test.go @@ -16,8 +16,6 @@ import ( func TestComputeMetadataMutator(t *testing.T) { b := &bundle.Bundle{ - RootPath: "parent/my_bundle", - SyncRootPath: "parent", Config: config.Root{ Workspace: config.Workspace{ RootPath: "/Users/shreyas.goenka@databricks.com", @@ -57,9 +55,9 @@ func TestComputeMetadataMutator(t *testing.T) { }, } - bundletest.SetLocation(b, "resources.jobs.my-job-1", "parent/my_bundle/a/b/c") - bundletest.SetLocation(b, "resources.jobs.my-job-2", "parent/my_bundle/d/e/f") - bundletest.SetLocation(b, "resources.pipelines.my-pipeline", "parent/my_bundle/abc") + bundletest.SetLocation(b, "resources.jobs.my-job-1", "a/b/c") + bundletest.SetLocation(b, "resources.jobs.my-job-2", "d/e/f") + bundletest.SetLocation(b, "resources.pipelines.my-pipeline", "abc") expectedMetadata := metadata.Metadata{ Version: metadata.Version, @@ -81,11 +79,11 @@ func TestComputeMetadataMutator(t *testing.T) { Resources: metadata.Resources{ Jobs: map[string]*metadata.Job{ "my-job-1": { - RelativePath: "my_bundle/a/b/c", + RelativePath: "a/b/c", ID: "1111", }, "my-job-2": { - RelativePath: "my_bundle/d/e/f", + RelativePath: "d/e/f", ID: "2222", }, },