From 6a2c4119ab58cd8dcda4f42a0534e6147c6781cc Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 00:14:11 +0200 Subject: [PATCH 01/10] Add check for path is a directory in filer.ReadDir --- libs/filer/dbfs_client.go | 42 +++++++--------------------- libs/filer/filer.go | 18 ++++++++++-- libs/filer/workspace_files_client.go | 29 ++++--------------- 3 files changed, 31 insertions(+), 58 deletions(-) diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index c86a80b1e15..0a9d5b086a1 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -22,7 +22,11 @@ type dbfsDirEntry struct { } func (entry dbfsDirEntry) Type() fs.FileMode { - return entry.Mode() + typ := fs.ModePerm + if entry.fi.IsDir { + typ |= fs.ModeDir + } + return typ } func (entry dbfsDirEntry) Info() (fs.FileInfo, error) { @@ -43,11 +47,7 @@ func (info dbfsFileInfo) Size() int64 { } func (info dbfsFileInfo) Mode() fs.FileMode { - mode := fs.ModePerm - if info.fi.IsDir { - mode |= fs.ModeDir - } - return mode + return fs.ModePerm } func (info dbfsFileInfo) ModTime() time.Time { @@ -222,6 +222,10 @@ func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, e return nil, err } + if len(res.Files) == 1 && !res.Files[0].IsDir && res.Files[0].Path == absPath { + return nil, NotADirectory{absPath} + } + info := make([]fs.DirEntry, len(res.Files)) for i, v := range res.Files { info[i] = dbfsDirEntry{dbfsFileInfo: dbfsFileInfo{fi: v}} @@ -240,29 +244,3 @@ func (w *DbfsClient) Mkdir(ctx context.Context, name string) error { return w.workspaceClient.Dbfs.MkdirsByPath(ctx, dirPath) } - -func (w *DbfsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { - absPath, err := w.root.Join(name) - if err != nil { - return nil, err - } - - info, err := w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath) - if err != nil { - var aerr *apierr.APIError - if !errors.As(err, &aerr) { - return nil, err - } - - // This API returns a 404 if the file doesn't exist. - if aerr.StatusCode == http.StatusNotFound { - if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { - return nil, FileDoesNotExistError{absPath} - } - } - - return nil, err - } - - return dbfsFileInfo{*info}, nil -} diff --git a/libs/filer/filer.go b/libs/filer/filer.go index ff01ea79816..9d505399511 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -2,6 +2,7 @@ package filer import ( "context" + "errors" "fmt" "io" "io/fs" @@ -50,6 +51,20 @@ func (err NoSuchDirectoryError) Is(other error) bool { return other == fs.ErrNotExist } +var ErrNotADirectory = errors.New("not a directory") + +type NotADirectory struct { + path string +} + +func (err NotADirectory) Error() string { + return fmt.Sprintf("%s is not a directory", err.path) +} + +func (err NotADirectory) Is(other error) bool { + return other == ErrNotADirectory +} + // Filer is used to access files in a workspace. // It has implementations for accessing files in WSFS and in DBFS. type Filer interface { @@ -68,7 +83,4 @@ type Filer interface { // Creates directory at `path`, creating any intermediate directories as required. Mkdir(ctx context.Context, path string) error - - // Stat returns information about the file at `path`. - Stat(ctx context.Context, name string) (fs.FileInfo, error) } diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 967f9a1de5e..594e1dbc80d 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -222,6 +222,12 @@ func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.D objects, err := w.workspaceClient.Workspace.ListAll(ctx, workspace.ListWorkspaceRequest{ Path: absPath, }) + + // TODO: add integration test for this + if len(objects) == 1 && objects[0].Path == absPath { + return nil, NotADirectory{absPath} + } + if err != nil { // If we got an API error we deal with it below. var aerr *apierr.APIError @@ -256,26 +262,3 @@ func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error { Path: dirPath, }) } - -func (w *WorkspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { - absPath, err := w.root.Join(name) - if err != nil { - return nil, err - } - - info, err := w.workspaceClient.Workspace.GetStatusByPath(ctx, absPath) - if err != nil { - // If we got an API error we deal with it below. - var aerr *apierr.APIError - if !errors.As(err, &aerr) { - return nil, err - } - - // This API returns a 404 if the specified path does not exist. - if aerr.StatusCode == http.StatusNotFound { - return nil, FileDoesNotExistError{absPath} - } - } - - return wsfsFileInfo{*info}, nil -} From 610bb3ccb9d0343351f61d37ca4347d1c8982f9b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 00:15:57 +0200 Subject: [PATCH 02/10] - --- internal/filer_test.go | 53 +++--------------------------------------- 1 file changed, 3 insertions(+), 50 deletions(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index 0dda1d1bfe2..223f5d847bf 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -15,7 +15,6 @@ import ( "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/files" - "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -68,32 +67,11 @@ func runFilerReadWriteTest(t *testing.T, ctx context.Context, f filer.Filer) { assert.NoError(t, err) filerTest{t, f}.assertContents(ctx, "/foo/bar", `hello universe`) - // Stat on a directory should succeed. - // Note: size and modification time behave differently between WSFS and DBFS. - info, err := f.Stat(ctx, "/foo") - require.NoError(t, err) - assert.Equal(t, "foo", info.Name()) - assert.True(t, info.Mode().IsDir()) - assert.Equal(t, true, info.IsDir()) - - // Stat on a file should succeed. - // Note: size and modification time behave differently between WSFS and DBFS. - info, err = f.Stat(ctx, "/foo/bar") - require.NoError(t, err) - assert.Equal(t, "bar", info.Name()) - assert.True(t, info.Mode().IsRegular()) - assert.Equal(t, false, info.IsDir()) - // Delete should fail if the file doesn't exist. err = f.Delete(ctx, "/doesnt_exist") assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) assert.True(t, errors.Is(err, fs.ErrNotExist)) - // Stat should fail if the file doesn't exist. - _, err = f.Stat(ctx, "/doesnt_exist") - assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) - assert.True(t, errors.Is(err, fs.ErrNotExist)) - // Delete should succeed for file that does exist. err = f.Delete(ctx, "/foo/bar") assert.NoError(t, err) @@ -159,35 +137,10 @@ func runFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { assert.Len(t, entries, 1) assert.Equal(t, "c", entries[0].Name()) assert.True(t, entries[0].IsDir()) -} - -func temporaryWorkspaceDir(t *testing.T, w *databricks.WorkspaceClient) string { - ctx := context.Background() - me, err := w.CurrentUser.Me(ctx) - require.NoError(t, err) - - path := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("integration-test-filer-wsfs-")) - // Ensure directory exists, but doesn't exist YET! - // Otherwise we could inadvertently remove a directory that already exists on cleanup. - t.Logf("mkdir %s", path) - err = w.Workspace.MkdirsByPath(ctx, path) - require.NoError(t, err) - - // Remove test directory on test completion. - t.Cleanup(func() { - t.Logf("rm -rf %s", path) - err := w.Workspace.Delete(ctx, workspace.Delete{ - Path: path, - Recursive: true, - }) - if err == nil || apierr.IsMissing(err) { - return - } - t.Logf("unable to remove temporary workspace directory %s: %#v", path, err) - }) - - return path + // TODO: split into a separate PR + _, err = f.ReadDir(ctx, "/hello.txt") + assert.ErrorIs(t, err, filer.ErrNotADirectory) } func setupWorkspaceFilesTest(t *testing.T) (context.Context, filer.Filer) { From 772b2eecbd1c97e30de175e1916232841b2eb2e7 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 00:18:49 +0200 Subject: [PATCH 03/10] - --- internal/filer_test.go | 21 +++++++++++++++ libs/filer/dbfs_client.go | 38 +++++++++++++++++++++++----- libs/filer/filer.go | 3 +++ libs/filer/workspace_files_client.go | 23 +++++++++++++++++ 4 files changed, 79 insertions(+), 6 deletions(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index 223f5d847bf..02a83339852 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -67,11 +67,32 @@ func runFilerReadWriteTest(t *testing.T, ctx context.Context, f filer.Filer) { assert.NoError(t, err) filerTest{t, f}.assertContents(ctx, "/foo/bar", `hello universe`) + // Stat on a directory should succeed. + // Note: size and modification time behave differently between WSFS and DBFS. + info, err := f.Stat(ctx, "/foo") + require.NoError(t, err) + assert.Equal(t, "foo", info.Name()) + assert.True(t, info.Mode().IsDir()) + assert.Equal(t, true, info.IsDir()) + + // Stat on a file should succeed. + // Note: size and modification time behave differently between WSFS and DBFS. + info, err = f.Stat(ctx, "/foo/bar") + require.NoError(t, err) + assert.Equal(t, "bar", info.Name()) + assert.True(t, info.Mode().IsRegular()) + assert.Equal(t, false, info.IsDir()) + // Delete should fail if the file doesn't exist. err = f.Delete(ctx, "/doesnt_exist") assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) assert.True(t, errors.Is(err, fs.ErrNotExist)) + // Stat should fail if the file doesn't exist. + _, err = f.Stat(ctx, "/doesnt_exist") + assert.True(t, errors.As(err, &filer.FileDoesNotExistError{})) + assert.True(t, errors.Is(err, fs.ErrNotExist)) + // Delete should succeed for file that does exist. err = f.Delete(ctx, "/foo/bar") assert.NoError(t, err) diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 0a9d5b086a1..8229e97b110 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -22,11 +22,7 @@ type dbfsDirEntry struct { } func (entry dbfsDirEntry) Type() fs.FileMode { - typ := fs.ModePerm - if entry.fi.IsDir { - typ |= fs.ModeDir - } - return typ + return entry.Mode() } func (entry dbfsDirEntry) Info() (fs.FileInfo, error) { @@ -47,7 +43,11 @@ func (info dbfsFileInfo) Size() int64 { } func (info dbfsFileInfo) Mode() fs.FileMode { - return fs.ModePerm + mode := fs.ModePerm + if info.fi.IsDir { + mode |= fs.ModeDir + } + return mode } func (info dbfsFileInfo) ModTime() time.Time { @@ -244,3 +244,29 @@ func (w *DbfsClient) Mkdir(ctx context.Context, name string) error { return w.workspaceClient.Dbfs.MkdirsByPath(ctx, dirPath) } + +func (w *DbfsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { + absPath, err := w.root.Join(name) + if err != nil { + return nil, err + } + + info, err := w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath) + if err != nil { + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return nil, err + } + + // This API returns a 404 if the file doesn't exist. + if aerr.StatusCode == http.StatusNotFound { + if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { + return nil, FileDoesNotExistError{absPath} + } + } + + return nil, err + } + + return dbfsFileInfo{*info}, nil +} diff --git a/libs/filer/filer.go b/libs/filer/filer.go index 9d505399511..1525aba3a0e 100644 --- a/libs/filer/filer.go +++ b/libs/filer/filer.go @@ -83,4 +83,7 @@ type Filer interface { // Creates directory at `path`, creating any intermediate directories as required. Mkdir(ctx context.Context, path string) error + + // Stat returns information about the file at `path`. + Stat(ctx context.Context, name string) (fs.FileInfo, error) } diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 594e1dbc80d..7111d2678c8 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -262,3 +262,26 @@ func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error { Path: dirPath, }) } + +func (w *WorkspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { + absPath, err := w.root.Join(name) + if err != nil { + return nil, err + } + + info, err := w.workspaceClient.Workspace.GetStatusByPath(ctx, absPath) + if err != nil { + // If we got an API error we deal with it below. + var aerr *apierr.APIError + if !errors.As(err, &aerr) { + return nil, err + } + + // This API returns a 404 if the specified path does not exist. + if aerr.StatusCode == http.StatusNotFound { + return nil, FileDoesNotExistError{absPath} + } + } + + return wsfsFileInfo{*info}, nil +} From bce7df8f19086c01b15708d93b2e5b7c673c7eb5 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 00:19:15 +0200 Subject: [PATCH 04/10] - --- internal/helpers.go | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/internal/helpers.go b/internal/helpers.go index b51d005b27e..f1901f14e1c 100644 --- a/internal/helpers.go +++ b/internal/helpers.go @@ -14,6 +14,9 @@ import ( "github.com/databricks/cli/cmd/root" _ "github.com/databricks/cli/cmd/version" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/require" _ "github.com/databricks/cli/cmd/workspace" @@ -176,3 +179,32 @@ func writeFile(t *testing.T, name string, body string) string { f.Close() return f.Name() } + +func temporaryWorkspaceDir(t *testing.T, w *databricks.WorkspaceClient) string { + ctx := context.Background() + me, err := w.CurrentUser.Me(ctx) + require.NoError(t, err) + + path := fmt.Sprintf("/Users/%s/%s", me.UserName, RandomName("integration-test-wsfs-")) + + // Ensure directory exists, but doesn't exist YET! + // Otherwise we could inadvertently remove a directory that already exists on cleanup. + t.Logf("mkdir %s", path) + err = w.Workspace.MkdirsByPath(ctx, path) + require.NoError(t, err) + + // Remove test directory on test completion. + t.Cleanup(func() { + t.Logf("rm -rf %s", path) + err := w.Workspace.Delete(ctx, workspace.Delete{ + Path: path, + Recursive: true, + }) + if err == nil || apierr.IsMissing(err) { + return + } + t.Logf("unable to remove temporary workspace directory %s: %#v", path, err) + }) + + return path +} From b402500535ce219af67f90bc85b9135cf32b4173 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 00:20:23 +0200 Subject: [PATCH 05/10] cleanup todos --- internal/filer_test.go | 1 - libs/filer/workspace_files_client.go | 1 - 2 files changed, 2 deletions(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index 02a83339852..1f4e8df968a 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -159,7 +159,6 @@ func runFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { assert.Equal(t, "c", entries[0].Name()) assert.True(t, entries[0].IsDir()) - // TODO: split into a separate PR _, err = f.ReadDir(ctx, "/hello.txt") assert.ErrorIs(t, err, filer.ErrNotADirectory) } diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index 7111d2678c8..b06a25146b4 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -223,7 +223,6 @@ func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.D Path: absPath, }) - // TODO: add integration test for this if len(objects) == 1 && objects[0].Path == absPath { return nil, NotADirectory{absPath} } From dd7e8947c058718a2632dea51e7ed51f55557cc7 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 00:28:32 +0200 Subject: [PATCH 06/10] add test for empty dir case --- internal/filer_test.go | 8 ++++++++ libs/filer/dbfs_client.go | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/internal/filer_test.go b/internal/filer_test.go index 1f4e8df968a..69598cb866b 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -159,8 +159,16 @@ func runFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) { assert.Equal(t, "c", entries[0].Name()) assert.True(t, entries[0].IsDir()) + // Expect an error trying to call ReadDir on a file _, err = f.ReadDir(ctx, "/hello.txt") assert.ErrorIs(t, err, filer.ErrNotADirectory) + + // Expect 0 entries for an empty directory + err = f.Mkdir(ctx, "empty-dir") + require.NoError(t, err) + entries, err = f.ReadDir(ctx, "empty-dir") + assert.NoError(t, err) + assert.Len(t, entries, 0) } func setupWorkspaceFilesTest(t *testing.T) (context.Context, filer.Filer) { diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 8229e97b110..67878136b79 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -222,7 +222,7 @@ func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, e return nil, err } - if len(res.Files) == 1 && !res.Files[0].IsDir && res.Files[0].Path == absPath { + if len(res.Files) == 1 && res.Files[0].Path == absPath { return nil, NotADirectory{absPath} } From e21c4345a7e3674bc88e6b4c502ad48111d9a5aa Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 00:33:39 +0200 Subject: [PATCH 07/10] Use filer in repofiles struct --- internal/repofiles_test.go | 155 ++++++++++++++++++++++++++ libs/sync/repofiles/repofiles.go | 96 +++++++--------- libs/sync/repofiles/repofiles_test.go | 45 ++++---- 3 files changed, 219 insertions(+), 77 deletions(-) create mode 100644 internal/repofiles_test.go diff --git a/internal/repofiles_test.go b/internal/repofiles_test.go new file mode 100644 index 00000000000..07c488e0435 --- /dev/null +++ b/internal/repofiles_test.go @@ -0,0 +1,155 @@ +package internal + +import ( + "context" + "os" + "path" + "path/filepath" + "strings" + "testing" + + "github.com/databricks/cli/libs/filer" + "github.com/databricks/cli/libs/sync/repofiles" + "github.com/databricks/databricks-sdk-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TODO: skip if not cloud env, these are integration tests +// TODO: split into a separate PR + +func TestRepoFilesPutFile(t *testing.T) { + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + ctx := context.Background() + + // initialize client + wsfsTmpDir := temporaryWorkspaceDir(t, w) + localTmpDir := t.TempDir() + r, err := repofiles.Create(wsfsTmpDir, localTmpDir, w, &repofiles.RepoFileOptions{ + OverwriteIfExists: true, + }) + require.NoError(t, err) + f, err := filer.NewWorkspaceFilesClient(w, wsfsTmpDir) + require.NoError(t, err) + + // create local file + err = os.WriteFile(filepath.Join(localTmpDir, "foo.txt"), []byte(`hello, world`), os.ModePerm) + require.NoError(t, err) + err = r.PutFile(ctx, "foo.txt") + require.NoError(t, err) + + require.NoError(t, f.Mkdir(ctx, "bar")) + + entries, err := f.ReadDir(ctx, "bar") + require.NoError(t, err) + require.Len(t, entries, 1) + + assertFileContains(t, ctx, f, "foo.txt", "hello, world") +} + +func TestRepoFilesFileOverwritesNotebook(t *testing.T) { + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + ctx := context.Background() + + // initialize client + wsfsTmpDir := temporaryWorkspaceDir(t, w) + localTmpDir := t.TempDir() + r, err := repofiles.Create(wsfsTmpDir, localTmpDir, w, &repofiles.RepoFileOptions{ + OverwriteIfExists: true, + }) + require.NoError(t, err) + f, err := filer.NewWorkspaceFilesClient(w, wsfsTmpDir) + require.NoError(t, err) + + // create local notebook + err = os.WriteFile(filepath.Join(localTmpDir, "foo.py"), []byte("#Databricks notebook source\nprint(1)"), os.ModePerm) + require.NoError(t, err) + + // upload notebook + err = r.PutFile(ctx, "foo.py") + require.NoError(t, err) + assertNotebookExists(t, ctx, w, path.Join(wsfsTmpDir, "foo")) + + // upload file, and assert that it overwrites the notebook + err = os.WriteFile(filepath.Join(localTmpDir, "foo"), []byte("I am going to overwrite the notebook"), os.ModePerm) + require.NoError(t, err) + err = r.PutFile(ctx, "foo") + require.NoError(t, err) + assertFileContains(t, ctx, f, "foo", "I am going to overwrite the notebook") +} + +func TestRepoFilesFileOverwritesEmptyDirectoryTree(t *testing.T) { + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + ctx := context.Background() + + // initialize client + wsfsTmpDir := temporaryWorkspaceDir(t, w) + localTmpDir := t.TempDir() + r, err := repofiles.Create(wsfsTmpDir, localTmpDir, w, &repofiles.RepoFileOptions{ + OverwriteIfExists: true, + }) + require.NoError(t, err) + f, err := filer.NewWorkspaceFilesClient(w, wsfsTmpDir) + require.NoError(t, err) + + // create local file + err = os.WriteFile(filepath.Join(localTmpDir, "foo"), []byte(`hello, world`), os.ModePerm) + require.NoError(t, err) + + // construct a directory tree without files in the workspace + err = f.Mkdir(ctx, "foo/a/b/c") + require.NoError(t, err) + err = f.Mkdir(ctx, "foo/a/b/d/e") + require.NoError(t, err) + err = f.Mkdir(ctx, "foo/f/g/i") + require.NoError(t, err) + + // assert the directories exist + entries, err := f.ReadDir(ctx, "foo") + require.NoError(t, err) + assert.Len(t, entries, 2) + assert.True(t, entries[0].IsDir()) + assert.True(t, entries[1].IsDir()) + + // upload file, and assert that it overwrites the empty directories + err = r.PutFile(ctx, "foo") + require.NoError(t, err) + assertFileContains(t, ctx, f, "foo", "hello, world") + + // assert the directories do not exist anymore + _, err = f.ReadDir(ctx, "foo") + assert.ErrorIs(t, err, filer.ErrNotADirectory) +} + +func TestRepoFilesFileInDirOverwritesExistingNotebook(t *testing.T) { + w, err := databricks.NewWorkspaceClient() + require.NoError(t, err) + ctx := context.Background() + + // initialize client + wsfsTmpDir := temporaryWorkspaceDir(t, w) + localTmpDir := t.TempDir() + r, err := repofiles.Create(wsfsTmpDir, localTmpDir, w, &repofiles.RepoFileOptions{ + OverwriteIfExists: true, + }) + require.NoError(t, err) + f, err := filer.NewWorkspaceFilesClient(w, wsfsTmpDir) + require.NoError(t, err) + + // create local notebook + err = f.Write(ctx, "foo.py", strings.NewReader("#Databricks notebook source\nprint(1)")) + require.NoError(t, err) + assertNotebookExists(t, ctx, w, path.Join(wsfsTmpDir, "foo")) + + // upload file + err = os.Mkdir(filepath.Join(localTmpDir, "foo"), os.ModePerm) + require.NoError(t, err) + err = os.WriteFile(filepath.Join(localTmpDir, "foo/bar.txt"), []byte("I am going to overwrite the notebook"), os.ModePerm) + require.NoError(t, err) + err = r.PutFile(ctx, "foo/bar.txt") + require.NoError(t, err) + assertFileContains(t, ctx, f, "foo/bar.txt", "I am going to overwrite the notebook") +} diff --git a/libs/sync/repofiles/repofiles.go b/libs/sync/repofiles/repofiles.go index 8fcabc113ec..9a6428a168c 100644 --- a/libs/sync/repofiles/repofiles.go +++ b/libs/sync/repofiles/repofiles.go @@ -1,39 +1,55 @@ package repofiles import ( + "bytes" "context" "errors" "fmt" - "net/http" - "net/url" "os" "path" "path/filepath" "strings" + "github.com/databricks/cli/libs/filer" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" - "github.com/databricks/databricks-sdk-go/client" "github.com/databricks/databricks-sdk-go/service/workspace" ) +type RepoFileOptions struct { + OverwriteIfExists bool +} + // RepoFiles wraps reading and writing into a remote repo with safeguards to prevent -// accidental deletion of repos and more robust methods to overwrite workspace files +// accidental deletion of repos and more robust methods to overwrite workspac e files type RepoFiles struct { + *RepoFileOptions + repoRoot string localRoot string workspaceClient *databricks.WorkspaceClient + f filer.Filer } -func Create(repoRoot, localRoot string, workspaceClient *databricks.WorkspaceClient) *RepoFiles { +func Create(repoRoot, localRoot string, w *databricks.WorkspaceClient, opts *RepoFileOptions) (*RepoFiles, error) { + // override default timeout to support uploading larger files + w.Config.HTTPTimeoutSeconds = 600 + + // create filer to interact with WSFS + f, err := filer.NewWorkspaceFilesClient(w, repoRoot) + if err != nil { + return nil, err + } return &RepoFiles{ repoRoot: repoRoot, localRoot: localRoot, - workspaceClient: workspaceClient, - } + workspaceClient: w, + RepoFileOptions: opts, + f: f, + }, nil } -func (r *RepoFiles) remotePath(relativePath string) (string, error) { +func (r *RepoFiles) RemotePath(relativePath string) (string, error) { fullPath := path.Join(r.repoRoot, relativePath) cleanFullPath := path.Clean(fullPath) if !strings.HasPrefix(cleanFullPath, r.repoRoot) { @@ -52,36 +68,25 @@ func (r *RepoFiles) readLocal(relativePath string) ([]byte, error) { } func (r *RepoFiles) writeRemote(ctx context.Context, relativePath string, content []byte) error { - apiClientConfig := r.workspaceClient.Config - apiClientConfig.HTTPTimeoutSeconds = 600 - apiClient, err := client.New(apiClientConfig) - if err != nil { - return err + if !r.OverwriteIfExists { + return r.f.Write(ctx, relativePath, bytes.NewReader(content), filer.CreateParentDirectories) } - remotePath, err := r.remotePath(relativePath) - if err != nil { - return err - } - escapedPath := url.PathEscape(strings.TrimLeft(remotePath, "/")) - apiPath := fmt.Sprintf("/api/2.0/workspace-files/import-file/%s?overwrite=true", escapedPath) - - err = apiClient.Do(ctx, http.MethodPost, apiPath, content, nil) - - // Handling some edge cases when an upload might fail - // - // We cannot do more precise error scoping here because the API does not - // provide descriptive errors yet - // - // TODO: narrow down the error condition scope of this "if" block to only - // trigger for the specific edge cases instead of all errors once the API - // implements them + + err := r.f.Write(ctx, relativePath, bytes.NewReader(content), filer.CreateParentDirectories, filer.OverwriteIfExists) + + // TODO(pietern): Use the new FS interface to avoid needing to make a recursive + // delete call here. This call is dangerous if err != nil { // Delete any artifact files incase non overwriteable by the current file // type and thus are failing the PUT request. // files, folders and notebooks might not have been cleaned up and they // can't overwrite each other. If a folder `foo` exists, then attempts to // PUT a file `foo` will fail - err := r.workspaceClient.Workspace.Delete(ctx, + remotePath, err := r.RemotePath(relativePath) + if err != nil { + return err + } + err = r.workspaceClient.Workspace.Delete(ctx, workspace.Delete{ Path: remotePath, Recursive: true, @@ -96,33 +101,15 @@ func (r *RepoFiles) writeRemote(ctx context.Context, relativePath string, conten return err } - // Mkdir parent dirs incase they are what's causing the PUT request to - // fail - err = r.workspaceClient.Workspace.MkdirsByPath(ctx, path.Dir(remotePath)) - if err != nil { - return fmt.Errorf("could not mkdir to put file: %s", err) - } - - // Attempt to upload file again after cleanup/setup - err = apiClient.Do(ctx, http.MethodPost, apiPath, content, nil) - if err != nil { - return err - } + // Attempt to write the file again, this time without the CreateParentDirectories and + // OverwriteIfExists flags + return r.f.Write(ctx, relativePath, bytes.NewReader(content)) } return nil } func (r *RepoFiles) deleteRemote(ctx context.Context, relativePath string) error { - remotePath, err := r.remotePath(relativePath) - if err != nil { - return err - } - return r.workspaceClient.Workspace.Delete(ctx, - workspace.Delete{ - Path: remotePath, - Recursive: false, - }, - ) + return r.f.Delete(ctx, relativePath) } // The API calls for a python script foo.py would be @@ -154,6 +141,3 @@ func (r *RepoFiles) DeleteFile(ctx context.Context, relativePath string) error { } return nil } - -// TODO: write integration tests for all non happy path cases that rely on -// specific behaviour of the workspace apis diff --git a/libs/sync/repofiles/repofiles_test.go b/libs/sync/repofiles/repofiles_test.go index 2a881d90d06..ce2a14c2c0c 100644 --- a/libs/sync/repofiles/repofiles_test.go +++ b/libs/sync/repofiles/repofiles_test.go @@ -6,72 +6,74 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestRepoFilesRemotePath(t *testing.T) { repoRoot := "/Repos/doraemon/bar" - repoFiles := Create(repoRoot, "/doraemon/foo/bar", nil) + repoFiles, err := Create(repoRoot, "/doraemon/foo/bar", nil, nil) + require.NoError(t, err) - remotePath, err := repoFiles.remotePath("a/b/c") + remotePath, err := repoFiles.RemotePath("a/b/c") assert.NoError(t, err) assert.Equal(t, repoRoot+"/a/b/c", remotePath) - remotePath, err = repoFiles.remotePath("a/b/../d") + remotePath, err = repoFiles.RemotePath("a/b/../d") assert.NoError(t, err) assert.Equal(t, repoRoot+"/a/d", remotePath) - remotePath, err = repoFiles.remotePath("a/../c") + remotePath, err = repoFiles.RemotePath("a/../c") assert.NoError(t, err) assert.Equal(t, repoRoot+"/c", remotePath) - remotePath, err = repoFiles.remotePath("a/b/c/.") + remotePath, err = repoFiles.RemotePath("a/b/c/.") assert.NoError(t, err) assert.Equal(t, repoRoot+"/a/b/c", remotePath) - remotePath, err = repoFiles.remotePath("a/b/c/d/./../../f/g") + remotePath, err = repoFiles.RemotePath("a/b/c/d/./../../f/g") assert.NoError(t, err) assert.Equal(t, repoRoot+"/a/b/f/g", remotePath) - _, err = repoFiles.remotePath("..") + _, err = repoFiles.RemotePath("..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ..`) - _, err = repoFiles.remotePath("a/../..") + _, err = repoFiles.RemotePath("a/../..") assert.ErrorContains(t, err, `relative file path is not inside repo root: a/../..`) - _, err = repoFiles.remotePath("./../.") + _, err = repoFiles.RemotePath("./../.") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./../.`) - _, err = repoFiles.remotePath("/./.././..") + _, err = repoFiles.RemotePath("/./.././..") assert.ErrorContains(t, err, `relative file path is not inside repo root: /./.././..`) - _, err = repoFiles.remotePath("./../.") + _, err = repoFiles.RemotePath("./../.") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./../.`) - _, err = repoFiles.remotePath("./..") + _, err = repoFiles.RemotePath("./..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./..`) - _, err = repoFiles.remotePath("./../../..") + _, err = repoFiles.RemotePath("./../../..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./../../..`) - _, err = repoFiles.remotePath("./../a/./b../../..") + _, err = repoFiles.RemotePath("./../a/./b../../..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./../a/./b../../..`) - _, err = repoFiles.remotePath("../..") + _, err = repoFiles.RemotePath("../..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ../..`) - _, err = repoFiles.remotePath(".//a/..//./b/..") + _, err = repoFiles.RemotePath(".//a/..//./b/..") assert.ErrorContains(t, err, `file path relative to repo root cannot be empty`) - _, err = repoFiles.remotePath("a/b/../..") + _, err = repoFiles.RemotePath("a/b/../..") assert.ErrorContains(t, err, "file path relative to repo root cannot be empty") - _, err = repoFiles.remotePath("") + _, err = repoFiles.RemotePath("") assert.ErrorContains(t, err, "file path relative to repo root cannot be empty") - _, err = repoFiles.remotePath(".") + _, err = repoFiles.RemotePath(".") assert.ErrorContains(t, err, "file path relative to repo root cannot be empty") - _, err = repoFiles.remotePath("/") + _, err = repoFiles.RemotePath("/") assert.ErrorContains(t, err, "file path relative to repo root cannot be empty") } @@ -81,7 +83,8 @@ func TestRepoReadLocal(t *testing.T) { err := os.WriteFile(helloPath, []byte("my name is doraemon :P"), os.ModePerm) assert.NoError(t, err) - repoFiles := Create("/Repos/doraemon/bar", tempDir, nil) + repoFiles, err := Create("/Repos/doraemon/bar", tempDir, nil, nil) + require.NoError(t, err) bytes, err := repoFiles.readLocal("./a/../hello.txt") assert.NoError(t, err) assert.Equal(t, "my name is doraemon :P", string(bytes)) From c51172b3e6726d4e7748eb88fa96e40cb9c8c612 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 00:38:35 +0200 Subject: [PATCH 08/10] some cleanup --- internal/helpers.go | 23 +++++++++++++++++++++++ libs/sync/sync.go | 5 ++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/internal/helpers.go b/internal/helpers.go index f1901f14e1c..63a76c0bbe4 100644 --- a/internal/helpers.go +++ b/internal/helpers.go @@ -5,6 +5,7 @@ import ( "bytes" "context" "fmt" + "io" "math/rand" "os" "path/filepath" @@ -14,9 +15,11 @@ import ( "github.com/databricks/cli/cmd/root" _ "github.com/databricks/cli/cmd/version" + "github.com/databricks/cli/libs/filer" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/apierr" "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" _ "github.com/databricks/cli/cmd/workspace" @@ -208,3 +211,23 @@ func temporaryWorkspaceDir(t *testing.T, w *databricks.WorkspaceClient) string { return path } + +func assertFileContains(t *testing.T, ctx context.Context, f filer.Filer, name, contents string) { + r, err := f.Read(ctx, name) + require.NoError(t, err) + + var b bytes.Buffer + _, err = io.Copy(&b, r) + require.NoError(t, err) + + assert.Contains(t, b.String(), contents) +} + +func assertNotebookExists(t *testing.T, ctx context.Context, w *databricks.WorkspaceClient, path string) { + info, err := w.Workspace.ListAll(ctx, workspace.ListWorkspaceRequest{ + Path: path, + }) + require.NoError(t, err) + assert.Len(t, info, 1) + assert.Equal(t, info[0].ObjectType, workspace.ObjectTypeNotebook) +} diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 54d0624e77c..65bad57f08c 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -77,7 +77,10 @@ func New(ctx context.Context, opts SyncOptions) (*Sync, error) { } } - repoFiles := repofiles.Create(opts.RemotePath, opts.LocalPath, opts.WorkspaceClient) + repoFiles, err := repofiles.Create(opts.RemotePath, opts.LocalPath, opts.WorkspaceClient, &repofiles.RepoFileOptions{OverwriteIfExists: true}) + if err != nil { + return nil, err + } return &Sync{ SyncOptions: &opts, From 5f4b8fbd6d94d25353cb337853f63d5f2527ed44 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 00:40:18 +0200 Subject: [PATCH 09/10] - --- libs/sync/repofiles/repofiles.go | 6 ++--- libs/sync/repofiles/repofiles_test.go | 38 +++++++++++++-------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/libs/sync/repofiles/repofiles.go b/libs/sync/repofiles/repofiles.go index 9a6428a168c..f856793af6a 100644 --- a/libs/sync/repofiles/repofiles.go +++ b/libs/sync/repofiles/repofiles.go @@ -21,7 +21,7 @@ type RepoFileOptions struct { } // RepoFiles wraps reading and writing into a remote repo with safeguards to prevent -// accidental deletion of repos and more robust methods to overwrite workspac e files +// accidental deletion of repos and more robust methods to overwrite workspace files type RepoFiles struct { *RepoFileOptions @@ -49,7 +49,7 @@ func Create(repoRoot, localRoot string, w *databricks.WorkspaceClient, opts *Rep }, nil } -func (r *RepoFiles) RemotePath(relativePath string) (string, error) { +func (r *RepoFiles) remotePath(relativePath string) (string, error) { fullPath := path.Join(r.repoRoot, relativePath) cleanFullPath := path.Clean(fullPath) if !strings.HasPrefix(cleanFullPath, r.repoRoot) { @@ -82,7 +82,7 @@ func (r *RepoFiles) writeRemote(ctx context.Context, relativePath string, conten // files, folders and notebooks might not have been cleaned up and they // can't overwrite each other. If a folder `foo` exists, then attempts to // PUT a file `foo` will fail - remotePath, err := r.RemotePath(relativePath) + remotePath, err := r.remotePath(relativePath) if err != nil { return err } diff --git a/libs/sync/repofiles/repofiles_test.go b/libs/sync/repofiles/repofiles_test.go index ce2a14c2c0c..dc9abbcddf4 100644 --- a/libs/sync/repofiles/repofiles_test.go +++ b/libs/sync/repofiles/repofiles_test.go @@ -14,66 +14,66 @@ func TestRepoFilesRemotePath(t *testing.T) { repoFiles, err := Create(repoRoot, "/doraemon/foo/bar", nil, nil) require.NoError(t, err) - remotePath, err := repoFiles.RemotePath("a/b/c") + remotePath, err := repoFiles.remotePath("a/b/c") assert.NoError(t, err) assert.Equal(t, repoRoot+"/a/b/c", remotePath) - remotePath, err = repoFiles.RemotePath("a/b/../d") + remotePath, err = repoFiles.remotePath("a/b/../d") assert.NoError(t, err) assert.Equal(t, repoRoot+"/a/d", remotePath) - remotePath, err = repoFiles.RemotePath("a/../c") + remotePath, err = repoFiles.remotePath("a/../c") assert.NoError(t, err) assert.Equal(t, repoRoot+"/c", remotePath) - remotePath, err = repoFiles.RemotePath("a/b/c/.") + remotePath, err = repoFiles.remotePath("a/b/c/.") assert.NoError(t, err) assert.Equal(t, repoRoot+"/a/b/c", remotePath) - remotePath, err = repoFiles.RemotePath("a/b/c/d/./../../f/g") + remotePath, err = repoFiles.remotePath("a/b/c/d/./../../f/g") assert.NoError(t, err) assert.Equal(t, repoRoot+"/a/b/f/g", remotePath) - _, err = repoFiles.RemotePath("..") + _, err = repoFiles.remotePath("..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ..`) - _, err = repoFiles.RemotePath("a/../..") + _, err = repoFiles.remotePath("a/../..") assert.ErrorContains(t, err, `relative file path is not inside repo root: a/../..`) - _, err = repoFiles.RemotePath("./../.") + _, err = repoFiles.remotePath("./../.") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./../.`) - _, err = repoFiles.RemotePath("/./.././..") + _, err = repoFiles.remotePath("/./.././..") assert.ErrorContains(t, err, `relative file path is not inside repo root: /./.././..`) - _, err = repoFiles.RemotePath("./../.") + _, err = repoFiles.remotePath("./../.") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./../.`) - _, err = repoFiles.RemotePath("./..") + _, err = repoFiles.remotePath("./..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./..`) - _, err = repoFiles.RemotePath("./../../..") + _, err = repoFiles.remotePath("./../../..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./../../..`) - _, err = repoFiles.RemotePath("./../a/./b../../..") + _, err = repoFiles.remotePath("./../a/./b../../..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ./../a/./b../../..`) - _, err = repoFiles.RemotePath("../..") + _, err = repoFiles.remotePath("../..") assert.ErrorContains(t, err, `relative file path is not inside repo root: ../..`) - _, err = repoFiles.RemotePath(".//a/..//./b/..") + _, err = repoFiles.remotePath(".//a/..//./b/..") assert.ErrorContains(t, err, `file path relative to repo root cannot be empty`) - _, err = repoFiles.RemotePath("a/b/../..") + _, err = repoFiles.remotePath("a/b/../..") assert.ErrorContains(t, err, "file path relative to repo root cannot be empty") - _, err = repoFiles.RemotePath("") + _, err = repoFiles.remotePath("") assert.ErrorContains(t, err, "file path relative to repo root cannot be empty") - _, err = repoFiles.RemotePath(".") + _, err = repoFiles.remotePath(".") assert.ErrorContains(t, err, "file path relative to repo root cannot be empty") - _, err = repoFiles.RemotePath("/") + _, err = repoFiles.remotePath("/") assert.ErrorContains(t, err, "file path relative to repo root cannot be empty") } From 944a3b98d28e4c7acd06342aeb1834f9134b74d0 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 2 Jun 2023 03:30:45 +0200 Subject: [PATCH 10/10] modularize the tests --- internal/repofiles_test.go | 260 ++++++++++++++++++--------- libs/filer/dbfs_client.go | 2 +- libs/filer/workspace_files_client.go | 2 +- 3 files changed, 180 insertions(+), 84 deletions(-) diff --git a/internal/repofiles_test.go b/internal/repofiles_test.go index 07c488e0435..6ffbd30e83b 100644 --- a/internal/repofiles_test.go +++ b/internal/repofiles_test.go @@ -2,8 +2,8 @@ package internal import ( "context" + "io/fs" "os" - "path" "path/filepath" "strings" "testing" @@ -11,145 +11,241 @@ import ( "github.com/databricks/cli/libs/filer" "github.com/databricks/cli/libs/sync/repofiles" "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) -// TODO: skip if not cloud env, these are integration tests -// TODO: split into a separate PR +type repofilesTestHelper struct { + w *databricks.WorkspaceClient + f filer.Filer + ctx context.Context + t *testing.T + + localRoot string + remoteRoot string +} + +func setupRepofilesTestHelper(t *testing.T, ctx context.Context) *repofilesTestHelper { + // t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) -func TestRepoFilesPutFile(t *testing.T) { w, err := databricks.NewWorkspaceClient() require.NoError(t, err) - ctx := context.Background() // initialize client wsfsTmpDir := temporaryWorkspaceDir(t, w) localTmpDir := t.TempDir() - r, err := repofiles.Create(wsfsTmpDir, localTmpDir, w, &repofiles.RepoFileOptions{ - OverwriteIfExists: true, - }) + require.NoError(t, err) f, err := filer.NewWorkspaceFilesClient(w, wsfsTmpDir) require.NoError(t, err) - // create local file - err = os.WriteFile(filepath.Join(localTmpDir, "foo.txt"), []byte(`hello, world`), os.ModePerm) + return &repofilesTestHelper{ + w: w, + f: f, + ctx: ctx, + t: t, + + localRoot: localTmpDir, + remoteRoot: wsfsTmpDir, + } +} + +func (h *repofilesTestHelper) createLocalFile(name string, content string) { + absPath := filepath.Join(h.localRoot, name) + err := os.MkdirAll(filepath.Dir(absPath), os.ModePerm) + require.NoError(h.t, err) + err = os.WriteFile(absPath, []byte(content), os.ModePerm) + require.NoError(h.t, err) +} + +func (h *repofilesTestHelper) createRemoteFile(name string, content string) { + h.f.Write(h.ctx, name, strings.NewReader(content), filer.CreateParentDirectories) +} + +func (h *repofilesTestHelper) createRemoteDirectory(name string) { + h.f.Mkdir(h.ctx, name) +} + +func (h *repofilesTestHelper) assertRemoteFileContent(name string, content string) { + assertFileContains(h.t, h.ctx, h.f, name, content) +} + +func (h *repofilesTestHelper) assertRemoteFileType(name string, fileType workspace.ObjectType) { + info, err := h.f.Stat(h.ctx, name) + require.NoError(h.t, err) + + objectInfo := info.Sys().(workspace.ObjectInfo) + assert.Equal(h.t, fileType, objectInfo.ObjectType) +} + +func TestRepoFilesPutFile(t *testing.T) { + ctx := context.Background() + helper := setupRepofilesTestHelper(t, ctx) + + r, err := repofiles.Create(helper.remoteRoot, helper.localRoot, helper.w, &repofiles.RepoFileOptions{ + OverwriteIfExists: true, + }) require.NoError(t, err) + + // create local file + helper.createLocalFile("foo.txt", "hello, world") err = r.PutFile(ctx, "foo.txt") require.NoError(t, err) - require.NoError(t, f.Mkdir(ctx, "bar")) + // Expect PUT to succeed + helper.assertRemoteFileContent("foo.txt", "hello, world") +} + +func TestRepoFilesPutFileOverwritesNotebook(t *testing.T) { + ctx := context.Background() + helper := setupRepofilesTestHelper(t, ctx) - entries, err := f.ReadDir(ctx, "bar") + r, err := repofiles.Create(helper.remoteRoot, helper.localRoot, helper.w, &repofiles.RepoFileOptions{ + OverwriteIfExists: true, + }) require.NoError(t, err) - require.Len(t, entries, 1) - assertFileContains(t, ctx, f, "foo.txt", "hello, world") + // Create notebook in workspace + helper.createRemoteFile("foo.py", "#Databricks notebook source\nprint(1)") + helper.assertRemoteFileType("foo", workspace.ObjectTypeNotebook) + + // Put file and assert file PUT succeeded + helper.createLocalFile("foo", "this file will overwrite the notebook") + err = r.PutFile(ctx, "foo") + assert.NoError(t, err) + helper.assertRemoteFileContent("foo", "this file will overwrite the notebook") + helper.assertRemoteFileType("foo", workspace.ObjectTypeFile) } -func TestRepoFilesFileOverwritesNotebook(t *testing.T) { - w, err := databricks.NewWorkspaceClient() - require.NoError(t, err) +func TestRepoFilesPutFileOverwritesEmptyDirectoryTree(t *testing.T) { ctx := context.Background() + helper := setupRepofilesTestHelper(t, ctx) - // initialize client - wsfsTmpDir := temporaryWorkspaceDir(t, w) - localTmpDir := t.TempDir() - r, err := repofiles.Create(wsfsTmpDir, localTmpDir, w, &repofiles.RepoFileOptions{ + r, err := repofiles.Create(helper.remoteRoot, helper.localRoot, helper.w, &repofiles.RepoFileOptions{ OverwriteIfExists: true, }) require.NoError(t, err) - f, err := filer.NewWorkspaceFilesClient(w, wsfsTmpDir) - require.NoError(t, err) - // create local notebook - err = os.WriteFile(filepath.Join(localTmpDir, "foo.py"), []byte("#Databricks notebook source\nprint(1)"), os.ModePerm) - require.NoError(t, err) + // create empty remote directory tree + helper.createRemoteDirectory("foo/a/b/c") + helper.createRemoteDirectory("foo/a/b/d/e") + helper.createRemoteDirectory("foo/f/g/i") - // upload notebook - err = r.PutFile(ctx, "foo.py") - require.NoError(t, err) - assertNotebookExists(t, ctx, w, path.Join(wsfsTmpDir, "foo")) + // assert directory tree is created + helper.assertRemoteFileType("foo", workspace.ObjectTypeDirectory) + helper.assertRemoteFileType("foo/a/b/c", workspace.ObjectTypeDirectory) + helper.assertRemoteFileType("foo/f/g/i", workspace.ObjectTypeDirectory) + helper.assertRemoteFileType("foo/a/b/d/e", workspace.ObjectTypeDirectory) - // upload file, and assert that it overwrites the notebook - err = os.WriteFile(filepath.Join(localTmpDir, "foo"), []byte("I am going to overwrite the notebook"), os.ModePerm) - require.NoError(t, err) + // Create local file and PUT it into the workspace + helper.createLocalFile("foo", "hello, world") err = r.PutFile(ctx, "foo") require.NoError(t, err) - assertFileContains(t, ctx, f, "foo", "I am going to overwrite the notebook") + helper.assertRemoteFileContent("foo", "hello, world") + helper.assertRemoteFileType("foo", workspace.ObjectTypeFile) } -func TestRepoFilesFileOverwritesEmptyDirectoryTree(t *testing.T) { - w, err := databricks.NewWorkspaceClient() - require.NoError(t, err) +func TestRepoFilesPutFileInDirOverwritesExistingNotebook(t *testing.T) { + // TODO: Skipping this test for now since the workspace-files import API has a + // bug and does not return the error message we need + t.SkipNow() + ctx := context.Background() + helper := setupRepofilesTestHelper(t, ctx) - // initialize client - wsfsTmpDir := temporaryWorkspaceDir(t, w) - localTmpDir := t.TempDir() - r, err := repofiles.Create(wsfsTmpDir, localTmpDir, w, &repofiles.RepoFileOptions{ + r, err := repofiles.Create(helper.remoteRoot, helper.localRoot, helper.w, &repofiles.RepoFileOptions{ OverwriteIfExists: true, }) require.NoError(t, err) - f, err := filer.NewWorkspaceFilesClient(w, wsfsTmpDir) - require.NoError(t, err) - // create local file - err = os.WriteFile(filepath.Join(localTmpDir, "foo"), []byte(`hello, world`), os.ModePerm) + // create remote notebook + helper.createRemoteFile("foo.py", "#Databricks notebook source\nprint(1)") + helper.assertRemoteFileType("foo", workspace.ObjectTypeNotebook) + + // create local file and PUT it in the workspace + helper.createLocalFile("foo/hello.txt", "just a file") + err = r.PutFile(ctx, "foo/hello.txt") require.NoError(t, err) - // construct a directory tree without files in the workspace - err = f.Mkdir(ctx, "foo/a/b/c") + // Assert PUT succeeeded + helper.assertRemoteFileType("foo", workspace.ObjectTypeDirectory) + helper.assertRemoteFileContent("foo/bar.txt", "just a file") +} + +func TestRepoFilesPutFileWithoutOverwrite(t *testing.T) { + ctx := context.Background() + helper := setupRepofilesTestHelper(t, ctx) + + r, err := repofiles.Create(helper.remoteRoot, helper.localRoot, helper.w, &repofiles.RepoFileOptions{ + OverwriteIfExists: false, + }) require.NoError(t, err) - err = f.Mkdir(ctx, "foo/a/b/d/e") + + // create local file + helper.createLocalFile("foo.txt", "hello, world") + err = r.PutFile(ctx, "foo.txt") require.NoError(t, err) - err = f.Mkdir(ctx, "foo/f/g/i") + + // Expect PUT to succeed + helper.assertRemoteFileContent("foo.txt", "hello, world") +} + +func TestRepoFilesPutFileWithoutOverwriteFails(t *testing.T) { + ctx := context.Background() + helper := setupRepofilesTestHelper(t, ctx) + + r, err := repofiles.Create(helper.remoteRoot, helper.localRoot, helper.w, &repofiles.RepoFileOptions{ + OverwriteIfExists: false, + }) require.NoError(t, err) - // assert the directories exist - entries, err := f.ReadDir(ctx, "foo") + // create remote file + helper.createRemoteFile("foo.txt", "this file already exists in the workspace") + + // create local file + helper.createLocalFile("foo.txt", "this file will attempt to overwrite the workspace file and fail") + + // assert overwrite fails + err = r.PutFile(ctx, "foo.txt") + assert.ErrorIs(t, err, fs.ErrExist) +} + +func TestRepoFilesPutFileWithoutOverwriteFailsIfDirectoryExists(t *testing.T) { + ctx := context.Background() + helper := setupRepofilesTestHelper(t, ctx) + + r, err := repofiles.Create(helper.remoteRoot, helper.localRoot, helper.w, &repofiles.RepoFileOptions{ + OverwriteIfExists: false, + }) require.NoError(t, err) - assert.Len(t, entries, 2) - assert.True(t, entries[0].IsDir()) - assert.True(t, entries[1].IsDir()) - // upload file, and assert that it overwrites the empty directories + helper.createRemoteDirectory("foo") + + // create local file + helper.createLocalFile("foo", "hello, world") err = r.PutFile(ctx, "foo") - require.NoError(t, err) - assertFileContains(t, ctx, f, "foo", "hello, world") - // assert the directories do not exist anymore - _, err = f.ReadDir(ctx, "foo") - assert.ErrorIs(t, err, filer.ErrNotADirectory) + // Assert PUT failed because file already exists + assert.ErrorIs(t, err, fs.ErrExist) } -func TestRepoFilesFileInDirOverwritesExistingNotebook(t *testing.T) { - w, err := databricks.NewWorkspaceClient() - require.NoError(t, err) +func TestRepoFilesPutFileWithoutOverwriteFailsIfNotebookExists(t *testing.T) { ctx := context.Background() + helper := setupRepofilesTestHelper(t, ctx) - // initialize client - wsfsTmpDir := temporaryWorkspaceDir(t, w) - localTmpDir := t.TempDir() - r, err := repofiles.Create(wsfsTmpDir, localTmpDir, w, &repofiles.RepoFileOptions{ - OverwriteIfExists: true, + r, err := repofiles.Create(helper.remoteRoot, helper.localRoot, helper.w, &repofiles.RepoFileOptions{ + OverwriteIfExists: false, }) require.NoError(t, err) - f, err := filer.NewWorkspaceFilesClient(w, wsfsTmpDir) - require.NoError(t, err) - // create local notebook - err = f.Write(ctx, "foo.py", strings.NewReader("#Databricks notebook source\nprint(1)")) - require.NoError(t, err) - assertNotebookExists(t, ctx, w, path.Join(wsfsTmpDir, "foo")) + // create remote notebook + helper.createRemoteFile("foo.py", "#Databricks notebook source\nprint(1)") - // upload file - err = os.Mkdir(filepath.Join(localTmpDir, "foo"), os.ModePerm) - require.NoError(t, err) - err = os.WriteFile(filepath.Join(localTmpDir, "foo/bar.txt"), []byte("I am going to overwrite the notebook"), os.ModePerm) - require.NoError(t, err) - err = r.PutFile(ctx, "foo/bar.txt") - require.NoError(t, err) - assertFileContains(t, ctx, f, "foo/bar.txt", "I am going to overwrite the notebook") + // create local file + helper.createLocalFile("foo", "hello, world") + err = r.PutFile(ctx, "foo") + + // Assert PUT failed because file already exists + assert.ErrorIs(t, err, fs.ErrExist) } diff --git a/libs/filer/dbfs_client.go b/libs/filer/dbfs_client.go index 67878136b79..d4397686994 100644 --- a/libs/filer/dbfs_client.go +++ b/libs/filer/dbfs_client.go @@ -59,7 +59,7 @@ func (info dbfsFileInfo) IsDir() bool { } func (info dbfsFileInfo) Sys() any { - return nil + return info.fi } // DbfsClient implements the [Filer] interface for the DBFS backend. diff --git a/libs/filer/workspace_files_client.go b/libs/filer/workspace_files_client.go index b06a25146b4..50ccfed7dfa 100644 --- a/libs/filer/workspace_files_client.go +++ b/libs/filer/workspace_files_client.go @@ -65,7 +65,7 @@ func (info wsfsFileInfo) IsDir() bool { } func (info wsfsFileInfo) Sys() any { - return nil + return info.oi } // WorkspaceFilesClient implements the files-in-workspace API.