From c7c990b4929dd0082e6975e6cc502c5dd6c988e6 Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Thu, 1 Nov 2018 05:52:13 +1100
Subject: [PATCH 01/30] added manual chunks for open_zarr

---
 xarray/backends/zarr.py       | 78 +++++++++++++++++++++++++----------
 xarray/tests/test_backends.py | 46 +++++++++++++++++++++
 2 files changed, 103 insertions(+), 21 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index ee77e0833c4..a5ee9867f8e 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -352,10 +352,11 @@ def close(self):
             zarr.consolidate_metadata(self.ds.store)
 
 
-def open_zarr(store, group=None, synchronizer=None, auto_chunk=True,
+def open_zarr(store, group=None, synchronizer=None, chunks=None,
               decode_cf=True, mask_and_scale=True, decode_times=True,
               concat_characters=True, decode_coords=True,
-              drop_variables=None, consolidated=False):
+              drop_variables=None, consolidated=False, auto_chunk=True, 
+              overwrite_encoded_chunks=False):
     """Load and decode a dataset from a Zarr store.
 
     .. note:: Experimental
@@ -375,10 +376,19 @@ def open_zarr(store, group=None, synchronizer=None, auto_chunk=True,
         Array synchronizer provided to zarr
     group : str, obtional
         Group path. (a.k.a. `path` in zarr terminology.)
+    chunks : int or dict or {None, 'auto'}, optional
+        Chunk sizes along each dimension, e.g., ``5`` or
+        ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created 
+        based on the variable's zarr chunks. If `chunks=None` and 
+        `auto_chunk=False`, zarr array data will lazily convert to numpy 
+        arrays upon access. 
     auto_chunk : bool, optional
         Whether to automatically create dask chunks corresponding to each
-        variable's zarr chunks. If False, zarr array data will lazily convert
-        to numpy arrays upon access.
+        variable's zarr chunks. If `chunks=None`, this overrides `chunks`. 
+        Equivalent to `chunks='auto'.` (Default: True)
+    overwrite_encoded_chunks: bool, optional
+        Whether to drop the zarr chunks encoded for each variable when a 
+        dataset is loaded with specified chunk sizes (default: False)
     decode_cf : bool, optional
         Whether to decode these variables, assuming they were saved according
         to CF conventions.
@@ -423,6 +433,15 @@ def open_zarr(store, group=None, synchronizer=None, auto_chunk=True,
     http://zarr.readthedocs.io/
     """
 
+    if auto_chunk and chunks is None:
+        chunks = 'auto' # maintain backwards compatibility
+
+    if not isinstance(chunks, (int, dict)):
+        if chunks != 'auto' and chunks is not None:
+            raise ValueError(
+                            "chunks must be an int, dict, 'auto', or None. "
+                            "Instead found %s. " % chunks)
+
     if not decode_cf:
         mask_and_scale = False
         decode_times = False
@@ -449,21 +468,38 @@ def maybe_decode_store(store, lock=False):
 
     # auto chunking needs to be here and not in ZarrStore because variable
     # chunks do not survive decode_cf
-    if auto_chunk:
-        # adapted from Dataset.Chunk()
-        def maybe_chunk(name, var):
-            from dask.base import tokenize
-            chunks = var.encoding.get('chunks')
-            if (var.ndim > 0) and (chunks is not None):
-                # does this cause any data to be read?
-                token2 = tokenize(name, var._data)
-                name2 = 'zarr-%s' % token2
-                return var.chunk(chunks, name=name2, lock=None)
-            else:
-                return var
-
-        variables = OrderedDict([(k, maybe_chunk(k, v))
-                                 for k, v in ds.variables.items()])
-        return ds._replace_vars_and_dims(variables)
-    else:
+    # return trivial case
+    if not chunks:
         return ds
+    
+    # adapted from Dataset.Chunk()
+    if isinstance(chunks, int):
+        chunks = dict.fromkeys(ds.dims, chunks)
+    
+    def selkeys(dict_, keys):
+        if dict_ is None:
+            return None
+        return dict((d, dict_[d]) for d in keys if d in dict_)
+    
+    def maybe_chunk(name, var, chunks):
+        from dask.base import tokenize
+
+        if chunks == 'auto':
+            chunks = var.encoding.get('chunks')
+        else:
+            chunks = selkeys(chunks, var.dims)
+            
+        if (var.ndim > 0) and (chunks is not None):
+            # does this cause any data to be read?
+            token2 = tokenize(name, var._data)
+            name2 = 'zarr-%s' % token2
+            var = var.chunk(chunks, name=name2, lock=None)
+            if overwrite_encoded_chunks and var.chunks is not None:
+                var.encoding['chunks'] = tuple(x[0] for x in var.chunks)
+            return var
+        else:
+            return var
+
+    variables = OrderedDict([(k, maybe_chunk(k, v, chunks))
+                                for k, v in ds.variables.items()])
+    return ds._replace_vars_and_dims(variables)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index a20ba2df229..5f21027962f 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1405,6 +1405,52 @@ def test_auto_chunk(self):
                 assert v._in_memory == (k in actual.dims)
                 # chunk size should be the same as original
                 assert v.chunks == original[k].chunks
+    
+    def test_manual_chunk(self):
+        original = create_test_data().chunk({'dim1': 3, 'dim2': 4, 'dim3': 3})
+        
+        # All of these should return non-chunked arrays
+        NO_CHUNKS = (None, 0, {})
+        for no_chunk in NO_CHUNKS:
+            with self.roundtrip(
+                    original, open_kwargs={'chunks': no_chunk, 
+                                            'auto_chunk': False}) as actual:
+                for k, v in actual.variables.items():
+                    # only index variables should be in memory
+                    assert v._in_memory == (k in actual.dims)
+                    # there should be no chunks
+                    assert v.chunks is None
+
+        # uniform arrays
+        for i in range(2, 6):
+            rechunked = original.chunk(chunks=i)
+
+            with self.roundtrip(
+                    original, open_kwargs={'chunks': i}) as actual:
+                for k, v in actual.variables.items():
+                    # only index variables should be in memory
+                    assert v._in_memory == (k in actual.dims)
+                    # chunk size should be the same as rechunked
+                    assert v.chunks == rechunked[k].chunks
+        
+        chunks = {'dim1': 2, 'dim2': 3, 'dim3': 5}
+        rechunked = original.chunk(chunks=chunks)
+
+        open_overwritten = {'chunks': chunks, 
+                            'overwrite_encoded_chunks': True}
+
+        with self.roundtrip(
+                original, open_kwargs=open_overwritten) as actual:
+            for k, v in actual.variables.items():
+                    assert v.chunks == rechunked[k].chunks
+
+            with self.roundtrip(actual, open_kwargs={'chunks': 'auto'}) as auto:
+                # encoding should have changed
+                for k, v in actual.variables.items():
+                    assert v.chunks == rechunked[k].chunks
+
+                assert_identical(actual, auto)
+                assert_identical(actual.load(), auto.load())
 
     def test_write_uneven_dask_chunks(self):
         # regression for GH#2225

From d37d9e1572dff5b0642aa79c5aac2afc21e4a00d Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Thu, 1 Nov 2018 05:53:03 +1100
Subject: [PATCH 02/30] updated whats-new

---
 doc/whats-new.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 9c88445b5ba..86d30ce42ad 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -428,6 +428,12 @@ Bug fixes
   encoding process if a reference date is used that is so distant that
   the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
   By `Spencer Clark <https://github.com/spencerkclark>`_.
+  
+- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` 
+  parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for 
+  backwards compatibility. The ``overwrite_encoded_chunks`` parameter is 
+  added to remove the original zarr chunk encoding.
+  By `Lily Wang <https://github.com/lilyminium>`_.
 
 - Chunked datasets can now roundtrip to Zarr storage continually
   with `to_zarr` and ``open_zarr`` (:issue:`2300`).

From f3c829e412fbc6ff94455d0499c2b4607e214190 Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Thu, 1 Nov 2018 06:14:23 +1100
Subject: [PATCH 03/30] fixed pep8 issues

---
 xarray/backends/zarr.py       | 23 +++++++++++------------
 xarray/tests/test_backends.py | 25 ++++++++++---------------
 2 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index a5ee9867f8e..89f55400e82 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -378,16 +378,16 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
         Group path. (a.k.a. `path` in zarr terminology.)
     chunks : int or dict or {None, 'auto'}, optional
         Chunk sizes along each dimension, e.g., ``5`` or
-        ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created 
-        based on the variable's zarr chunks. If `chunks=None` and 
-        `auto_chunk=False`, zarr array data will lazily convert to numpy 
-        arrays upon access. 
+        ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created
+        based on the variable's zarr chunks. If `chunks=None` and
+        `auto_chunk=False`, zarr array data will lazily convert to numpy
+        arrays upon access.
     auto_chunk : bool, optional
         Whether to automatically create dask chunks corresponding to each
-        variable's zarr chunks. If `chunks=None`, this overrides `chunks`. 
+        variable's zarr chunks. If `chunks=None`, this overrides `chunks`.
         Equivalent to `chunks='auto'.` (Default: True)
     overwrite_encoded_chunks: bool, optional
-        Whether to drop the zarr chunks encoded for each variable when a 
+        Whether to drop the zarr chunks encoded for each variable when a
         dataset is loaded with specified chunk sizes (default: False)
     decode_cf : bool, optional
         Whether to decode these variables, assuming they were saved according
@@ -434,12 +434,11 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
     """
 
     if auto_chunk and chunks is None:
-        chunks = 'auto' # maintain backwards compatibility
+        chunks = 'auto'  # maintain backwards compatibility
 
     if not isinstance(chunks, (int, dict)):
         if chunks != 'auto' and chunks is not None:
-            raise ValueError(
-                            "chunks must be an int, dict, 'auto', or None. "
+            raise ValueError("chunks must be an int, dict, 'auto', or None. "
                             "Instead found %s. " % chunks)
 
     if not decode_cf:
@@ -471,7 +470,7 @@ def maybe_decode_store(store, lock=False):
     # return trivial case
     if not chunks:
         return ds
-    
+
     # adapted from Dataset.Chunk()
     if isinstance(chunks, int):
         chunks = dict.fromkeys(ds.dims, chunks)
@@ -488,7 +487,7 @@ def maybe_chunk(name, var, chunks):
             chunks = var.encoding.get('chunks')
         else:
             chunks = selkeys(chunks, var.dims)
-            
+        
         if (var.ndim > 0) and (chunks is not None):
             # does this cause any data to be read?
             token2 = tokenize(name, var._data)
@@ -501,5 +500,5 @@ def maybe_chunk(name, var, chunks):
             return var
 
     variables = OrderedDict([(k, maybe_chunk(k, v, chunks))
-                                for k, v in ds.variables.items()])
+                            for k, v in ds.variables.items()])
     return ds._replace_vars_and_dims(variables)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 5f21027962f..5f555dde2d9 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1412,9 +1412,8 @@ def test_manual_chunk(self):
         # All of these should return non-chunked arrays
         NO_CHUNKS = (None, 0, {})
         for no_chunk in NO_CHUNKS:
-            with self.roundtrip(
-                    original, open_kwargs={'chunks': no_chunk, 
-                                            'auto_chunk': False}) as actual:
+            open_kwargs = {'chunks': no_chunk, 'auto_chunk': False}
+            with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
                     assert v._in_memory == (k in actual.dims)
@@ -1424,9 +1423,8 @@ def test_manual_chunk(self):
         # uniform arrays
         for i in range(2, 6):
             rechunked = original.chunk(chunks=i)
-
-            with self.roundtrip(
-                    original, open_kwargs={'chunks': i}) as actual:
+            open_kwargs = {'chunks': i}
+            with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
                     assert v._in_memory == (k in actual.dims)
@@ -1435,20 +1433,17 @@ def test_manual_chunk(self):
         
         chunks = {'dim1': 2, 'dim2': 3, 'dim3': 5}
         rechunked = original.chunk(chunks=chunks)
-
-        open_overwritten = {'chunks': chunks, 
-                            'overwrite_encoded_chunks': True}
-
-        with self.roundtrip(
-                original, open_kwargs=open_overwritten) as actual:
+        
+        open_kwargs = {'chunks': chunks, 'overwrite_encoded_chunks': True}
+        with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
             for k, v in actual.variables.items():
                     assert v.chunks == rechunked[k].chunks
-
-            with self.roundtrip(actual, open_kwargs={'chunks': 'auto'}) as auto:
+            
+            with self.roundtrip(actual) as auto:
                 # encoding should have changed
                 for k, v in actual.variables.items():
                     assert v.chunks == rechunked[k].chunks
-
+                    
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
 

From 36f253fa2f67d8f9dbe0d141f65844b8369d5e64 Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Thu, 1 Nov 2018 06:18:37 +1100
Subject: [PATCH 04/30] removed whitespace

---
 xarray/backends/zarr.py       |  8 ++++----
 xarray/tests/test_backends.py | 12 ++++++------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 89f55400e82..7ed26e31151 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -439,7 +439,7 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
     if not isinstance(chunks, (int, dict)):
         if chunks != 'auto' and chunks is not None:
             raise ValueError("chunks must be an int, dict, 'auto', or None. "
-                            "Instead found %s. " % chunks)
+                             "Instead found %s. " % chunks)
 
     if not decode_cf:
         mask_and_scale = False
@@ -474,12 +474,12 @@ def maybe_decode_store(store, lock=False):
     # adapted from Dataset.Chunk()
     if isinstance(chunks, int):
         chunks = dict.fromkeys(ds.dims, chunks)
-    
+
     def selkeys(dict_, keys):
         if dict_ is None:
             return None
         return dict((d, dict_[d]) for d in keys if d in dict_)
-    
+
     def maybe_chunk(name, var, chunks):
         from dask.base import tokenize
 
@@ -487,7 +487,7 @@ def maybe_chunk(name, var, chunks):
             chunks = var.encoding.get('chunks')
         else:
             chunks = selkeys(chunks, var.dims)
-        
+
         if (var.ndim > 0) and (chunks is not None):
             # does this cause any data to be read?
             token2 = tokenize(name, var._data)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 5f555dde2d9..01ef1caea38 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1405,10 +1405,10 @@ def test_auto_chunk(self):
                 assert v._in_memory == (k in actual.dims)
                 # chunk size should be the same as original
                 assert v.chunks == original[k].chunks
-    
+
     def test_manual_chunk(self):
         original = create_test_data().chunk({'dim1': 3, 'dim2': 4, 'dim3': 3})
-        
+
         # All of these should return non-chunked arrays
         NO_CHUNKS = (None, 0, {})
         for no_chunk in NO_CHUNKS:
@@ -1430,20 +1430,20 @@ def test_manual_chunk(self):
                     assert v._in_memory == (k in actual.dims)
                     # chunk size should be the same as rechunked
                     assert v.chunks == rechunked[k].chunks
-        
+
         chunks = {'dim1': 2, 'dim2': 3, 'dim3': 5}
         rechunked = original.chunk(chunks=chunks)
-        
+
         open_kwargs = {'chunks': chunks, 'overwrite_encoded_chunks': True}
         with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
             for k, v in actual.variables.items():
                     assert v.chunks == rechunked[k].chunks
-            
+
             with self.roundtrip(actual) as auto:
                 # encoding should have changed
                 for k, v in actual.variables.items():
                     assert v.chunks == rechunked[k].chunks
-                    
+
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
 

From ae4cf0ab19b3e563bde90a48b3e6ee615930d4a1 Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Wed, 7 Nov 2018 13:22:51 +1100
Subject: [PATCH 05/30] added deprecation warning

---
 xarray/backends/zarr.py       | 34 +++++++++++++++++++++++-----------
 xarray/tests/test_backends.py | 30 +++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 7ed26e31151..302301248d3 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -1,4 +1,11 @@
+<<<<<<< HEAD
 from collections import OrderedDict
+=======
+from __future__ import absolute_import, division, print_function
+
+import warnings
+
+>>>>>>> added deprecation warning
 from distutils.version import LooseVersion
 
 import numpy as np
@@ -352,7 +359,7 @@ def close(self):
             zarr.consolidate_metadata(self.ds.store)
 
 
-def open_zarr(store, group=None, synchronizer=None, chunks=None,
+def open_zarr(store, group=None, synchronizer=None, chunks='auto',
               decode_cf=True, mask_and_scale=True, decode_times=True,
               concat_characters=True, decode_coords=True,
               drop_variables=None, consolidated=False, auto_chunk=True, 
@@ -379,13 +386,8 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
     chunks : int or dict or {None, 'auto'}, optional
         Chunk sizes along each dimension, e.g., ``5`` or
         ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created
-        based on the variable's zarr chunks. If `chunks=None` and
-        `auto_chunk=False`, zarr array data will lazily convert to numpy
-        arrays upon access.
-    auto_chunk : bool, optional
-        Whether to automatically create dask chunks corresponding to each
-        variable's zarr chunks. If `chunks=None`, this overrides `chunks`.
-        Equivalent to `chunks='auto'.` (Default: True)
+        based on the variable's zarr chunks. If `chunks=None`, zarr array
+        data will lazily convert to numpy arrays upon access.
     overwrite_encoded_chunks: bool, optional
         Whether to drop the zarr chunks encoded for each variable when a
         dataset is loaded with specified chunk sizes (default: False)
@@ -432,9 +434,19 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
     ----------
     http://zarr.readthedocs.io/
     """
-
-    if auto_chunk and chunks is None:
-        chunks = 'auto'  # maintain backwards compatibility
+    if 'auto_chunk' in kwargs:
+        auto_chunk = kwargs.pop('auto_chunk')
+        if auto_chunk == True:
+            chunks = 'auto'  # maintain backwards compatibility
+        elif auto_chunk == False:
+            chunks = None
+
+        warnings.warn("auto_chunk is deprecated. Use chunks='auto' instead.",
+                      FutureWarning, stacklevel=2)
+
+    if kwargs:
+        raise TypeError("open_zarr() got unexpected keyword arguments " +
+                        ",".join(kwargs.keys()))
 
     if not isinstance(chunks, (int, dict)):
         if chunks != 'auto' and chunks is not None:
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 01ef1caea38..21e5518f6de 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1391,7 +1391,7 @@ def test_auto_chunk(self):
         original = create_test_data().chunk()
 
         with self.roundtrip(
-                original, open_kwargs={'auto_chunk': False}) as actual:
+                original, open_kwargs={'chunks': None}) as actual:
             for k, v in actual.variables.items():
                 # only index variables should be in memory
                 assert v._in_memory == (k in actual.dims)
@@ -1399,7 +1399,7 @@ def test_auto_chunk(self):
                 assert v.chunks is None
 
         with self.roundtrip(
-                original, open_kwargs={'auto_chunk': True}) as actual:
+                original, open_kwargs={'chunks': 'auto'}) as actual:
             for k, v in actual.variables.items():
                 # only index variables should be in memory
                 assert v._in_memory == (k in actual.dims)
@@ -1412,7 +1412,7 @@ def test_manual_chunk(self):
         # All of these should return non-chunked arrays
         NO_CHUNKS = (None, 0, {})
         for no_chunk in NO_CHUNKS:
-            open_kwargs = {'chunks': no_chunk, 'auto_chunk': False}
+            open_kwargs = {'chunks': no_chunk}
             with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
@@ -1446,13 +1446,33 @@ def test_manual_chunk(self):
 
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
+    
+    def test_deprecate_auto_chunk(self):
+        original = create_test_data().chunk()
+        with pytest.warns(FutureWarning):
+            with self.roundtrip(
+                original, open_kwargs={'auto_chunk': True}) as actual:
+                for k, v in actual.variables.items():
+                    # only index variables should be in memory
+                    assert v._in_memory == (k in actual.dims)
+                    # chunk size should be the same as original
+                    assert v.chunks == original[k].chunks
+        
+        with pytest.warns(FutureWarning):
+            with self.roundtrip(
+                original, open_kwargs={'auto_chunk': False}) as actual:
+                for k, v in actual.variables.items():
+                    # only index variables should be in memory
+                    assert v._in_memory == (k in actual.dims)
+                    # there should be no chunks
+                    assert v.chunks is None
+            
 
     def test_write_uneven_dask_chunks(self):
         # regression for GH#2225
         original = create_test_data().chunk({'dim1': 3, 'dim2': 4, 'dim3': 3})
-
         with self.roundtrip(
-                original, open_kwargs={'auto_chunk': True}) as actual:
+                original, open_kwargs={'chunks': 'auto'}) as actual:
             for k, v in actual.data_vars.items():
                 print(k)
                 assert v.chunks == actual[k].chunks

From cccfd046dd1b1e2bd1965b0c2a271ec4adebf86e Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Wed, 7 Nov 2018 13:34:44 +1100
Subject: [PATCH 06/30] fixed pep8 issues

---
 xarray/backends/zarr.py       |  4 ++--
 xarray/tests/test_backends.py | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 302301248d3..14119660ad0 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -436,9 +436,9 @@ def open_zarr(store, group=None, synchronizer=None, chunks='auto',
     """
     if 'auto_chunk' in kwargs:
         auto_chunk = kwargs.pop('auto_chunk')
-        if auto_chunk == True:
+        if auto_chunk:
             chunks = 'auto'  # maintain backwards compatibility
-        elif auto_chunk == False:
+        else:
             chunks = None
 
         warnings.warn("auto_chunk is deprecated. Use chunks='auto' instead.",
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 21e5518f6de..3dac12b5727 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1446,27 +1446,27 @@ def test_manual_chunk(self):
 
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
-    
+
     def test_deprecate_auto_chunk(self):
         original = create_test_data().chunk()
         with pytest.warns(FutureWarning):
             with self.roundtrip(
-                original, open_kwargs={'auto_chunk': True}) as actual:
+                    original, open_kwargs={'auto_chunk': True}) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
                     assert v._in_memory == (k in actual.dims)
                     # chunk size should be the same as original
                     assert v.chunks == original[k].chunks
-        
+
         with pytest.warns(FutureWarning):
             with self.roundtrip(
-                original, open_kwargs={'auto_chunk': False}) as actual:
+                    original, open_kwargs={'auto_chunk': False}) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
                     assert v._in_memory == (k in actual.dims)
                     # there should be no chunks
                     assert v.chunks is None
-            
+
 
     def test_write_uneven_dask_chunks(self):
         # regression for GH#2225

From da45d77d06897d71df61ed39d5c1d9ebe6256dbf Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Wed, 30 Jan 2019 11:06:35 +1100
Subject: [PATCH 07/30] added warning for bad chunks

---
 xarray/backends/zarr.py       | 47 ++++++++++++++++++++++++++---------
 xarray/tests/test_backends.py | 24 ++++++++++++++++++
 2 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 14119660ad0..9f10c6a3bb7 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -383,11 +383,12 @@ def open_zarr(store, group=None, synchronizer=None, chunks='auto',
         Array synchronizer provided to zarr
     group : str, obtional
         Group path. (a.k.a. `path` in zarr terminology.)
-    chunks : int or dict or {None, 'auto'}, optional
+    chunks : int or dict or tuple or {None, 'auto'}, optional
         Chunk sizes along each dimension, e.g., ``5`` or
         ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created
         based on the variable's zarr chunks. If `chunks=None`, zarr array
-        data will lazily convert to numpy arrays upon access.
+        data will lazily convert to numpy arrays upon access. This accepts 
+        all the chunk specifications as Dask does.
     overwrite_encoded_chunks: bool, optional
         Whether to drop the zarr chunks encoded for each variable when a
         dataset is loaded with specified chunk sizes (default: False)
@@ -486,25 +487,47 @@ def maybe_decode_store(store, lock=False):
     # adapted from Dataset.Chunk()
     if isinstance(chunks, int):
         chunks = dict.fromkeys(ds.dims, chunks)
+    
+    if isinstance(chunks, tuple) and len(chunks) == len(ds.dims):
+        chunks = dict(zip(ds.dims, chunks))
+
+    def get_chunk(name, var, chunks):
+        chunk_spec = dict(zip(var.dims, var.encoding.get('chunks')))
+
+        # Coordinate labels aren't chunked
+        if var.ndim == 1 and var.dims[0] == name:
+            return chunk_spec
+
+        if chunks == 'auto':
+            return chunk_spec
+
+        for dim in var.dims:
+            if dim in chunks:
+                spec = chunks[dim]
+                if isinstance(spec, int):
+                    spec = (spec,)
+                if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
+                    if any(s % chunk_spec[dim] for s in spec):
+                        print('ok any', spec, chunk_spec[dim], dim)
+                        warnings.warn("Specified Dask chunks %r would "
+                        "separate Zarr chunk shape %r for dimension %r. "
+                        "This significantly degrades performance. "
+                        "Consider rechunking after loading." 
+                        % (chunks[dim], chunk_spec[dim], dim))
+                chunk_spec[dim] = chunks[dim]
+        return chunk_spec
 
-    def selkeys(dict_, keys):
-        if dict_ is None:
-            return None
-        return dict((d, dict_[d]) for d in keys if d in dict_)
 
     def maybe_chunk(name, var, chunks):
         from dask.base import tokenize
 
-        if chunks == 'auto':
-            chunks = var.encoding.get('chunks')
-        else:
-            chunks = selkeys(chunks, var.dims)
+        chunk_spec = get_chunk(name, var, chunks)
 
-        if (var.ndim > 0) and (chunks is not None):
+        if (var.ndim > 0) and (chunk_spec is not None):
             # does this cause any data to be read?
             token2 = tokenize(name, var._data)
             name2 = 'zarr-%s' % token2
-            var = var.chunk(chunks, name=name2, lock=None)
+            var = var.chunk(chunk_spec, name=name2, lock=None)
             if overwrite_encoded_chunks and var.chunks is not None:
                 var.encoding['chunks'] = tuple(x[0] for x in var.chunks)
             return var
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 3dac12b5727..1b77da32528 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1446,6 +1446,30 @@ def test_manual_chunk(self):
 
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
+    
+    def test_warning_on_bad_chunks(self):
+        original = create_test_data().chunk({'dim1': 4, 'dim2': 3, 'dim3': 5})
+
+        bad_chunks = (2, {'dim2':(3, 3, 2, 1)})
+        for chunks in bad_chunks:
+            kwargs = {'chunks': chunks}
+            with pytest.warns(UserWarning):
+                with self.roundtrip(original, open_kwargs=kwargs) as actual:
+                    for k, v in actual.variables.items():
+                        # only index variables should be in memory
+                        assert v._in_memory == (k in actual.dims)
+
+        good_chunks = ({'dim2': 3}, {'dim3': 10})
+        for chunks in good_chunks:
+            kwargs = {'chunks': chunks}
+            with pytest.warns(None) as record:
+                with self.roundtrip(original, open_kwargs=kwargs) as actual:
+                    for k, v in actual.variables.items():
+                        # only index variables should be in memory
+                        assert v._in_memory == (k in actual.dims)
+            assert len(record) == 0
+
+
 
     def test_deprecate_auto_chunk(self):
         original = create_test_data().chunk()

From 7618c08e42e95da6682254053c8b38d4ebe43355 Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Wed, 30 Jan 2019 11:34:50 +1100
Subject: [PATCH 08/30] fixed lingering rebase conflicts

---
 doc/whats-new.rst       | 11 +++++------
 xarray/backends/zarr.py | 26 +++++++++-----------------
 2 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 86d30ce42ad..5020a975d67 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -121,6 +121,11 @@ Other enhancements
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
   By `Kevin Squire <https://github.com/kmsquire>`_.
+- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` 
+  parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for 
+  backwards compatibility. The ``overwrite_encoded_chunks`` parameter is 
+  added to remove the original zarr chunk encoding.
+  By `Lily Wang <https://github.com/lilyminium>`_.
 
 Bug fixes
 ~~~~~~~~~
@@ -428,12 +433,6 @@ Bug fixes
   encoding process if a reference date is used that is so distant that
   the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
   By `Spencer Clark <https://github.com/spencerkclark>`_.
-  
-- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` 
-  parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for 
-  backwards compatibility. The ``overwrite_encoded_chunks`` parameter is 
-  added to remove the original zarr chunk encoding.
-  By `Lily Wang <https://github.com/lilyminium>`_.
 
 - Chunked datasets can now roundtrip to Zarr storage continually
   with `to_zarr` and ``open_zarr`` (:issue:`2300`).
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 9f10c6a3bb7..87507ece201 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -1,11 +1,5 @@
-<<<<<<< HEAD
-from collections import OrderedDict
-=======
-from __future__ import absolute_import, division, print_function
-
 import warnings
-
->>>>>>> added deprecation warning
+from collections import OrderedDict
 from distutils.version import LooseVersion
 
 import numpy as np
@@ -362,8 +356,8 @@ def close(self):
 def open_zarr(store, group=None, synchronizer=None, chunks='auto',
               decode_cf=True, mask_and_scale=True, decode_times=True,
               concat_characters=True, decode_coords=True,
-              drop_variables=None, consolidated=False, auto_chunk=True, 
-              overwrite_encoded_chunks=False):
+              drop_variables=None, consolidated=False,
+              overwrite_encoded_chunks=False, **kwargs):
     """Load and decode a dataset from a Zarr store.
 
     .. note:: Experimental
@@ -387,7 +381,7 @@ def open_zarr(store, group=None, synchronizer=None, chunks='auto',
         Chunk sizes along each dimension, e.g., ``5`` or
         ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created
         based on the variable's zarr chunks. If `chunks=None`, zarr array
-        data will lazily convert to numpy arrays upon access. This accepts 
+        data will lazily convert to numpy arrays upon access. This accepts
         all the chunk specifications as Dask does.
     overwrite_encoded_chunks: bool, optional
         Whether to drop the zarr chunks encoded for each variable when a
@@ -487,7 +481,7 @@ def maybe_decode_store(store, lock=False):
     # adapted from Dataset.Chunk()
     if isinstance(chunks, int):
         chunks = dict.fromkeys(ds.dims, chunks)
-    
+
     if isinstance(chunks, tuple) and len(chunks) == len(ds.dims):
         chunks = dict(zip(ds.dims, chunks))
 
@@ -508,16 +502,14 @@ def get_chunk(name, var, chunks):
                     spec = (spec,)
                 if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
                     if any(s % chunk_spec[dim] for s in spec):
-                        print('ok any', spec, chunk_spec[dim], dim)
                         warnings.warn("Specified Dask chunks %r would "
-                        "separate Zarr chunk shape %r for dimension %r. "
-                        "This significantly degrades performance. "
-                        "Consider rechunking after loading." 
-                        % (chunks[dim], chunk_spec[dim], dim))
+                            "separate Zarr chunk shape %r for dimension %r. "
+                            "This significantly degrades performance. "
+                            "Consider rechunking after loading."
+                            % (chunks[dim], chunk_spec[dim], dim))
                 chunk_spec[dim] = chunks[dim]
         return chunk_spec
 
-
     def maybe_chunk(name, var, chunks):
         from dask.base import tokenize
 

From 8571131e78197d1fc0101c2be849527f9a883607 Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Wed, 30 Jan 2019 11:38:14 +1100
Subject: [PATCH 09/30] fixed pep8 issues

---
 xarray/backends/zarr.py       | 9 +++++----
 xarray/tests/test_backends.py | 7 ++-----
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 87507ece201..d0696f20499 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -503,10 +503,11 @@ def get_chunk(name, var, chunks):
                 if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
                     if any(s % chunk_spec[dim] for s in spec):
                         warnings.warn("Specified Dask chunks %r would "
-                            "separate Zarr chunk shape %r for dimension %r. "
-                            "This significantly degrades performance. "
-                            "Consider rechunking after loading."
-                            % (chunks[dim], chunk_spec[dim], dim))
+                                      "separate Zarr chunk shape %r for "
+                                      "dimension %r. This significantly "
+                                      "degrades performance. Consider "
+                                      "rechunking after loading instead."
+                                      % (chunks[dim], chunk_spec[dim], dim))
                 chunk_spec[dim] = chunks[dim]
         return chunk_spec
 
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 1b77da32528..5efcdf9cd98 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1446,11 +1446,11 @@ def test_manual_chunk(self):
 
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
-    
+
     def test_warning_on_bad_chunks(self):
         original = create_test_data().chunk({'dim1': 4, 'dim2': 3, 'dim3': 5})
 
-        bad_chunks = (2, {'dim2':(3, 3, 2, 1)})
+        bad_chunks = (2, {'dim2': (3, 3, 2, 1)})
         for chunks in bad_chunks:
             kwargs = {'chunks': chunks}
             with pytest.warns(UserWarning):
@@ -1469,8 +1469,6 @@ def test_warning_on_bad_chunks(self):
                         assert v._in_memory == (k in actual.dims)
             assert len(record) == 0
 
-
-
     def test_deprecate_auto_chunk(self):
         original = create_test_data().chunk()
         with pytest.warns(FutureWarning):
@@ -1491,7 +1489,6 @@ def test_deprecate_auto_chunk(self):
                     # there should be no chunks
                     assert v.chunks is None
 
-
     def test_write_uneven_dask_chunks(self):
         # regression for GH#2225
         original = create_test_data().chunk({'dim1': 3, 'dim2': 4, 'dim3': 3})

From a70205a4af1e51fa5534db72c18e33321ce7c9b8 Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Thu, 4 Apr 2019 12:39:26 +1100
Subject: [PATCH 10/30] added stacklevel

---
 xarray/backends/zarr.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index d0696f20499..e20140ee248 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -212,7 +212,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None):
     # zarr allows unicode, but not variable-length strings, so it's both
     # simpler and more compact to always encode as UTF-8 explicitly.
     # TODO: allow toggling this explicitly via dtype in encoding.
-    coder = coding.strings.EncodedStringCoder(allows_unicode=False)
+    coder = coding.strings.EncodedStringCoder(allows_unicode=True)
     var = coder.encode(var, name=name)
     var = coding.strings.ensure_fixed_length_bytes(var)
 
@@ -507,7 +507,8 @@ def get_chunk(name, var, chunks):
                                       "dimension %r. This significantly "
                                       "degrades performance. Consider "
                                       "rechunking after loading instead."
-                                      % (chunks[dim], chunk_spec[dim], dim))
+                                      % (chunks[dim], chunk_spec[dim], dim),
+                                      stacklevel=2)
                 chunk_spec[dim] = chunks[dim]
         return chunk_spec
 

From 17fa557d9ada9e60971ce0a7370fd969aa788946 Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Thu, 4 Apr 2019 12:44:45 +1100
Subject: [PATCH 11/30] fixed pep8 issues

---
 xarray/tests/test_backends.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 5efcdf9cd98..bf40e529931 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1437,7 +1437,7 @@ def test_manual_chunk(self):
         open_kwargs = {'chunks': chunks, 'overwrite_encoded_chunks': True}
         with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
             for k, v in actual.variables.items():
-                    assert v.chunks == rechunked[k].chunks
+                assert v.chunks == rechunked[k].chunks
 
             with self.roundtrip(actual) as auto:
                 # encoding should have changed

From 31619d711b952a30c8cfe56916c8720359835827 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@gmail.com>
Date: Thu, 4 Apr 2019 14:58:23 -0700
Subject: [PATCH 12/30] Various fixes for explicit Dataset.indexes (#2858)

* Various fixes for explicit Dataset.indexes

Fixes GH2856

I've added internal consistency checks to the uses of ``assert_equal`` in our
test suite, so this shouldn't happen again.

* Fix indexes in Dataset.interp
---
 doc/whats-new.rst            |  4 +-
 xarray/core/alignment.py     | 63 +++++++++++------------
 xarray/core/dataarray.py     |  3 --
 xarray/core/dataset.py       | 48 +++++++++++-------
 xarray/core/indexes.py       |  7 +--
 xarray/testing.py            | 43 ++++++++++++++--
 xarray/tests/__init__.py     | 25 +++++++++-
 xarray/tests/test_combine.py |  9 +++-
 xarray/tests/test_dataset.py | 97 ++++++++++++++++++++++++------------
 xarray/tests/test_interp.py  |  2 +-
 10 files changed, 202 insertions(+), 99 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 9c88445b5ba..6fc7c25ac91 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -31,7 +31,9 @@ Bug fixes
 
 - Dataset.copy(deep=True) now creates a deep copy of the attrs (:issue:`2835`).
   By `Andras Gefferth <https://github.com/kefirbandi>`_.
-- ``swap_dims`` would create incorrect ``indexes`` (:issue:`2842`).
+- Fix incorrect ``indexes`` resulting from various ``Dataset`` operations
+  (e.g., ``swap_dims``, ``isel``, ``reindex``, ``[]``) (:issue:`2842`,
+  :issue:`2856`).
   By `Stephan Hoyer <https://github.com/shoyer>`_.
 
 .. _whats-new.0.12.0:
diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py
index af08eef268f..642be735e9b 100644
--- a/xarray/core/alignment.py
+++ b/xarray/core/alignment.py
@@ -315,36 +315,51 @@ def reindex_variables(
     """
     from .dataarray import DataArray
 
+    # create variables for the new dataset
+    reindexed = OrderedDict()  # type: OrderedDict[Any, Variable]
+
     # build up indexers for assignment along each dimension
     int_indexers = {}
-    targets = OrderedDict()  # type: OrderedDict[Any, pd.Index]
+    new_indexes = OrderedDict(indexes)
     masked_dims = set()
     unchanged_dims = set()
 
-    # size of reindexed dimensions
-    new_sizes = {}
+    for dim, indexer in indexers.items():
+        if isinstance(indexer, DataArray) and indexer.dims != (dim,):
+            warnings.warn(
+                "Indexer has dimensions {0:s} that are different "
+                "from that to be indexed along {1:s}. "
+                "This will behave differently in the future.".format(
+                    str(indexer.dims), dim),
+                FutureWarning, stacklevel=3)
+
+        target = new_indexes[dim] = utils.safe_cast_to_index(indexers[dim])
+
+        if dim in indexes:
+            index = indexes[dim]
 
-    for name, index in indexes.items():
-        if name in indexers:
             if not index.is_unique:
                 raise ValueError(
                     'cannot reindex or align along dimension %r because the '
-                    'index has duplicate values' % name)
-
-            target = utils.safe_cast_to_index(indexers[name])
-            new_sizes[name] = len(target)
+                    'index has duplicate values' % dim)
 
             int_indexer = get_indexer_nd(index, target, method, tolerance)
 
             # We uses negative values from get_indexer_nd to signify
             # values that are missing in the index.
             if (int_indexer < 0).any():
-                masked_dims.add(name)
+                masked_dims.add(dim)
             elif np.array_equal(int_indexer, np.arange(len(index))):
-                unchanged_dims.add(name)
+                unchanged_dims.add(dim)
 
-            int_indexers[name] = int_indexer
-            targets[name] = target
+            int_indexers[dim] = int_indexer
+
+        if dim in variables:
+            var = variables[dim]
+            args = (var.attrs, var.encoding)  # type: tuple
+        else:
+            args = ()
+        reindexed[dim] = IndexVariable((dim,), target, *args)
 
     for dim in sizes:
         if dim not in indexes and dim in indexers:
@@ -356,25 +371,6 @@ def reindex_variables(
                     'index because its size %r is different from the size of '
                     'the new index %r' % (dim, existing_size, new_size))
 
-    # create variables for the new dataset
-    reindexed = OrderedDict()  # type: OrderedDict[Any, Variable]
-
-    for dim, indexer in indexers.items():
-        if isinstance(indexer, DataArray) and indexer.dims != (dim,):
-            warnings.warn(
-                "Indexer has dimensions {0:s} that are different "
-                "from that to be indexed along {1:s}. "
-                "This will behave differently in the future.".format(
-                    str(indexer.dims), dim),
-                FutureWarning, stacklevel=3)
-
-        if dim in variables:
-            var = variables[dim]
-            args = (var.attrs, var.encoding)  # type: tuple
-        else:
-            args = ()
-        reindexed[dim] = IndexVariable((dim,), indexers[dim], *args)
-
     for name, var in variables.items():
         if name not in indexers:
             key = tuple(slice(None)
@@ -395,9 +391,6 @@ def reindex_variables(
 
             reindexed[name] = new_var
 
-    new_indexes = OrderedDict(indexes)
-    new_indexes.update(targets)
-
     return reindexed, new_indexes
 
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index c24703f5384..a9e55159f57 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -231,9 +231,6 @@ def __init__(self, data, coords=None, dims=None, name=None,
             coords, dims = _infer_coords_and_dims(data.shape, coords, dims)
             variable = Variable(dims, data, attrs, encoding, fastpath=True)
 
-        # uncomment for a useful consistency check:
-        # assert all(isinstance(v, Variable) for v in coords.values())
-
         # These fully describe a DataArray
         self._variable = variable
         self._coords = coords
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index e3b2e3c3d2c..cf6631fa5ba 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -938,6 +938,7 @@ def _copy_listed(self: T, names) -> T:
         """
         variables = OrderedDict()  # type: OrderedDict[Any, Variable]
         coord_names = set()
+        indexes = OrderedDict()  # type: OrderedDict[Any, pd.Index]
 
         for name in names:
             try:
@@ -948,6 +949,8 @@ def _copy_listed(self: T, names) -> T:
                 variables[var_name] = var
                 if ref_name in self._coord_names or ref_name in self.dims:
                     coord_names.add(var_name)
+                if (var_name,) == var.dims:
+                    indexes[var_name] = var.to_index()
 
         needed_dims = set()  # type: set
         for v in variables.values():
@@ -959,12 +962,8 @@ def _copy_listed(self: T, names) -> T:
             if set(self.variables[k].dims) <= needed_dims:
                 variables[k] = self._variables[k]
                 coord_names.add(k)
-
-        if self._indexes is None:
-            indexes = None
-        else:
-            indexes = OrderedDict((k, v) for k, v in self._indexes.items()
-                                  if k in coord_names)
+                if k in self.indexes:
+                    indexes[k] = self.indexes[k]
 
         return self._replace(variables, coord_names, dims, indexes=indexes)
 
@@ -1503,9 +1502,13 @@ def _validate_indexers(
             raise ValueError("dimensions %r do not exist" % invalid)
 
         # all indexers should be int, slice, np.ndarrays, or Variable
-        indexers_list = []
+        indexers_list = []  # type: List[Tuple[Any, Union[slice, Variable]]]
         for k, v in indexers.items():
-            if isinstance(v, (slice, Variable)):
+            if isinstance(v, slice):
+                indexers_list.append((k, v))
+                continue
+
+            if isinstance(v, Variable):
                 pass
             elif isinstance(v, DataArray):
                 v = v.variable
@@ -1524,14 +1527,19 @@ def _validate_indexers(
                         v = _parse_array_of_cftime_strings(v, index.date_type)
 
                 if v.ndim == 0:
-                    v = as_variable(v)
+                    v = Variable((), v)
                 elif v.ndim == 1:
-                    v = as_variable((k, v))
+                    v = IndexVariable((k,), v)
                 else:
                     raise IndexError(
                         "Unlabeled multi-dimensional array cannot be "
                         "used for indexing: {}".format(k))
+
+            if v.ndim == 1:
+                v = v.to_index_variable()
+
             indexers_list.append((k, v))
+
         return indexers_list
 
     def _get_indexers_coords_and_indexes(self, indexers):
@@ -1631,7 +1639,7 @@ def isel(self, indexers=None, drop=False, **indexers_kwargs):
 
             if name in self.indexes:
                 new_var, new_index = isel_variable_and_index(
-                    var, self.indexes[name], var_indexers)
+                    name, var, self.indexes[name], var_indexers)
                 if new_index is not None:
                     indexes[name] = new_index
             else:
@@ -2117,15 +2125,20 @@ def _validate_interp_indexer(x, new_x):
         indexes = OrderedDict(
             (k, v) for k, v in obj.indexes.items() if k not in indexers)
         selected = self._replace_with_new_dims(
-            variables, coord_names, indexes=indexes)
+            variables.copy(), coord_names, indexes=indexes)
 
         # attach indexer as coordinate
         variables.update(indexers)
+        indexes.update(
+            (k, v.to_index()) for k, v in indexers.items() if v.dims == (k,)
+        )
+
         # Extract coordinates from indexers
         coord_vars, new_indexes = (
             selected._get_indexers_coords_and_indexes(coords))
         variables.update(coord_vars)
         indexes.update(new_indexes)
+
         coord_names = (set(variables)
                        .intersection(obj._coord_names)
                        .union(coord_vars))
@@ -2401,6 +2414,7 @@ def expand_dims(self, dim=None, axis=None, **dim_kwargs):
                     ' variable name.'.format(dim=d))
 
         variables = OrderedDict()
+        coord_names = self._coord_names.copy()
         # If dim is a dict, then ensure that the values are either integers
         # or iterables.
         for k, v in dim.items():
@@ -2410,7 +2424,7 @@ def expand_dims(self, dim=None, axis=None, **dim_kwargs):
                 # value within the dim dict to the length of the iterable
                 # for later use.
                 variables[k] = xr.IndexVariable((k,), v)
-                self._coord_names.add(k)
+                coord_names.add(k)
                 dim[k] = variables[k].size
             elif isinstance(v, int):
                 pass  # Do nothing if the dimensions value is just an int
@@ -2420,7 +2434,7 @@ def expand_dims(self, dim=None, axis=None, **dim_kwargs):
 
         for k, v in self._variables.items():
             if k not in dim:
-                if k in self._coord_names:  # Do not change coordinates
+                if k in coord_names:  # Do not change coordinates
                     variables[k] = v
                 else:
                     result_ndim = len(v.dims) + len(axis)
@@ -2452,10 +2466,10 @@ def expand_dims(self, dim=None, axis=None, **dim_kwargs):
                 variables[k] = v.set_dims(k)
 
         new_dims = self._dims.copy()
-        for d in dim:
-            new_dims[d] = 1
+        new_dims.update(dim)
 
-        return self._replace(variables, dims=new_dims)
+        return self._replace_vars_and_dims(
+            variables, dims=new_dims, coord_names=coord_names)
 
     def set_index(self, indexes=None, append=False, inplace=None,
                   **indexes_kwargs):
diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
index 6d8b553036a..eccb72b6a58 100644
--- a/xarray/core/indexes.py
+++ b/xarray/core/indexes.py
@@ -1,6 +1,6 @@
 import collections.abc
 from collections import OrderedDict
-from typing import Any, Iterable, Mapping, Optional, Tuple, Union
+from typing import Any, Hashable, Iterable, Mapping, Optional, Tuple, Union
 
 import pandas as pd
 
@@ -59,6 +59,7 @@ def default_indexes(
 
 
 def isel_variable_and_index(
+    name: Hashable,
     variable: Variable,
     index: pd.Index,
     indexers: Mapping[Any, Union[slice, Variable]],
@@ -75,8 +76,8 @@ def isel_variable_and_index(
 
     new_variable = variable.isel(indexers)
 
-    if new_variable.ndim != 1:
-        # can't preserve a index if result is not 0D
+    if new_variable.dims != (name,):
+        # can't preserve a index if result has new dimensions
         return new_variable, None
 
     # we need to compute the new index
diff --git a/xarray/testing.py b/xarray/testing.py
index 794c0614925..eb8a0e8603d 100644
--- a/xarray/testing.py
+++ b/xarray/testing.py
@@ -1,8 +1,12 @@
 """Testing functions exposed to the user API"""
+from collections import OrderedDict
+
 import numpy as np
+import pandas as pd
 
 from xarray.core import duck_array_ops
 from xarray.core import formatting
+from xarray.core.indexes import default_indexes
 
 
 def _decode_string_data(data):
@@ -143,8 +147,37 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True):
                         .format(type(a)))
 
 
-def assert_combined_tile_ids_equal(dict1, dict2):
-    assert len(dict1) == len(dict2)
-    for k, v in dict1.items():
-        assert k in dict2.keys()
-        assert_equal(dict1[k], dict2[k])
+def _assert_indexes_invariants_checks(indexes, possible_coord_variables, dims):
+    import xarray as xr
+
+    assert isinstance(indexes, OrderedDict), indexes
+    assert all(isinstance(v, pd.Index) for v in indexes.values()), \
+        {k: type(v) for k, v in indexes.items()}
+
+    index_vars = {k for k, v in possible_coord_variables.items()
+                  if isinstance(v, xr.IndexVariable)}
+    assert indexes.keys() <= index_vars, (set(indexes), index_vars)
+
+    # Note: when we support non-default indexes, these checks should be opt-in
+    # only!
+    defaults = default_indexes(possible_coord_variables, dims)
+    assert indexes.keys() == defaults.keys(), \
+        (set(indexes), set(defaults))
+    assert all(v.equals(defaults[k]) for k, v in indexes.items()), \
+        (indexes, defaults)
+
+
+def _assert_indexes_invariants(a):
+    """Separate helper function for checking indexes invariants only."""
+    import xarray as xr
+
+    if isinstance(a, xr.DataArray):
+        if a._indexes is not None:
+            _assert_indexes_invariants_checks(a._indexes, a._coords, a.dims)
+    elif isinstance(a, xr.Dataset):
+        if a._indexes is not None:
+            _assert_indexes_invariants_checks(
+                a._indexes, a._variables, a._dims)
+    elif isinstance(a, xr.Variable):
+        # no indexes
+        pass
diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py
index 4ebcc29a61e..525360701fe 100644
--- a/xarray/tests/__init__.py
+++ b/xarray/tests/__init__.py
@@ -13,8 +13,7 @@
 from xarray.core import utils
 from xarray.core.options import set_options
 from xarray.core.indexing import ExplicitlyIndexed
-from xarray.testing import (assert_equal, assert_identical,  # noqa: F401
-                            assert_allclose, assert_combined_tile_ids_equal)
+import xarray.testing
 from xarray.plot.utils import import_seaborn
 
 try:
@@ -180,3 +179,25 @@ def source_ndarray(array):
     if base is None:
         base = array
     return base
+
+
+# Internal versions of xarray's test functions that validate additional
+# invariants
+# TODO: add more invariant checks.
+
+def assert_equal(a, b):
+    xarray.testing.assert_equal(a, b)
+    xarray.testing._assert_indexes_invariants(a)
+    xarray.testing._assert_indexes_invariants(b)
+
+
+def assert_identical(a, b):
+    xarray.testing.assert_identical(a, b)
+    xarray.testing._assert_indexes_invariants(a)
+    xarray.testing._assert_indexes_invariants(b)
+
+
+def assert_allclose(a, b, **kwargs):
+    xarray.testing.assert_allclose(a, b, **kwargs)
+    xarray.testing._assert_indexes_invariants(a)
+    xarray.testing._assert_indexes_invariants(b)
diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
index 0d03b6e0cdf..6d0f4626086 100644
--- a/xarray/tests/test_combine.py
+++ b/xarray/tests/test_combine.py
@@ -13,7 +13,7 @@
     _infer_tile_ids_from_nested_list, _new_tile_id)
 
 from . import (
-    InaccessibleArray, assert_array_equal, assert_combined_tile_ids_equal,
+    InaccessibleArray, assert_array_equal,
     assert_equal, assert_identical, raises_regex, requires_dask)
 from .test_dataset import create_test_data
 
@@ -418,6 +418,13 @@ def test_auto_combine_no_concat(self):
         assert_identical(expected, actual)
 
 
+def assert_combined_tile_ids_equal(dict1, dict2):
+    assert len(dict1) == len(dict2)
+    for k, v in dict1.items():
+        assert k in dict2.keys()
+        assert_equal(dict1[k], dict2[k])
+
+
 class TestTileIDsFromNestedList(object):
     def test_1d(self):
         ds = create_test_data
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index ab83d385ef4..3ace80f5eea 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -2053,12 +2053,8 @@ def test_expand_dims_error(self):
         if python36_plus:
             with raises_regex(ValueError, 'both keyword and positional'):
                 original.expand_dims(OrderedDict((("d", 4),)), e=4)
-        else:
-            # In python 3.5, using dim_kwargs should raise a ValueError.
-            with raises_regex(ValueError, "dim_kwargs isn't"):
-                original.expand_dims(OrderedDict((("d", 4),)), e=4)
 
-    def test_expand_dims(self):
+    def test_expand_dims_int(self):
         original = Dataset({'x': ('a', np.random.randn(3)),
                             'y': (['b', 'a'], np.random.randn(4, 3))},
                            coords={'a': np.linspace(0, 1, 3),
@@ -2091,9 +2087,37 @@ def test_expand_dims(self):
         roundtripped = actual.squeeze('z')
         assert_identical(original, roundtripped)
 
+    def test_expand_dims_coords(self):
+        original = Dataset({'x': ('a', np.array([1, 2, 3]))})
+        expected = Dataset(
+            {'x': (('b', 'a'), np.array([[1, 2, 3], [1, 2, 3]]))},
+            coords={'b': [1, 2]},
+        )
+        actual = original.expand_dims(OrderedDict(b=[1, 2]))
+        assert_identical(expected, actual)
+        assert 'b' not in original._coord_names
+
+    def test_expand_dims_existing_scalar_coord(self):
+        original = Dataset({'x': 1}, {'a': 2})
+        expected = Dataset({'x': (('a',), [1])}, {'a': [2]})
+        actual = original.expand_dims('a')
+        assert_identical(expected, actual)
+
+    def test_isel_expand_dims_roundtrip(self):
+        original = Dataset({'x': (('a',), [1])}, {'a': [2]})
+        actual = original.isel(a=0).expand_dims('a')
+        assert_identical(actual, original)
+
+    def test_expand_dims_mixed_int_and_coords(self):
         # Test expanding one dimension to have size > 1 that doesn't have
         # coordinates, and also expanding another dimension to have size > 1
         # that DOES have coordinates.
+        original = Dataset({'x': ('a', np.random.randn(3)),
+                            'y': (['b', 'a'], np.random.randn(4, 3))},
+                           coords={'a': np.linspace(0, 1, 3),
+                                   'b': np.linspace(0, 1, 4),
+                                   'c': np.linspace(0, 1, 5)})
+
         actual = original.expand_dims(
             OrderedDict((("d", 4), ("e", ["l", "m", "n"]))))
 
@@ -2109,34 +2133,45 @@ def test_expand_dims(self):
                                            b=np.linspace(0, 1, 4),
                                            a=np.linspace(0, 1, 3)),
                                dims=['d', 'e', 'b', 'a']).drop('d')},
-            coords={'c': np.linspace(0, 1, 5)},
-            attrs={'key': 'entry'})
+            coords={'c': np.linspace(0, 1, 5)})
         assert_identical(actual, expected)
 
-        # Test with kwargs instead of passing dict to dim arg.
-
-        # TODO: only the code under the if-statement is needed when python 3.5
-        #   is no longer supported.
-        python36_plus = sys.version_info[0] == 3 and sys.version_info[1] > 5
-        if python36_plus:
-            other_way = original.expand_dims(e=["l", "m", "n"])
-            other_way_expected = Dataset(
-                {'x': xr.DataArray(original['x'].values * np.ones([3, 3]),
-                                   coords=dict(e=['l', 'm', 'n'],
-                                               a=np.linspace(0, 1, 3)),
-                                   dims=['e', 'a']),
-                 'y': xr.DataArray(original['y'].values * np.ones([3, 4, 3]),
-                                   coords=dict(e=['l', 'm', 'n'],
-                                               b=np.linspace(0, 1, 4),
-                                               a=np.linspace(0, 1, 3)),
-                                   dims=['e', 'b', 'a'])},
-                coords={'c': np.linspace(0, 1, 5)},
-                attrs={'key': 'entry'})
-            assert_identical(other_way_expected, other_way)
-        else:
-            # In python 3.5, using dim_kwargs should raise a ValueError.
-            with raises_regex(ValueError, "dim_kwargs isn't"):
-                original.expand_dims(e=["l", "m", "n"])
+    @pytest.mark.skipif(
+        sys.version_info[:2] > (3, 5),
+        reason="we only raise these errors for Python 3.5",
+    )
+    def test_expand_dims_kwargs_python35(self):
+        original = Dataset({'x': ('a', np.random.randn(3))})
+        with raises_regex(ValueError, "dim_kwargs isn't"):
+            original.expand_dims(e=["l", "m", "n"])
+        with raises_regex(TypeError, "must be an OrderedDict"):
+            original.expand_dims({'e': ["l", "m", "n"]})
+
+    @pytest.mark.skipif(
+        sys.version_info[:2] < (3, 6),
+        reason='keyword arguments are only ordered on Python 3.6+',
+    )
+    def test_expand_dims_kwargs_python36plus(self):
+        original = Dataset({'x': ('a', np.random.randn(3)),
+                            'y': (['b', 'a'], np.random.randn(4, 3))},
+                           coords={'a': np.linspace(0, 1, 3),
+                                   'b': np.linspace(0, 1, 4),
+                                   'c': np.linspace(0, 1, 5)},
+                           attrs={'key': 'entry'})
+        other_way = original.expand_dims(e=["l", "m", "n"])
+        other_way_expected = Dataset(
+            {'x': xr.DataArray(original['x'].values * np.ones([3, 3]),
+                               coords=dict(e=['l', 'm', 'n'],
+                                           a=np.linspace(0, 1, 3)),
+                               dims=['e', 'a']),
+             'y': xr.DataArray(original['y'].values * np.ones([3, 4, 3]),
+                               coords=dict(e=['l', 'm', 'n'],
+                                           b=np.linspace(0, 1, 4),
+                                           a=np.linspace(0, 1, 3)),
+                               dims=['e', 'b', 'a'])},
+            coords={'c': np.linspace(0, 1, 5)},
+            attrs={'key': 'entry'})
+        assert_identical(other_way_expected, other_way)
 
     def test_set_index(self):
         expected = create_test_multiindex()
diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py
index 5596bfb3bfb..8347d54bd1e 100644
--- a/xarray/tests/test_interp.py
+++ b/xarray/tests/test_interp.py
@@ -291,7 +291,7 @@ def test_errors(use_dask):
     if use_dask:
         da = get_example_data(3)
     else:
-        da = get_example_data(1)
+        da = get_example_data(0)
 
     result = da.interp(x=[-1, 1, 3], kwargs={'fill_value': 0.0})
     assert not np.isnan(result.values).any()

From aa6abb592ac2464170459ca96409398ec8b4593a Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@google.com>
Date: Thu, 4 Apr 2019 18:31:26 -0700
Subject: [PATCH 13/30] 0.12.1 release

---
 doc/whats-new.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 6fc7c25ac91..76e51b21984 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -15,8 +15,8 @@ What's New
 
 .. _whats-new.0.12.1:
 
-v0.12.1 (unreleased)
---------------------
+v0.12.1 (4 April 2019)
+----------------------
 
 Enhancements
 ~~~~~~~~~~~~
@@ -25,7 +25,6 @@ Enhancements
   with size > 1. (:issue:`2710`)
   By `Martin Pletcher <https://github.com/pletchm>`_.
 
-
 Bug fixes
 ~~~~~~~~~
 

From 23d54a890e6cfe1de420071b597c911164de4cb8 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@google.com>
Date: Thu, 4 Apr 2019 18:34:37 -0700
Subject: [PATCH 14/30] revert to 0.12.2 dev

---
 doc/whats-new.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 76e51b21984..4c126196469 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -13,6 +13,17 @@ What's New
     import xarray as xr
     np.random.seed(123456)
 
+.. _whats-new.0.12.2:
+
+v0.12.2 (unreleased)
+--------------------
+
+Enhancements
+~~~~~~~~~~~~
+
+Bug fixes
+~~~~~~~~~
+
 .. _whats-new.0.12.1:
 
 v0.12.1 (4 April 2019)

From e7ec0870b15114b0443c28bf7e32d42717808c98 Mon Sep 17 00:00:00 2001
From: Adam Leskis <leskis@gmail.com>
Date: Sun, 7 Apr 2019 20:55:07 +0100
Subject: [PATCH 15/30] update links to https (#2872)

---
 README.rst | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/README.rst b/README.rst
index 6dbf774549d..83382f87ed5 100644
--- a/README.rst
+++ b/README.rst
@@ -8,9 +8,9 @@ xarray: N-D labeled arrays and datasets
 .. image:: https://coveralls.io/repos/pydata/xarray/badge.svg
    :target: https://coveralls.io/r/pydata/xarray
 .. image:: https://readthedocs.org/projects/xray/badge/?version=latest
-   :target: http://xarray.pydata.org/
-.. image:: http://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat
-  :target: http://pandas.pydata.org/speed/xarray/
+   :target: https://xarray.pydata.org/
+.. image:: https://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat
+  :target: https://pandas.pydata.org/speed/xarray/
 .. image:: https://img.shields.io/pypi/v/xarray.svg
    :target: https://pypi.python.org/pypi/xarray/
 
@@ -30,10 +30,10 @@ It is particularly tailored to working with netCDF_ files, which were the
 source of xarray's data model, and integrates tightly with dask_ for parallel
 computing.
 
-.. _NumPy: http://www.numpy.org
-.. _pandas: http://pandas.pydata.org
-.. _dask: http://dask.org
-.. _netCDF: http://www.unidata.ucar.edu/software/netcdf
+.. _NumPy: https://www.numpy.org
+.. _pandas: https://pandas.pydata.org
+.. _dask: https://dask.org
+.. _netCDF: https://www.unidata.ucar.edu/software/netcdf
 
 Why xarray?
 -----------
@@ -66,12 +66,12 @@ powerful and concise interface. For example:
 Documentation
 -------------
 
-Learn more about xarray in its official documentation at http://xarray.pydata.org/
+Learn more about xarray in its official documentation at https://xarray.pydata.org/
 
 Contributing
 ------------
 
-You can find information about contributing to xarray at our `Contributing page <http://xarray.pydata.org/en/latest/contributing.html#>`_.
+You can find information about contributing to xarray at our `Contributing page <https://xarray.pydata.org/en/latest/contributing.html#>`_.
 
 Get in touch
 ------------
@@ -81,9 +81,9 @@ Get in touch
 - For less well defined questions or ideas, or to announce other projects of
   interest to xarray users, use the `mailing list`_.
 
-.. _StackOverFlow: http://stackoverflow.com/questions/tagged/python-xarray
+.. _StackOverFlow: https://stackoverflow.com/questions/tagged/python-xarray
 .. _mailing list: https://groups.google.com/forum/#!forum/xarray
-.. _on GitHub: http://github.com/pydata/xarray
+.. _on GitHub: https://github.com/pydata/xarray
 
 NumFOCUS
 --------
@@ -120,7 +120,7 @@ Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
 
-  http://www.apache.org/licenses/LICENSE-2.0
+  https://www.apache.org/licenses/LICENSE-2.0
 
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,

From 3435b03de218f54a55eb72dff597bb47b0f407cb Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@gmail.com>
Date: Sun, 7 Apr 2019 23:42:30 -0700
Subject: [PATCH 16/30] Fix mypy typing error in cftime_offsets.py (#2878)

---
 xarray/coding/cftime_offsets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index d724554b458..2ee38a20a4d 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -79,7 +79,7 @@ def get_date_type(calendar):
 
 class BaseCFTimeOffset(object):
     _freq = None  # type: ClassVar[str]
-    _day_option = None
+    _day_option = None  # type: ClassVar[str]
 
     def __init__(self, n=1):
         if not isinstance(n, int):

From 2c10d1443bea09e5ef53e5a7e35195a195e193a7 Mon Sep 17 00:00:00 2001
From: Ryan Abernathey <ryan.abernathey@gmail.com>
Date: Tue, 9 Apr 2019 19:34:21 -0400
Subject: [PATCH 17/30] decreased pytest verbosity (#2881)

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index ea9ee7adcf4..212ddb77daa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -65,7 +65,7 @@ script:
     elif [[ "$CONDA_ENV" == "py36-hypothesis" ]]; then
       pytest properties ;
     else
-      py.test xarray --cov=xarray --cov-config ci/.coveragerc --cov-report term-missing --verbose $EXTRA_FLAGS;
+      py.test xarray --cov=xarray --cov-config ci/.coveragerc --cov-report term-missing $EXTRA_FLAGS;
     fi
 
 after_success:

From f063f55f6ba31e8d871f9163570d94d256e72daa Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Thu, 1 Nov 2018 05:52:13 +1100
Subject: [PATCH 18/30] added manual chunks for open_zarr

---
 xarray/backends/zarr.py       | 78 +++++++++++++++++++++++++----------
 xarray/tests/test_backends.py | 46 +++++++++++++++++++++
 2 files changed, 103 insertions(+), 21 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index ee77e0833c4..a5ee9867f8e 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -352,10 +352,11 @@ def close(self):
             zarr.consolidate_metadata(self.ds.store)
 
 
-def open_zarr(store, group=None, synchronizer=None, auto_chunk=True,
+def open_zarr(store, group=None, synchronizer=None, chunks=None,
               decode_cf=True, mask_and_scale=True, decode_times=True,
               concat_characters=True, decode_coords=True,
-              drop_variables=None, consolidated=False):
+              drop_variables=None, consolidated=False, auto_chunk=True, 
+              overwrite_encoded_chunks=False):
     """Load and decode a dataset from a Zarr store.
 
     .. note:: Experimental
@@ -375,10 +376,19 @@ def open_zarr(store, group=None, synchronizer=None, auto_chunk=True,
         Array synchronizer provided to zarr
     group : str, obtional
         Group path. (a.k.a. `path` in zarr terminology.)
+    chunks : int or dict or {None, 'auto'}, optional
+        Chunk sizes along each dimension, e.g., ``5`` or
+        ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created 
+        based on the variable's zarr chunks. If `chunks=None` and 
+        `auto_chunk=False`, zarr array data will lazily convert to numpy 
+        arrays upon access. 
     auto_chunk : bool, optional
         Whether to automatically create dask chunks corresponding to each
-        variable's zarr chunks. If False, zarr array data will lazily convert
-        to numpy arrays upon access.
+        variable's zarr chunks. If `chunks=None`, this overrides `chunks`. 
+        Equivalent to `chunks='auto'.` (Default: True)
+    overwrite_encoded_chunks: bool, optional
+        Whether to drop the zarr chunks encoded for each variable when a 
+        dataset is loaded with specified chunk sizes (default: False)
     decode_cf : bool, optional
         Whether to decode these variables, assuming they were saved according
         to CF conventions.
@@ -423,6 +433,15 @@ def open_zarr(store, group=None, synchronizer=None, auto_chunk=True,
     http://zarr.readthedocs.io/
     """
 
+    if auto_chunk and chunks is None:
+        chunks = 'auto' # maintain backwards compatibility
+
+    if not isinstance(chunks, (int, dict)):
+        if chunks != 'auto' and chunks is not None:
+            raise ValueError(
+                            "chunks must be an int, dict, 'auto', or None. "
+                            "Instead found %s. " % chunks)
+
     if not decode_cf:
         mask_and_scale = False
         decode_times = False
@@ -449,21 +468,38 @@ def maybe_decode_store(store, lock=False):
 
     # auto chunking needs to be here and not in ZarrStore because variable
     # chunks do not survive decode_cf
-    if auto_chunk:
-        # adapted from Dataset.Chunk()
-        def maybe_chunk(name, var):
-            from dask.base import tokenize
-            chunks = var.encoding.get('chunks')
-            if (var.ndim > 0) and (chunks is not None):
-                # does this cause any data to be read?
-                token2 = tokenize(name, var._data)
-                name2 = 'zarr-%s' % token2
-                return var.chunk(chunks, name=name2, lock=None)
-            else:
-                return var
-
-        variables = OrderedDict([(k, maybe_chunk(k, v))
-                                 for k, v in ds.variables.items()])
-        return ds._replace_vars_and_dims(variables)
-    else:
+    # return trivial case
+    if not chunks:
         return ds
+    
+    # adapted from Dataset.Chunk()
+    if isinstance(chunks, int):
+        chunks = dict.fromkeys(ds.dims, chunks)
+    
+    def selkeys(dict_, keys):
+        if dict_ is None:
+            return None
+        return dict((d, dict_[d]) for d in keys if d in dict_)
+    
+    def maybe_chunk(name, var, chunks):
+        from dask.base import tokenize
+
+        if chunks == 'auto':
+            chunks = var.encoding.get('chunks')
+        else:
+            chunks = selkeys(chunks, var.dims)
+            
+        if (var.ndim > 0) and (chunks is not None):
+            # does this cause any data to be read?
+            token2 = tokenize(name, var._data)
+            name2 = 'zarr-%s' % token2
+            var = var.chunk(chunks, name=name2, lock=None)
+            if overwrite_encoded_chunks and var.chunks is not None:
+                var.encoding['chunks'] = tuple(x[0] for x in var.chunks)
+            return var
+        else:
+            return var
+
+    variables = OrderedDict([(k, maybe_chunk(k, v, chunks))
+                                for k, v in ds.variables.items()])
+    return ds._replace_vars_and_dims(variables)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index a20ba2df229..5f21027962f 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1405,6 +1405,52 @@ def test_auto_chunk(self):
                 assert v._in_memory == (k in actual.dims)
                 # chunk size should be the same as original
                 assert v.chunks == original[k].chunks
+    
+    def test_manual_chunk(self):
+        original = create_test_data().chunk({'dim1': 3, 'dim2': 4, 'dim3': 3})
+        
+        # All of these should return non-chunked arrays
+        NO_CHUNKS = (None, 0, {})
+        for no_chunk in NO_CHUNKS:
+            with self.roundtrip(
+                    original, open_kwargs={'chunks': no_chunk, 
+                                            'auto_chunk': False}) as actual:
+                for k, v in actual.variables.items():
+                    # only index variables should be in memory
+                    assert v._in_memory == (k in actual.dims)
+                    # there should be no chunks
+                    assert v.chunks is None
+
+        # uniform arrays
+        for i in range(2, 6):
+            rechunked = original.chunk(chunks=i)
+
+            with self.roundtrip(
+                    original, open_kwargs={'chunks': i}) as actual:
+                for k, v in actual.variables.items():
+                    # only index variables should be in memory
+                    assert v._in_memory == (k in actual.dims)
+                    # chunk size should be the same as rechunked
+                    assert v.chunks == rechunked[k].chunks
+        
+        chunks = {'dim1': 2, 'dim2': 3, 'dim3': 5}
+        rechunked = original.chunk(chunks=chunks)
+
+        open_overwritten = {'chunks': chunks, 
+                            'overwrite_encoded_chunks': True}
+
+        with self.roundtrip(
+                original, open_kwargs=open_overwritten) as actual:
+            for k, v in actual.variables.items():
+                    assert v.chunks == rechunked[k].chunks
+
+            with self.roundtrip(actual, open_kwargs={'chunks': 'auto'}) as auto:
+                # encoding should have changed
+                for k, v in actual.variables.items():
+                    assert v.chunks == rechunked[k].chunks
+
+                assert_identical(actual, auto)
+                assert_identical(actual.load(), auto.load())
 
     def test_write_uneven_dask_chunks(self):
         # regression for GH#2225

From c02a1c7f4dd6e686042d7622fe6dbdd40e0917ef Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Thu, 1 Nov 2018 05:53:03 +1100
Subject: [PATCH 19/30] updated whats-new

---
 doc/whats-new.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 4c126196469..5d32fffc53a 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -440,6 +440,12 @@ Bug fixes
   encoding process if a reference date is used that is so distant that
   the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
   By `Spencer Clark <https://github.com/spencerkclark>`_.
+  
+- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` 
+  parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for 
+  backwards compatibility. The ``overwrite_encoded_chunks`` parameter is 
+  added to remove the original zarr chunk encoding.
+  By `Lily Wang <https://github.com/lilyminium>`_.
 
 - Chunked datasets can now roundtrip to Zarr storage continually
   with `to_zarr` and ``open_zarr`` (:issue:`2300`).

From c361f705a707bc7df3ad6558592e165a62d11479 Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Thu, 1 Nov 2018 06:14:23 +1100
Subject: [PATCH 20/30] fixed pep8 issues

---
 xarray/backends/zarr.py       | 23 +++++++++++------------
 xarray/tests/test_backends.py | 25 ++++++++++---------------
 2 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index a5ee9867f8e..89f55400e82 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -378,16 +378,16 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
         Group path. (a.k.a. `path` in zarr terminology.)
     chunks : int or dict or {None, 'auto'}, optional
         Chunk sizes along each dimension, e.g., ``5`` or
-        ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created 
-        based on the variable's zarr chunks. If `chunks=None` and 
-        `auto_chunk=False`, zarr array data will lazily convert to numpy 
-        arrays upon access. 
+        ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created
+        based on the variable's zarr chunks. If `chunks=None` and
+        `auto_chunk=False`, zarr array data will lazily convert to numpy
+        arrays upon access.
     auto_chunk : bool, optional
         Whether to automatically create dask chunks corresponding to each
-        variable's zarr chunks. If `chunks=None`, this overrides `chunks`. 
+        variable's zarr chunks. If `chunks=None`, this overrides `chunks`.
         Equivalent to `chunks='auto'.` (Default: True)
     overwrite_encoded_chunks: bool, optional
-        Whether to drop the zarr chunks encoded for each variable when a 
+        Whether to drop the zarr chunks encoded for each variable when a
         dataset is loaded with specified chunk sizes (default: False)
     decode_cf : bool, optional
         Whether to decode these variables, assuming they were saved according
@@ -434,12 +434,11 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
     """
 
     if auto_chunk and chunks is None:
-        chunks = 'auto' # maintain backwards compatibility
+        chunks = 'auto'  # maintain backwards compatibility
 
     if not isinstance(chunks, (int, dict)):
         if chunks != 'auto' and chunks is not None:
-            raise ValueError(
-                            "chunks must be an int, dict, 'auto', or None. "
+            raise ValueError("chunks must be an int, dict, 'auto', or None. "
                             "Instead found %s. " % chunks)
 
     if not decode_cf:
@@ -471,7 +470,7 @@ def maybe_decode_store(store, lock=False):
     # return trivial case
     if not chunks:
         return ds
-    
+
     # adapted from Dataset.Chunk()
     if isinstance(chunks, int):
         chunks = dict.fromkeys(ds.dims, chunks)
@@ -488,7 +487,7 @@ def maybe_chunk(name, var, chunks):
             chunks = var.encoding.get('chunks')
         else:
             chunks = selkeys(chunks, var.dims)
-            
+        
         if (var.ndim > 0) and (chunks is not None):
             # does this cause any data to be read?
             token2 = tokenize(name, var._data)
@@ -501,5 +500,5 @@ def maybe_chunk(name, var, chunks):
             return var
 
     variables = OrderedDict([(k, maybe_chunk(k, v, chunks))
-                                for k, v in ds.variables.items()])
+                            for k, v in ds.variables.items()])
     return ds._replace_vars_and_dims(variables)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 5f21027962f..5f555dde2d9 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1412,9 +1412,8 @@ def test_manual_chunk(self):
         # All of these should return non-chunked arrays
         NO_CHUNKS = (None, 0, {})
         for no_chunk in NO_CHUNKS:
-            with self.roundtrip(
-                    original, open_kwargs={'chunks': no_chunk, 
-                                            'auto_chunk': False}) as actual:
+            open_kwargs = {'chunks': no_chunk, 'auto_chunk': False}
+            with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
                     assert v._in_memory == (k in actual.dims)
@@ -1424,9 +1423,8 @@ def test_manual_chunk(self):
         # uniform arrays
         for i in range(2, 6):
             rechunked = original.chunk(chunks=i)
-
-            with self.roundtrip(
-                    original, open_kwargs={'chunks': i}) as actual:
+            open_kwargs = {'chunks': i}
+            with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
                     assert v._in_memory == (k in actual.dims)
@@ -1435,20 +1433,17 @@ def test_manual_chunk(self):
         
         chunks = {'dim1': 2, 'dim2': 3, 'dim3': 5}
         rechunked = original.chunk(chunks=chunks)
-
-        open_overwritten = {'chunks': chunks, 
-                            'overwrite_encoded_chunks': True}
-
-        with self.roundtrip(
-                original, open_kwargs=open_overwritten) as actual:
+        
+        open_kwargs = {'chunks': chunks, 'overwrite_encoded_chunks': True}
+        with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
             for k, v in actual.variables.items():
                     assert v.chunks == rechunked[k].chunks
-
-            with self.roundtrip(actual, open_kwargs={'chunks': 'auto'}) as auto:
+            
+            with self.roundtrip(actual) as auto:
                 # encoding should have changed
                 for k, v in actual.variables.items():
                     assert v.chunks == rechunked[k].chunks
-
+                    
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
 

From 447af8c0e7a65c3ecc62925aebb077006e93c643 Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Thu, 1 Nov 2018 06:18:37 +1100
Subject: [PATCH 21/30] removed whitespace

---
 xarray/backends/zarr.py       |  8 ++++----
 xarray/tests/test_backends.py | 12 ++++++------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 89f55400e82..7ed26e31151 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -439,7 +439,7 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
     if not isinstance(chunks, (int, dict)):
         if chunks != 'auto' and chunks is not None:
             raise ValueError("chunks must be an int, dict, 'auto', or None. "
-                            "Instead found %s. " % chunks)
+                             "Instead found %s. " % chunks)
 
     if not decode_cf:
         mask_and_scale = False
@@ -474,12 +474,12 @@ def maybe_decode_store(store, lock=False):
     # adapted from Dataset.Chunk()
     if isinstance(chunks, int):
         chunks = dict.fromkeys(ds.dims, chunks)
-    
+
     def selkeys(dict_, keys):
         if dict_ is None:
             return None
         return dict((d, dict_[d]) for d in keys if d in dict_)
-    
+
     def maybe_chunk(name, var, chunks):
         from dask.base import tokenize
 
@@ -487,7 +487,7 @@ def maybe_chunk(name, var, chunks):
             chunks = var.encoding.get('chunks')
         else:
             chunks = selkeys(chunks, var.dims)
-        
+
         if (var.ndim > 0) and (chunks is not None):
             # does this cause any data to be read?
             token2 = tokenize(name, var._data)
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 5f555dde2d9..01ef1caea38 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1405,10 +1405,10 @@ def test_auto_chunk(self):
                 assert v._in_memory == (k in actual.dims)
                 # chunk size should be the same as original
                 assert v.chunks == original[k].chunks
-    
+
     def test_manual_chunk(self):
         original = create_test_data().chunk({'dim1': 3, 'dim2': 4, 'dim3': 3})
-        
+
         # All of these should return non-chunked arrays
         NO_CHUNKS = (None, 0, {})
         for no_chunk in NO_CHUNKS:
@@ -1430,20 +1430,20 @@ def test_manual_chunk(self):
                     assert v._in_memory == (k in actual.dims)
                     # chunk size should be the same as rechunked
                     assert v.chunks == rechunked[k].chunks
-        
+
         chunks = {'dim1': 2, 'dim2': 3, 'dim3': 5}
         rechunked = original.chunk(chunks=chunks)
-        
+
         open_kwargs = {'chunks': chunks, 'overwrite_encoded_chunks': True}
         with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
             for k, v in actual.variables.items():
                     assert v.chunks == rechunked[k].chunks
-            
+
             with self.roundtrip(actual) as auto:
                 # encoding should have changed
                 for k, v in actual.variables.items():
                     assert v.chunks == rechunked[k].chunks
-                    
+
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
 

From cdd23d4e37bfeaa1d3f18331539d9ddae8696530 Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Wed, 7 Nov 2018 13:22:51 +1100
Subject: [PATCH 22/30] added deprecation warning

---
 xarray/backends/zarr.py       | 34 +++++++++++++++++++++++-----------
 xarray/tests/test_backends.py | 30 +++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 7ed26e31151..302301248d3 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -1,4 +1,11 @@
+<<<<<<< HEAD
 from collections import OrderedDict
+=======
+from __future__ import absolute_import, division, print_function
+
+import warnings
+
+>>>>>>> added deprecation warning
 from distutils.version import LooseVersion
 
 import numpy as np
@@ -352,7 +359,7 @@ def close(self):
             zarr.consolidate_metadata(self.ds.store)
 
 
-def open_zarr(store, group=None, synchronizer=None, chunks=None,
+def open_zarr(store, group=None, synchronizer=None, chunks='auto',
               decode_cf=True, mask_and_scale=True, decode_times=True,
               concat_characters=True, decode_coords=True,
               drop_variables=None, consolidated=False, auto_chunk=True, 
@@ -379,13 +386,8 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
     chunks : int or dict or {None, 'auto'}, optional
         Chunk sizes along each dimension, e.g., ``5`` or
         ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created
-        based on the variable's zarr chunks. If `chunks=None` and
-        `auto_chunk=False`, zarr array data will lazily convert to numpy
-        arrays upon access.
-    auto_chunk : bool, optional
-        Whether to automatically create dask chunks corresponding to each
-        variable's zarr chunks. If `chunks=None`, this overrides `chunks`.
-        Equivalent to `chunks='auto'.` (Default: True)
+        based on the variable's zarr chunks. If `chunks=None`, zarr array
+        data will lazily convert to numpy arrays upon access.
     overwrite_encoded_chunks: bool, optional
         Whether to drop the zarr chunks encoded for each variable when a
         dataset is loaded with specified chunk sizes (default: False)
@@ -432,9 +434,19 @@ def open_zarr(store, group=None, synchronizer=None, chunks=None,
     ----------
     http://zarr.readthedocs.io/
     """
-
-    if auto_chunk and chunks is None:
-        chunks = 'auto'  # maintain backwards compatibility
+    if 'auto_chunk' in kwargs:
+        auto_chunk = kwargs.pop('auto_chunk')
+        if auto_chunk == True:
+            chunks = 'auto'  # maintain backwards compatibility
+        elif auto_chunk == False:
+            chunks = None
+
+        warnings.warn("auto_chunk is deprecated. Use chunks='auto' instead.",
+                      FutureWarning, stacklevel=2)
+
+    if kwargs:
+        raise TypeError("open_zarr() got unexpected keyword arguments " +
+                        ",".join(kwargs.keys()))
 
     if not isinstance(chunks, (int, dict)):
         if chunks != 'auto' and chunks is not None:
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 01ef1caea38..21e5518f6de 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1391,7 +1391,7 @@ def test_auto_chunk(self):
         original = create_test_data().chunk()
 
         with self.roundtrip(
-                original, open_kwargs={'auto_chunk': False}) as actual:
+                original, open_kwargs={'chunks': None}) as actual:
             for k, v in actual.variables.items():
                 # only index variables should be in memory
                 assert v._in_memory == (k in actual.dims)
@@ -1399,7 +1399,7 @@ def test_auto_chunk(self):
                 assert v.chunks is None
 
         with self.roundtrip(
-                original, open_kwargs={'auto_chunk': True}) as actual:
+                original, open_kwargs={'chunks': 'auto'}) as actual:
             for k, v in actual.variables.items():
                 # only index variables should be in memory
                 assert v._in_memory == (k in actual.dims)
@@ -1412,7 +1412,7 @@ def test_manual_chunk(self):
         # All of these should return non-chunked arrays
         NO_CHUNKS = (None, 0, {})
         for no_chunk in NO_CHUNKS:
-            open_kwargs = {'chunks': no_chunk, 'auto_chunk': False}
+            open_kwargs = {'chunks': no_chunk}
             with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
@@ -1446,13 +1446,33 @@ def test_manual_chunk(self):
 
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
+    
+    def test_deprecate_auto_chunk(self):
+        original = create_test_data().chunk()
+        with pytest.warns(FutureWarning):
+            with self.roundtrip(
+                original, open_kwargs={'auto_chunk': True}) as actual:
+                for k, v in actual.variables.items():
+                    # only index variables should be in memory
+                    assert v._in_memory == (k in actual.dims)
+                    # chunk size should be the same as original
+                    assert v.chunks == original[k].chunks
+        
+        with pytest.warns(FutureWarning):
+            with self.roundtrip(
+                original, open_kwargs={'auto_chunk': False}) as actual:
+                for k, v in actual.variables.items():
+                    # only index variables should be in memory
+                    assert v._in_memory == (k in actual.dims)
+                    # there should be no chunks
+                    assert v.chunks is None
+            
 
     def test_write_uneven_dask_chunks(self):
         # regression for GH#2225
         original = create_test_data().chunk({'dim1': 3, 'dim2': 4, 'dim3': 3})
-
         with self.roundtrip(
-                original, open_kwargs={'auto_chunk': True}) as actual:
+                original, open_kwargs={'chunks': 'auto'}) as actual:
             for k, v in actual.data_vars.items():
                 print(k)
                 assert v.chunks == actual[k].chunks

From 7099e70e80c7d81a0c34c8b3ccb2f0c05089dbdb Mon Sep 17 00:00:00 2001
From: Lily Wang <lwang@live.com.au>
Date: Wed, 7 Nov 2018 13:34:44 +1100
Subject: [PATCH 23/30] fixed pep8 issues

---
 xarray/backends/zarr.py       |  4 ++--
 xarray/tests/test_backends.py | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 302301248d3..14119660ad0 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -436,9 +436,9 @@ def open_zarr(store, group=None, synchronizer=None, chunks='auto',
     """
     if 'auto_chunk' in kwargs:
         auto_chunk = kwargs.pop('auto_chunk')
-        if auto_chunk == True:
+        if auto_chunk:
             chunks = 'auto'  # maintain backwards compatibility
-        elif auto_chunk == False:
+        else:
             chunks = None
 
         warnings.warn("auto_chunk is deprecated. Use chunks='auto' instead.",
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 21e5518f6de..3dac12b5727 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1446,27 +1446,27 @@ def test_manual_chunk(self):
 
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
-    
+
     def test_deprecate_auto_chunk(self):
         original = create_test_data().chunk()
         with pytest.warns(FutureWarning):
             with self.roundtrip(
-                original, open_kwargs={'auto_chunk': True}) as actual:
+                    original, open_kwargs={'auto_chunk': True}) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
                     assert v._in_memory == (k in actual.dims)
                     # chunk size should be the same as original
                     assert v.chunks == original[k].chunks
-        
+
         with pytest.warns(FutureWarning):
             with self.roundtrip(
-                original, open_kwargs={'auto_chunk': False}) as actual:
+                    original, open_kwargs={'auto_chunk': False}) as actual:
                 for k, v in actual.variables.items():
                     # only index variables should be in memory
                     assert v._in_memory == (k in actual.dims)
                     # there should be no chunks
                     assert v.chunks is None
-            
+
 
     def test_write_uneven_dask_chunks(self):
         # regression for GH#2225

From 301953a1ddd3989193ff4f067ceda8b9ee6bbb12 Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Wed, 30 Jan 2019 11:06:35 +1100
Subject: [PATCH 24/30] added warning for bad chunks

---
 xarray/backends/zarr.py       | 47 ++++++++++++++++++++++++++---------
 xarray/tests/test_backends.py | 24 ++++++++++++++++++
 2 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 14119660ad0..9f10c6a3bb7 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -383,11 +383,12 @@ def open_zarr(store, group=None, synchronizer=None, chunks='auto',
         Array synchronizer provided to zarr
     group : str, obtional
         Group path. (a.k.a. `path` in zarr terminology.)
-    chunks : int or dict or {None, 'auto'}, optional
+    chunks : int or dict or tuple or {None, 'auto'}, optional
         Chunk sizes along each dimension, e.g., ``5`` or
         ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created
         based on the variable's zarr chunks. If `chunks=None`, zarr array
-        data will lazily convert to numpy arrays upon access.
+        data will lazily convert to numpy arrays upon access. This accepts 
+        all the chunk specifications as Dask does.
     overwrite_encoded_chunks: bool, optional
         Whether to drop the zarr chunks encoded for each variable when a
         dataset is loaded with specified chunk sizes (default: False)
@@ -486,25 +487,47 @@ def maybe_decode_store(store, lock=False):
     # adapted from Dataset.Chunk()
     if isinstance(chunks, int):
         chunks = dict.fromkeys(ds.dims, chunks)
+    
+    if isinstance(chunks, tuple) and len(chunks) == len(ds.dims):
+        chunks = dict(zip(ds.dims, chunks))
+
+    def get_chunk(name, var, chunks):
+        chunk_spec = dict(zip(var.dims, var.encoding.get('chunks')))
+
+        # Coordinate labels aren't chunked
+        if var.ndim == 1 and var.dims[0] == name:
+            return chunk_spec
+
+        if chunks == 'auto':
+            return chunk_spec
+
+        for dim in var.dims:
+            if dim in chunks:
+                spec = chunks[dim]
+                if isinstance(spec, int):
+                    spec = (spec,)
+                if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
+                    if any(s % chunk_spec[dim] for s in spec):
+                        print('ok any', spec, chunk_spec[dim], dim)
+                        warnings.warn("Specified Dask chunks %r would "
+                        "separate Zarr chunk shape %r for dimension %r. "
+                        "This significantly degrades performance. "
+                        "Consider rechunking after loading." 
+                        % (chunks[dim], chunk_spec[dim], dim))
+                chunk_spec[dim] = chunks[dim]
+        return chunk_spec
 
-    def selkeys(dict_, keys):
-        if dict_ is None:
-            return None
-        return dict((d, dict_[d]) for d in keys if d in dict_)
 
     def maybe_chunk(name, var, chunks):
         from dask.base import tokenize
 
-        if chunks == 'auto':
-            chunks = var.encoding.get('chunks')
-        else:
-            chunks = selkeys(chunks, var.dims)
+        chunk_spec = get_chunk(name, var, chunks)
 
-        if (var.ndim > 0) and (chunks is not None):
+        if (var.ndim > 0) and (chunk_spec is not None):
             # does this cause any data to be read?
             token2 = tokenize(name, var._data)
             name2 = 'zarr-%s' % token2
-            var = var.chunk(chunks, name=name2, lock=None)
+            var = var.chunk(chunk_spec, name=name2, lock=None)
             if overwrite_encoded_chunks and var.chunks is not None:
                 var.encoding['chunks'] = tuple(x[0] for x in var.chunks)
             return var
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 3dac12b5727..1b77da32528 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1446,6 +1446,30 @@ def test_manual_chunk(self):
 
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
+    
+    def test_warning_on_bad_chunks(self):
+        original = create_test_data().chunk({'dim1': 4, 'dim2': 3, 'dim3': 5})
+
+        bad_chunks = (2, {'dim2':(3, 3, 2, 1)})
+        for chunks in bad_chunks:
+            kwargs = {'chunks': chunks}
+            with pytest.warns(UserWarning):
+                with self.roundtrip(original, open_kwargs=kwargs) as actual:
+                    for k, v in actual.variables.items():
+                        # only index variables should be in memory
+                        assert v._in_memory == (k in actual.dims)
+
+        good_chunks = ({'dim2': 3}, {'dim3': 10})
+        for chunks in good_chunks:
+            kwargs = {'chunks': chunks}
+            with pytest.warns(None) as record:
+                with self.roundtrip(original, open_kwargs=kwargs) as actual:
+                    for k, v in actual.variables.items():
+                        # only index variables should be in memory
+                        assert v._in_memory == (k in actual.dims)
+            assert len(record) == 0
+
+
 
     def test_deprecate_auto_chunk(self):
         original = create_test_data().chunk()

From 8e61e7e3ff37a8ebdb42b9918e2a694a037f5342 Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Wed, 30 Jan 2019 11:34:50 +1100
Subject: [PATCH 25/30] fixed lingering rebase conflicts

---
 doc/whats-new.rst       | 11 +++++------
 xarray/backends/zarr.py | 26 +++++++++-----------------
 2 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 5d32fffc53a..ed3d2d60442 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -133,6 +133,11 @@ Other enhancements
   By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Added :py:meth:`~xarray.Dataset.drop_dims` (:issue:`1949`).
   By `Kevin Squire <https://github.com/kmsquire>`_.
+- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` 
+  parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for 
+  backwards compatibility. The ``overwrite_encoded_chunks`` parameter is 
+  added to remove the original zarr chunk encoding.
+  By `Lily Wang <https://github.com/lilyminium>`_.
 
 Bug fixes
 ~~~~~~~~~
@@ -440,12 +445,6 @@ Bug fixes
   encoding process if a reference date is used that is so distant that
   the dates must be encoded using cftime rather than NumPy (:issue:`2272`).
   By `Spencer Clark <https://github.com/spencerkclark>`_.
-  
-- ``xr.open_zarr`` now accepts manually specified chunks with the ``chunks=`` 
-  parameter. ``auto_chunk=True`` is equivalent to ``chunks='auto'`` for 
-  backwards compatibility. The ``overwrite_encoded_chunks`` parameter is 
-  added to remove the original zarr chunk encoding.
-  By `Lily Wang <https://github.com/lilyminium>`_.
 
 - Chunked datasets can now roundtrip to Zarr storage continually
   with `to_zarr` and ``open_zarr`` (:issue:`2300`).
diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 9f10c6a3bb7..87507ece201 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -1,11 +1,5 @@
-<<<<<<< HEAD
-from collections import OrderedDict
-=======
-from __future__ import absolute_import, division, print_function
-
 import warnings
-
->>>>>>> added deprecation warning
+from collections import OrderedDict
 from distutils.version import LooseVersion
 
 import numpy as np
@@ -362,8 +356,8 @@ def close(self):
 def open_zarr(store, group=None, synchronizer=None, chunks='auto',
               decode_cf=True, mask_and_scale=True, decode_times=True,
               concat_characters=True, decode_coords=True,
-              drop_variables=None, consolidated=False, auto_chunk=True, 
-              overwrite_encoded_chunks=False):
+              drop_variables=None, consolidated=False,
+              overwrite_encoded_chunks=False, **kwargs):
     """Load and decode a dataset from a Zarr store.
 
     .. note:: Experimental
@@ -387,7 +381,7 @@ def open_zarr(store, group=None, synchronizer=None, chunks='auto',
         Chunk sizes along each dimension, e.g., ``5`` or
         ``{'x': 5, 'y': 5}``. If `chunks='auto'`, dask chunks are created
         based on the variable's zarr chunks. If `chunks=None`, zarr array
-        data will lazily convert to numpy arrays upon access. This accepts 
+        data will lazily convert to numpy arrays upon access. This accepts
         all the chunk specifications as Dask does.
     overwrite_encoded_chunks: bool, optional
         Whether to drop the zarr chunks encoded for each variable when a
@@ -487,7 +481,7 @@ def maybe_decode_store(store, lock=False):
     # adapted from Dataset.Chunk()
     if isinstance(chunks, int):
         chunks = dict.fromkeys(ds.dims, chunks)
-    
+
     if isinstance(chunks, tuple) and len(chunks) == len(ds.dims):
         chunks = dict(zip(ds.dims, chunks))
 
@@ -508,16 +502,14 @@ def get_chunk(name, var, chunks):
                     spec = (spec,)
                 if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
                     if any(s % chunk_spec[dim] for s in spec):
-                        print('ok any', spec, chunk_spec[dim], dim)
                         warnings.warn("Specified Dask chunks %r would "
-                        "separate Zarr chunk shape %r for dimension %r. "
-                        "This significantly degrades performance. "
-                        "Consider rechunking after loading." 
-                        % (chunks[dim], chunk_spec[dim], dim))
+                            "separate Zarr chunk shape %r for dimension %r. "
+                            "This significantly degrades performance. "
+                            "Consider rechunking after loading."
+                            % (chunks[dim], chunk_spec[dim], dim))
                 chunk_spec[dim] = chunks[dim]
         return chunk_spec
 
-
     def maybe_chunk(name, var, chunks):
         from dask.base import tokenize
 

From 8fd65ea9f7ee841446e3ba1b287239f6cf4f0a16 Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Wed, 30 Jan 2019 11:38:14 +1100
Subject: [PATCH 26/30] fixed pep8 issues

---
 xarray/backends/zarr.py       | 9 +++++----
 xarray/tests/test_backends.py | 7 ++-----
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 87507ece201..d0696f20499 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -503,10 +503,11 @@ def get_chunk(name, var, chunks):
                 if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
                     if any(s % chunk_spec[dim] for s in spec):
                         warnings.warn("Specified Dask chunks %r would "
-                            "separate Zarr chunk shape %r for dimension %r. "
-                            "This significantly degrades performance. "
-                            "Consider rechunking after loading."
-                            % (chunks[dim], chunk_spec[dim], dim))
+                                      "separate Zarr chunk shape %r for "
+                                      "dimension %r. This significantly "
+                                      "degrades performance. Consider "
+                                      "rechunking after loading instead."
+                                      % (chunks[dim], chunk_spec[dim], dim))
                 chunk_spec[dim] = chunks[dim]
         return chunk_spec
 
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 1b77da32528..5efcdf9cd98 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1446,11 +1446,11 @@ def test_manual_chunk(self):
 
                 assert_identical(actual, auto)
                 assert_identical(actual.load(), auto.load())
-    
+
     def test_warning_on_bad_chunks(self):
         original = create_test_data().chunk({'dim1': 4, 'dim2': 3, 'dim3': 5})
 
-        bad_chunks = (2, {'dim2':(3, 3, 2, 1)})
+        bad_chunks = (2, {'dim2': (3, 3, 2, 1)})
         for chunks in bad_chunks:
             kwargs = {'chunks': chunks}
             with pytest.warns(UserWarning):
@@ -1469,8 +1469,6 @@ def test_warning_on_bad_chunks(self):
                         assert v._in_memory == (k in actual.dims)
             assert len(record) == 0
 
-
-
     def test_deprecate_auto_chunk(self):
         original = create_test_data().chunk()
         with pytest.warns(FutureWarning):
@@ -1491,7 +1489,6 @@ def test_deprecate_auto_chunk(self):
                     # there should be no chunks
                     assert v.chunks is None
 
-
     def test_write_uneven_dask_chunks(self):
         # regression for GH#2225
         original = create_test_data().chunk({'dim1': 3, 'dim2': 4, 'dim3': 3})

From 4bb164d619cb6f0eacaaaf6122d7f82653d2851c Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Thu, 4 Apr 2019 12:39:26 +1100
Subject: [PATCH 27/30] added stacklevel

---
 xarray/backends/zarr.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index d0696f20499..e20140ee248 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -212,7 +212,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None):
     # zarr allows unicode, but not variable-length strings, so it's both
     # simpler and more compact to always encode as UTF-8 explicitly.
     # TODO: allow toggling this explicitly via dtype in encoding.
-    coder = coding.strings.EncodedStringCoder(allows_unicode=False)
+    coder = coding.strings.EncodedStringCoder(allows_unicode=True)
     var = coder.encode(var, name=name)
     var = coding.strings.ensure_fixed_length_bytes(var)
 
@@ -507,7 +507,8 @@ def get_chunk(name, var, chunks):
                                       "dimension %r. This significantly "
                                       "degrades performance. Consider "
                                       "rechunking after loading instead."
-                                      % (chunks[dim], chunk_spec[dim], dim))
+                                      % (chunks[dim], chunk_spec[dim], dim),
+                                      stacklevel=2)
                 chunk_spec[dim] = chunks[dim]
         return chunk_spec
 

From 485717d285c3afb55bf65cda5cd1f1062bab04e8 Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Thu, 4 Apr 2019 12:44:45 +1100
Subject: [PATCH 28/30] fixed pep8 issues

---
 xarray/tests/test_backends.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 5efcdf9cd98..bf40e529931 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -1437,7 +1437,7 @@ def test_manual_chunk(self):
         open_kwargs = {'chunks': chunks, 'overwrite_encoded_chunks': True}
         with self.roundtrip(original, open_kwargs=open_kwargs) as actual:
             for k, v in actual.variables.items():
-                    assert v.chunks == rechunked[k].chunks
+                assert v.chunks == rechunked[k].chunks
 
             with self.roundtrip(actual) as auto:
                 # encoding should have changed

From b0e1e1e68d80d4efb334ab40515e9282726a6407 Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Fri, 12 Apr 2019 11:34:02 +1000
Subject: [PATCH 29/30] disallow unicode again

---
 xarray/backends/zarr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index e20140ee248..f5364314af8 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -212,7 +212,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None):
     # zarr allows unicode, but not variable-length strings, so it's both
     # simpler and more compact to always encode as UTF-8 explicitly.
     # TODO: allow toggling this explicitly via dtype in encoding.
-    coder = coding.strings.EncodedStringCoder(allows_unicode=True)
+    coder = coding.strings.EncodedStringCoder(allows_unicode=False)
     var = coder.encode(var, name=name)
     var = coding.strings.ensure_fixed_length_bytes(var)
 

From f17cb5e99779acda42e211a5d18868aa168cef3b Mon Sep 17 00:00:00 2001
From: Lily <lily.wang@anu.edu.au>
Date: Fri, 12 Apr 2019 11:37:49 +1000
Subject: [PATCH 30/30] disallow unicode again

---
 xarray/backends/zarr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index e20140ee248..f5364314af8 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -212,7 +212,7 @@ def encode_zarr_variable(var, needs_copy=True, name=None):
     # zarr allows unicode, but not variable-length strings, so it's both
     # simpler and more compact to always encode as UTF-8 explicitly.
     # TODO: allow toggling this explicitly via dtype in encoding.
-    coder = coding.strings.EncodedStringCoder(allows_unicode=True)
+    coder = coding.strings.EncodedStringCoder(allows_unicode=False)
     var = coder.encode(var, name=name)
     var = coding.strings.ensure_fixed_length_bytes(var)