From a5dc7b30350ac04e8b26a928c0d5833bb819de70 Mon Sep 17 00:00:00 2001 From: Remo Goetschi Date: Tue, 15 May 2018 13:43:58 +0200 Subject: [PATCH 1/7] avoid race condition during chunk write When the chunk file is first removed before the new version is moved into place, racing reads may encounter a missing chunk. Using rename() or replace() without remove() avoids the issue on Posix-Systems as the methods are atomic. The fallback of remove() -> rename() is included for Windows pre Python 3.3. Fixes #263 --- zarr/storage.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 6720b42d12..d73eea29b8 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -772,9 +772,18 @@ def __setitem__(self, key, value): f.write(value) # move temporary file into place - if os.path.exists(file_path): - os.remove(file_path) - os.rename(temp_path, file_path) + if hasattr(os, 'replace'): + # Python >= 3.3 has replace() which can replace existing + # files on all platforms. + os.replace(temp_path, file_path) + else: + # In Python < 3.3 use rename(). + if os.name == 'nt' and os.path.exists(file_path): + # On windows, rename() can't overwrite files. So + # the file is removed first. + os.remove(file_path) + + os.rename(temp_path, file_path) finally: # clean up if temp file still exists for whatever reason From 90339b3af69ac318b834481e0cd6266806d03a6d Mon Sep 17 00:00:00 2001 From: SBA Date: Thu, 8 Nov 2018 18:30:19 +0100 Subject: [PATCH 2/7] move feature-detection to init-time so it's not repeated on every write --- zarr/storage.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index d73eea29b8..f0075301d3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -33,7 +33,6 @@ import numpy as np - from zarr.util import (normalize_shape, normalize_chunks, normalize_order, normalize_storage_path, buffer_size, normalize_fill_value, nolock, normalize_dtype) @@ -55,6 +54,24 @@ from zarr.codecs import Zlib default_compressor = Zlib() +# Find the best function for FS replace. +# Preferably atomic. +if hasattr(os, 'replace'): + # Python >= 3.3 has replace() which can replace existing + # files on all platforms. + replace = os.replace +else: + # In Python < 3.3 use rename(). + if os.name == 'nt': + def replace(old, new): + if os.path.exists(new): + # On windows, rename() can't overwrite files. So + # the file is removed first. + os.remove(new) + os.rename(old, new) + else: + replace = os.rename + def _path_to_prefix(path): # assume path already normalized @@ -772,18 +789,7 @@ def __setitem__(self, key, value): f.write(value) # move temporary file into place - if hasattr(os, 'replace'): - # Python >= 3.3 has replace() which can replace existing - # files on all platforms. - os.replace(temp_path, file_path) - else: - # In Python < 3.3 use rename(). - if os.name == 'nt' and os.path.exists(file_path): - # On windows, rename() can't overwrite files. So - # the file is removed first. - os.remove(file_path) - - os.rename(temp_path, file_path) + replace(temp_path, file_path) finally: # clean up if temp file still exists for whatever reason From 89dda99d8818d057d92e331e8f01eee8f8856a4c Mon Sep 17 00:00:00 2001 From: SBA Date: Wed, 14 Nov 2018 10:42:11 +0100 Subject: [PATCH 3/7] use pyosreplace to get atomic replace() on Windows --- setup.py | 1 + zarr/storage.py | 23 +++++++---------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/setup.py b/setup.py index a5e8334e43..96667a4d48 100644 --- a/setup.py +++ b/setup.py @@ -27,6 +27,7 @@ 'numpy>=1.7', 'fasteners', 'numcodecs>=0.5.3', + "pyosreplace : python_version < '3.3' and sys.platform == 'win32'", ], package_dir={'': '.'}, packages=['zarr', 'zarr.tests'], diff --git a/zarr/storage.py b/zarr/storage.py index f0075301d3..adeb33e91c 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -54,23 +54,14 @@ from zarr.codecs import Zlib default_compressor = Zlib() -# Find the best function for FS replace. -# Preferably atomic. -if hasattr(os, 'replace'): - # Python >= 3.3 has replace() which can replace existing - # files on all platforms. - replace = os.replace +# Find which function to use for atomic replace +if sys.version_info >= (3, 3): + from os import replace +elif sys.platform == "win32": + from osreplace import replace else: - # In Python < 3.3 use rename(). - if os.name == 'nt': - def replace(old, new): - if os.path.exists(new): - # On windows, rename() can't overwrite files. So - # the file is removed first. - os.remove(new) - os.rename(old, new) - else: - replace = os.rename + # POSIX rename() is always atomic + from os import rename as replace def _path_to_prefix(path): From fb3624dd142fcf10d6fb6953eaa59b347d404988 Mon Sep 17 00:00:00 2001 From: SBA Date: Wed, 14 Nov 2018 10:44:15 +0100 Subject: [PATCH 4/7] disable coverage count for legacy branches --- zarr/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index adeb33e91c..5027d3a6a1 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -57,9 +57,9 @@ # Find which function to use for atomic replace if sys.version_info >= (3, 3): from os import replace -elif sys.platform == "win32": +elif sys.platform == "win32": # pragma: no cover from osreplace import replace -else: +else: # pragma: no cover # POSIX rename() is always atomic from os import rename as replace From 953912051b3c5af84e97decb23c7ccff3af21994 Mon Sep 17 00:00:00 2001 From: SBA Date: Wed, 14 Nov 2018 11:25:24 +0100 Subject: [PATCH 5/7] Use conditional instead of env marker Because the env markers didn't work. Just guessing at this point. --- setup.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 96667a4d48..1c864e31ec 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division from setuptools import setup +import sys DESCRIPTION = 'An implementation of chunked, compressed, ' \ @@ -9,6 +10,16 @@ with open('README.rst') as f: LONG_DESCRIPTION = f.read() +dependencies = [ + 'asciitree', + 'numpy>=1.7', + 'fasteners', + 'numcodecs>=0.5.3', +] + +if sys.version_info < (3, 3) and sys.platform == "win32": + dependencies.append('pyosreplace') + setup( name='zarr', description=DESCRIPTION, @@ -22,13 +33,7 @@ 'setuptools>18.0', 'setuptools-scm>1.5.4' ], - install_requires=[ - 'asciitree', - 'numpy>=1.7', - 'fasteners', - 'numcodecs>=0.5.3', - "pyosreplace : python_version < '3.3' and sys.platform == 'win32'", - ], + install_requires=dependencies, package_dir={'': '.'}, packages=['zarr', 'zarr.tests'], classifiers=[ From 1aee7cfdea91804f314ee069a18ddbcaa487e09b Mon Sep 17 00:00:00 2001 From: SBA Date: Wed, 21 Nov 2018 13:24:50 +0100 Subject: [PATCH 6/7] add pyosreplace package to requirements --- requirements.txt | 1 + requirements_dev.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index ab79134cb7..e035a2fc72 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ fasteners numcodecs numpy pytest +pyosreplace; python_version < '3.3' and sys.platform == 'win32' diff --git a/requirements_dev.txt b/requirements_dev.txt index 2ad18f372c..f6504724af 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,3 +1,4 @@ asciitree==0.3.3 fasteners==0.14.1 numcodecs==0.5.5 +pyosreplace==0.1; python_version < '3.3' and sys.platform == 'win32' From 9451f06aa2231af9596078faa32709540edc55b8 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Tue, 4 Dec 2018 22:42:13 +0000 Subject: [PATCH 7/7] release notes [ci skip] --- docs/release.rst | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 335ec58fb3..45eb9c8a49 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -19,22 +19,28 @@ Enhancements * Support has been added for structured arrays with sub-array shape and/or nested fields. By :user:`Tarik Onalan `, :issue:`111`, :issue:`296`. -Maintenance -~~~~~~~~~~~ +Bug fixes +~~~~~~~~~ + +* The implementation of the :class:`zarr.storage.DirectoryStore` class has been modified to + ensure that writes are atomic and there are no race conditions where a chunk might appear + transiently missing during a write operation. By :user:`sbalmer `, :issue:`327`, + :issue:`263`. * The required version of the `numcodecs `_ package has been upgraded to 0.6.2, which has enabled some code simplification and fixes a failing test involving msgpack encoding. By :user:`John Kirkham `, :issue:`352`, :issue:`355`, :issue:`324`. -* CI and test environments have been upgraded to include Python 3.7, drop Python 3.4, and - upgrade all pinned package requirements. :issue:`308`. - * Failing tests related to pickling/unpickling have been fixed. By :user:`Ryan Williams `, :issue:`273`, :issue:`308`. -Acknowledgments -~~~~~~~~~~~~~~~ +Maintenance +~~~~~~~~~~~ + +* CI and test environments have been upgraded to include Python 3.7, drop Python 3.4, and + upgrade all pinned package requirements. :issue:`308`. + .. _release_2.2.0: