From c5725a9f5e98a248212a7f2eb1d9b35bf3ce8994 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Fri, 10 Apr 2020 20:34:47 -0700 Subject: [PATCH 01/11] Add sm arch 80 to Makefile --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4ee71c9478b1..7494ada0c258 100644 --- a/Makefile +++ b/Makefile @@ -429,7 +429,7 @@ endif # be JIT-compiled by the updated driver from the included PTX. ifeq ($(USE_CUDA), 1) ifeq ($(CUDA_ARCH),) - KNOWN_CUDA_ARCHS := 30 35 50 52 60 61 70 75 + KNOWN_CUDA_ARCHS := 30 35 50 52 60 61 70 75 80 # Run nvcc on a zero-length file to check architecture-level support. # Create args to include SASS in the fat binary for supported levels. CUDA_ARCH := $(foreach arch,$(KNOWN_CUDA_ARCHS), \ From b6db51b5ffbcdaf24ea6d327977fdcb76b91e7ce Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Sun, 19 Jul 2020 14:12:50 -0700 Subject: [PATCH 02/11] Unittest tolerance handling improvements (#18694) * Add sm arch 80 to Makefile * Add TF32 to cuBLAS GEMMs Signed-off-by: Serge Panev * Add CUDA version guards Signed-off-by: Serge Panev * Remove useless TF32 for double and old CUDA version Signed-off-by: Serge Panev * Factorize VERSION_ADJUSTED_TF32_MATH Signed-off-by: Serge Panev * Add TF32 considerations to test_util.py:check_consistency() * Bypass test_gluon_gpu.py:test_large_models if gmem >32GB * Default tols in assert_almost_equal() now a function of dtype and ctx * Expand types listed by default_tols() * Fix pylint * All with_seed() tests to waitall in teardown * Elevate MXNET_TEST_SEED logging to WARNING * Revert test_gluon_gpu.py:test_rnn_layer to default tols * Fix test_gluon_model_zoo_gpu.py::test_inference and test_operator_gpy.py::test_np_linalg_{solve,tensorinv} * test_numpy_interoperability.py to not fix seed for rest of CI * Further fix to test_np_linalg_tensorinv * Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system. * Fix test_operator_gpu.py::test_embedding_with_type * Fix test_operator_gpu.py::{test_*convolution_large_c,test_np_linalg_tensorsolve} * Remove unneeded print() from test_numpy_interoperability.py * Unify tol handling of check_consistency() and assert_almost_equal(). Test tweeks. * Add tol handling of assert_almost_equal() with number args * Add tol handling of bool comparisons * Fix test_numpy_op.py::test_np_random_rayleigh * Fix test_operator_gpu.py::test_batchnorm_with_type * Fix test_gluon.py::test_sync_batchnorm in cpu selftest * Improve unittest failure reporting * Add to robustness of test_operator_gpu.py::test_embedding_with_type * Check_consistency() to use equal backward gradients for increased test robustness * Fix test_operator_gpu.py::test_{fully_connected,gemm}. Add default_numeric_eps(). * test_utils.py fix for numeric gradient calc * Reinstate rtol=1e-2 for test_operator.py::test_order * Remove auto-cast of check_consistency() input data to least precise dtype (not needed) * Fix test_operator.py::test_{reciprocol,cbrt,rcbrt}_op * Expand default float64 numeric_eps for test_operator_gpu.py::test_sofmin * Fix segfault-on-error of @retry decorator. Add test isolation. * assert_almost_equal() to handle a,b scalars * Fix test_operator_gpu.py::test_gluon_{mvn,mvn_v1} race * Fix test_operator_gpu.py::test_flatten_slice_after_conv via scale * Remove test_utils.py:almost_equal_ignore_nan() * Fix sample vs. pop variance issue with test_numpy_op.py::test_npx_batch_norm * Expose test_utils.py:effective_dtype() and use to fix test_operator_gpu.py::test_np_linalg_svd * Fix true_divide int_array / int_scalar -> float_array to honor np_default_dtype * Try test_elemwise_binary_ops serial to avoid pytest worker crash * Fix (log_)softmax backward on empty ndarray * Temporarily log all CI seeds to troubleshoot seed non-determinism * Revert "Temporarily log all CI seeds to troubleshoot seed non-determinism" This reverts commit f60eff20785b812ac4fcd70d51359ee0cbfb3e47. * Temp log all CI seeds to troubleshoot unwanted seed determinism * Revert "Add sm arch 80 to Makefile" This reverts commit f9306cecc53b0633ef5f5b7b000802fbf0d73fe9. * Same fix of sample vs. pop variance issue, now with test_operator_gpu.py::test_batchnorm * Revert "Temp log all CI seeds to troubleshoot unwanted seed determinism" This reverts commit ff328efb0be3445690669d5437a6af575ff12b49. * Marking test_sparse_dot_grad with garbage_expected after teardown error * Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_gluon_kl{_v1,} * Temp skip of test_aggregate_duplication on gpu * Add seeding to test_{numpy,}_contrib_gluon_data_vision.py. Make created files unique. * Add ndarray module isolation to help debug test_bbox_augmenters worker crash * Marking test_sparse_square_sum serial after pytest worker crash * Fix flakiness of test_gluon_probability{_v1,_v2}.py::test_half_cauchy{_v1,} Co-authored-by: Serge Panev Co-authored-by: Bart Gawrych --- python/mxnet/test_utils.py | 360 +++++++++++------- src/operator/linalg.h | 8 + src/operator/linalg_impl.h | 34 +- src/operator/numpy/np_true_divide-inl.h | 19 +- tests/python/gpu/test_gluon_gpu.py | 16 +- tests/python/gpu/test_gluon_model_zoo_gpu.py | 2 +- tests/python/gpu/test_operator_gpu.py | 95 ++--- tests/python/unittest/common.py | 14 +- tests/python/unittest/test_autograd.py | 1 + tests/python/unittest/test_gluon.py | 4 +- tests/python/unittest/test_ndarray.py | 2 +- .../unittest/test_numpy_interoperability.py | 7 +- tests/python/unittest/test_numpy_op.py | 43 +-- tests/python/unittest/test_operator.py | 126 +++--- tests/python/unittest/test_sparse_operator.py | 1 + 15 files changed, 440 insertions(+), 292 deletions(-) diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py index 9a24b5fd7d51..9e544547b1a1 100755 --- a/python/mxnet/test_utils.py +++ b/python/mxnet/test_utils.py @@ -71,19 +71,110 @@ def default_dtype(): # _TODO: get default dtype from environment variable return np.float32 +def default_rtols(): + """Get default relative tolerances for data comparisons involving each data type.""" + return {np.dtype(np.float16): 1e-2, + np.dtype(np.float32): 1e-4, + np.dtype(np.float64): 1e-5, + np.dtype(np.bool): 0, + np.dtype(np.int8): 0, + np.dtype(np.uint8): 0, + np.dtype(np.int32): 0, + np.dtype(np.uint32): 0, + np.dtype(np.int64): 0, + np.dtype(np.uint64): 0} + +def default_atols(): + """Get default absolute tolerances for data comparisons involving each data type.""" + return {np.dtype(np.float16): 1e-1, + np.dtype(np.float32): 1e-3, + np.dtype(np.float64): 1e-20, + np.dtype(np.bool): 0, + np.dtype(np.int8): 0, + np.dtype(np.uint8): 0, + np.dtype(np.int32): 0, + np.dtype(np.uint32): 0, + np.dtype(np.int64): 0, + np.dtype(np.uint64): 0} + +def default_numeric_eps(): + """Get default epsilon for finite difference gradient calculations with data type.""" + # prefer a power-of-two eps, since no bits are dropped when serving as an input delta + return {np.dtype(np.float16): 1.0 / 2**6, + np.dtype(np.float32): 1.0 / 2**9, + np.dtype(np.float64): 1.0 / 2**14} + + +def effective_dtype(dat): + """ Return the most appropriate dtype for determining the tolerance used in dat comparisons + Parameters + ---------- + dat : np.ndarray or mx.nd.array or mx.np.ndarray + """ + # On arch 80 gpus, a float32-io gemm or conv op will trim the mantissa of data + # inputs to be of comparable precision to a float16, so float16 becomes the + # 'effective dtype' for tolerance tests involving such op outputs. -def get_atol(atol=None): - """Get default numerical threshold for regression test.""" - # _TODO: get from env variable, different threshold might - # be needed for different device and dtype - return 1e-20 if atol is None else atol + # Is TF32 enabled in the ctx (the default on arch 80 GPUs) + def is_TF32_enabled(ctx): + try: + return (ctx.device_type == 'gpu' and + get_cuda_compute_capability(ctx) == 80 and + os.environ.get('NVIDIA_TF32_OVERRIDE') != '0') + except: # pylint: disable=bare-except + return False + + ctx = dat.ctx if hasattr(dat, 'ctx') else None + dtype = np.dtype(dat.dtype) + if dtype == np.dtype(np.float32) and is_TF32_enabled(ctx): + return np.dtype(np.float16) + else: + return dtype + + +def get_tolerance(dat, tol, default_tol): + """ Return the tolerance to be used for dat comparisons based on the given tol, datatype and context. + Parameters + ---------- + dat : np.ndarray or mx.nd.array or mx.np.ndarray + tol : float, or a dict of dtype->float + default_tol : default dict of dtype->float for all types + """ + + if isinstance(tol, numbers.Number): + return tol + + # If the caller has supplied a tol dict, use that if it has an entry for dtype, + # else use the supplied default tol dict. + dtype = effective_dtype(dat) + tol = {} if tol is None else tol + return tol.get(dtype, default_tol[dtype]) + + +def get_tols(x, y, rtol, atol): + """For comparing two datasets 'x' and 'y', what relative and absolute tolerances should be used.""" + # Tolerance analysis needs 'dtype' of 'x' and 'y', so convert numbers to numpy scalars as needed + if isinstance(x, numbers.Number): + x = np.array(x) + if isinstance(y, numbers.Number): + y = np.array(y) + + # If tols are not specified, use the largest default tol for 'x' and 'y' based on their ctx and dtype. + rtol = max(get_tolerance(x, rtol, default_rtols()), + get_tolerance(y, rtol, default_rtols())) + atol = max(get_tolerance(x, atol, default_atols()), + get_tolerance(y, atol, default_atols())) + return rtol, atol -def get_rtol(rtol=None): + +def get_atol(atol=None, dtype=np.dtype(np.float64)): """Get default numerical threshold for regression test.""" - # _TODO: get from env variable, different threshold might - # be needed for different device and dtype - return 1e-5 if rtol is None else rtol + return default_atols()[dtype] if atol is None else atol + +def get_rtol(rtol=None, dtype=np.dtype(np.float64)): + """Get default numerical threshold for regression test.""" + return default_rtols()[dtype] if rtol is None else rtol def get_etol(etol=None): """Get default numerical threshold for regression test.""" @@ -513,10 +604,8 @@ def np_reduce(dat, axis, keepdims, numpy_reduce_func): return ret -def find_max_violation(a, b, rtol=None, atol=None): +def _find_max_violation(a, b, rtol, atol): """Finds and returns the location of maximum violation.""" - rtol = get_rtol(rtol) - atol = get_atol(atol) # 'smart' absdiff that considers inf's as equals (to match np.allclose) absdiff = np.where(np.equal(a, b), 0, np.abs(a-b)) tol = atol + rtol*np.abs(b) @@ -579,9 +668,9 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan= ---------- a : np.ndarray or mx.nd.array b : np.ndarray or mx.nd.array - rtol : None or float + rtol : None or float or dict of dtype -> float The relative threshold. Default threshold will be used if set to ``None``. - atol : None or float + atol : None or float or dict of dtype -> float The absolute threshold. Default threshold will be used if set to ``None``. names : tuple of names, optional The names used in error message when an exception occurs @@ -593,8 +682,12 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan= if not use_broadcast: checkShapes(a, b) - rtol = get_rtol(rtol) - atol = get_atol(atol) + rtol, atol = get_tols(a, b, rtol, atol) + + if isinstance(a, mx.numpy.ndarray): + a = a.asnumpy() + if isinstance(b, mx.numpy.ndarray): + b = b.asnumpy() use_np_allclose = isinstance(a, np.ndarray) and isinstance(b, np.ndarray) if not use_np_allclose: if not (hasattr(a, 'context') and hasattr(b, 'context') and a.context == b.context and a.dtype == b.dtype): @@ -618,32 +711,37 @@ def assert_almost_equal(a, b, rtol=None, atol=None, names=('a', 'b'), equal_nan= a = a.asnumpy() b = b.asnumpy() - index, rel = find_max_violation(a, b, rtol, atol) - indexErr = index - relErr = rel - - print('\n*** Maximum errors for vector of size {}: rtol={}, atol={}\n'.format(a.size, rtol, atol)) - aTmp = a.copy() - bTmp = b.copy() - i = 1 - while i <= a.size: - if i <= mismatches[0]: - print("%3d: Error %f %s" %(i, rel, locationError(a, b, index, names))) + index, rel = _find_max_violation(a, b, rtol, atol) + if index != (): + # a, b are the numpy arrays + indexErr = index + relErr = rel + + print('\n*** Maximum errors for vector of size {}: rtol={}, atol={}\n'.format(a.size, rtol, atol)) + aTmp = a.copy() + bTmp = b.copy() + i = 1 + while i <= a.size: + if i <= mismatches[0]: + print("%3d: Error %f %s" %(i, rel, locationError(a, b, index, names))) + + aTmp[index] = bTmp[index] = 0 + if almost_equal(aTmp, bTmp, rtol, atol, equal_nan=equal_nan): + break - aTmp[index] = bTmp[index] = 0 - if almost_equal(aTmp, bTmp, rtol, atol, equal_nan=equal_nan): - break + i += 1 + if i <= mismatches[1] or mismatches[1] <= 0: + index, rel = _find_max_violation(aTmp, bTmp, rtol, atol) + else: + break - i += 1 - if i <= mismatches[1] or mismatches[1] <= 0: - index, rel = find_max_violation(aTmp, bTmp, rtol, atol) - else: - break + mismatchDegree = "at least " if mismatches[1] > 0 and i > mismatches[1] else "" + errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e (mismatch %s%f%%).\n%s" % \ + (relErr, rtol, atol, mismatchDegree, 100*i/a.size, \ + locationError(a, b, indexErr, names, maxError=True)) + else: + errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e.\n" % (rel, rtol, atol) - mismatchDegree = "at least " if mismatches[1] > 0 and i > mismatches[1] else "" - errMsg = "Error %f exceeds tolerance rtol=%e, atol=%e (mismatch %s%f%%).\n%s" % \ - (relErr, rtol, atol, mismatchDegree, 100*i/a.size, \ - locationError(a, b, indexErr, names, maxError=True)) np.set_printoptions(threshold=4, suppress=True) msg = npt.build_err_msg([a, b], err_msg=errMsg) @@ -662,16 +760,25 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None, ---------- a : np.ndarray b : np.ndarray + rtol : None or float or dict of dtype -> float + The relative threshold. Default threshold will be used if set to ``None``. + atol : None or float or dict of dtype -> float + The absolute threshold. Default threshold will be used if set to ``None``. threshold : None or float The checking threshold. Default threshold will be used if set to ``None``. etol : None or float The error rate threshold. If etol is float, return true if error_rate < etol even if any error is found. + names : tuple of names, optional + The names used in error message when an exception occurs + equal_nan : boolean, optional + The flag determining how to treat NAN values in comparison + mismatches : tuple of mismatches + Maximum number of mismatches to be printed (mismatches[0]) and determine (mismatches[1]) """ etol = get_etol(etol) if etol > 0: - rtol = get_rtol(rtol) - atol = get_atol(atol) + rtol, atol = get_tols(a, b, rtol, atol) if isinstance(a, mx.nd.NDArray): a = a.asnumpy() if isinstance(b, mx.nd.NDArray): @@ -679,7 +786,7 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None, equals = np.isclose(a, b, rtol=rtol, atol=atol) err = 1 - np.count_nonzero(equals) / equals.size if err > etol: - index, rel = find_max_violation(a, b, rtol, atol) + index, rel = _find_max_violation(a, b, rtol, atol) indexErr = index relErr = rel @@ -697,7 +804,7 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None, i += 1 if i <= mismatches[1] or mismatches[1] <= 0: - index, rel = find_max_violation(aTmp, bTmp, rtol, atol) + index, rel = _find_max_violation(aTmp, bTmp, rtol, atol) else: break @@ -712,31 +819,6 @@ def assert_almost_equal_with_err(a, b, rtol=None, atol=None, etol=None, assert_almost_equal(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan) -def almost_equal_ignore_nan(a, b, rtol=None, atol=None): - """Test that two NumPy arrays are almost equal (ignoring NaN in either array). - Combines a relative and absolute measure of approximate eqality. - If either the relative or absolute check passes, the arrays are considered equal. - Including an absolute check resolves issues with the relative check where all - array values are close to zero. - - Parameters - ---------- - a : np.ndarray - b : np.ndarray - rtol : None or float - The relative threshold. Default threshold will be used if set to ``None``. - atol : None or float - The absolute threshold. Default threshold will be used if set to ``None``. - """ - a = np.copy(a) - b = np.copy(b) - nan_mask = np.logical_or(np.isnan(a), np.isnan(b)) - a[nan_mask] = 0 - b[nan_mask] = 0 - - return almost_equal(a, b, rtol, atol) - - def assert_almost_equal_ignore_nan(a, b, rtol=None, atol=None, names=('a', 'b')): """Test that two NumPy arrays are almost equal (ignoring NaN in either array). Combines a relative and absolute measure of approximate eqality. @@ -776,14 +858,14 @@ def decorate(f): """Decorate a test case.""" def wrapper(*args, **kwargs): """Wrapper for tests function.""" - for _ in range(n): + for i in range(n): try: f(*args, **kwargs) + return except AssertionError as e: - err = e - continue - return - raise err + if i == n-1: + raise e + mx.nd.waitall() return wrapper return decorate @@ -1015,7 +1097,7 @@ def as_stype(var, stype, dtype): return approx_grads -def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rtol=1e-2, +def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=None, rtol=None, atol=None, grad_nodes=None, use_forward_train=True, ctx=None, grad_stype_dict=None, dtype=default_dtype()): """Verify an operation by checking backward pass via finite difference method. @@ -1060,9 +1142,6 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto [1] https://github.com/Theano/Theano/blob/master/theano/gradient.py """ assert dtype in (np.float16, np.float32, np.float64) - # cannot use finite differences with small eps without high precision - if dtype in (np.float32, np.float16): - assert numeric_eps >= 1e-5 if ctx is None: ctx = default_context() @@ -1136,12 +1215,18 @@ def random_projection(shape): assert len(executor.outputs) == 1 executor.forward(is_train=True) + + eps = get_tolerance(executor.outputs[0], numeric_eps, default_numeric_eps()) + # cannot use finite differences with small eps without high precision + if dtype in (np.float32, np.float16): + assert eps >= 1e-5 + executor.backward() - symbolic_grads = {k:executor.grad_dict[k].asnumpy() for k in grad_nodes} + symbolic_grads = executor.grad_dict numeric_gradients = numeric_grad( executor, location_npy, aux_states_npy, - eps=numeric_eps, use_forward_train=use_forward_train, dtype=dtype) + eps=eps, use_forward_train=use_forward_train, dtype=dtype) for name in grad_nodes: fd_grad = numeric_gradients[name] @@ -1151,6 +1236,8 @@ def random_projection(shape): assert_almost_equal(fd_grad, sym_grad, rtol, atol, ("NUMERICAL_%s"%name, "BACKWARD_%s"%name)) elif grad_req[name] == 'add': + if isinstance(sym_grad, mx.nd.NDArray): + sym_grad = sym_grad.asnumpy() assert_almost_equal(fd_grad, sym_grad - orig_grad, rtol, atol, ("NUMERICAL_%s"%name, "BACKWARD_%s"%name)) elif grad_req[name] == 'null': @@ -1160,7 +1247,7 @@ def random_projection(shape): raise ValueError("Invalid grad_req %s for argument %s"%(grad_req[name], name)) -def check_symbolic_forward(sym, location, expected, rtol=1E-4, atol=None, +def check_symbolic_forward(sym, location, expected, rtol=None, atol=None, aux_states=None, ctx=None, equal_nan=False, dtype=default_dtype()): """Compares a symbol's forward results with the expected ones. @@ -1236,14 +1323,14 @@ def check_symbolic_forward(sym, location, expected, rtol=1E-4, atol=None, executor.forward(is_train=False) - outputs = [x.asnumpy() for x in executor.outputs] + outputs = executor.outputs for output_name, expect, output in zip(sym.list_outputs(), expected, outputs): assert_almost_equal(expect, output, rtol, atol, ("EXPECTED_%s"%output_name, "FORWARD_%s"%output_name), equal_nan=equal_nan) return executor.outputs -def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol=None, +def check_symbolic_backward(sym, location, out_grads, expected, rtol=None, atol=None, aux_states=None, grad_req='write', ctx=None, grad_stypes=None, equal_nan=False, dtype=default_dtype()): """Compares a symbol's backward results with the expected ones. @@ -1361,7 +1448,7 @@ def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol= executor.backward(out_grads) - grads = {k: v.asnumpy() for k, v in args_grad_data.items()} + grads = args_grad_data for name in expected: if grad_req[name] == 'write': @@ -1369,7 +1456,8 @@ def check_symbolic_backward(sym, location, out_grads, expected, rtol=1e-5, atol= ("EXPECTED_%s"%name, "BACKWARD_%s"%name), equal_nan=equal_nan) elif grad_req[name] == 'add': - assert_almost_equal(expected[name], grads[name] - args_grad_npy[name], + grad = grads[name].asnumpy() if isinstance(grads[name], mx.nd.NDArray) else grads[name] + assert_almost_equal(expected[name], grad - args_grad_npy[name], rtol, atol, ("EXPECTED_%s"%name, "BACKWARD_%s"%name), equal_nan=equal_nan) elif grad_req[name] == 'null': @@ -1454,16 +1542,8 @@ def check_speed(sym, location=None, ctx=None, N=20, grad_req=None, typ="whole", raise ValueError('typ can only be "whole" or "forward".') -def get_tolerance(rtol, ctx): - if 'atol' in ctx: - return ctx['atol'] - if 'atol_mult' in ctx: - return ctx['atol_mult'] * rtol - return rtol - - def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', - arg_params=None, aux_params=None, tol=None, + arg_params=None, aux_params=None, rtol=None, atol=None, raise_on_err=True, ground_truth=None, equal_nan=False, use_uniform=False, rand_type=np.float64): """Check symbol gives the same output for different running context @@ -1478,6 +1558,20 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', Standard deviation of the inner normal distribution. Used in initialization. grad_req : str or list of str or dict of str to str Gradient requirement. + arg_params : dict of input name -> input data + data to use for non-aux inputs + aux_params : dict of input name -> input data + data to use for aux inputs + rtol : float or dictionary dtype->float, optional + The relative error tolerance. + atol : float or dictionary dtype->float, optional + The absolute error tolerance. + raise_on_err : bool, optional, defaults to True + Should an error raise an exception (or just output exception message) + ground_truth : dict of output name -> data, optional + Provided ideal result to be compared against + equal_nan : bool, optional, defaults to False + Should nans be treated as equal in the comparison use_unifrom: bool Optional, When flag set to true, random input data generated follows uniform distribution, @@ -1513,20 +1607,6 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', 'type_dict': {'concat_arg0': np.float32, 'concat_arg1': np.float32}}] >>> check_consistency(sym, ctx_list) """ - if tol is None: - tol = {np.dtype(np.float16): 1e-1, - np.dtype(np.float32): 1e-3, - np.dtype(np.float64): 1e-5, - np.dtype(np.uint8): 0, - np.dtype(np.int32): 0, - np.dtype(np.int64): 0} - elif isinstance(tol, numbers.Number): - tol = {np.dtype(np.float16): tol, - np.dtype(np.float32): tol, - np.dtype(np.float64): tol, - np.dtype(np.uint8): tol, - np.dtype(np.int32): tol, - np.dtype(np.int64): tol} assert len(ctx_list) > 1 if isinstance(sym, Symbol): @@ -1544,10 +1624,16 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', arg_params = {} if arg_params is None else arg_params aux_params = {} if aux_params is None else aux_params - for n, arr in exe_list[0].arg_dict.items(): + + # returns the least precise of two dtypes + def smaller_dtype(dt1, dt2): + return dt1 if dt2 is None or np.dtype(dt1).itemsize < np.dtype(dt2).itemsize else dt2 + + # It's important to assign random inputs in a deterministic order, for reproducibility. + for n, arr in _sorted_items(exe_list[0].arg_dict): if n not in arg_params: if use_uniform: - arg_params[n] = np.random.uniform(low=-0.92, high=0.92, + arg_params[n] = np.random.uniform(low=-0.92 * scale, high=0.92 * scale, size=arr.shape).astype(rand_type) else: arg_params[n] = np.random.normal(size=arr.shape, @@ -1566,31 +1652,28 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', arr[:] = np.zeros(arr.shape, dtype=arr.dtype) dtypes = [np.dtype(exe.outputs[0].dtype) for exe in exe_list] - max_idx = np.argmax(dtypes) + # Select the ground truth as the first model having the highest precision output[0] + gt_idx = np.argmax(dtypes) gt = ground_truth if gt is None: - gt = exe_list[max_idx].output_dict.copy() + gt = exe_list[gt_idx].output_dict.copy() if grad_req != 'null': - gt.update(exe_list[max_idx].grad_dict) + gt.update(exe_list[gt_idx].grad_dict) # test for exe in exe_list: exe.forward(is_train=False) for i, exe in enumerate(exe_list): - if i == max_idx: + if i == gt_idx: continue - rtol = tol[dtypes[i]] - atol = get_tolerance(rtol, ctx_list[i]) for name, arr in zip(output_names, exe.outputs): - # Previously, the cast was to dtypes[i], but symbol may be mixed-precision, - # so casting the ground truth to the actual output type seems more correct. - gtarr = gt[name].astype(arr.dtype) + gtarr = gt[name] try: assert_almost_equal(arr, gtarr, rtol=rtol, atol=atol, equal_nan=equal_nan) except AssertionError as e: - print('Predict Err: ctx %d vs ctx %d at %s'%(i, max_idx, name)) + print('Predict Err: ctx %d vs ctx %d at %s'%(i, gt_idx, name)) traceback.print_exc() if raise_on_err: raise e @@ -1599,29 +1682,50 @@ def check_consistency(sym, ctx_list, scale=1.0, grad_req='write', # train if grad_req != 'null': + # Perform forward() for exe in exe_list: exe.forward(is_train=True) - exe.backward(exe.outputs) + # Use the first executor's output data, cast to the least precise dtype, + # as the gradient data to pass to all executor's backward() call. + least_precise_dtype = [out.dtype for out in exe_list[0].outputs] + for exe in exe_list: + least_precise_dtype = [smaller_dtype(out1.dtype, dt) \ + for (out1, dt) in zip(exe.outputs, least_precise_dtype)] + golden_data_np = [out.astype(dt).asnumpy() \ + for (out, dt) in zip(exe_list[0].outputs, least_precise_dtype)] + # Perform backward() + for exe in exe_list: + out_grads = [mx.nd.array(golden_np, ctx=exe._ctx, + dtype=out.dtype).tostype(out.stype) + for (golden_np, out) in zip(golden_data_np, exe.outputs)] + exe.backward(out_grads) for i, exe in enumerate(exe_list): - if i == max_idx: + if i == gt_idx: continue - rtol = tol[dtypes[i]] - atol = get_tolerance(rtol, ctx_list[i]) curr = zip(output_names + arg_names, exe.outputs + exe.grad_arrays) for name, arr in curr: if gt[name] is None: assert arr is None continue - # Previous cast was to dtypes[i], but symbol may be mixed-precision, - # so casting the ground truth to the actual output type seems more correct. - gtarr = gt[name].astype(arr.dtype) + gtarr = gt[name] try: - assert_almost_equal(arr, gtarr, rtol=rtol, atol=atol, equal_nan=equal_nan) + rt, at = rtol, atol + # If the primary data i/o type is float16, then the tolerance used when + # comparing a float32 input gradient (e.g. batchnorm gamma) should be float16. + smaller_arr_dtype = smaller_dtype(arr.dtype, dtypes[i]) + smaller_gt_dtype = smaller_dtype(gtarr.dtype, dtypes[gt_idx]) + if smaller_arr_dtype != arr.dtype or \ + smaller_gt_dtype != gtarr.dtype: + rt, at = get_tols(arr.astype(smaller_arr_dtype), + gtarr.astype(smaller_gt_dtype), rtol, atol) + assert_almost_equal(arr, gtarr, rtol=rt, atol=at, equal_nan=equal_nan) except AssertionError as e: - print('Train Err: ctx %d vs ctx %d at %s'%(i, max_idx, name)) + print('Train Err: {} {} ctx {} vs {} {} ctx {} at {}'.format( + np.dtype(arr.dtype).name, arr.ctx, i, + np.dtype(gtarr.dtype).name, gtarr.ctx, gt_idx, name)) traceback.print_exc() if raise_on_err: raise e diff --git a/src/operator/linalg.h b/src/operator/linalg.h index 291e251f5cbc..3e82c6a2fad1 100644 --- a/src/operator/linalg.h +++ b/src/operator/linalg.h @@ -280,6 +280,14 @@ void linalg_batch_det_backward_helper(const Tensor& LU, const DType zero_det, const mxnet::OpContext& ctx); +#ifdef __CUDACC__ +#if CUDA_VERSION < 11000 +#define VERSION_ADJUSTED_TF32_MATH CUBLAS_DEFAULT_MATH +#else +#define VERSION_ADJUSTED_TF32_MATH CUBLAS_TF32_TENSOR_OP_MATH +#endif +#endif // __CUDACC__ + #include "linalg_impl.h" #endif // MXNET_OPERATOR_LINALG_H_ diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h index fd6800d184e4..47b54f6ac340 100644 --- a/src/operator/linalg_impl.h +++ b/src/operator/linalg_impl.h @@ -205,12 +205,15 @@ inline void linalg_gemm(const Tensor& A, #else cublasDataType_t full_datatype = CUBLAS_DATA_FULL; #endif + auto handle = Stream::GetBlasHandle(s); + cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); CUBLAS_CALL(cublasSgemmEx( - Stream::GetBlasHandle(s), (tB ? CUBLAS_OP_T : CUBLAS_OP_N), + handle, (tB ? CUBLAS_OP_T : CUBLAS_OP_N), (tA ? CUBLAS_OP_T : CUBLAS_OP_N), C.size(1), C.size(0), (tB ? B.size(1) : B.size(0)), &alpha, B.dptr_, full_datatype, B.stride_, A.dptr_, full_datatype, A.stride_, &beta, C.dptr_, full_datatype, - C.stride_)) + C.stride_)); + CUBLAS_CALL(cublasSetMathMode(handle, saved_math_mode)); } #else @@ -228,13 +231,16 @@ void linalg_gemm_axis(const Tensor& A, const Tensor::GetBlasHandle(s), \ + auto handle = Stream::GetBlasHandle(s); \ + cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); \ + CUBLAS_CALL(cublas##fname(handle, \ (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \ (tA ? CUBLAS_OP_T : CUBLAS_OP_N), \ C.size(2), C.size(0), (tB ? B.size(2) : B.size(0)), &alpha, \ B.dptr_, B.size(1)*B.stride_, B.stride_, \ A.dptr_, A.size(1)*A.stride_, A.stride_, &beta, \ C.dptr_, C.size(1)*C.stride_, C.stride_, A.size(1))) \ + CUBLAS_CALL(cublasSetMathMode(handle, saved_math_mode)); \ } LINALG_GPU_GEMM_AXIS(SgemmStridedBatched, float) LINALG_GPU_GEMM_AXIS(DgemmStridedBatched, double) @@ -342,13 +348,22 @@ void linalg_gemm(const Tensor::GetBlasHandle(s), \ + auto handle = Stream::GetBlasHandle(s); \ + cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); \ + CUBLAS_CALL(cublas##fname(handle, \ (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \ (tA ? CUBLAS_OP_T : CUBLAS_OP_N), \ C.size(2), C.size(1), (tB ? B.size(2) : B.size(1)), \ - &alpha, B.dptr_, B.stride_, B.size(1) * B.stride_, \ - A.dptr_, A.stride_, A.size(1) * A.stride_, \ - &beta, C.dptr_, C.stride_, C.size(1) * C.stride_, A.size(0))) \ + &alpha, \ + B.dptr_, B.stride_, \ + static_cast(B.size(1) * B.stride_), \ + A.dptr_, A.stride_, \ + static_cast(A.size(1) * A.stride_), \ + &beta, \ + C.dptr_, C.stride_, \ + static_cast(C.size(1) * C.stride_), \ + A.size(0))) \ + CUBLAS_CALL(cublasSetMathMode(handle, saved_math_mode)); \ } LINALG_GPU_BATCH_GEMM(DgemmStridedBatched, double) @@ -373,7 +388,7 @@ void linalg_gemm(const Tensor(const Tensor::GetBlasHandle(s); \ + cublasMath_t saved_math_mode = SetCublasMathMode(handle, VERSION_ADJUSTED_TF32_MATH); \ for (index_t i = 0; i < A.size(2); ++i) { \ CUBLAS_CALL(cublas##fname(Stream::GetBlasHandle(s), \ (tB ? CUBLAS_OP_T : CUBLAS_OP_N), \ @@ -423,6 +440,7 @@ void linalg_gemm(const Tensor, xpu>::Launch( - s, data.Size(), out.dptr(), data.dptr(), - static_cast(alpha)); + CHECK(out.type_flag_ == mshadow::kFloat32 || out.type_flag_ == mshadow::kFloat64) + << "true_divide only supports float32 and float64" + " output when input's dtype is " + << type_string(inputs[0].type_flag_); + MSHADOW_REAL_TYPE_SWITCH(out.type_flag_, ODType, { + MXNET_INT_TYPE_SWITCH(inputs[0].type_flag_, DType, { + MXNET_ASSIGN_REQ_SWITCH(req[0], Req, { + Kernel, xpu>::Launch( + s, data.Size(), out.dptr(), data.dptr(), + static_cast(alpha)); + }); }); }); #else diff --git a/tests/python/gpu/test_gluon_gpu.py b/tests/python/gpu/test_gluon_gpu.py index 42a2424c7d9b..52280bf898a5 100644 --- a/tests/python/gpu/test_gluon_gpu.py +++ b/tests/python/gpu/test_gluon_gpu.py @@ -50,10 +50,9 @@ def check_rnn_layer(layer): states = layer.begin_state(16) co, cs = layer(x, states) - # atol of 1e-6 required, as exposed by seed 2124685726 - assert_almost_equal(go, co, rtol=1e-2, atol=1e-6) + assert_almost_equal(go, co) for g, c in zip(gs, cs): - assert_almost_equal(g, c, rtol=1e-2, atol=1e-6) + assert_almost_equal(g, c) @with_seed() @@ -70,9 +69,9 @@ def check_rnn_layer_w_rand_inputs(layer): states = layer.begin_state(16) co, cs = layer(x, states) - assert_almost_equal(go, co, rtol=1e-2, atol=1e-6) + assert_almost_equal(go, co) for g, c in zip(gs, cs): - assert_almost_equal(g, c, rtol=1e-2, atol=1e-6) + assert_almost_equal(g, c) @with_seed() @@ -481,6 +480,13 @@ def tensor_size(big_tensor_bytes): # This in the past has given cudnnFind() trouble when it needed to allocate similar I/O's # from the area carved out by the MXNET_GPU_MEM_POOL_RESERVE setting (by default 5%). (free_mem_bytes, total_mem_bytes) = mx.context.gpu_memory_info(ctx.device_id) + # This test needs to be 'qualified' for use with each new larger memory size + largest_supported_total_mem_GB = 32 + if (total_mem_bytes > largest_supported_total_mem_GB * 1024 * 1024 * 1024): + sys.stderr.write( + ' bypassing test due to too-large global memory of size {} ... '.format(total_mem_bytes)) + return + start_size = tensor_size(0.20 * total_mem_bytes) num_trials = 10 sys.stderr.write( diff --git a/tests/python/gpu/test_gluon_model_zoo_gpu.py b/tests/python/gpu/test_gluon_model_zoo_gpu.py index 6f559db62808..8d473f705a41 100644 --- a/tests/python/gpu/test_gluon_model_zoo_gpu.py +++ b/tests/python/gpu/test_gluon_model_zoo_gpu.py @@ -91,7 +91,7 @@ def test_inference(): max_val = np.max(np.abs(cpu_out.asnumpy())) gpu_max_val = np.max(np.abs(gpu_out.asnumpy())) eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val)) - assert_almost_equal(cpu_out / max_val, gpu_out / gpu_max_val, rtol=1e-3, atol=1e-3) + assert_almost_equal(cpu_out / max_val, gpu_out / gpu_max_val) def get_nn_model(name): if "densenet" in name: diff --git a/tests/python/gpu/test_operator_gpu.py b/tests/python/gpu/test_operator_gpu.py index 9378480d9af6..bcf906a92e44 100644 --- a/tests/python/gpu/test_operator_gpu.py +++ b/tests/python/gpu/test_operator_gpu.py @@ -26,6 +26,7 @@ from nose.tools import assert_raises import scipy.sparse as sps import mxnet.ndarray.sparse as mxsps +import itertools from mxnet.test_utils import check_consistency, set_default_context, assert_almost_equal, assert_allclose from mxnet.base import MXNetError from mxnet import autograd @@ -495,30 +496,20 @@ def test_batchnorm_with_type(): # V2, 2D - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_2D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_2D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_2D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_2D) + bools = [False, True] + for fix_gamma, cudnn_off in itertools.product(bools, bools): + sym = mx.sym.BatchNorm(name='norm', fix_gamma=fix_gamma, cudnn_off=cudnn_off) + check_consistency(sym, ctx_list_v2_2D) # V2, 1D - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_1D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_1D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_1D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_1D) - # - # # V2, 3D - sym = mx.sym.BatchNorm(name='norm', fix_gamma=False, cudnn_off=True) - check_consistency(sym, ctx_list_v2_3D) - sym = mx.sym.BatchNorm(name='norm', fix_gamma=True, cudnn_off=True) - check_consistency(sym, ctx_list_v2_3D) + for fix_gamma, cudnn_off in itertools.product(bools, bools): + sym = mx.sym.BatchNorm(name='norm', fix_gamma=fix_gamma, cudnn_off=cudnn_off) + check_consistency(sym, ctx_list_v2_1D) + + # V2, 3D + for fix_gamma, cudnn_off in itertools.product(bools, [True,]): + sym = mx.sym.BatchNorm(name='norm', fix_gamma=fix_gamma, cudnn_off=cudnn_off) + check_consistency(sym, ctx_list_v2_3D) @with_seed() @@ -632,9 +623,9 @@ def test_convolution_with_type(): np.dtype(np.float64): 1e-5, np.dtype(np.uint8): 0, np.dtype(np.int32): 0} - check_consistency(sym, ctx_list, tol=tol) + check_consistency(sym, ctx_list, rtol=tol, atol=tol) # test ability to turn off training on bias - check_consistency(sym, ctx_list, grad_req={'conv_data': 'write', 'conv_weight': 'write', 'conv_bias': 'null'}, tol=tol) + check_consistency(sym, ctx_list, grad_req={'conv_data': 'write', 'conv_weight': 'write', 'conv_bias': 'null'}, rtol=tol, atol=tol) # Apply N symbols against each of M contexts, checking that all NxM combinations match. @@ -717,7 +708,6 @@ def test_conv_deconv_guards(): # Test cases for convolution and deconvolution via strided fft. Ensure that the framework # guards against problematic CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING in cuDNN [7.3.1,7.5) # see https://docs.nvidia.com/deeplearning/sdk/cudnn-release-notes/rel_750.html#rel_750 - tol = 1e-1 for (op, opname) in [(mx.sym.Convolution, 'conv'), (mx.sym.Deconvolution, 'deconv')]: dataname = opname + '_data' ctx = {'ctx': mx.gpu(0), dataname: (32, 32, 64, 64), 'type_dict': {dataname: np.float32}} @@ -732,7 +722,7 @@ def test_conv_deconv_guards(): try: sym = op(**test_case_args) sym_no_cudnn = op(cudnn_off=True, **test_case_args) - check_consistency([sym, sym_no_cudnn], [ctx, ctx], tol=tol) + check_consistency([sym, sym_no_cudnn], [ctx, ctx], scale=0.1) except: print('Test failure of mx.sym.{} with args: {}'.format(op.__name__, test_case_args)) raise @@ -756,7 +746,7 @@ def _conv_with_num_streams(seed): cudnn_off=True, name='conv') try: # tol can be pretty high- we're looking for a large diff due to garbaged workspace - check_consistency([sym, sym_no_cudnn], [ctx, ctx], tol=1e-2) + check_consistency([sym, sym_no_cudnn], [ctx, ctx], rtol=1e-2, atol=1e-2) except: print('Failing conv size = {}'.format(size)) raise @@ -778,20 +768,19 @@ def test_convolution_multiple_streams(): @with_seed() def test_convolution_large_c(): problematic_c = 64 * 1024 - # The convolution accumulates many values, so set large tolerances. - tol = {np.dtype(np.float32): 1, - np.dtype(np.float64): 1} + # The convolution accumulates many values, so scale the input magnitude. + scale = 0.1 def test_1D_with_width(width, grad_req): ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, width), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, width), 'type_dict': {'conv_data': np.float64}}] sym = mx.sym.Convolution(layout='NCW', num_filter=8, kernel=(2,), name='conv') - check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req) + check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale) def test_2D_with_width(width, grad_req): ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, 2, width), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.gpu(0), 'conv_data': (1, problematic_c, 2, width), 'type_dict': {'conv_data': np.float64}}] sym = mx.sym.Convolution(layout='NCHW', num_filter=4, kernel=(2,2), name='conv') - check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req) + check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale) # Run with different data tensor shapes to run cudnnFind() multiple times. # First, populate algo and op caches with models that always use cudnnFind() (req == 'write'). @@ -808,20 +797,19 @@ def test_2D_with_width(width, grad_req): @with_seed() def test_deconvolution_large_c(): problematic_c = 64 * 1024 - # The deconvolution accumulates many values, so set large tolerances. - tol = {np.dtype(np.float32): 1, - np.dtype(np.float64): 1} + # The deconvolution accumulates many values, so scale the input magnitude. + scale = 0.1 def test_1D_with_width(width, grad_req): ctx_list = [{'ctx': mx.gpu(0), 'deconv_data': (1, 8, width), 'type_dict': {'deconv_data': np.float32}}, {'ctx': mx.gpu(0), 'deconv_data': (1, 8, width), 'type_dict': {'deconv_data': np.float64}}] sym = mx.sym.Deconvolution(layout='NCW', num_filter=problematic_c, kernel=(2,), name='deconv') - check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req) + check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale) def test_2D_with_width(width, grad_req): ctx_list = [{'ctx': mx.gpu(0), 'deconv_data': (1, 8, 2, width), 'type_dict': {'deconv_data': np.float32}}, {'ctx': mx.gpu(0), 'deconv_data': (1, 8, 2, width), 'type_dict': {'deconv_data': np.float64}}] sym = mx.sym.Deconvolution(layout='NCHW', num_filter=problematic_c, kernel=(2,2), name='deconv') - check_consistency([sym, sym], ctx_list, tol=tol, grad_req=grad_req) + check_consistency([sym, sym], ctx_list, grad_req=grad_req, scale=scale) # Run with different data tensor shapes to run cudnnFind() multiple times. # First, populate algo and op caches with models that always use cudnnFind() (req == 'write'). @@ -926,8 +914,8 @@ def test_deconvolution_with_type(): np.dtype(np.float64): 1e-5, np.dtype(np.uint8): 0, np.dtype(np.int32): 0} - check_consistency(sym, ctx_list, tol=tol) - check_consistency(sym, ctx_list, tol=tol, grad_req="add") + check_consistency(sym, ctx_list, rtol=tol, atol=tol) + check_consistency(sym, ctx_list, rtol=tol, atol=tol, grad_req="add") # 2D deconvolution sym = mx.sym.Deconvolution(num_filter=2, kernel=(3,3), name='deconv') @@ -942,8 +930,8 @@ def test_deconvolution_with_type(): np.dtype(np.float64): 1e-5, np.dtype(np.uint8): 0, np.dtype(np.int32): 0} - check_consistency(sym, ctx_list, tol=tol) - check_consistency(sym, ctx_list, tol=tol, grad_req="add") + check_consistency(sym, ctx_list, rtol=tol, atol=tol) + check_consistency(sym, ctx_list, rtol=tol, atol=tol, grad_req="add") @with_seed() @@ -1025,10 +1013,11 @@ def test_bilinear_sampler_with_type(): def test_grid_generator_with_type(): data = mx.sym.Variable('data') sym = mx.sym.GridGenerator(data=data, transform_type='affine', target_shape=(20, 20)) + scale = 1 ctx_list = [{'ctx': mx.gpu(0), 'data': (3, 6), 'type_dict': {'data': np.float32}}, {'ctx': mx.cpu(0), 'data': (3, 6), 'type_dict': {'data': np.float32}}] - check_consistency(sym, ctx_list) - check_consistency(sym, ctx_list, grad_req="add") + check_consistency(sym, ctx_list, scale=scale) + check_consistency(sym, ctx_list, scale=scale, grad_req="add") sym = mx.sym.GridGenerator(data=data, transform_type='warp', target_shape=(20, 20)) ctx_list = [{'ctx': mx.gpu(0), 'data': (3, 2, 20, 20), 'type_dict': {'data': np.float32}}, {'ctx': mx.cpu(0), 'data': (3, 2, 20, 20), 'type_dict': {'data': np.float32}}] @@ -1173,7 +1162,7 @@ def test_pooling_versions_helper(pool_op_list, data, kernel, pool_type, pad, str pool_op)) sym_list.append(sym) - check_consistency(sym_list, ctx_list, equal_nan=(not count_include_pad), tol=tol) + check_consistency(sym_list, ctx_list, equal_nan=(not count_include_pad), rtol=tol, atol=tol) def test_pooling_dim(dim, pool_type, dtype, pool_op_list, p_value=2, count_include_pad=True, tol=None): @@ -1331,7 +1320,7 @@ def test_flatten_slice_after_conv(): ctx_list = [{'ctx': mx.gpu(0), 'conv_data': (2, 16, 16, 16), 'type_dict': {'conv_data': np.float32}}, {'ctx': mx.cpu(0), 'conv_data': (2, 16, 16, 16), 'type_dict': {'conv_data': np.float32}}] - check_consistency(slice_sym, ctx_list) + check_consistency(slice_sym, ctx_list, scale=0.5) @with_seed() @@ -1634,7 +1623,7 @@ def test_embedding_helper(data_types, weight_types, low_pad, high_pad): 'type_dict': {'embedding_data': data_type, 'embedding_weight': weight_type}}) arg_params = {'embedding_data': np.random.randint(low=-low_pad, high=V+high_pad, size=(N,))} check_consistency(sym, ctx_list, grad_req={'embedding_data': 'null','embedding_weight': 'write'}, - arg_params=arg_params) + arg_params=arg_params, scale=0.1) data_types = [np.float16, np.float32, np.float64, np.int32] weight_types = [np.float16, np.float32, np.float64] @@ -1882,7 +1871,7 @@ def test_deformable_psroipooling_with_type(): 'deformable_psroipool_trans': np.float16}}, ] - check_consistency(sym, ctx_list, scale=0.1, tol=tol, + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol, grad_req={'deformable_psroipool_data': 'write', 'deformable_psroipool_rois': 'null', 'deformable_psroipool_trans': 'write'}, arg_params=arg_params) @@ -1913,9 +1902,9 @@ def test_deformable_convolution_with_type(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) # test ability to turn off training on bias - check_consistency(sym, ctx_list, scale=0.1, tol=tol, + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol, grad_req={'deformable_conv_data': 'write', 'deformable_conv_offset': 'write', 'deformable_conv_weight': 'write', @@ -1948,7 +1937,7 @@ def test_deformable_convolution_options(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), pad=(1,1), name='deformable_conv') - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) # Stride > 1 ctx_list = [{'ctx': mx.gpu(0), @@ -1969,7 +1958,7 @@ def test_deformable_convolution_options(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), stride=(2,2), name='deformable_conv') - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) # Dilate > 1 ctx_list = [{'ctx': mx.gpu(0), @@ -1990,7 +1979,7 @@ def test_deformable_convolution_options(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] sym = mx.sym.contrib.DeformableConvolution(num_filter=3, kernel=(3,3), dilate=(2,2), name='deformable_conv') - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) # Deformable group > 1 ctx_list = [{'ctx': mx.gpu(0), @@ -2011,7 +2000,7 @@ def test_deformable_convolution_options(): 'type_dict': {'deformable_conv_data': np.float32, 'deformable_conv_offset': np.float32}}, ] sym = mx.sym.contrib.DeformableConvolution(num_filter=4, kernel=(3,3), num_deformable_group=2, name='deformable_conv') - check_consistency(sym, ctx_list, scale=0.1, tol=tol) + check_consistency(sym, ctx_list, scale=0.1, rtol=tol, atol=tol) @with_seed() diff --git a/tests/python/unittest/common.py b/tests/python/unittest/common.py index ab2d191f1360..8e4e2e35f0cc 100644 --- a/tests/python/unittest/common.py +++ b/tests/python/unittest/common.py @@ -214,11 +214,13 @@ def test_new(*args, **kwargs): try: orig_test(*args, **kwargs) except: - # With exceptions, repeat test_msg at INFO level to be sure it's seen. - if log_level < logging.INFO: - logger.info(test_msg) + # With exceptions, repeat test_msg at WARNING level to be sure it's seen. + if log_level < logging.WARNING: + logger.warning(test_msg) raise finally: + # Provide test-isolation for any test having this decorator + mx.nd.waitall() np.random.set_state(post_test_state) return test_new return test_helper @@ -277,7 +279,7 @@ def setup_module(): seed = np.random.randint(0, np.iinfo(np.int32).max) else: seed = int(module_seed_str) - logger.warn('*** module-level seed is set: all tests running deterministically ***') + logger.warning('*** module-level seed is set: all tests running deterministically ***') logger.info('Setting module np/mx/python random seeds, use MXNET_MODULE_SEED=%s to reproduce.', seed) np.random.seed(seed) mx.random.seed(seed) @@ -285,7 +287,7 @@ def setup_module(): # The MXNET_TEST_SEED environment variable will override MXNET_MODULE_SEED for tests with # the 'with_seed()' decoration. Inform the user of this once here at the module level. if os.getenv('MXNET_TEST_SEED') is not None: - logger.warn('*** test-level seed set: all "@with_seed()" tests run deterministically ***') + logger.warning('*** test-level seed set: all "@with_seed()" tests run deterministically ***') try: from tempfile import TemporaryDirectory @@ -373,4 +375,4 @@ def run_in_spawned_process(func, env, *args): finally: os.environ.clear() os.environ.update(orig_environ) - return True \ No newline at end of file + return True diff --git a/tests/python/unittest/test_autograd.py b/tests/python/unittest/test_autograd.py index 61955f034a71..a72af688764e 100644 --- a/tests/python/unittest/test_autograd.py +++ b/tests/python/unittest/test_autograd.py @@ -433,6 +433,7 @@ def check_grad_with_stype(array_stype, grad_stype, expected_stype): check_grad_with_stype(stype, grad_stype, grad_stype) @with_seed() +@pytest.mark.garbage_expected def test_sparse_dot_grad(): def check_sparse_dot_grad(rhs): lhs = rand_ndarray((2, 8), 'csr') diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py index 60fd526e16c7..6129c2892d05 100644 --- a/tests/python/unittest/test_gluon.py +++ b/tests/python/unittest/test_gluon.py @@ -23,7 +23,7 @@ from mxnet import gluon from mxnet.gluon import nn from mxnet.base import py_str, MXNetError -from mxnet.test_utils import assert_almost_equal +from mxnet.test_utils import assert_almost_equal, default_context from mxnet.util import is_np_array from mxnet.ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID from mxnet.test_utils import use_np @@ -897,7 +897,7 @@ def _syncParameters(bn1, bn2, ctx): input2grad.asnumpy(), atol=atol, rtol=rtol) cfgs = [(1, False)] - num_gpus = mx.context.num_gpus() + num_gpus = 0 if default_context().device_type != 'gpu' else mx.context.num_gpus() batch_size = 24 for i in range(1, num_gpus + 1): if batch_size % i == 0: diff --git a/tests/python/unittest/test_ndarray.py b/tests/python/unittest/test_ndarray.py index 3a9bd9e93126..167d26e922e4 100644 --- a/tests/python/unittest/test_ndarray.py +++ b/tests/python/unittest/test_ndarray.py @@ -24,7 +24,7 @@ import random import functools from nose.tools import assert_raises, raises -from common import with_seed, assertRaises, TemporaryDirectory +from common import with_seed, assertRaises, TemporaryDirectory, setup_module, teardown from mxnet.test_utils import almost_equal from mxnet.test_utils import assert_almost_equal, assert_exception from mxnet.test_utils import default_context diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 18b26579f740..67722047ddee 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -28,7 +28,7 @@ from mxnet.test_utils import assert_almost_equal from mxnet.test_utils import use_np from mxnet.test_utils import is_op_runnable -from common import assertRaises, with_seed +from common import assertRaises, with_seed, random_seed from mxnet.numpy_dispatch_protocol import with_array_function_protocol, with_array_ufunc_protocol from mxnet.numpy_dispatch_protocol import _NUMPY_ARRAY_FUNCTION_LIST, _NUMPY_ARRAY_UFUNC_LIST @@ -471,8 +471,8 @@ def _add_workload_linalg_cholesky(): dtypes = (np.float32, np.float64) for shape, dtype in itertools.product(shapes, dtypes): - _np.random.seed(1) - a = _np.random.randn(*shape) + with random_seed(1): + a = _np.random.randn(*shape) t = list(range(len(shape))) t[-2:] = -1, -2 @@ -2705,7 +2705,6 @@ def _add_workload_unwrap(): phase[3:] += np.pi phase_s = np.vstack((phase,phase)) OpArgMngr.add_workload('unwrap', phase) - print(phase_s.shape) OpArgMngr.add_workload('unwrap', phase_s, axis=1) diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 475ff0243290..c5804c253744 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -31,7 +31,7 @@ from mxnet.gluon import HybridBlock from mxnet.base import MXNetError from mxnet.test_utils import same, assert_almost_equal, rand_shape_nd, rand_ndarray -from mxnet.test_utils import check_numeric_gradient, use_np, collapse_sum_like +from mxnet.test_utils import check_numeric_gradient, use_np, collapse_sum_like, effective_dtype from mxnet.test_utils import new_matrix_with_real_eigvals_nd from mxnet.test_utils import new_sym_matrix_with_real_eigvals_nd from common import assertRaises, with_seed @@ -1488,15 +1488,18 @@ def _test_batchnorm_impl(shape, fix_gamma, cudnn_off, output_mean_var, running_mean = running_mean * momentum + \ data_mean_flat * (1 - momentum) + + m = _np.prod(shape) / shape[axis] + # cudnn uses m-1 in the denominator of its sample variance calculation, not m + sample_var_adjust = 1.0 if cudnn_off or fix_gamma else m / (m-1) running_var = running_var * momentum + \ - data_var_flat * (1 - momentum) + data_var_flat * sample_var_adjust * (1 - momentum) W = bn_gamma.reshape(expand_shape) dnx = ograd * W xsm = data - data_mean nd = 1.0 / np.sqrt(data_var + epsilon) nx = xsm * nd - m = _np.prod(shape) / shape[axis] dvar = np.sum(dnx * xsm, axis=reduce_axis, keepdims=True, ) * (-0.5) * np.power(nd, 3) dmean = -nd * np.sum(dnx, axis=reduce_axis, keepdims=True) - \ @@ -3951,7 +3954,7 @@ def hybrid_forward(self, F, mean, sigma): for ((shape1, shape2), out_shape) in zip(param_shape, output_shapes): mx_out = np.random.lognormal(np.zeros(shape1), np.ones(shape2), out_shape) np_out = _np.random.lognormal(np.zeros(shape1).asnumpy(), np.ones(shape2).asnumpy(), out_shape) - assert_almost_equal(mx_out.asnumpy().shape, np_out.shape) + assert mx_out.asnumpy().shape == np_out.shape def _test_lognormal_exception(sigma): output = np.random.lognormal(sigma=sigma).asnumpy() @@ -4177,7 +4180,7 @@ def hybrid_forward(self, F, scale): with mx.autograd.record(): mx_out = test_rayleigh(scale) np_out = _np.random.rayleigh(scale = scale.asnumpy(), size = shape) - assert_almost_equal(np_out.shape, mx_out.shape) + assert np_out.shape == mx_out.shape mx_out.backward() assert scale.grad.shape == shape assert_almost_equal(scale.grad.asnumpy().sum(), mx_out.asnumpy().sum(), rtol=1e-3, atol=1e-5) @@ -4185,7 +4188,7 @@ def hybrid_forward(self, F, scale): for shape in shapes: mx_out = np.random.rayleigh(np.array([1]), shape) np_out = _np.random.rayleigh(np.array([1]).asnumpy(), shape) - assert_almost_equal(mx_out.asnumpy().shape, np_out.shape) + assert mx_out.asnumpy().shape == np_out.shape def _test_rayleigh_exception(scale): output = np.random.rayleigh(scale=scale).asnumpy() @@ -4218,7 +4221,7 @@ def hybrid_forward(self, F, scale): with mx.autograd.record(): mx_out = test_exponential_grad(scale) np_out = _np.random.exponential(scale = scale.asnumpy(), size = out_shape) - assert_almost_equal(np_out.shape, mx_out.shape) + assert np_out.shape == mx_out.shape mx_out.backward() assert scale.grad.shape == out_shape assert_almost_equal(scale.grad.asnumpy().sum(), mx_out.asnumpy().sum(), rtol=1e-3, atol=1e-5) @@ -4956,6 +4959,8 @@ def check_svd(UT, L, V, data_np): data_np = _np.random.uniform(-10.0, 10.0, shape) data_np = _np.array(data_np, dtype=dtype) data = np.array(data_np, dtype=dtype) + if effective_dtype(data) == np.dtype(np.float16): + continue data.attach_grad() with mx.autograd.record(): ret = test_svd(data) @@ -5211,7 +5216,7 @@ def check_solve(x, a_np, b_np): print(e) else: assert x.shape == x_expected.shape - assert_almost_equal(x.asnumpy(), x_expected, rtol=rtol, atol=atol) + assert_almost_equal(x, x_expected) def newInvertibleMatrix_2D(shape, max_cond=4): while 1: @@ -5251,7 +5256,6 @@ def get_grad_b(A, X): nrhs = (-1, 0, 1, 2, 3) dtypes = ['float32', 'float64'] for hybridize, shape, dtype, nrh in itertools.product([False, True], shapes, dtypes, nrhs): - rtol, atol =1e-2, 1e-4 test_solve = TestSolve() if hybridize: test_solve.hybridize() @@ -5285,8 +5289,8 @@ def get_grad_b(A, X): mx.autograd.backward(mx_out) b_backward_expected = get_grad_b(a.asnumpy(), mx_out.asnumpy()) a_backward_expected = -_np.matmul(b_backward_expected, _np.swapaxes(mx_out, -1, -2).asnumpy()) - assert_almost_equal(a.grad.asnumpy(), a_backward_expected, rtol=rtol, atol=atol) - assert_almost_equal(b.grad.asnumpy(), b_backward_expected, rtol=rtol, atol=atol) + assert_almost_equal(a.grad, a_backward_expected) + assert_almost_equal(b.grad, b_backward_expected) # check imperative once again mx_out = np.linalg.solve(a, b) @@ -5311,7 +5315,7 @@ def check_tensorinv(inv_a, a_np, ind): print(e) else: assert inv_a.shape == inv_a_expected.shape - assert_almost_equal(inv_a.asnumpy(), inv_a_expected, rtol=rtol, atol=atol) + assert_almost_equal(inv_a, inv_a_expected) def newInvertibleMatrix_2D(shape, max_cond=4): while 1: @@ -5354,11 +5358,6 @@ def get_grad_A(A, ind): ] dtypes = ['float32', 'float64'] for hybridize, shape, dtype, in itertools.product([False, True], shapes, dtypes): - rtol = 1e-3 - atol = 1e-5 - if dtype == 'float32': - rtol = 1e-2 - atol = 1e-4 ind = shape[0] test_tensorinv = TestTensorinv(ind=ind) if hybridize: @@ -5386,7 +5385,7 @@ def get_grad_A(A, ind): if 0 not in mx_out.shape: mx.autograd.backward(mx_out) grad_A_expected = get_grad_A(a.asnumpy(), ind) - assert_almost_equal(a.grad.asnumpy(), grad_A_expected, rtol=rtol, atol=atol) + assert_almost_equal(a.grad, grad_A_expected) # check imperative once again mx_out = np.linalg.tensorinv(a, ind) @@ -5439,7 +5438,7 @@ def check_tensorsolve(x, a_np, b_np, axes): print(e) else: assert x.shape == x_expected.shape - assert_almost_equal(x.asnumpy(), x_expected, rtol=rtol, atol=atol) + assert_almost_equal(x, x_expected) def shapeInfer(a_shape, b_shape, axes=None): # b_shape - Right-hand tensor shape, which can be of any shape. @@ -5501,8 +5500,6 @@ def newInvertibleMatrix_2D(shape, max_cond=4): for hybridize in [True, False]: for dtype in dtypes: for a_shape, b_shape, axes in shapes: - rtol = 1e-2 if dtype == 'float32' else 1e-3 - atol = 1e-4 if dtype == 'float32' else 1e-5 test_tensorsolve = TestTensorsolve(axes) if hybridize: test_tensorsolve.hybridize() @@ -5539,8 +5536,8 @@ def newInvertibleMatrix_2D(shape, max_cond=4): mx.autograd.backward(mx_out) grad_a_expected, grad_b_expected = get_tensorsolve_backward( a.asnumpy(), b.asnumpy(), mx_out.asnumpy(), a_axes, a_origin_axes, a_trans_shape) - assert_almost_equal(a.grad.asnumpy(), grad_a_expected, rtol=rtol, atol=atol) - assert_almost_equal(b.grad.asnumpy(), grad_b_expected, rtol=rtol, atol=atol) + assert_almost_equal(a.grad, grad_a_expected) + assert_almost_equal(b.grad, grad_b_expected) # check imperative once again mx_out = test_tensorsolve(a, b) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 4e736e5dc0ab..edb3e6a5aa4c 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -814,21 +814,26 @@ def test_symbol_pow(): @with_seed() def test_fully_connected(): + # Create data of given shape as a uniform distribution centered on 0.0 + def random_data(shape, dtype=np.float32): + return mx.nd.random.uniform(low=-0.5, + high=0.5, shape=shape, dtype=dtype) data = mx.sym.var("data") fc_weight = mx.sym.var("weight") fc_bias = mx.sym.var("bias") fc = mx.sym.FullyConnected(data=data, weight=fc_weight, bias=fc_bias, num_hidden=10, no_bias=False, name='fc') - data = mx.nd.random.uniform(shape=(5, 5, 5, 13), dtype=np.float32) - fc_weight = mx.nd.random.uniform(shape=(10, 325), dtype=np.float32) - fc_bias = mx.nd.random.uniform(shape=(10), dtype=np.float32) - fc_bias2 = mx.nd.random.uniform(shape=(10, 1), dtype=np.float32) + + data = random_data(shape=(5, 5, 5, 13)) + fc_weight = random_data(shape=(10, 325)) + fc_bias = random_data(shape=(10)) + fc_bias2 = random_data(shape=(10, 1)) + data_np = data.asnumpy().reshape(5, 325) fc_weight_np = np.transpose(fc_weight.asnumpy()) fc_bias_np = fc_bias.asnumpy() res = np.dot(data_np, fc_weight_np) + fc_bias.asnumpy() check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, {'fc_output': res}) - check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}, - numeric_eps=1e-2, rtol=1e-4, atol=1e-2) + check_numeric_gradient(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias_np}) # TODO: Fix Bug #15032 when bias has ndim > 1 #check_symbolic_forward(fc, {'data': data_np, 'weight': fc_weight.asnumpy(), 'bias': fc_bias2.asnumpy()}, {'fc_output': res}) @@ -1905,15 +1910,18 @@ def _test_batchnorm_impl(op_name, shape, fix_gamma, cudnn_off, output_mean_var, running_mean = running_mean * momentum + \ data_mean_flat * (1 - momentum) + + m = np.prod(shape) / shape[axis] + # cudnn uses m-1 in the denominator of its sample variance calculation, not m + sample_var_adjust = 1.0 if cudnn_off or fix_gamma else m / (m-1) running_var = running_var * momentum + \ - data_var_flat * (1 - momentum) + data_var_flat * sample_var_adjust * (1 - momentum) W = bn_gamma.reshape(expand_shape) dnx = ograd * W xsm = data - data_mean nd = 1.0 / mx.nd.sqrt(data_var + epsilon) nx = xsm * nd - m = np.prod(shape) / shape[axis] dvar = (dnx * xsm).sum(axis=axis, keepdims=True, exclude=True) * (-0.5) * mx.nd.power(nd, 3) dmean = -nd * dnx.sum(axis=axis, keepdims=True, exclude=True) - \ @@ -2848,13 +2856,13 @@ def test_reduce_inner(numpy_reduce_func, numpy_reduce_grad_func, mx_reduce_sym, args_grad={'a': grad_nd}) net.forward(is_train=True) - equal_forward = almost_equal_ignore_nan(net.outputs[0].asnumpy(), sum_groundtruth, 1E-4, 1E-4) - assert equal_forward + # check forward + assert_almost_equal_ignore_nan(net.outputs[0].asnumpy(), sum_groundtruth, rtol=1e-4, atol=1e-4) net.backward(out_grads=mx.nd.array(outgrad_npy)) bc_grad_groundtruth = np.broadcast_to(grad_groundtruth, grad_nd.shape) - equal_backward = almost_equal_ignore_nan(grad_nd.asnumpy(), bc_grad_groundtruth, 1E-4, 1E-4) - assert equal_backward + # check backward + assert_almost_equal_ignore_nan(grad_nd.asnumpy(), bc_grad_groundtruth, rtol=1e-4, atol=1e-4) test_none_axis = [True, False] for test_none in test_none_axis: @@ -4504,7 +4512,7 @@ def get_large_matrix(): out_npy = gt_topk(dat=a_npy, axis=axis, ret_typ="value", k=a_npy.size, is_ascend=is_ascend) else: out_npy = gt_topk(dat=a_npy, axis=axis, ret_typ="value", k=5, is_ascend=is_ascend) - check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, ctx=ctx) + check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, rtol=1e-2, ctx=ctx) check_symbolic_forward(b, location={'a': a_npy}, expected=[out_npy]) b = mx.sym.topk(a, axis=1, is_ascend=is_ascend, ret_typ="indices", k=5) @@ -4552,7 +4560,7 @@ def get_large_matrix(): for is_ascend in [True, False]: b = mx.sym.topk(a, axis=axis, is_ascend=is_ascend, ret_typ="value", k=k) out_npy = gt_topk(dat=a_npy, axis=axis, ret_typ="value", k=k, is_ascend=is_ascend) - check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, ctx=ctx) + check_numeric_gradient(b, location={'a': a_npy}, numeric_eps=1e-2, rtol=1e-2, ctx=ctx) check_symbolic_forward(b, location={'a': a_npy}, expected=[out_npy]) b = mx.sym.topk(a, axis=1, is_ascend=is_ascend, ret_typ="indices", k=5) @@ -4716,7 +4724,7 @@ def test_grid_generator(): # check forward exe.arg_dict['affine'][:] = np.array([[1.0,0,0,0,1.0,0]]) exe.forward(is_train=True) - output = exe.outputs[0].asnumpy() + output = exe.outputs[0] output[0,0,:,:] = (output[0,0,:,:] + 1) * (target_shape[1] - 1) / 2.0 output[0,1,:,:] = (output[0,1,:,:] + 1) * (target_shape[0] - 1) / 2.0 xv, yv = np.meshgrid(np.arange(target_shape[0]), np.arange(target_shape[1])) @@ -4731,7 +4739,7 @@ def test_grid_generator(): tmp[1] = -1.0 + (np.arange(target_shape[0]*target_shape[1]) // target_shape[1]) * (2.0 / (target_shape[0]-1)) tmp[2] = 1 grad_est = np.dot(out_grad[0].reshape(2,target_shape[0]*target_shape[1]),tmp.T).reshape(1,6) - assert_almost_equal(exe.grad_dict['affine'], grad_est, rtol=1e-3, atol=1e-5) + assert_almost_equal(exe.grad_dict['affine'], grad_est) # check addto exe = grid.simple_bind(ctx=default_context(), affine=(1,6), grad_req='add') grid_grad_npy = np.random.normal(size=exe.grad_dict['affine'].shape) @@ -4739,7 +4747,7 @@ def test_grid_generator(): exe.arg_dict['affine'][:] = np.array([[1.0, 0, 0, 0, 1.0, 0]]) exe.forward(is_train=True) exe.backward(mx.nd.array(out_grad)) - assert_almost_equal(exe.grad_dict['affine'], grad_est + grid_grad_npy, rtol=1e-2, atol=1e-5) + assert_almost_equal(exe.grad_dict['affine'], grad_est + grid_grad_npy) # transform_type = warp test_case = [(12,21),(4,3),(6,12)] @@ -5784,51 +5792,62 @@ def test_div_sqrt_dim(): check_symbolic_forward(test, [data_tmp], [data_tmp / np.sqrt(data_tmp.shape[-1])]) +# helper function to identify inputs likely to fail check_numeric_gradient tol test +# due to finite difference method inaccuracies or function discontuities at the origin +def bad_input_finder(f, f_grad, dtype): + eps = default_numeric_eps()[np.dtype(dtype)] + rtol = default_rtols()[np.dtype(dtype)] + def expected_relative_error(x): + fd_gradient = (f(x+eps/2) - f(x-eps/2)) / eps + return abs(fd_gradient/f_grad(x) - 1) + def is_fd_problem_input(x): + return abs(x) < eps/2 or expected_relative_error(x) > rtol + return np.vectorize(is_fd_problem_input) + @with_seed() def test_reciprocal_op(): - eps = 2**(-11) - data_tmp = np.random.rand(3, 4) * 10 - 5 - # Avoid possible division by 0 errors and finite difference method inaccuracies. - # Factor of 6 below set empirically, depends on eps. - # Issue exposed by seed 879579887. - # Replace problematic inputs with 1.0. - data_tmp[abs(data_tmp) < 6*eps] = 1.0 + data_tmp = np.random.rand(3, 4).astype(np.float32) * 10 - 5 + + # Avoid possible division by 0 errors and finite difference method + # inaccuracies by replacing problem inputs with 1.0. + is_bad_input = bad_input_finder(np.reciprocal, + lambda x: -np.reciprocal(x)**2, np.float32) + data_tmp[is_bad_input(data_tmp)] = 1.0 data = mx.symbol.Variable('data') test = mx.sym.reciprocal(data) - check_numeric_gradient(test, [data_tmp], numeric_eps = eps) + check_numeric_gradient(test, [data_tmp]) check_symbolic_forward(test, [data_tmp], [np.reciprocal(data_tmp)]) @with_seed() def test_cbrt_op(): - eps = 2**(-11) - data_tmp = np.random.rand(3, 4) * 10 - 5 - # Avoid finite difference method inaccuracies due to infinite gradient at the origin. - # Factor of 4 below set empirically, depends on eps. - # Issue exposed by seed 553872106. - # Replace problematic inputs with 1.0. - data_tmp[abs(data_tmp) < 4*eps] = 1.0 + data_tmp = np.random.rand(3, 4).astype(np.float32) * 10 - 5 + + # Avoid possible division by 0 errors and finite difference method + # inaccuracies by replacing problem inputs with 1.0. + is_bad_input = bad_input_finder(np.cbrt, + lambda x: 1./(3 * np.cbrt(x)**2), np.float32) + data_tmp[is_bad_input(data_tmp)] = 1.0 data = mx.symbol.Variable('data') test = mx.sym.cbrt(data) - - check_numeric_gradient(test, [data_tmp], numeric_eps=eps) + check_numeric_gradient(test, [data_tmp]) check_symbolic_forward(test, [data_tmp], [np.cbrt(data_tmp)]) @with_seed() def test_rcbrt_op(): - eps = 2**(-11) - data_tmp = np.random.rand(3, 4) * 10 - 5 - # Avoid possible division by 0 errors and finite difference method inaccuracies. - # Factor of 4 below set empirically, depends on eps. - # Issue exposed by seed 788174893. - # Replace problematic inputs with 1.0. - data_tmp[abs(data_tmp) < 4*eps] = 1.0 + data_tmp = np.random.rand(3, 4).astype(np.float32) * 10 - 5 + + # Avoid possible division by 0 errors and finite difference method + # inaccuracies by replacing problem inputs with 1.0. + is_bad_input = bad_input_finder(lambda x: 1./np.cbrt(x), + lambda x: -1./(3 * np.cbrt(x)**4), np.float32) + data_tmp[is_bad_input(data_tmp)] = 1.0 data = mx.symbol.Variable('data') test = mx.sym.rcbrt(data) - check_numeric_gradient(test, [data_tmp], numeric_eps = eps) + check_numeric_gradient(test, [data_tmp]) check_symbolic_forward(test, [data_tmp], [1/np.cbrt(data_tmp)]) @@ -6237,7 +6256,7 @@ def test_deformable_convolution(): # By now we only have gpu implementation if default_context().device_type == 'gpu': check_numeric_gradient(op, [im_data, offset_data, weight, bias], rtol=rtol, atol=atol, - grad_nodes=grad_nodes, ctx=mx.gpu(0)) + grad_nodes=grad_nodes, ctx=mx.gpu(0), numeric_eps=1.0/64) def _validate_sample_location(input_rois, input_offset, spatial_scale, pooled_w, pooled_h, sample_per_part, part_size, output_dim, num_classes, trans_std, feat_h, feat_w): @@ -6330,10 +6349,11 @@ def test_deformable_psroipooling(): grad_nodes=grad_nodes, ctx=mx.gpu(0)) -def _gemm_test_helper(dtype, grad_check, rtol_fw = 1e-7, atol_fw = 1e-9): - num_eps = 1e-6 - rtol_bw = 1e-5 - atol_bw = 1e-6 +def _gemm_test_helper(dtype, grad_check, rtol_fw = None, atol_fw = None, + rtol_bw = None, atol_bw = None, num_eps = None): + def np_random_data(shape, dtype=np.float32): + return np.random.uniform(low=-0.5, + high=0.5, size=shape).astype(dtype) data1 = mx.symbol.Variable('data1') data2 = mx.symbol.Variable('data2') @@ -6352,10 +6372,10 @@ def _gemm_test_helper(dtype, grad_check, rtol_fw = 1e-7, atol_fw = 1e-9): shape2 = (3, 2) shape3 = (3, 3) shape4 = (2, 2) - data_in1 = np.random.uniform(1, 10, shape1).astype(dtype) - data_in2 = np.random.uniform(1, 10, shape2).astype(dtype) - data_in3 = np.random.uniform(1, 10, shape3).astype(dtype) - data_in4 = np.random.uniform(1, 10, shape4).astype(dtype) + data_in1 = np_random_data(shape1, dtype) + data_in2 = np_random_data(shape2, dtype) + data_in3 = np_random_data(shape3, dtype) + data_in4 = np_random_data(shape4, dtype) # Check all transpositions of gemm operator. data_in1_t = np.transpose(data_in1) data_in2_t = np.transpose(data_in2) @@ -6462,10 +6482,10 @@ def _gemm_test_helper(dtype, grad_check, rtol_fw = 1e-7, atol_fw = 1e-9): def test_gemm(): _gemm_test_helper(np.float64, True) os.environ["MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION"] = "0" - _gemm_test_helper(np.float32, False, rtol_fw = 1e-5, atol_fw = 1e-7) + _gemm_test_helper(np.float32, True) if default_context().device_type == 'gpu': os.environ["MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION"] = "1" - _gemm_test_helper(np.float32, False, rtol_fw = 2e-5, atol_fw = 2e-7) + _gemm_test_helper(np.float32, True) os.environ["MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION"] = "0" # Helper functions for test_laop diff --git a/tests/python/unittest/test_sparse_operator.py b/tests/python/unittest/test_sparse_operator.py index 4c4e3dbdfc51..cb93fe1b04f0 100644 --- a/tests/python/unittest/test_sparse_operator.py +++ b/tests/python/unittest/test_sparse_operator.py @@ -1633,6 +1633,7 @@ def test_fallback(func_name, axis=0, keepdims=True, exclude=True): @with_seed() +@pytest.mark.serial def test_sparse_square_sum(): dim0 = 30 dim1 = 30 From efd25d4c8b3f50120aba8fdf0eb42606e3876988 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Fri, 11 Sep 2020 19:01:41 -0700 Subject: [PATCH 03/11] Fix test_gluon_data.py:test_dataloader_context when run on 1-GPU system. --- tests/python/unittest/test_gluon_data.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/python/unittest/test_gluon_data.py b/tests/python/unittest/test_gluon_data.py index c3ae2de41722..ef27a7fd35f5 100644 --- a/tests/python/unittest/test_gluon_data.py +++ b/tests/python/unittest/test_gluon_data.py @@ -278,11 +278,11 @@ def test_multi_worker_dataloader_release_pool(): del D +@with_seed() def test_dataloader_context(): X = np.random.uniform(size=(10, 20)) dataset = gluon.data.ArrayDataset(X) default_dev_id = 0 - custom_dev_id = 1 # use non-pinned memory loader1 = gluon.data.DataLoader(dataset, 8) @@ -294,11 +294,15 @@ def test_dataloader_context(): for _, x in enumerate(loader2): assert x.context == context.cpu_pinned(default_dev_id) - # use pinned memory with custom device id - loader3 = gluon.data.DataLoader(dataset, 8, pin_memory=True, - pin_device_id=custom_dev_id) - for _, x in enumerate(loader3): - assert x.context == context.cpu_pinned(custom_dev_id) + if mx.context.num_gpus() <= 1: + print('Bypassing custom_dev_id pinned mem test on system with < 2 gpus.') + else: + custom_dev_id = 1 + # use pinned memory with custom device id + loader3 = gluon.data.DataLoader(dataset, 8, pin_memory=True, + pin_device_id=custom_dev_id) + for _, x in enumerate(loader3): + assert x.context == context.cpu_pinned(custom_dev_id) def batchify(a): return a From e63299bb8f87ba32d92afae5c8d17a28aba1aaa0 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Fri, 11 Sep 2020 19:35:09 -0700 Subject: [PATCH 04/11] Remove pytest decorators introduced in error --- tests/python/unittest/test_autograd.py | 1 - tests/python/unittest/test_sparse_operator.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/python/unittest/test_autograd.py b/tests/python/unittest/test_autograd.py index a72af688764e..61955f034a71 100644 --- a/tests/python/unittest/test_autograd.py +++ b/tests/python/unittest/test_autograd.py @@ -433,7 +433,6 @@ def check_grad_with_stype(array_stype, grad_stype, expected_stype): check_grad_with_stype(stype, grad_stype, grad_stype) @with_seed() -@pytest.mark.garbage_expected def test_sparse_dot_grad(): def check_sparse_dot_grad(rhs): lhs = rand_ndarray((2, 8), 'csr') diff --git a/tests/python/unittest/test_sparse_operator.py b/tests/python/unittest/test_sparse_operator.py index cb93fe1b04f0..4c4e3dbdfc51 100644 --- a/tests/python/unittest/test_sparse_operator.py +++ b/tests/python/unittest/test_sparse_operator.py @@ -1633,7 +1633,6 @@ def test_fallback(func_name, axis=0, keepdims=True, exclude=True): @with_seed() -@pytest.mark.serial def test_sparse_square_sum(): dim0 = 30 dim1 = 30 From 8c0e7d975810557bd127af53cda5f17381f5acfe Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Fri, 11 Sep 2020 19:54:56 -0700 Subject: [PATCH 05/11] Fix test_forward.py:test_consistency --- tests/python/gpu/test_forward.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/gpu/test_forward.py b/tests/python/gpu/test_forward.py index 02b0256024d3..2572eda31c40 100644 --- a/tests/python/gpu/test_forward.py +++ b/tests/python/gpu/test_forward.py @@ -74,7 +74,7 @@ def test_consistency(dump=False): ctx_list = [{'ctx': mx.gpu(0), 'data': data.shape, 'type_dict': {'data': data.dtype}}, {'ctx': mx.cpu(0), 'data': data.shape, 'type_dict': {'data': data.dtype}}] gt = check_consistency(sym, ctx_list, arg_params=arg_params, aux_params=aux_params, - tol=1e-3, grad_req='null', raise_on_err=False, ground_truth=gt) + rtol=1e-3, atol=1e-3, grad_req='null', raise_on_err=False, ground_truth=gt) if dump: np.savez('data/inception-v3-dump.npz', **{n: a.asnumpy() for n, a in gt.items()}) From 9714e23e8a22a263895a48f28cc2d420a516bde9 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Sat, 12 Sep 2020 13:21:17 -0700 Subject: [PATCH 06/11] Fix test_numpy_op.py tests --- tests/python/unittest/test_numpy_op.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index c5804c253744..75dccfd577b6 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -4112,8 +4112,8 @@ def _test_random_beta_range(output): mx_out = test_random_beta(mx_data, mx_data) mx_out_imperative = mx.np.random.beta(mx_data, mx_data, size=param_shape, dtype=out_dtype) - assert_almost_equal(np_out.shape, mx_out.shape) - assert_almost_equal(np_out.shape, mx_out_imperative.shape) + assert np_out.shape == mx_out.shape + assert np_out.shape == mx_out_imperative.shape assert _test_random_beta_range(mx_out.asnumpy()) == True assert _test_random_beta_range(mx_out_imperative.asnumpy()) == True @@ -4153,8 +4153,8 @@ def hybrid_forward(self, F, df): mx_out = test_random_chisquare(mx_df) mx_out_imperative = mx.np.random.chisquare(mx_df, size=param_shape, dtype=out_dtype) - assert_almost_equal(np_out.shape, mx_out.shape) - assert_almost_equal(np_out.shape, mx_out_imperative.shape) + assert np_out.shape == mx_out.shape + assert np_out.shape == mx_out_imperative.shape @with_seed() @@ -4959,7 +4959,7 @@ def check_svd(UT, L, V, data_np): data_np = _np.random.uniform(-10.0, 10.0, shape) data_np = _np.array(data_np, dtype=dtype) data = np.array(data_np, dtype=dtype) - if effective_dtype(data) == np.dtype(np.float16): + if effective_dtype(data) == _np.dtype(_np.float16): continue data.attach_grad() with mx.autograd.record(): From 5bd44dbead582cb0b4ee61faf5b3f83e8c0b8a15 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Sat, 12 Sep 2020 13:24:29 -0700 Subject: [PATCH 07/11] Improve test seeding in test_numpy_interoperablity.py (#18762) --- .../unittest/test_numpy_interoperability.py | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/tests/python/unittest/test_numpy_interoperability.py b/tests/python/unittest/test_numpy_interoperability.py index 67722047ddee..fd8abf1849be 100644 --- a/tests/python/unittest/test_numpy_interoperability.py +++ b/tests/python/unittest/test_numpy_interoperability.py @@ -28,7 +28,7 @@ from mxnet.test_utils import assert_almost_equal from mxnet.test_utils import use_np from mxnet.test_utils import is_op_runnable -from common import assertRaises, with_seed, random_seed +from common import assertRaises, with_seed, random_seed, setup_module, teardown from mxnet.numpy_dispatch_protocol import with_array_function_protocol, with_array_ufunc_protocol from mxnet.numpy_dispatch_protocol import _NUMPY_ARRAY_FUNCTION_LIST, _NUMPY_ARRAY_UFUNC_LIST @@ -58,8 +58,15 @@ def add_workload(name, *args, **kwargs): @staticmethod def get_workloads(name): + if OpArgMngr._args == {}: + _prepare_workloads() return OpArgMngr._args.get(name, None) + @staticmethod + def randomize_workloads(): + # Force a new _prepare_workloads(), which will be based on new random numbers + OpArgMngr._args = {} + def _add_workload_all(): # check bad element in all positions @@ -470,8 +477,8 @@ def _add_workload_linalg_cholesky(): shapes = [(1, 1), (2, 2), (3, 3), (50, 50), (3, 10, 10)] dtypes = (np.float32, np.float64) - for shape, dtype in itertools.product(shapes, dtypes): - with random_seed(1): + with random_seed(1): + for shape, dtype in itertools.product(shapes, dtypes): a = _np.random.randn(*shape) t = list(range(len(shape))) @@ -2979,9 +2986,6 @@ def _prepare_workloads(): _add_workload_vander() -_prepare_workloads() - - def _get_numpy_op_output(onp_op, *args, **kwargs): onp_args = [arg.asnumpy() if isinstance(arg, np.ndarray) else arg for arg in args] onp_kwargs = {k: v.asnumpy() if isinstance(v, np.ndarray) else v for k, v in kwargs.items()} @@ -2993,7 +2997,7 @@ def _get_numpy_op_output(onp_op, *args, **kwargs): return onp_op(*onp_args, **onp_kwargs) -def _check_interoperability_helper(op_name, *args, **kwargs): +def _check_interoperability_helper(op_name, rel_tol, abs_tol, *args, **kwargs): strs = op_name.split('.') if len(strs) == 1: onp_op = getattr(_np, op_name) @@ -3009,11 +3013,11 @@ def _check_interoperability_helper(op_name, *args, **kwargs): assert type(out) == type(expected_out) for arr, expected_arr in zip(out, expected_out): if isinstance(arr, np.ndarray): - assert_almost_equal(arr.asnumpy(), expected_arr, rtol=1e-3, atol=1e-4, use_broadcast=False, equal_nan=True) + assert_almost_equal(arr.asnumpy(), expected_arr, rtol=rel_tol, atol=abs_tol, use_broadcast=False, equal_nan=True) else: _np.testing.assert_equal(arr, expected_arr) elif isinstance(out, np.ndarray): - assert_almost_equal(out.asnumpy(), expected_out, rtol=1e-3, atol=1e-4, use_broadcast=False, equal_nan=True) + assert_almost_equal(out.asnumpy(), expected_out, rtol=rel_tol, atol=abs_tol, use_broadcast=False, equal_nan=True) elif isinstance(out, _np.dtype): _np.testing.assert_equal(out, expected_out) else: @@ -3025,6 +3029,7 @@ def _check_interoperability_helper(op_name, *args, **kwargs): def check_interoperability(op_list): + OpArgMngr.randomize_workloads() for name in op_list: if name in _TVM_OPS and not is_op_runnable(): continue @@ -3033,13 +3038,17 @@ def check_interoperability(op_list): if name in ['full_like', 'zeros_like', 'ones_like'] and \ StrictVersion(platform.python_version()) < StrictVersion('3.0.0'): continue + default_tols = (1e-3, 1e-4) + tols = {'linalg.tensorinv': (1e-2, 5e-3), + 'linalg.solve': (1e-3, 5e-2)} + (rel_tol, abs_tol) = tols.get(name, default_tols) print('Dispatch test:', name) workloads = OpArgMngr.get_workloads(name) assert workloads is not None, 'Workloads for operator `{}` has not been ' \ 'added for checking interoperability with ' \ 'the official NumPy.'.format(name) for workload in workloads: - _check_interoperability_helper(name, *workload['args'], **workload['kwargs']) + _check_interoperability_helper(name, rel_tol, abs_tol, *workload['args'], **workload['kwargs']) @with_seed() From abba2aa35dc730fbc61f53a08232b80027c67c0d Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Sat, 12 Sep 2020 17:27:59 -0700 Subject: [PATCH 08/11] Fix test_numpy_op.py:test_np_random_{beta,chisquare} --- tests/python/unittest/test_numpy_op.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py index 75dccfd577b6..11f851a2cd50 100644 --- a/tests/python/unittest/test_numpy_op.py +++ b/tests/python/unittest/test_numpy_op.py @@ -4096,7 +4096,8 @@ def _test_random_beta_range(output): smaller_than_one = _np.all(output < 1) return bigger_than_zero and smaller_than_one - shape_list = [(), (1,), (2, 3), (4, 0, 5), 6, (7, 8), None] + # Starting with numpy 1.19.0: "Output size () is not compatible with broadcast dimensions of inputs (1,)." + shape_list = [(1,), (2, 3), (4, 0, 5), 6, (7, 8), None] # since fp16 might incur precision issue, the corresponding test is skipped dtype_list = [np.float32, np.float64] hybridize_list = [False, True] @@ -4135,7 +4136,8 @@ def __init__(self, size=None, dtype=None, ctx=None): def hybrid_forward(self, F, df): return F.np.random.chisquare(df, size=self._size, dtype=self._dtype, ctx=self._ctx) - shape_list = [(), (1,), (2, 3), (4, 0, 5), 6, (7, 8), None] + # Starting with numpy 1.19.0: "Output size () is not compatible with broadcast dimensions of inputs (1,)." + shape_list = [(1,), (2, 3), (4, 0, 5), 6, (7, 8), None] dtype_list = [np.float16, np.float32, np.float64] hybridize_list = [False, True] From a11af29bf83741b69b33caa1c6988130426b024d Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Sun, 13 Sep 2020 13:11:02 -0700 Subject: [PATCH 09/11] Reduce problem sizes with test_optimizer.py:test_multilamb --- tests/python/unittest/test_optimizer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/python/unittest/test_optimizer.py b/tests/python/unittest/test_optimizer.py index 6d7cf40f29f7..4c4285d77c0a 100755 --- a/tests/python/unittest/test_optimizer.py +++ b/tests/python/unittest/test_optimizer.py @@ -504,10 +504,10 @@ def test_multilamb(): opt1 = PyLAMB opt2 = mx.optimizer.LAMB - # shapes as Bert-large - dims_x = [1024, 4096, 1024, 1024] - dims_y = [1, 1, 1024, 4096] - dims_occurrences = [9, 1, 4, 2] + dims_x = [1024, 4096, 1024] + dims_y = [1, 1, 1024] + dims_occurrences = [2, 1, 2] + nlayers = 4 # 24 # extra_dims_x=[30522, 512, 30522] # extra_dims_y=[1, 1024, 1024] From ed5c287ba43475f71734415c92e279bf8b52bb0b Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Sun, 13 Sep 2020 16:03:18 -0700 Subject: [PATCH 10/11] Skip test_gluon_gpu.py:test_fused_{lstm,gpu}_layer, fix test_rnn_cells, for fp16 contexts --- python/mxnet/test_utils.py | 2 +- tests/python/unittest/test_gluon_rnn.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/python/mxnet/test_utils.py b/python/mxnet/test_utils.py index 9e544547b1a1..3e068604954f 100755 --- a/python/mxnet/test_utils.py +++ b/python/mxnet/test_utils.py @@ -119,7 +119,7 @@ def effective_dtype(dat): def is_TF32_enabled(ctx): try: return (ctx.device_type == 'gpu' and - get_cuda_compute_capability(ctx) == 80 and + get_cuda_compute_capability(ctx) >= 80 and os.environ.get('NVIDIA_TF32_OVERRIDE') != '0') except: # pylint: disable=bare-except return False diff --git a/tests/python/unittest/test_gluon_rnn.py b/tests/python/unittest/test_gluon_rnn.py index 6f9308b12cea..0c34d55971d0 100644 --- a/tests/python/unittest/test_gluon_rnn.py +++ b/tests/python/unittest/test_gluon_rnn.py @@ -23,7 +23,7 @@ from functools import partial from numpy.testing import assert_allclose import unittest -from mxnet.test_utils import almost_equal, assert_almost_equal +from mxnet.test_utils import almost_equal, assert_almost_equal, effective_dtype from common import assert_raises_cudnn_not_satisfied, with_seed @@ -445,13 +445,13 @@ def check_rnn_forward(layer, inputs, deterministic=True): out.backward() if isinstance(inputs, mx.nd.NDArray): - input_grads = inputs.grad.asnumpy() + input_grads = inputs.grad else: input_grads = np.stack([x.grad.asnumpy() for x in inputs], axis=1) if deterministic: - mx.test_utils.assert_almost_equal(np_out, out.asnumpy(), rtol=1e-3, atol=1e-5) - mx.test_utils.assert_almost_equal(np_dx, input_grads, rtol=1e-3, atol=1e-5) + mx.test_utils.assert_almost_equal(np_out, out) + mx.test_utils.assert_almost_equal(np_dx, input_grads) def test_rnn_cells(): @@ -746,6 +746,9 @@ def check_rnn_bidir_layer_gradients(mode, input_size, hidden_size, num_layers, l @with_seed() @assert_raises_cudnn_not_satisfied(min_version='5.1.10') def test_fused_lstm_layer(): + if effective_dtype(mx.nd.array([1.,])) == np.float16: + print('Skipping test: effective dtype for this context is float16.') + return input_sizes = [8] hidden_sizes = [8, 16] num_layers = [1, 2, 3, 4] @@ -758,6 +761,9 @@ def test_fused_lstm_layer(): @with_seed() @assert_raises_cudnn_not_satisfied(min_version='5.1.10') def test_fused_gru_layer(): + if effective_dtype(mx.nd.array([1.,])) == np.float16: + print('Skipping test: effective dtype for this context is float16.') + return input_sizes = [8] hidden_sizes = [8, 16] num_layers = [1, 2, 3, 4] From cf8d091807b1ab0318ad4a7d01177d39aa2f0f12 Mon Sep 17 00:00:00 2001 From: Dick Carter Date: Wed, 16 Sep 2020 23:32:37 -0700 Subject: [PATCH 11/11] Trigger CI