From d7e4483d9bda66c8bc223227a3b9374bb916ef76 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 13:57:40 -0400 Subject: [PATCH 01/18] Run ruff with unsafe-fixes. --- src/modelarrayio/__init__.py | 2 +- src/modelarrayio/cifti.py | 2 +- src/modelarrayio/fixels.py | 34 +++++++++++++++++----------------- src/modelarrayio/voxels.py | 12 ++++++------ test/test_cifti_cli.py | 7 +------ test/test_voxels_cli.py | 7 +------ 6 files changed, 27 insertions(+), 37 deletions(-) diff --git a/src/modelarrayio/__init__.py b/src/modelarrayio/__init__.py index eae22f8..a3d4a83 100644 --- a/src/modelarrayio/__init__.py +++ b/src/modelarrayio/__init__.py @@ -70,7 +70,7 @@ EXTRA_REQUIRES['docs'] = EXTRA_REQUIRES['doc'] # Enable a handle to install all extra dependencies at once -EXTRA_REQUIRES['all'] = list(set([v for deps in EXTRA_REQUIRES.values() for v in deps])) +EXTRA_REQUIRES['all'] = list({v for deps in EXTRA_REQUIRES.values() for v in deps}) CLASSIFIERS = [ 'Development Status :: 3 - Alpha', diff --git a/src/modelarrayio/cifti.py b/src/modelarrayio/cifti.py index b58d437..7a43d21 100644 --- a/src/modelarrayio/cifti.py +++ b/src/modelarrayio/cifti.py @@ -301,7 +301,7 @@ def write_storage( return 0 # Establish a reference brain axis once to ensure consistent ordering across workers. - first_scalar, first_sources = next(iter(scalar_sources.items())) + _first_scalar, first_sources = next(iter(scalar_sources.items())) first_path = op.join(relative_root, first_sources[0]) _, reference_brain_names = extract_cifti_scalar_data(first_path) diff --git a/src/modelarrayio/fixels.py b/src/modelarrayio/fixels.py index 02771e1..a77752b 100644 --- a/src/modelarrayio/fixels.py +++ b/src/modelarrayio/fixels.py @@ -89,7 +89,7 @@ def gather_fixels(index_file, directions_file): path to a Nifti2 directions file """ - index_img, index_data = mif_to_nifti2(index_file) + _index_img, index_data = mif_to_nifti2(index_file) count_vol = index_data[..., 0].astype( np.uint32 ) # number of fixels in each voxel; by index.mif definition @@ -123,23 +123,23 @@ def gather_fixels(index_file, directions_file): sorted_coords = voxel_coords[id_sort] voxel_table = pd.DataFrame( - dict( - voxel_id=np.arange(voxel_coords.shape[0]), - i=sorted_coords[:, 0], - j=sorted_coords[:, 1], - k=sorted_coords[:, 2], - ) + { + 'voxel_id': np.arange(voxel_coords.shape[0]), + 'i': sorted_coords[:, 0], + 'j': sorted_coords[:, 1], + 'k': sorted_coords[:, 2], + } ) - directions_img, directions_data = mif_to_nifti2(directions_file) + _directions_img, directions_data = mif_to_nifti2(directions_file) fixel_table = pd.DataFrame( - dict( - fixel_id=fixel_ids, - voxel_id=fixel_voxel_ids, - x=directions_data[:, 0], - y=directions_data[:, 1], - z=directions_data[:, 2], - ) + { + 'fixel_id': fixel_ids, + 'voxel_id': fixel_voxel_ids, + 'x': directions_data[:, 0], + 'y': directions_data[:, 1], + 'z': directions_data[:, 2], + } ) return fixel_table, voxel_table @@ -192,11 +192,11 @@ def write_storage( scalars = defaultdict(list) sources_lists = defaultdict(list) print('Extracting .mif data...') - for ix, row in tqdm( + for _ix, row in tqdm( cohort_df.iterrows(), total=cohort_df.shape[0] ): # ix: index of row (start from 0); row: one row of data scalar_file = op.join(relative_root, row['source_file']) - scalar_img, scalar_data = mif_to_nifti2(scalar_file) + _scalar_img, scalar_data = mif_to_nifti2(scalar_file) scalars[row['scalar_name']].append(scalar_data) # append to specific scalar_name sources_lists[row['scalar_name']].append( row['source_file'] diff --git a/src/modelarrayio/voxels.py b/src/modelarrayio/voxels.py index f29da83..257d7b3 100644 --- a/src/modelarrayio/voxels.py +++ b/src/modelarrayio/voxels.py @@ -294,12 +294,12 @@ def write_storage( # voxel_table: records the coordinations of the nonzero voxels; coord starts from 0 (because using python) voxel_table = pd.DataFrame( - dict( - voxel_id=np.arange(voxel_coords.shape[0]), - i=voxel_coords[:, 0], - j=voxel_coords[:, 1], - k=voxel_coords[:, 2], - ) + { + 'voxel_id': np.arange(voxel_coords.shape[0]), + 'i': voxel_coords[:, 0], + 'j': voxel_coords[:, 1], + 'k': voxel_coords[:, 2], + } ) # upload each cohort's data diff --git a/test/test_cifti_cli.py b/test/test_cifti_cli.py index a509489..eb2c828 100644 --- a/test/test_cifti_cli.py +++ b/test/test_cifti_cli.py @@ -103,12 +103,7 @@ def test_concifti_cli_creates_expected_hdf5(tmp_path): # Column names exist and match subjects count grp = h5['scalars/THICK'] assert 'column_names' in grp - colnames = list( - map( - lambda x: x.decode('utf-8') if isinstance(x, bytes) else str(x), - grp['column_names'][...], - ) - ) + colnames = [x.decode('utf-8') if isinstance(x, bytes) else str(x) for x in grp['column_names'][...]] assert len(colnames) == 2 # Spot-check a couple values diff --git a/test/test_voxels_cli.py b/test/test_voxels_cli.py index e6266c1..3cb1e24 100644 --- a/test/test_voxels_cli.py +++ b/test/test_voxels_cli.py @@ -122,12 +122,7 @@ def test_convoxel_cli_creates_expected_hdf5(tmp_path): # Column names exist and match subjects count grp = h5['scalars/FA'] assert 'column_names' in grp - colnames = list( - map( - lambda x: x.decode('utf-8') if isinstance(x, bytes) else str(x), - grp['column_names'][...], - ) - ) + colnames = [x.decode('utf-8') if isinstance(x, bytes) else str(x) for x in grp['column_names'][...]] assert len(colnames) == 2 # Spot-check a voxel mapping (pick the third voxel) From d8d75cc67670bb46a398d09d13f7b4da969f8384 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 14:02:42 -0400 Subject: [PATCH 02/18] Address style issues. --- docs/conf.py | 4 ++-- src/modelarrayio/cifti.py | 8 +++++--- src/modelarrayio/fixels.py | 4 ++-- src/modelarrayio/h5_storage.py | 2 +- src/modelarrayio/s3_utils.py | 2 +- src/modelarrayio/tiledb_storage.py | 8 ++++---- src/modelarrayio/voxels.py | 12 ++++++++---- test/test_cifti_cli.py | 8 ++++++-- test/test_voxels_cli.py | 10 +++++++--- test/test_voxels_s3.py | 8 ++++++-- 10 files changed, 42 insertions(+), 24 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index d693ccd..3e4ecd1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,10 +1,10 @@ # Configuration file for the Sphinx documentation builder. # https://www.sphinx-doc.org/en/master/usage/configuration.html -from datetime import datetime +from datetime import UTC, datetime project = 'ModelArrayIO' -copyright = f'2017-{datetime.today().strftime("%Y")}, PennLINC developers' +copyright = f'2017-{datetime.now(tz=UTC).strftime("%Y")}, PennLINC developers' author = 'PennLINC developers' extensions = [ diff --git a/src/modelarrayio/cifti.py b/src/modelarrayio/cifti.py index 7a43d21..e7e6b72 100644 --- a/src/modelarrayio/cifti.py +++ b/src/modelarrayio/cifti.py @@ -28,6 +28,8 @@ from .tiledb_storage import write_column_names as tdb_write_column_names from .tiledb_storage import write_rows_in_column_stripes as tdb_write_stripes +logger = logging.getLogger(__name__) + def _cohort_to_long_dataframe(cohort_df, scalar_columns=None): scalar_columns = [col for col in (scalar_columns or []) if col] @@ -448,9 +450,9 @@ def _h5_to_ciftis(example_cifti, h5_file, analysis_name, cifti_output_dir): try: results_names = names_data.tolist() - except Exception: + except (AttributeError, OSError, TypeError, ValueError): print("Unable to read column names, using 'componentNNN' instead") - results_names = ['component%03d' % (n + 1) for n in range(results_matrix.shape[0])] + results_names = [f'component{n + 1:03d}' for n in range(results_matrix.shape[0])] # Make output directory if it does not exist if not op.isdir(cifti_output_dir): @@ -497,7 +499,7 @@ def h5_to_ciftis(): # Get an example cifti if args.example_cifti is None: - logging.warning( + logger.warning( 'No example cifti file provided, using the first cifti file from the cohort file' ) cohort_df = pd.read_csv(args.cohort_file) diff --git a/src/modelarrayio/fixels.py b/src/modelarrayio/fixels.py index a77752b..17f4091 100644 --- a/src/modelarrayio/fixels.py +++ b/src/modelarrayio/fixels.py @@ -455,9 +455,9 @@ def h5_to_mifs(example_mif, h5_file, analysis_name, fixel_output_dir): try: results_names = names_data.tolist() - except Exception: + except (AttributeError, OSError, TypeError, ValueError): print("Unable to read column names, using 'componentNNN' instead") - results_names = ['component%03d' % (n + 1) for n in range(results_matrix.shape[0])] + results_names = [f'component{n + 1:03d}' for n in range(results_matrix.shape[0])] # Make output directory if it does not exist if not op.isdir(fixel_output_dir): diff --git a/src/modelarrayio/h5_storage.py b/src/modelarrayio/h5_storage.py index dafcda6..31ebce0 100644 --- a/src/modelarrayio/h5_storage.py +++ b/src/modelarrayio/h5_storage.py @@ -28,7 +28,7 @@ def resolve_compression(compression, compression_level, shuffle): if comp == 'gzip': try: gzip_level = int(compression_level) - except Exception: + except (TypeError, ValueError): gzip_level = 4 gzip_level = max(0, min(9, gzip_level)) return comp, gzip_level, use_shuffle diff --git a/src/modelarrayio/s3_utils.py b/src/modelarrayio/s3_utils.py index 312d2b9..882c0f7 100644 --- a/src/modelarrayio/s3_utils.py +++ b/src/modelarrayio/s3_utils.py @@ -27,7 +27,7 @@ def _make_s3_client(): except ImportError: raise ImportError( 'boto3 is required for s3:// paths. Install with: pip install modelarrayio[s3]' - ) + ) from None anon = os.environ.get('MODELARRAYIO_S3_ANON', '').lower() in ('1', 'true', 'yes') if anon: from botocore import UNSIGNED diff --git a/src/modelarrayio/tiledb_storage.py b/src/modelarrayio/tiledb_storage.py index 26491e9..4774c4e 100644 --- a/src/modelarrayio/tiledb_storage.py +++ b/src/modelarrayio/tiledb_storage.py @@ -29,7 +29,7 @@ def _build_filter_list(compression: str | None, compression_level: int | None, s level = None try: level = int(compression_level) if compression_level is not None else None - except Exception: + except (TypeError, ValueError): level = None if comp == 'zstd': filters.append(tiledb.ZstdFilter(level=level if level is not None else 5)) @@ -129,7 +129,7 @@ def create_scalar_matrix_array( if sources_list is not None: try: A.meta['column_names'] = json.dumps(list(sources_list)) - except Exception: + except (TypeError, ValueError, tiledb.TileDBError): # Fallback without metadata if serialization fails logger.warning('Failed to write column_names metadata for %s', uri) logger.info('Finished writing array %s', uri) @@ -182,7 +182,7 @@ def create_empty_scalar_matrix_array( try: with tiledb.open(uri, 'w') as A: A.meta['column_names'] = json.dumps(list(map(str, sources_list))) - except Exception: + except (TypeError, ValueError, tiledb.TileDBError): logger.warning('Failed to write column_names metadata for %s', uri) return uri @@ -258,5 +258,5 @@ def write_column_names(base_uri: str, scalar: str, sources: Sequence[str]): try: with tiledb.Group(group_uri, 'w') as G: G.meta['column_names'] = json.dumps(sources) - except Exception: + except (TypeError, ValueError, tiledb.TileDBError): logger.warning('Failed to write column_names metadata for group %s', group_uri) diff --git a/src/modelarrayio/voxels.py b/src/modelarrayio/voxels.py index 257d7b3..335bae1 100644 --- a/src/modelarrayio/voxels.py +++ b/src/modelarrayio/voxels.py @@ -1,4 +1,5 @@ import argparse +import logging import os import os.path as op from collections import defaultdict @@ -25,6 +26,8 @@ from .tiledb_storage import create_empty_scalar_matrix_array as tdb_create_empty from .tiledb_storage import write_rows_in_column_stripes as tdb_write_stripes +logger = logging.getLogger(__name__) + def _load_cohort_voxels(cohort_df, group_mask_matrix, relative_root, s3_workers): """Load all voxel rows from the cohort, optionally in parallel. @@ -137,7 +140,7 @@ def _decode_names(arr): s = s.rstrip('\x00').strip() out.append(s) return out - except Exception: + except (AttributeError, OSError, TypeError, ValueError): return None results_names = None @@ -146,7 +149,7 @@ def _decode_names(arr): names_attr = results_matrix.attrs.get('colnames', None) if names_attr is not None: results_names = _decode_names(names_attr) - except Exception: + except (OSError, RuntimeError, TypeError, ValueError): results_names = None # 2) Fallback to dataset-based column names (new format) @@ -162,13 +165,14 @@ def _decode_names(arr): results_names = _decode_names(names_ds) if results_names: break - except Exception: + except (KeyError, OSError, RuntimeError, TypeError, ValueError): + logger.debug('Could not read column names from %s', p, exc_info=True) continue # 3) Final fallback to generated names if not results_names: print("Unable to read column names, using 'componentNNN' instead") - results_names = ['component%03d' % (n + 1) for n in range(results_matrix.shape[0])] + results_names = [f'component{n + 1:03d}' for n in range(results_matrix.shape[0])] # # Make output directory if it does not exist # has been done in h5_to_volumes_wrapper() # if op.isdir(volume_output_dir) == False: diff --git a/test/test_cifti_cli.py b/test/test_cifti_cli.py index eb2c828..629f98a 100644 --- a/test/test_cifti_cli.py +++ b/test/test_cifti_cli.py @@ -76,7 +76,9 @@ def test_concifti_cli_creates_expected_hdf5(tmp_path): '1.0', ] env = os.environ.copy() - proc = subprocess.run(cmd, cwd=str(tmp_path), env=env, capture_output=True, text=True) + proc = subprocess.run( + cmd, cwd=str(tmp_path), env=env, capture_output=True, text=True, check=False + ) assert proc.returncode == 0, f'concifti failed: {proc.stdout}\n{proc.stderr}' assert op.exists(out_h5) @@ -103,7 +105,9 @@ def test_concifti_cli_creates_expected_hdf5(tmp_path): # Column names exist and match subjects count grp = h5['scalars/THICK'] assert 'column_names' in grp - colnames = [x.decode('utf-8') if isinstance(x, bytes) else str(x) for x in grp['column_names'][...]] + colnames = [ + x.decode('utf-8') if isinstance(x, bytes) else str(x) for x in grp['column_names'][...] + ] assert len(colnames) == 2 # Spot-check a couple values diff --git a/test/test_voxels_cli.py b/test/test_voxels_cli.py index 3cb1e24..eca1203 100644 --- a/test/test_voxels_cli.py +++ b/test/test_voxels_cli.py @@ -61,7 +61,7 @@ def test_convoxel_cli_creates_expected_hdf5(tmp_path): with cohort_csv.open('w', newline='') as f: writer = csv.DictWriter(f, fieldnames=['scalar_name', 'source_file', 'source_mask_file']) writer.writeheader() - for sidx, (scalar_name, mask_name) in enumerate(subjects): + for _sidx, (scalar_name, mask_name) in enumerate(subjects): writer.writerow( { 'scalar_name': 'FA', @@ -98,7 +98,9 @@ def test_convoxel_cli_creates_expected_hdf5(tmp_path): '1.0', ] env = os.environ.copy() - proc = subprocess.run(cmd, cwd=str(tmp_path), env=env, capture_output=True, text=True) + proc = subprocess.run( + cmd, cwd=str(tmp_path), env=env, capture_output=True, text=True, check=False + ) assert proc.returncode == 0, f'convoxel failed: {proc.stdout}\n{proc.stderr}' assert op.exists(out_h5) @@ -122,7 +124,9 @@ def test_convoxel_cli_creates_expected_hdf5(tmp_path): # Column names exist and match subjects count grp = h5['scalars/FA'] assert 'column_names' in grp - colnames = [x.decode('utf-8') if isinstance(x, bytes) else str(x) for x in grp['column_names'][...]] + colnames = [ + x.decode('utf-8') if isinstance(x, bytes) else str(x) for x in grp['column_names'][...] + ] assert len(colnames) == 2 # Spot-check a voxel mapping (pick the third voxel) diff --git a/test/test_voxels_s3.py b/test/test_voxels_s3.py index 13b22ca..2a9d794 100644 --- a/test/test_voxels_s3.py +++ b/test/test_voxels_s3.py @@ -43,6 +43,7 @@ def group_mask_path(tmp_path_factory): boto3 = pytest.importorskip('boto3') from botocore import UNSIGNED from botocore.config import Config + from botocore.exceptions import BotoCoreError tmp = tmp_path_factory.mktemp('s3_group_mask') dest = tmp / 'group_mask.nii.gz' @@ -50,7 +51,7 @@ def group_mask_path(tmp_path_factory): key = f'{_PREFIX}/func_mask/{OHSU_SUBJECTS[0]}_func_mask.nii.gz' try: s3.download_file(_BUCKET, key, str(dest)) - except Exception as exc: + except (OSError, BotoCoreError) as exc: pytest.skip(f'S3 download unavailable: {exc}') return dest @@ -102,7 +103,9 @@ def test_convoxel_s3_parallel(tmp_path, group_mask_path): '4', ] env = {**os.environ, 'MODELARRAYIO_S3_ANON': '1'} - proc = subprocess.run(cmd, cwd=str(tmp_path), capture_output=True, text=True, env=env) + proc = subprocess.run( + cmd, cwd=str(tmp_path), capture_output=True, text=True, env=env, check=False + ) assert proc.returncode == 0, f'convoxel failed:\n{proc.stdout}\n{proc.stderr}' assert out_h5.exists() @@ -169,6 +172,7 @@ def test_convoxel_s3_serial_matches_parallel(tmp_path, group_mask_path): capture_output=True, text=True, env=env, + check=False, ) assert proc.returncode == 0, f'convoxel failed (workers={workers}):\n{proc.stderr}' From 1ca930f08e75fd572c6da66523ffbcd44902568e Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 14:05:40 -0400 Subject: [PATCH 03/18] Add sphinx-copybutton. --- pyproject.toml | 1 + uv.lock | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index d969e93..d209259 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ doc = [ "sphinx>=6.2.1", "myst-parser>=2", "sphinx-argparse", + "sphinx-copybutton", "sphinx_rtd_theme", "sphinxcontrib-apidoc", "sphinxcontrib-bibtex", diff --git a/uv.lock b/uv.lock index 3645be9..bb40d7d 100644 --- a/uv.lock +++ b/uv.lock @@ -573,6 +573,7 @@ all = [ { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "sphinx-argparse" }, + { name = "sphinx-copybutton" }, { name = "sphinx-rtd-theme" }, { name = "sphinxcontrib-apidoc" }, { name = "sphinxcontrib-bibtex" }, @@ -582,6 +583,7 @@ doc = [ { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, { name = "sphinx-argparse" }, + { name = "sphinx-copybutton" }, { name = "sphinx-rtd-theme" }, { name = "sphinxcontrib-apidoc" }, { name = "sphinxcontrib-bibtex" }, @@ -627,6 +629,8 @@ requires-dist = [ { name = "sphinx", marker = "extra == 'doc'", specifier = ">=6.2.1" }, { name = "sphinx-argparse", marker = "extra == 'all'" }, { name = "sphinx-argparse", marker = "extra == 'doc'" }, + { name = "sphinx-copybutton", marker = "extra == 'all'" }, + { name = "sphinx-copybutton", marker = "extra == 'doc'" }, { name = "sphinx-rtd-theme", marker = "extra == 'all'" }, { name = "sphinx-rtd-theme", marker = "extra == 'doc'" }, { name = "sphinxcontrib-apidoc", marker = "extra == 'all'" }, @@ -1225,6 +1229,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e5/43/9f0e9bfb3ce02cbf7747aa2185c48a9d6e42ba95736a5e8f511a5054d976/sphinx_argparse-0.5.2-py3-none-any.whl", hash = "sha256:d771b906c36d26dee669dbdbb5605c558d9440247a5608b810f7fa6e26ab1fd3", size = 12547, upload-time = "2024-07-17T12:08:06.307Z" }, ] +[[package]] +name = "sphinx-copybutton" +version = "0.5.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sphinx", version = "9.0.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, + { name = "sphinx", version = "9.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/2b/a964715e7f5295f77509e59309959f4125122d648f86b4fe7d70ca1d882c/sphinx-copybutton-0.5.2.tar.gz", hash = "sha256:4cf17c82fb9646d1bc9ca92ac280813a3b605d8c421225fd9913154103ee1fbd", size = 23039, upload-time = "2023-04-14T08:10:22.998Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/48/1ea60e74949eecb12cdd6ac43987f9fd331156388dcc2319b45e2ebb81bf/sphinx_copybutton-0.5.2-py3-none-any.whl", hash = "sha256:fb543fd386d917746c9a2c50360c7905b605726b9355cd26e9974857afeae06e", size = 13343, upload-time = "2023-04-14T08:10:20.844Z" }, +] + [[package]] name = "sphinx-rtd-theme" version = "3.1.0" From f95e64bb298cba36212f30a2fdd68ec254145614 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 14:32:30 -0400 Subject: [PATCH 04/18] Update. --- test/conftest.py | 13 +++++++++++++ tox.ini | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 test/conftest.py diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..69c4ff5 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,13 @@ +"""Pytest hooks shared by this package's tests.""" + +from __future__ import annotations + +import os +from pathlib import Path + +# CLI tests run subprocesses with cwd under tmp_path (outside the repo). Coverage +# discovers [tool.coverage] from the process cwd, so those children would default to +# line-only / non-parallel data and pytest-cov then fails on combine with arc data from +# the parent. Point subprocess coverage at the project config explicitly. +_ROOT = Path(__file__).resolve().parents[1] +os.environ.setdefault('COVERAGE_RCFILE', str(_ROOT / 'pyproject.toml')) diff --git a/tox.ini b/tox.ini index cf9b4bf..e422b2f 100644 --- a/tox.ini +++ b/tox.ini @@ -45,7 +45,7 @@ uv_resolution = min: lowest-direct commands = - pytest -m "not s3" --cov=modelarrayio --cov-report=term-missing --cov-report=xml {posargs:test} + pytest -m "not s3" --cov=modelarrayio --cov-config={toxinidir}/pyproject.toml --cov-report=term-missing --cov-report=xml {posargs:test} [testenv:lint] runner = uv-venv-lock-runner From 4a10923c4794471799e94439a66dc8cc92561c80 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 14:34:05 -0400 Subject: [PATCH 05/18] Add 3.14 to tests. --- .github/workflows/tox.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index dfc00ef..2ac5cc6 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -29,7 +29,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest"] - python-version: ["3.11", "3.12", "3.13"] + python-version: ["3.11", "3.12", "3.13", "3.14"] dependencies: ["latest"] include: - os: ubuntu-latest From 93754676bc6e7ac26edda4f0a8f0d86bf2a4fd51 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 16:50:45 -0400 Subject: [PATCH 06/18] Update docstrings. --- src/modelarrayio/cifti.py | 122 +++++++++++++++------- src/modelarrayio/fixels.py | 156 ++++++++++++++++++++++------- src/modelarrayio/h5_storage.py | 6 +- src/modelarrayio/tiledb_storage.py | 3 +- src/modelarrayio/voxels.py | 48 +++++---- 5 files changed, 239 insertions(+), 96 deletions(-) diff --git a/src/modelarrayio/cifti.py b/src/modelarrayio/cifti.py index e7e6b72..797f5c6 100644 --- a/src/modelarrayio/cifti.py +++ b/src/modelarrayio/cifti.py @@ -76,28 +76,22 @@ def _build_scalar_sources(long_df): def extract_cifti_scalar_data(cifti_file, reference_brain_names=None): - """ - Load a scalar cifti file and get its data and mapping - - Parameters: - ----------- + """Load a scalar cifti file and get its data and mapping - cifti_file: pathlike + Parameters + ---------- + cifti_file : :obj:`str` CIFTI2 file on disk - - reference_brain_names: np.ndarray + reference_brain_names : :obj:`numpy.ndarray` Array of vertex names - Returns: - -------- - cifti_scalar_data: np.ndarray + Returns + ------- + cifti_scalar_data: :obj:`numpy.ndarray` The scalar data from the cifti file - - brain_structures: np.ndarray + brain_structures: :obj:`numpy.ndarray` The per-greyordinate brain structures as strings - """ - cifti = cifti_file if hasattr(cifti_file, 'get_fdata') else nb.load(cifti_file) cifti_hdr = cifti.header axes = [cifti_hdr.get_axis(i) for i in range(cifti.ndim)] @@ -128,13 +122,22 @@ def extract_cifti_scalar_data(cifti_file, reference_brain_names=None): return cifti_data, brain_names - # vertex_table = pd.DataFrame( - # dict( - # vertex_id=np.arange(cifti_data.shape[0]), - # structure_name=brain_names) - def brain_names_to_dataframe(brain_names): + """Convert brain names to a dataframe. + + Parameters + ---------- + brain_names : :obj:`numpy.ndarray` + Array of brain names + + Returns + ------- + greyordinate_df : :obj:`pandas.DataFrame` + DataFrame with vertex_id and structure_id + structure_name_strings : :obj:`list` + List of structure names + """ # Make a lookup table for greyordinates structure_ids, structure_names = pd.factorize(brain_names) # Make them a list of strings @@ -156,11 +159,20 @@ def _load_cohort_cifti(cohort_long, relative_root, s3_workers): Threads share memory so reference_brain_names is accessed directly with no copying overhead. + Parameters + ---------- + cohort_long : :obj:`pandas.DataFrame` + Long-format cohort dataframe + relative_root : :obj:`str` + Root to which all paths are relative + s3_workers : :obj:`int` + Number of workers to use for parallel loading + Returns ------- - scalars : dict[str, list[np.ndarray]] + scalars : :obj:`dict` Per-scalar ordered list of 1-D subject arrays, ready for stripe-write. - reference_brain_names : np.ndarray + reference_brain_names : :obj:`numpy.ndarray` Brain structure names from the first file, for building greyordinate table. """ # Assign stable per-scalar subject indices in cohort order @@ -238,22 +250,54 @@ def write_storage( scalar_columns=None, s3_workers=1, ): - """ - Load all fixeldb data. + """Load all CIFTI data and write to an HDF5 file with configurable storage. + Parameters - ----------- - index_file: str - path to a Nifti2 index file - directions_file: str - path to a Nifti2 directions file - cohort_file: str - path to a csv with demographic info and paths to data - output_h5: str - path to a new .h5 file to be written - relative_root: str - path to which index_file, directions_file and cohort_file (and its contents) are relative - """ + ---------- + cohort_file : :obj:`str` + Path to a csv with demographic info and paths to data + backend : :obj:`str` + Backend to use for storage + output_h5 : :obj:`str` + Path to a new .h5 file to be written + output_tdb : :obj:`str` + Path to a new .tdb file to be written + relative_root : :obj:`str` + Root to which all paths are relative + storage_dtype : :obj:`str` + Floating type to store values + compression : :obj:`str` + HDF5 compression filter + compression_level : :obj:`int` + Gzip compression level (0-9) + shuffle : :obj:`bool` + Enable HDF5 shuffle filter + chunk_voxels : :obj:`int` + Chunk size along the voxel axis + target_chunk_mb : :obj:`float` + Target chunk size in MiB when auto-computing chunk_voxels + tdb_compression : :obj:`str` + TileDB compression filter + tdb_compression_level : :obj:`int` + TileDB compression level + tdb_shuffle : :obj:`bool` + Enable TileDB shuffle filter + tdb_tile_voxels : :obj:`int` + Tile size along the voxel axis + tdb_target_tile_mb : :obj:`float` + Target tile size in MiB when auto-computing tdb_tile_voxels + tdb_workers : :obj:`int` + Number of workers to use for parallel loading + scalar_columns : :obj:`list` + List of scalar columns to use + s3_workers : :obj:`int` + Number of workers to use for parallel loading + Returns + ------- + status : :obj:`int` + Status of the operation. 0 if successful, 1 if failed. + """ cohort_path = op.join(relative_root, cohort_file) cohort_df = pd.read_csv(cohort_path) cohort_long = _cohort_to_long_dataframe(cohort_df, scalar_columns=scalar_columns) @@ -413,7 +457,8 @@ def main(): def _h5_to_ciftis(example_cifti, h5_file, analysis_name, cifti_output_dir): - """Writes the contents of an hdf5 file to a fixels directory. + """Write the contents of an hdf5 file to a fixels directory. + The ``h5_file`` parameter should point to an HDF5 file that contains at least two datasets. There must be one called ``results/results_matrix``, that contains a matrix of fixel results. Each column contains a single result and each row is a @@ -425,6 +470,7 @@ def _h5_to_ciftis(example_cifti, h5_file, analysis_name, cifti_output_dir): Then each column in ``results/results_matrix`` is extracted to fill the data of a new Nifti2 file that gets converted to mif and named according to the corresponding item in ``results/has_names``. + Parameters ========== example_cifti: pathlike @@ -435,6 +481,7 @@ def _h5_to_ciftis(example_cifti, h5_file, analysis_name, cifti_output_dir): the name for the analysis results to be saved fixel_output_dir: str abspath to where the output cifti files will go. + Outputs ======= None @@ -488,6 +535,7 @@ def _h5_to_ciftis(example_cifti, h5_file, analysis_name, cifti_output_dir): def h5_to_ciftis(): + """Write the contents of an hdf5 file to a cifti directory.""" parser = get_h5_to_ciftis_parser() args = parser.parse_args() diff --git a/src/modelarrayio/fixels.py b/src/modelarrayio/fixels.py index 17f4091..8b33d3c 100644 --- a/src/modelarrayio/fixels.py +++ b/src/modelarrayio/fixels.py @@ -1,4 +1,5 @@ import argparse +import logging import os import os.path as op import shutil @@ -32,7 +33,20 @@ def is_exe(fpath): def mif_to_nifti2(mif_file): + """Convert a .mif file to a .nii file. + Parameters + ---------- + mif_file : :obj:`str` + Path to a .mif file + + Returns + ------- + nifti2_img : :obj:`nibabel.Nifti2Image` + Nifti2 image + data : :obj:`numpy.ndarray` + Data from the nifti2 image + """ if not mif_file.endswith('.nii'): dirpath = tempfile.mkdtemp() mrconvert = find_mrconvert() @@ -46,8 +60,10 @@ def mif_to_nifti2(mif_file): else: nii_file = mif_file dirpath = None + if not op.exists(nii_file): raise Exception(err) + nifti2_img = nb.load(nii_file) data = nifti2_img.get_fdata(dtype=np.float32).squeeze() # ... do stuff with dirpath @@ -57,8 +73,16 @@ def mif_to_nifti2(mif_file): def nifti2_to_mif(nifti2_image, mif_file): - # Note: because -force is not turned on in "mrconvert", the output files won't be overwritten! + """Convert a .nii file to a .mif file. + Parameters + ---------- + nifti2_image : :obj:`nibabel.Nifti2Image` + Nifti2 image + mif_file : :obj:`str` + Path to a .mif file + """ + # Note: because -force is not turned on in "mrconvert", the output files won't be overwritten! mrconvert = find_mrconvert() if mrconvert is None: raise Exception('The mrconvert executable could not be found on $PATH') @@ -79,16 +103,22 @@ def nifti2_to_mif(nifti2_image, mif_file): def gather_fixels(index_file, directions_file): - """ - Load the index and directions files to get lookup tables. + """Load the index and directions files to get lookup tables. + Parameters - ----------- - index_file: str - path to a Nifti2 index file - directions_file: str - path to a Nifti2 directions file + ---------- + index_file : :obj:`str` + Path to a Nifti2 index file + directions_file : :obj:`str` + Path to a Nifti2 directions file + + Returns + ------- + fixel_table : :obj:`pandas.DataFrame` + DataFrame with fixel_id, voxel_id, x, y, z + voxel_table : :obj:`pandas.DataFrame` + DataFrame with voxel_id, i, j, k """ - _index_img, index_data = mif_to_nifti2(index_file) count_vol = index_data[..., 0].astype( np.uint32 @@ -165,20 +195,59 @@ def write_storage( tdb_tile_voxels=0, tdb_target_tile_mb=2.0, ): - """ - Load all fixeldb data. + """Load all fixeldb data and write to an HDF5 file with configurable storage. + Parameters - ----------- - index_file: str - path to a Nifti2 index file - directions_file: str - path to a Nifti2 directions file - cohort_file: str - path to a csv with demographic info and paths to data - output_h5: str - path to a new .h5 file to be written - relative_root: str - path to which index_file, directions_file and cohort_file (and its contents) are relative + ---------- + index_file : :obj:`str` + Path to a Nifti2 index file + directions_file : :obj:`str` + Path to a Nifti2 directions file + cohort_file : :obj:`str` + Path to a csv with demographic info and paths to data + backend : :obj:`str` + Backend to use for storage + output_h5 : :obj:`str` + Path to a new .h5 file to be written + output_tdb : :obj:`str` + Path to a new .tdb file to be written + relative_root : :obj:`str` + Root to which all paths are relative + backend : :obj:`str` + Backend to use for storage + output_h5 : :obj:`str` + Path to a new .h5 file to be written + output_tdb : :obj:`str` + Path to a new .tdb file to be written + relative_root : :obj:`str` + Root to which all paths are relative + storage_dtype : :obj:`str` + Floating type to store values + compression : :obj:`str` + HDF5 compression filter + compression_level : :obj:`int` + Gzip compression level (0-9) + shuffle : :obj:`bool` + Enable HDF5 shuffle filter + chunk_voxels : :obj:`int` + Chunk size along the voxel axis + target_chunk_mb : :obj:`float` + Target chunk size in MiB when auto-computing chunk_voxels + tdb_compression : :obj:`str` + TileDB compression filter + tdb_compression_level : :obj:`int` + TileDB compression level + tdb_shuffle : :obj:`bool` + Enable TileDB shuffle filter + tdb_tile_voxels : :obj:`int` + Tile size along the voxel axis + tdb_target_tile_mb : :obj:`float` + Target tile size in MiB when auto-computing tdb_tile_voxels + + Returns + ------- + status : :obj:`int` + Status of the operation. 0 if successful, 1 if failed. """ # gather fixel data fixel_table, voxel_table = gather_fixels( @@ -192,15 +261,13 @@ def write_storage( scalars = defaultdict(list) sources_lists = defaultdict(list) print('Extracting .mif data...') - for _ix, row in tqdm( - cohort_df.iterrows(), total=cohort_df.shape[0] - ): # ix: index of row (start from 0); row: one row of data + # ix: index of row (start from 0); row: one row of data + for _ix, row in tqdm(cohort_df.iterrows(), total=cohort_df.shape[0]): scalar_file = op.join(relative_root, row['source_file']) _scalar_img, scalar_data = mif_to_nifti2(scalar_file) scalars[row['scalar_name']].append(scalar_data) # append to specific scalar_name - sources_lists[row['scalar_name']].append( - row['source_file'] - ) # append source mif filename to specific scalar_name + # append source mif filename to specific scalar_name + sources_lists[row['scalar_name']].append(row['source_file']) # Write the output if backend == 'hdf5': @@ -233,6 +300,7 @@ def write_storage( write_rows_in_column_stripes(dset, scalars[scalar_name]) f.close() return int(not op.exists(output_file)) + else: base_uri = op.join(relative_root, output_tdb) os.makedirs(base_uri, exist_ok=True) @@ -255,6 +323,7 @@ def write_storage( ) uri = op.join(base_uri, dataset_path) tdb_write_stripes(uri, scalars[scalar_name]) + return 0 @@ -274,7 +343,10 @@ def get_parser(): parser.add_argument( '--relative-root', '--relative_root', - help='Root to which all paths are relative, i.e. defining the (absolute) path to root directory of index_file, directions_file, cohort_file, and output_hdf5.', + help=( + 'Root to which all paths are relative, i.e. defining the (absolute) ' + 'path to root directory of index_file, directions_file, cohort_file, and output_hdf5.' + ), type=op.abspath, default='/inputs/', ) @@ -348,14 +420,20 @@ def get_parser(): '--chunk-voxels', '--chunk_voxels', type=int, - help='Chunk size along fixel/voxel axis. If 0, auto-compute based on --target-chunk-mb and number of subjects', + help=( + 'Chunk size along fixel/voxel axis. ' + 'If 0, auto-compute based on --target-chunk-mb and number of subjects' + ), default=0, ) parser.add_argument( '--tdb-tile-voxels', '--tdb_tile_voxels', type=int, - help='Tile length along item axis for TileDB. If 0, auto-compute based on --tdb-target-tile-mb', + help=( + 'Tile length along item axis for TileDB. ' + 'If 0, auto-compute based on --tdb-target-tile-mb' + ), default=0, ) parser.add_argument( @@ -384,10 +462,9 @@ def get_parser(): def main(): - + """Main function to write fixel data to an HDF5 or TileDB file.""" parser = get_parser() args = parser.parse_args() - import logging logging.basicConfig( level=getattr(logging, str(args.log_level).upper(), logging.INFO), @@ -418,6 +495,7 @@ def main(): def h5_to_mifs(example_mif, h5_file, analysis_name, fixel_output_dir): """Writes the contents of an hdf5 file to a fixels directory. + The ``h5_file`` parameter should point to an HDF5 file that contains at least two datasets. There must be one called ``results/results_matrix``, that contains a matrix of fixel results. Each column contains a single result and each row is a @@ -429,6 +507,7 @@ def h5_to_mifs(example_mif, h5_file, analysis_name, fixel_output_dir): Then each column in ``results/results_matrix`` is extracted to fill the data of a new Nifti2 file that gets converted to mif and named according to the corresponding item in ``results/has_names``. + Parameters ========== example_mif: str @@ -440,6 +519,7 @@ def h5_to_mifs(example_mif, h5_file, analysis_name, fixel_output_dir): fixel_output_dir: str abspath to where the output fixel data will go. the index and directions mif files should already be copied here. + Outputs ======= None @@ -533,7 +613,10 @@ def get_h5_to_fixels_parser(): parser.add_argument( '--relative-root', '--relative_root', - help='Root to which all paths are relative, i.e. defining the (absolute) path to root directory of index_file, directions_file, cohort_file, input_hdf5, and output_dir.', + help=( + 'Root to which all paths are relative, i.e. defining the (absolute) path to root ' + 'directory of index_file, directions_file, cohort_file, input_hdf5, and output_dir.' + ), type=os.path.abspath, ) parser.add_argument( @@ -549,7 +632,10 @@ def get_h5_to_fixels_parser(): parser.add_argument( '--output-dir', '--output_dir', - help='Fixel directory where outputs will be saved. If the directory does not exist, it will be automatically created.', + help=( + 'Fixel directory where outputs will be saved. ' + 'If the directory does not exist, it will be automatically created.' + ), ) return parser diff --git a/src/modelarrayio/h5_storage.py b/src/modelarrayio/h5_storage.py index 31ebce0..c5b440a 100644 --- a/src/modelarrayio/h5_storage.py +++ b/src/modelarrayio/h5_storage.py @@ -42,7 +42,8 @@ def compute_chunk_shape_full_subjects( num_items = int(num_items) if num_subjects <= 0 or num_items <= 0: raise ValueError( - f'Cannot compute chunk shape with zero-length dimension: num_subjects={num_subjects}, num_items={num_items}' + 'Cannot compute chunk shape with zero-length dimension: ' + f'num_subjects={num_subjects}, num_items={num_items}' ) subjects_per_chunk = num_subjects @@ -178,8 +179,7 @@ def write_column_names(h5_file: h5py.File, scalar: str, sources: pd.Series | lis def write_rows_in_column_stripes(dset, rows): - """ - Fill a 2D HDF5 dataset by buffering column-aligned stripes to minimize + """Fill a 2D HDF5 dataset by buffering column-aligned stripes to minimize chunk recompression, using about one chunk's worth of memory. Parameters diff --git a/src/modelarrayio/tiledb_storage.py b/src/modelarrayio/tiledb_storage.py index 4774c4e..883e765 100644 --- a/src/modelarrayio/tiledb_storage.py +++ b/src/modelarrayio/tiledb_storage.py @@ -48,7 +48,8 @@ def compute_tile_shape_full_subjects( num_items = int(num_items) if num_subjects <= 0 or num_items <= 0: raise ValueError( - f'Cannot compute tile shape with zero-length dimension: num_subjects={num_subjects}, num_items={num_items}' + 'Cannot compute tile shape with zero-length dimension: ' + f'num_subjects={num_subjects}, num_items={num_items}' ) subjects_per_tile = num_subjects diff --git a/src/modelarrayio/voxels.py b/src/modelarrayio/voxels.py index 335bae1..eeb3169 100644 --- a/src/modelarrayio/voxels.py +++ b/src/modelarrayio/voxels.py @@ -103,7 +103,7 @@ def flattened_image(scalar_image, scalar_mask, group_mask_matrix): def h5_to_volumes(h5_file, analysis_name, group_mask_file, output_extension, volume_output_dir): - """Convert stat results in .h5 file to a list of volume (.nii or .nii.gz) files""" + """Convert stat results in .h5 file to a list of volume (.nii or .nii.gz) files.""" data_type_tosave = np.float32 @@ -195,9 +195,8 @@ def _decode_names(arr): output_img.to_filename(out_file) # if this result is p.value, also write out 1-p.value (1m.p.value) - if ( - 'p.value' in valid_result_name - ): # the result name contains "p.value" (from R package broom) + # the result name contains "p.value" (from R package broom) + if 'p.value' in valid_result_name: valid_result_name_1mpvalue = valid_result_name.replace('p.value', '1m.p.value') out_file_1mpvalue = op.join( volume_output_dir, @@ -259,10 +258,10 @@ def write_storage( tdb_target_tile_mb=2.0, s3_workers=1, ): - """ - Load all volume data and write to an HDF5 file with configurable storage. + """Load all volume data and write to an HDF5 file with configurable storage. + Parameters - ----------- + ---------- group_mask_file: str Path to a NIfTI-1 binary group mask file. cohort_file: str @@ -278,7 +277,8 @@ def write_storage( compression_level: int Gzip compression level (0-9). Only used when compression == 'gzip'. Default 4. shuffle: bool - Enable HDF5 shuffle filter to improve compression. Default True (effective when compression != 'none'). + Enable HDF5 shuffle filter to improve compression. + Default True (effective when compression != 'none'). chunk_voxels: int Chunk size along the voxel axis. If 0, auto-compute using target_chunk_mb. Default 0. target_chunk_mb: float @@ -289,14 +289,14 @@ def write_storage( # Load the group mask image to define the rows of the matrix group_mask_img = nb.load(op.join(relative_root, group_mask_file)) - group_mask_matrix = ( - group_mask_img.get_fdata() > 0 - ) # get_fdata(): get matrix data in float format - voxel_coords = np.column_stack( - np.nonzero(group_mask_img.get_fdata()) - ) # np.nonzero() returns the coords of nonzero elements; then np.column_stack() stack them together as an (#voxels, 3) array - - # voxel_table: records the coordinations of the nonzero voxels; coord starts from 0 (because using python) + # get_fdata(): get matrix data in float format + group_mask_matrix = group_mask_img.get_fdata() > 0 + # np.nonzero() returns the coords of nonzero elements; + # then np.column_stack() stack them together as an (#voxels, 3) array + voxel_coords = np.column_stack(np.nonzero(group_mask_img.get_fdata())) + + # voxel_table: records the coordinations of the nonzero voxels; + # coord starts from 0 (because using python) voxel_table = pd.DataFrame( { 'voxel_id': np.arange(voxel_coords.shape[0]), @@ -348,8 +348,10 @@ def write_storage( base_uri = op.join(relative_root, output_tdb) os.makedirs(base_uri, exist_ok=True) - # Store voxel coordinates as a small TileDB array (optional): we store as metadata on base group - # Here we serialize as a dense 2D array for parity with HDF5 tables if desired, but keep it simple: metadata JSON + # Store voxel coordinates as a small TileDB array (optional): + # we store as metadata on base group + # Here we serialize as a dense 2D array for parity with HDF5 tables if desired, + # but keep it simple: metadata JSON # Create arrays for each scalar for scalar_name in scalars.keys(): num_subjects = len(scalars[scalar_name]) @@ -401,12 +403,18 @@ def get_h5_to_volume_parser(): parser.add_argument( '--output-dir', '--output_dir', - help='A directory where output volume files will be saved. If the directory does not exist, it will be automatically created.', + help=( + 'A directory where output volume files will be saved. ' + 'If the directory does not exist, it will be automatically created.' + ), ) parser.add_argument( '--output-ext', '--output_ext', - help='The extension for output volume data. Options are .nii.gz (default) and .nii. Please provide the prefix dot.', + help=( + 'The extension for output volume data. ' + 'Options are .nii.gz (default) and .nii. Please provide the prefix dot.' + ), default='.nii.gz', ) return parser From de116d58601ce97a5200aa876383bbddae059b08 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 16:59:10 -0400 Subject: [PATCH 07/18] Add usage page. --- docs/index.md | 9 ++++-- docs/usage.rst | 62 ++++++++++++++++++++++++++++++++++++++ pyproject.toml | 2 +- src/modelarrayio/fixels.py | 1 - 4 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 docs/usage.rst diff --git a/docs/index.md b/docs/index.md index ba76bf2..11513a4 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,9 @@ -# ModelArrayIO - ```{include} ../README.md ``` + +```{toctree} +:maxdepth: 2 +:caption: Usage + +usage +``` \ No newline at end of file diff --git a/docs/usage.rst b/docs/usage.rst new file mode 100644 index 0000000..539294d --- /dev/null +++ b/docs/usage.rst @@ -0,0 +1,62 @@ +######################################## +Using ModelArrayIO from the command line +######################################## + + +******** +confixel +******** + +.. argparse:: + :ref: modelarrayio.fixels:get_parser + :prog: confixel + :func: get_parser + + +******** +convoxel +******** + +.. argparse:: + :ref: modelarrayio.voxels:get_parser + :prog: convoxel + :func: get_parser + + +******** +concifti +******** + +.. argparse:: + :ref: modelarrayio.cifti:get_parser + :prog: concifti + :func: get_parser + + +**************** +fixelstats_write +**************** + +.. argparse:: + :ref: modelarrayio.fixels:get_h5_to_fixels_parser + :prog: fixelstats_write + :func: get_h5_to_fixels_parser + +***************** +volumestats_write +***************** + +.. argparse:: + :ref: modelarrayio.voxels:get_h5_to_volume_parser + :prog: volumestats_write + :func: get_h5_to_volume_parser + + +**************** +ciftistats_write +**************** + +.. argparse:: + :ref: modelarrayio.cifti:get_h5_to_ciftis_parser + :prog: ciftistats_write + :func: get_h5_to_ciftis_parser diff --git a/pyproject.toml b/pyproject.toml index d209259..265b3bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -103,7 +103,7 @@ exclude = ".*" [tool.ruff] line-length = 99 -src = ["src"] +src = ["src/modelarrayio"] [tool.ruff.lint] extend-select = [ diff --git a/src/modelarrayio/fixels.py b/src/modelarrayio/fixels.py index 8b33d3c..376e121 100644 --- a/src/modelarrayio/fixels.py +++ b/src/modelarrayio/fixels.py @@ -328,7 +328,6 @@ def write_storage( def get_parser(): - parser = argparse.ArgumentParser(description='Create a hdf5 file of fixel data') parser.add_argument('--index-file', '--index_file', help='Index File', required=True) parser.add_argument( From c6e2013f01f1e9d52275aeda7bc57546217783f7 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:01:46 -0400 Subject: [PATCH 08/18] Update. --- docs/conf.py | 4 +--- docs/index.md | 9 --------- docs/index.rst | 8 ++++++++ 3 files changed, 9 insertions(+), 12 deletions(-) delete mode 100644 docs/index.md create mode 100644 docs/index.rst diff --git a/docs/conf.py b/docs/conf.py index 3e4ecd1..e751dc8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,9 +14,7 @@ ] templates_path = ['_templates'] -source_suffix = { - '.md': 'markdown', -} +source_suffix = '.rst' master_doc = 'index' exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 11513a4..0000000 --- a/docs/index.md +++ /dev/null @@ -1,9 +0,0 @@ -```{include} ../README.md -``` - -```{toctree} -:maxdepth: 2 -:caption: Usage - -usage -``` \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..240da55 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,8 @@ +```{include} ../README.md +``` + +. toctree:: + :maxdepth: 2 + :caption: Contents: + + usage From f42a0f331f3f4da36666b95465638aa2cc90a909 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:03:24 -0400 Subject: [PATCH 09/18] Update index.rst --- docs/index.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 240da55..72fb332 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,8 +1,7 @@ -```{include} ../README.md -``` +.. include:: ../README.md -. toctree:: +.. toctree:: :maxdepth: 2 - :caption: Contents: + :caption: Usage - usage + usage \ No newline at end of file From c0923dc9befd7ff3325e52cab2c4cba3526de846 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:05:48 -0400 Subject: [PATCH 10/18] Update conf.py --- docs/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/conf.py b/docs/conf.py index e751dc8..9bd5491 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -9,6 +9,7 @@ extensions = [ 'myst_parser', + 'sphinxarg.ext', 'sphinx_copybutton', 'sphinx_rtd_theme', ] From 800c2df34d520d33e99d99c14353204677107dda Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:06:10 -0400 Subject: [PATCH 11/18] Update conf.py --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 9bd5491..b27b785 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,7 +15,7 @@ ] templates_path = ['_templates'] -source_suffix = '.rst' +source_suffix = {'.rst': 'restructuredtext'} master_doc = 'index' exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] From 10fe549cfd614913fd9c09ba8bd2a53c42739d7b Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:08:57 -0400 Subject: [PATCH 12/18] Update __init__.py --- src/modelarrayio/__init__.py | 84 +----------------------------------- 1 file changed, 2 insertions(+), 82 deletions(-) diff --git a/src/modelarrayio/__init__.py b/src/modelarrayio/__init__.py index a3d4a83..f2f0c9c 100644 --- a/src/modelarrayio/__init__.py +++ b/src/modelarrayio/__init__.py @@ -1,83 +1,3 @@ -# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- -# vi: set ft=python sts=4 ts=4 sw=4 et: -""" -Base module variables for ModelArrayIO -""" +from . import cifti, fixels, voxels -from .__about__ import __version__ - -__author__ = 'The PennLINC Developers' -__copyright__ = 'Copyright 2021, PennLINC, Perelman School of Medicine, University of Pennsylvania' -__credits__ = [ - 'Matt Cieslak', - 'Tinashe Tapera', - 'Chenying Zhao', - 'Steven Meisler', - 'Valerie Sydnor', - 'Josiane Bourque', -] -__license__ = '3-clause BSD' -__maintainer__ = 'Matt Cieslak' -__status__ = 'Prototype' -__url__ = 'https://github.com/PennLINC/ModelArrayIO' -__packagename__ = 'modelarrayio' -__description__ = 'ModelArrayIO converters for fixel/voxel/greyordinate data' -__longdesc__ = """\ -A package that converts between imaging formats and the HDF5 file format used by ModelArray. -""" - -DOWNLOAD_URL = f'https://github.com/pennlinc/{__packagename__}/archive/{__version__}.tar.gz' - - -SETUP_REQUIRES = [ - 'setuptools>=18.0', - 'numpy', - 'cython', -] - -REQUIRES = [ - 'numpy', - 'future', - 'nilearn', - 'nibabel>=2.2.1', - 'pandas', - 'h5py', -] - -LINKS_REQUIRES = [] - -TESTS_REQUIRES = [ - 'mock', - 'codecov', - 'pytest', -] - -EXTRA_REQUIRES = { - 'doc': [ - 'sphinx>=1.5.3', - 'sphinx_rtd_theme', - 'sphinx-argparse', - 'pydotplus', - 'pydot>=1.2.3', - 'packaging', - 'nbsphinx', - ], - 'tests': TESTS_REQUIRES, - 'duecredit': ['duecredit'], - 'datalad': ['datalad'], - 'resmon': ['psutil>=5.4.0'], -} -EXTRA_REQUIRES['docs'] = EXTRA_REQUIRES['doc'] - -# Enable a handle to install all extra dependencies at once -EXTRA_REQUIRES['all'] = list({v for deps in EXTRA_REQUIRES.values() for v in deps}) - -CLASSIFIERS = [ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering :: Image Recognition', - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', -] +__all__ = ['cifti', 'fixels', 'voxels'] From eeddc1d0b5e023f1b60389e6600be7e1d3005a03 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:13:52 -0400 Subject: [PATCH 13/18] Update usage.rst --- docs/usage.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 539294d..1d00cb3 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -8,7 +8,7 @@ confixel ******** .. argparse:: - :ref: modelarrayio.fixels:get_parser + :ref: modelarrayio.fixels.get_parser :prog: confixel :func: get_parser @@ -18,7 +18,7 @@ convoxel ******** .. argparse:: - :ref: modelarrayio.voxels:get_parser + :ref: modelarrayio.voxels.get_parser :prog: convoxel :func: get_parser @@ -28,7 +28,7 @@ concifti ******** .. argparse:: - :ref: modelarrayio.cifti:get_parser + :ref: modelarrayio.cifti.get_parser :prog: concifti :func: get_parser @@ -38,7 +38,7 @@ fixelstats_write **************** .. argparse:: - :ref: modelarrayio.fixels:get_h5_to_fixels_parser + :ref: modelarrayio.fixels.get_h5_to_fixels_parser :prog: fixelstats_write :func: get_h5_to_fixels_parser @@ -47,7 +47,7 @@ volumestats_write ***************** .. argparse:: - :ref: modelarrayio.voxels:get_h5_to_volume_parser + :ref: modelarrayio.voxels.get_h5_to_volume_parser :prog: volumestats_write :func: get_h5_to_volume_parser @@ -57,6 +57,6 @@ ciftistats_write **************** .. argparse:: - :ref: modelarrayio.cifti:get_h5_to_ciftis_parser + :ref: modelarrayio.cifti.get_h5_to_ciftis_parser :prog: ciftistats_write :func: get_h5_to_ciftis_parser From c5c60c8413bca38fe3a21a70abf4c6d18e2eb859 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:20:52 -0400 Subject: [PATCH 14/18] Update. --- docs/_templates/class.rst | 52 ++++++++++++++++++++ docs/_templates/function.rst | 12 +++++ docs/_templates/module.rst | 40 +++++++++++++++ docs/api.rst | 28 +++++++++++ docs/conf.py | 16 ++++++ docs/index.rst | 1 - docs/sphinxext/github_link.py | 92 +++++++++++++++++++++++++++++++++++ docs/usage.rst | 6 +-- 8 files changed, 243 insertions(+), 4 deletions(-) create mode 100644 docs/_templates/class.rst create mode 100644 docs/_templates/function.rst create mode 100644 docs/_templates/module.rst create mode 100644 docs/api.rst create mode 100644 docs/sphinxext/github_link.py diff --git a/docs/_templates/class.rst b/docs/_templates/class.rst new file mode 100644 index 0000000..0b9ab90 --- /dev/null +++ b/docs/_templates/class.rst @@ -0,0 +1,52 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + :no-members: + :no-inherited-members: + :no-special-members: + + {% block methods %} + {% if methods %} + + .. automethod:: __init__ + + {% if ('__call__' in all_methods) or ('__call__' in inherited_members) %} + + .. automethod:: __call__ + + {% endif %} + + .. rubric:: Methods + + .. autosummary:: + :toctree: + {% for item in all_methods %} + {%- if not item.startswith('_') or item in ['__mul__', '__getitem__', '__len__'] %} + ~{{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + {% for item in inherited_members %} + {%- if item in ['__mul__', '__getitem__', '__len__'] %} + ~{{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + {% endif %} + {% endblock %} + + + {% block attributes %} + {% if attributes %} + .. rubric:: Attributes + + .. autosummary:: + :toctree: + {% for item in all_attributes %} + {%- if not item.startswith('_') %} + ~{{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/docs/_templates/function.rst b/docs/_templates/function.rst new file mode 100644 index 0000000..7e775e8 --- /dev/null +++ b/docs/_templates/function.rst @@ -0,0 +1,12 @@ +:mod:`{{module}}`.{{objname}} +{{ underline }}==================== + +.. currentmodule:: {{ module }} + +.. autofunction:: {{ objname }} + +.. .. include:: {{module}}.{{objname}}.examples + +.. raw:: html + +
diff --git a/docs/_templates/module.rst b/docs/_templates/module.rst new file mode 100644 index 0000000..a6e07d3 --- /dev/null +++ b/docs/_templates/module.rst @@ -0,0 +1,40 @@ +{{ fullname }} +{{ underline }} + +.. automodule:: {{ fullname }} + + {% block functions %} + {% if functions %} + .. rubric:: Functions + + .. autosummary:: + :toctree: + {% for item in functions %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block classes %} + {% if classes %} + .. rubric:: Classes + + .. autosummary:: + :toctree: + {% for item in classes %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block exceptions %} + {% if exceptions %} + .. rubric:: Exceptions + + .. autosummary:: + :toctree: + {% for item in exceptions %} + {{ item }} + {%- endfor %} + {% endif %} + {% endblock %} diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..4d520aa --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,28 @@ +.. currentmodule:: modelarrayio + +### +API +### + + +***************************************** +:mod:`modelarrayio`: ModelArrayIO functions +***************************************** + +.. automodule:: modelarrayio + :no-members: + :no-inherited-members: + +.. currentmodule:: modelarrayio + +.. autosummary:: + :template: module.rst + :toctree: generated/ + + cifti + fixels + voxels + h5_storage + parser + s3_utils + tiledb_storage diff --git a/docs/conf.py b/docs/conf.py index b27b785..2202a5a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,6 +1,8 @@ # Configuration file for the Sphinx documentation builder. # https://www.sphinx-doc.org/en/master/usage/configuration.html +import os +import sys from datetime import UTC, datetime project = 'ModelArrayIO' @@ -26,3 +28,17 @@ myst_heading_slugs = True suppress_warnings = ['image.not_readable'] + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +sys.path.append(os.path.abspath('sphinxext')) +sys.path.insert(0, os.path.abspath('../modelarrayio')) + +from github_link import make_linkcode_resolve + +# The following is used by sphinx.ext.linkcode to provide links to github +linkcode_resolve = make_linkcode_resolve( + 'modelarrayio', + 'https://github.com/pennlinc/ModelArrayIO/blob/{revision}/{package}/{path}#L{lineno}', +) \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 72fb332..ab8909e 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,6 +2,5 @@ .. toctree:: :maxdepth: 2 - :caption: Usage usage \ No newline at end of file diff --git a/docs/sphinxext/github_link.py b/docs/sphinxext/github_link.py new file mode 100644 index 0000000..862d11b --- /dev/null +++ b/docs/sphinxext/github_link.py @@ -0,0 +1,92 @@ +""" +This vendored script comes from scikit-learn: +https://github.com/scikit-learn/scikit-learn/blob/master/doc/sphinxext/github_link.py +""" + +import inspect +import os +import shutil +import subprocess +import sys +from functools import partial +from operator import attrgetter + + +def _get_git_revision(): + git_cmd = shutil.which('git') + if git_cmd is None: + return None + try: + revision = subprocess.check_output( + [git_cmd, 'rev-parse', '--short', 'HEAD'], + text=False, + ).strip() + except (subprocess.CalledProcessError, OSError): + print('Failed to execute git to get revision') + return None + return revision.decode('utf-8') + + +def _linkcode_resolve(domain, info, package, url_fmt, revision): + """Determine a link to online source for a class/method/function + + This is called by sphinx.ext.linkcode + + An example with a long-untouched module that everyone has + >>> _linkcode_resolve('py', {'module': 'tty', + ... 'fullname': 'setraw'}, + ... package='tty', + ... url_fmt='http://hg.python.org/cpython/file/' + ... '{revision}/Lib/{package}/{path}#L{lineno}', + ... revision='xxxx') + 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' + """ + + if revision is None: + return + if domain not in ('py', 'pyx'): + return + if not info.get('module') or not info.get('fullname'): + return + + class_name = info['fullname'].split('.')[0] + module = __import__(info['module'], fromlist=[class_name]) + obj = attrgetter(info['fullname'])(module) + + # Unwrap the object to get the correct source + # file in case that is wrapped by a decorator + obj = inspect.unwrap(obj) + + try: + fn = inspect.getsourcefile(obj) + except Exception: # noqa:BLE001 + fn = None + if not fn: + try: + fn = inspect.getsourcefile(sys.modules[obj.__module__]) + except Exception: # noqa:BLE001 + fn = None + if not fn: + return + + fn = os.path.relpath(fn, start=os.path.dirname(__import__(package).__file__)) + try: + lineno = inspect.getsourcelines(obj)[1] + except Exception: # noqa:BLE001 + lineno = '' + return url_fmt.format(revision=revision, package=package, path=fn, lineno=lineno) + + +def make_linkcode_resolve(package, url_fmt): + """Returns a linkcode_resolve function for the given URL format + + revision is a git commit reference (hash or name) + + package is the name of the root module of the package + + url_fmt is along the lines of ('https://github.com/USER/PROJECT/' + 'blob/{revision}/{package}/' + '{path}#L{lineno}') + """ + revision = _get_git_revision() + return partial(_linkcode_resolve, revision=revision, package=package, url_fmt=url_fmt) diff --git a/docs/usage.rst b/docs/usage.rst index 1d00cb3..b72dc12 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -1,6 +1,6 @@ -######################################## -Using ModelArrayIO from the command line -######################################## +##### +Usage +##### ******** From 0c2c078f05f63615c5539e7de1190b14063ac02b Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:21:49 -0400 Subject: [PATCH 15/18] Update conf.py --- docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 2202a5a..ed7e410 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -35,10 +35,10 @@ sys.path.append(os.path.abspath('sphinxext')) sys.path.insert(0, os.path.abspath('../modelarrayio')) -from github_link import make_linkcode_resolve +from github_link import make_linkcode_resolve # noqa: E402 # The following is used by sphinx.ext.linkcode to provide links to github linkcode_resolve = make_linkcode_resolve( 'modelarrayio', 'https://github.com/pennlinc/ModelArrayIO/blob/{revision}/{package}/{path}#L{lineno}', -) \ No newline at end of file +) From cf1357663a91c714662c51fbcab40ddf67fc7be6 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:26:59 -0400 Subject: [PATCH 16/18] Update. --- docs/conf.py | 9 +++++++++ docs/index.rst | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index ed7e410..059a524 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -11,6 +11,15 @@ extensions = [ 'myst_parser', + 'sphinx.ext.napoleon', + 'matplotlib.sphinxext.plot_directive', + 'sphinx.ext.autodoc', + 'sphinx.ext.autosectionlabel', + 'sphinx.ext.autosummary', + 'sphinx.ext.doctest', + 'sphinx.ext.ifconfig', + 'sphinx.ext.intersphinx', + 'sphinx.ext.linkcode', 'sphinxarg.ext', 'sphinx_copybutton', 'sphinx_rtd_theme', diff --git a/docs/index.rst b/docs/index.rst index ab8909e..10d33f1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,4 +3,5 @@ .. toctree:: :maxdepth: 2 - usage \ No newline at end of file + usage + api From 2d5ac25c85c2194002ea507ee65222454faf4fc9 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Tue, 24 Mar 2026 17:29:23 -0400 Subject: [PATCH 17/18] Update conf.py --- docs/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 059a524..63b1e08 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,7 +12,6 @@ extensions = [ 'myst_parser', 'sphinx.ext.napoleon', - 'matplotlib.sphinxext.plot_directive', 'sphinx.ext.autodoc', 'sphinx.ext.autosectionlabel', 'sphinx.ext.autosummary', From 251d3ee299155ebb89626ffdc155bbea73955634 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Wed, 25 Mar 2026 08:58:11 -0400 Subject: [PATCH 18/18] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2b9d03d..1fb6e48 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# ConFixel: Moved to ModelArrayIO +# ModelArrayIO ConFixel has moved beyond fixels and handles a lot of other modalities. It also got optimized! -[Going forward, ConFixel is now ModelArrayIO.](https://pennlinc.github.io/ModelArrayIO/). +[Going forward, ConFixel is now ModelArrayIO.](https://pennlinc.github.io/ModelArrayIO/). This repository will be left here because the URL is in the publication.