diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 023cdf6..9406edf 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -17,8 +17,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.9', '3.10', '3.11'] - constrain: ["-c constrain_min.txt", ""] + python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -30,7 +29,7 @@ jobs: python -m pip install --upgrade pip python -m pip install --upgrade uv uv pip install --system flake8 pytest - if [ -f requirements.txt ]; then uv pip install --system -U -r requirements.txt; fi + uv pip install --system -r pyproject.toml - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names diff --git a/CHANGELOG.md b/CHANGELOG.md index a72e3e6..22e259b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,18 @@ # Changelog -## [Unreleased] +## [v1.0.0] - 2025-10-03 + +### First stable release + +- **search_datasets** endpoint +- **search_cells** endpoint +- **download_urls** endpoint +- **dataset_metadata** endpoint +- **embedding_data** endpoint +- **heatmap** endpoint +- **differential_expression** endpoint +- **session management** with token-based authentication + ## [v0.0.1] - 2024-12-12 diff --git a/README.md b/README.md index d97e190..7fd02b0 100644 --- a/README.md +++ b/README.md @@ -1,154 +1,59 @@ -# Python client for CAP GraphQL API -Python client uses Ariadne code generation https://ariadnegraphql.org/blog/2023/02/02/ariadne-codegen to generate pydantic models and graphQL client. +# Python client for Cell-Annotation-Platform GraphQL API +[![PyPI version](https://img.shields.io/pypi/v/cap-sc-client)](https://pypi.org/project/cap-sc-client/) -1. Add new queries to `queries.graphql` -2. Run `ariadne-codegen` +The Python package provides a simple interface to interact with the [Cell Annotation Platform](https://celltype.info/) (CAP) GraphQL API. The package allows to search for datasets, cell labels metadata and get molecular profiles of cell types published on CAP. -# API calls +## Installation -Create CAP object `cap = Cap()` and use it to access public API endpoints. +```bash +pip install -U cap-sc-client +``` -If you plan to use CAP API endpoints that require authoriization please set environment variables either `CAP_LOGIN` / `CAP_PWD` or `CAP_TOKEN` with custom token that you can get from CAP UI. CAP will automatically use this information to authenticate you during authorized endpoints requests. +## Basic usage -## Search datasets -```Python -cap.search_datasets(search=None, organism=None, tissue=None, assay=None, limit = 50, offset=0, sort=[]) -``` -returns CAP published datasets searched by a keyword that could be filtered by `organism`, `tissue` or `assay`. -The result could be paginated using `limit`, `offset` and sorted using `sort` and `ASC`, `DESC` keywords - -Example: -```Python -cap.search_datasets( - search="blood" - organism=["Homo sapiens"], - tissue=["stomach","pyloric antrum"], - assay=["10x 3' v1"], - sort=[{'name':'ASC'}] -) -``` -Result: -```Python -{ - 'results': [ - { - 'id': '420', - 'name': 'Charting human development ...', - 'description': 'Developing human multi-organ ...', - 'cellCount': 155232, - 'labelsets': [ - { - 'id': '3714', - 'name': 'assay', - 'description': None, - 'labels': [ - { - 'id': '25154', - 'name': "10x 3' v2", - 'count': 146343, - 'typename__': 'Label' - } - ... - ], - 'typename__': 'Labelset' - } - ... - ], - 'project': { - 'version': 1.0, - 'id': '263', - 'name': 'Charting human ...', - 'owner': { - 'displayName': 'CAP Data Upload' - }, - 'typename__': 'Project' - } - } - ... - ] -} -``` -## Dataset download URLs -```Python -cap.download_urls(id) -``` -returns URLs for published dataset files: annData, Seurat, JSON (zip), JSON (tar) +The main goal of the package is to provide an interface to access CAP datasets and cell label annotations collection via standard python toolings like pandas dataframes. -Example: -```Python -cap.download_urls(678) ``` -Result: -```Python -{ - 'downloadUrls': { - 'annDataUrl': 'https://storage.googleapis.com/...h5ad', - 'seuratUrl': None, - 'capJsonUrlTar': 'https://storage.googleapis.com/...h5ad.json.tar', - 'capJsonUrlZip': 'https://storage.googleapis.com/...h5ad.json.zip', - 'typename__': 'DatasetDownloadUrlsResponse' - } -} +>>> from cap_sc_client import CapClient +>>> cp = CapClient() +>>> datasets = cp.search_datasets(limit=5, offset=0, organism=["Homo sapiens"]) +>>> datasets.head() + id name cell_count project +0 1427 Skin fibroblasts - Pan-d... 337376.0 {'id': '613', 'name': 'Pan... +1 1426 Skin fibroblast scRNA-seq ... 153546.0 {'id': '613', 'name': 'Pan... +2 1157 Single cell atlas of the h... 72788.0 {'id': '544', 'name': 'Sin... +3 1156 snRNA-seq of human retina ... 3177310.0 {'id': '544', 'name': 'Sin... +4 1154 snRNA-seq of human retina ... 691008.0 {'id': '544', 'name': 'Sin... +>>> labels = cp.search_cell_labels(limit=10, offset=0) +>>> labels[["full_name", "ontology_term_exists", "marker_genes"]] + full_name ontology_term_exists marker_genes +0 cycling stromal ... True [MKI67, TOP2A, C... +1 alveolar type 1 ... True [PDPN, HOPX] +2 mesoderm 2 (ZEB2) False [ZEB2] +3 acinar cell True [PRSS1] +4 neuron True [STMN2] +5 smooth muscle cell True [DES, CNN1, ACTA... +6 ciliated cell True [FOXJ1] +7 Schwann cell True [MPZ] +8 pancreatic cells False [PDX1] +9 club cell True [SCGB1A1] ``` -## Search cell labels -```Python -cap.search_cell_labels(search=None, organism=None, tissue=None, assay=None, limit = 50, offset=0, sort=[]) -``` -returns cell labels from CAP published datasets searched by a keyword that could be filtered by `organism`, `tissue` or `assay`. -The result could be paginated using `limit`, `offset` and sorted using `sort` and `ASC`, `DESC` keywords - -Example: -```Python -cap.search_cell_labels( - search="blood" - organism=["Homo sapiens"], - tissue=["stomach","pyloric antrum"], - assay=["10x 3' v1"], - sort=[{'name':'ASC'}] -) -``` -Result: -```Python -{ - 'lookupCells': [ - { - 'id': '51853', - 'fullName': 'progenitor cell', - 'name': 'progenitor cell', - 'ontologyTermExists': True, - 'ontologyTermId': 'CL:0011026', - 'ontologyTerm': 'progenitor cell', - 'synonyms': ['unknown'], - 'categoryOntologyTermExists': True, - 'categoryOntologyTermId': 'CL:0011115', - 'categoryOntologyTerm': 'precursor cell', - 'categoryFullName': 'precursor cell', - 'markerGenes': ['EOMES'], - 'canonicalMarkerGenes': ['unknown'], - 'count': 53089, - 'ontologyAssessment': None, - 'labelset': { - 'id': '6387', - 'name': 'cell_type', - 'description': 'An atlas ...', - 'dataset': { - 'id': '532', - 'name': 'Second Trimester ...', - 'project': { - 'id': '305', - 'name': 'Human developing neocortex by area', - 'version': 1, - 'typename__': 'Project' - }, - 'typename__': 'Dataset' - }, - 'typename__': 'Labelset' - }, - 'typename__': 'Label' - } - ... - ] -} -``` \ No newline at end of file +There is also a `MDSession` class that allows to interact molecular profile of cell types within specific dataset. However, this class requires user to be familiar with CAP MD page. For more examples please refer to [examples](./examples/) folder and GitHub Wiki for detailed documentation. + +## Documentation + +Detailed documentation is available on [GitHub Wiki](https://github.com/cellannotation/cap-python-client/wiki). + + +## Changelog + +See [CHANGELOG.md](./CHANGELOG.md). + +## Development + +This project uses [Ariadne code generation](https://ariadnegraphql.org/blog/2023/02/02/ariadne-codegen) to generate pydantic models and graphQL client. In case of need to update or add new queries please follow the steps below: + +1. Add new queries to [queries.graphql](./queries.graphql) +2. Run `ariadne-codegen` diff --git a/cap_client/__init__.py b/cap_sc_client/__init__.py similarity index 100% rename from cap_client/__init__.py rename to cap_sc_client/__init__.py diff --git a/cap_client/cap.py b/cap_sc_client/cap.py similarity index 76% rename from cap_client/cap.py rename to cap_sc_client/cap.py index c6f5ec6..f39af03 100644 --- a/cap_client/cap.py +++ b/cap_sc_client/cap.py @@ -1,9 +1,4 @@ from typing import List, Dict, Literal -import time -import http.client -import json -import jwt -import os from uuid import uuid4 import pandas as pd import httpx @@ -30,7 +25,6 @@ from .client.heatmap import HeatmapDatasetEmbeddingDiffHeatMap CAP_API_URL = "https://celltype.info/graphql" -CAP_AUTHENTICATE_URL = "us-central1-capv2-gke-prod.cloudfunctions.net" # https://${var.gcp_region}-${var.gcp_project_id}.cloudfunctions.net/authenticate-token SESSION_ID = str DIFF_KEY = str @@ -39,7 +33,18 @@ class MDSession: + """ + A session for processing molecular data page endpoints. + """ def __init__(self, dataset_id: str, _client: _Client): + """ + Initializes the MDSession with the provided dataset ID and client. + Do not call directly, use CapClient.md_session instead. + + Args: + dataset_id (str): The unique identifier of the dataset to be processed. + _client (_Client): An instance of the client to interact with the backend API. + """ self.__client: _Client = _client self._dataset_id: str = dataset_id self._session_id: str = None @@ -195,8 +200,6 @@ def embedding_data( selection_key_minor = selection_key_minor, ) - # TODO: is not workgin with new rc1 - # update request later https://capdevelopment.atlassian.net/browse/MVP-6489 response = self.__client.embedding_data( dataset_id = self.dataset_id, options = options @@ -342,6 +345,33 @@ def heatmap( selection_key: SELECTION_KEY = None, include_reference: bool = True ) -> HeatmapDatasetEmbeddingDiffHeatMap: + """ + Return the data to plot a heatmap for the top differentially expressed genes from specific DE analysis. + + Parameters: + ----------- + diff_key : DIFF_KEY + The string key associated with the differential expression analysis results. + n_top_genes : int, optional + The number of top differentially expressed genes to include in the heatmap. Default is 3. + max_cells_displayed : int, optional + The maximum number of cells to display in the heatmap. Default is 1000. + gene_name_filter : str, optional + A filter to include only genes matching a given prefix. Should be used to focus on specific gene. Default is None. + pseudogenes_filter : bool, optional + If True, filters out genes which are often over-expressed but biologically non-informative. + Defaults to True. See https://github.com/cellannotation/cap-gene-filtering for details. + selection_key : SELECTION_KEY, optional + If provided, the heatmap will include only cells within the specified selection. Default is None. + include_reference : bool, optional + If True, includes a reference selection in the heatmap. Default is True. + + Returns: + -------- + HeatmapDatasetEmbeddingDiffHeatMap + An object containing the heatmap data, including gene names, cell IDs, expression values, + and selection information. + """ options=PostHeatmapInput( diff_key = diff_key, @@ -354,7 +384,6 @@ def heatmap( selection_key = selection_key, ) - # TODO: update api, it was changed on rc1 https://capdevelopment.atlassian.net/browse/MVP-6489 res = self.__client.heatmap( dataset_id=self.dataset_id, options=options, @@ -367,73 +396,11 @@ class CapClient: def __init__( self, url: str = CAP_API_URL, - auth_url: str = CAP_AUTHENTICATE_URL, - login: str = None, - pwd: str = None, - custom_token: str = None ) -> None: headers = None client = httpx.Client(timeout=300, headers=headers) self.__client = _Client(url, headers=headers, http_client=client) - self._login = login if login is not None else os.environ.get('CAP_LOGIN') - self._pwd = pwd if pwd is not None else os.environ.get('CAP_PWD') - self._custom_token = custom_token if custom_token is not None else os.environ.get('CAP_TOKEN') - self._token: str = None - self._token_expiry_time: time = None - self._error_status: str = None - self.auth_url = auth_url - - def _auth_request ( - self, - base_url: str, - url: str, - body: dict - ) -> bool: - connection = http.client.HTTPSConnection(base_url) - headers = {'Content-type': 'application/json'} - connection.request("POST", url = url, body=json.dumps(body), headers=headers) - response = connection.getresponse() - if (response.status == 200): - try: - response = response.read().decode() - self._token = json.loads(response)['idToken'] - # TODO : Add signature verification https://capdevelopment.atlassian.net/browse/MVP-6392 - self._token_expiry_time = jwt.decode(self._token, options={"verify_signature": False})['exp'] - self._error_status = None - return True - except: # TODO: implement appropriate error handling https://capdevelopment.atlassian.net/browse/MVP-6489 - self._error_status = "Failed to parse 200 OK response to get ID token" - return False - self._error_status = "Failed to get ID token " + response.reason - return False - - def authenticate( - self - ) -> bool: - # try authenticate by custom token first - if self._custom_token is not None: - body = {'token':self._custom_token} - if (self._auth_request(base_url= self.auth_url, url = "/authenticate-token", body = body)): - return True - if self._login is not None and self._pwd is not None: - body = {'email':self._login, 'password': self._pwd} - if (self._auth_request(base_url = self.auth_url, url = "/authenticate-user", body = body)): - return True - self._error_status = "Missing CAP client authetication settings. Check CAP_LOGIN, CAP_PWD or CAP_TOKEN enviroment variables." - return False - - @property - def error_status(self) -> str: - return self._error_status - - @property - def id_token(self) -> str: - return self._token - - @property - def token_expiry_time(self) -> time: - return self._token_expiry_time - + def search_datasets( self, search: List[str] = None, @@ -444,6 +411,33 @@ def search_datasets( offset: int = 0, sort: List[Dict[str, str]] = [], ) -> pd.DataFrame: + """ + Search public datasets, the analogue of the [dataset search page on CAP](https://celltype.info/search/datasets). + + Parameters: + ----------- + search : List[str], optional + A list of search terms to filter datasets by name. Defaults to None. + organism : List[str], optional + A list of organism names to filter datasets. Defaults to None. + tissue : List[str], optional + A list of tissue types to filter datasets. Defaults to None. + assay : List[str], optional + A list of assay types to filter datasets. Defaults to None. + limit : int, optional + The maximum number of datasets to return. Defaults to 50. + offset : int, optional + The number of datasets to skip before starting to collect the result set. Defaults to 0. + sort : List[Dict[str, str]], optional + A list of dictionaries specifying the sorting order. Each dictionary should have a single key-value pair + where the key is the field to sort by and the value is either "asc" for ascending or "desc" for descending order. + Example: [{"name": "asc"}, {"createdAt": "desc"}]. Defaults to an empty list. + + Returns: + -------- + pd.DataFrame + A DataFrame containing the search results with columns corresponding to dataset attributes. + """ sorting = [] for item in sort: key = list(item.keys())[0] @@ -467,7 +461,9 @@ def search_datasets( response = self.__client.search_datasets( options=search_options, filter=search_filter, search=search_input ) - df = pd.DataFrame([r.model_dump() for r in response.results]) + df = pd.DataFrame([r.model_dump() for r in response.results]) + if "typename__" in df.columns: + df.drop(columns=["typename__"], inplace=True) return df def search_cell_labels( @@ -480,6 +476,33 @@ def search_cell_labels( offset: int = 0, sort: List[Dict[str, str]] = [], ) -> pd.DataFrame: + """ + Search for cell labels in the dataset. The analogue of the [cell labels search page on CAP](https://celltype.info/search/cell-labels). + + Parameters: + ----------- + search : List[str], optional + A list of search terms to filter datasets by name. Defaults to None. + organism : List[str], optional + A list of organism names to filter datasets. Defaults to None. + tissue : List[str], optional + A list of tissue types to filter datasets. Defaults to None. + assay : List[str], optional + A list of assay types to filter datasets. Defaults to None. + limit : int, optional + The maximum number of datasets to return. Defaults to 50. + offset : int, optional + The number of datasets to skip before starting to collect the result set. Defaults to 0. + sort : List[Dict[str, str]], optional + A list of dictionaries specifying the sorting order. Each dictionary should have a single key-value pair + where the key is the field to sort by and the value is either "asc" for ascending or "desc" for descending order. + Example: [{"name": "asc"}, {"createdAt": "desc"}]. Defaults to an empty list. + + Returns: + -------- + pd.DataFrame + A DataFrame containing the search results with columns corresponding to cell annotation metadata attributes. + """ sorting = [] for item in sort: key = list(item.keys())[0] @@ -507,7 +530,9 @@ def search_cell_labels( response = self.__client.lookup_cells( options=search_options, filter=search_filter, search=search_input ) - df = pd.DataFrame([lc.model_dump() for lc in response.lookup_cells]) + df = pd.DataFrame([lc.model_dump() for lc in response.lookup_cells]) + if "typename__" in df.columns: + df.drop(columns=["typename__"], inplace=True) return df def md_session(self, dataset_id: str) -> MDSession: diff --git a/cap_client/client/__init__.py b/cap_sc_client/client/__init__.py similarity index 100% rename from cap_client/client/__init__.py rename to cap_sc_client/client/__init__.py diff --git a/cap_client/client/base_client.py b/cap_sc_client/client/base_client.py similarity index 100% rename from cap_client/client/base_client.py rename to cap_sc_client/client/base_client.py diff --git a/cap_client/client/base_model.py b/cap_sc_client/client/base_model.py similarity index 100% rename from cap_client/client/base_model.py rename to cap_sc_client/client/base_model.py diff --git a/cap_client/client/client.py b/cap_sc_client/client/client.py similarity index 100% rename from cap_client/client/client.py rename to cap_sc_client/client/client.py diff --git a/cap_client/client/cluster_types.py b/cap_sc_client/client/cluster_types.py similarity index 100% rename from cap_client/client/cluster_types.py rename to cap_sc_client/client/cluster_types.py diff --git a/cap_client/client/create_session.py b/cap_sc_client/client/create_session.py similarity index 100% rename from cap_client/client/create_session.py rename to cap_sc_client/client/create_session.py diff --git a/cap_client/client/dataset_initial_state_query.py b/cap_sc_client/client/dataset_initial_state_query.py similarity index 100% rename from cap_client/client/dataset_initial_state_query.py rename to cap_sc_client/client/dataset_initial_state_query.py diff --git a/cap_client/client/dataset_ready.py b/cap_sc_client/client/dataset_ready.py similarity index 100% rename from cap_client/client/dataset_ready.py rename to cap_sc_client/client/dataset_ready.py diff --git a/cap_client/client/download_urls.py b/cap_sc_client/client/download_urls.py similarity index 100% rename from cap_client/client/download_urls.py rename to cap_sc_client/client/download_urls.py diff --git a/cap_client/client/embedding_clusters.py b/cap_sc_client/client/embedding_clusters.py similarity index 100% rename from cap_client/client/embedding_clusters.py rename to cap_sc_client/client/embedding_clusters.py diff --git a/cap_client/client/embedding_data.py b/cap_sc_client/client/embedding_data.py similarity index 100% rename from cap_client/client/embedding_data.py rename to cap_sc_client/client/embedding_data.py diff --git a/cap_client/client/enums.py b/cap_sc_client/client/enums.py similarity index 100% rename from cap_client/client/enums.py rename to cap_sc_client/client/enums.py diff --git a/cap_client/client/exceptions.py b/cap_sc_client/client/exceptions.py similarity index 100% rename from cap_client/client/exceptions.py rename to cap_sc_client/client/exceptions.py diff --git a/cap_client/client/files_status.py b/cap_sc_client/client/files_status.py similarity index 100% rename from cap_client/client/files_status.py rename to cap_sc_client/client/files_status.py diff --git a/cap_client/client/fragments.py b/cap_sc_client/client/fragments.py similarity index 100% rename from cap_client/client/fragments.py rename to cap_sc_client/client/fragments.py diff --git a/cap_client/client/general_de.py b/cap_sc_client/client/general_de.py similarity index 100% rename from cap_client/client/general_de.py rename to cap_sc_client/client/general_de.py diff --git a/cap_client/client/heatmap.py b/cap_sc_client/client/heatmap.py similarity index 100% rename from cap_client/client/heatmap.py rename to cap_sc_client/client/heatmap.py diff --git a/cap_client/client/highly_variable_genes.py b/cap_sc_client/client/highly_variable_genes.py similarity index 100% rename from cap_client/client/highly_variable_genes.py rename to cap_sc_client/client/highly_variable_genes.py diff --git a/cap_client/client/input_types.py b/cap_sc_client/client/input_types.py similarity index 100% rename from cap_client/client/input_types.py rename to cap_sc_client/client/input_types.py diff --git a/cap_client/client/lookup_cells.py b/cap_sc_client/client/lookup_cells.py similarity index 100% rename from cap_client/client/lookup_cells.py rename to cap_sc_client/client/lookup_cells.py diff --git a/cap_client/client/md_commons_query.py b/cap_sc_client/client/md_commons_query.py similarity index 100% rename from cap_client/client/md_commons_query.py rename to cap_sc_client/client/md_commons_query.py diff --git a/cap_client/client/md_ready.py b/cap_sc_client/client/md_ready.py similarity index 100% rename from cap_client/client/md_ready.py rename to cap_sc_client/client/md_ready.py diff --git a/cap_client/client/search_datasets.py b/cap_sc_client/client/search_datasets.py similarity index 100% rename from cap_client/client/search_datasets.py rename to cap_sc_client/client/search_datasets.py diff --git a/pyproject.toml b/pyproject.toml index 19a907b..a46a720 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,10 +3,10 @@ name="cap_sc_client" version="0.0.10" license = "MIT" authors= [ - { name="M. Sokolov" }, { name="R. Mukhin" }, - { name="A. Isaev" }, { name="E. Biederstedt" }, + { name="A. Isaev" }, + { name="M. Sokolov" }, ] description = "Python client for Cell-Annotation-Platform (CAP) GraphQL API." readme = {file = "README.txt", content-type = "text/markdown"} @@ -18,7 +18,6 @@ dependencies = [ "httpx>=0.27.2", "pydantic>=2.10.3", "pydantic_core>=2.27.1", - "PyJWT>=2.10.1", "pandas>=2.0.0", ] @@ -35,7 +34,6 @@ requires = ["setuptools"] [project.optional-dependencies] dev = ["ariadne"] -test = ["pytest"] [tool.ariadne-codegen] remote_schema_url = "https://celltype.info/graphql" @@ -47,9 +45,9 @@ target_package_path="./cap_client/" target_package_name="client" client_name="_Client" -[tool.pytest.ini_options] -pythonpath = ["cap_client"] -log_level = "DEBUG" - [tool.setuptools.packages.find] exclude = ["tmp*", "test*", "examples*"] + + +[tool.pytest.ini_options] +pythonpath = ["./"] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1e678b7..0000000 --- a/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -httpx>=0.27.2 -pydantic>=2.10.3 -pydantic_core>=2.27.1 -PyJWT>=2.10.1 -pandas>=2.0.0 diff --git a/test/__init__.py b/test/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/test/cap_auth_test.py b/test/cap_auth_test.py deleted file mode 100644 index 002d23c..0000000 --- a/test/cap_auth_test.py +++ /dev/null @@ -1,82 +0,0 @@ -import pytest -from unittest.mock import patch, Mock -from cap_client import CapClient - -CAP_AUTHENTICATE_URL = "us-central1-capv2-gke-prod.cloudfunctions.net" - -def test_authenticate_with_custom_token_success(): - cap = CapClient() - with patch.object(CapClient, '_auth_request') as mock_request: - mock_request.return_value = True - cap._custom_token = "dummy_token" - - result = cap.authenticate() - - assert result is True - mock_request.assert_called_once_with( - base_url='us-central1-capv2-gke-prod.cloudfunctions.net', - url='/authenticate-token', - body={'token': 'dummy_token'} - ) - -def test_authenticate_with_custom_token_failure(): - cap = CapClient() - with patch.object(CapClient, '_auth_request') as mock_request: - mock_request.return_value = False - cap._custom_token = "dummy_token" - - result = cap.authenticate() - - assert result is False - mock_request.assert_called_once() - -def test_authenticate_with_credentials_success(): - cap = CapClient() - with patch.object(CapClient, '_auth_request') as mock_request: - # Arrange - mock_request.return_value = True - cap._custom_token = None - cap._login = "test@example.com" - cap._pwd = "password123" - - # Act - result = cap.authenticate() - - # Assert - assert result is True - mock_request.assert_called_once_with( - base_url='us-central1-capv2-gke-prod.cloudfunctions.net', - url='/authenticate-user', - body={'email': 'test@example.com', 'password': 'password123'} - ) - -def test_authenticate_with_credentials_failure(): - cap = CapClient() - with patch.object(CapClient, '_auth_request') as mock_request: - # Arrange - mock_request.return_value = False - cap._custom_token = None - cap._login = "test@example.com" - cap._pwd = "password123" - - # Act - result = cap.authenticate() - - # Assert - assert result is False - mock_request.assert_called_once() - -def test_authenticate_with_no_credentials(): - cap = CapClient() - # Arrange - cap._custom_token = None - cap._login = None - cap._pwd = None - - # Act - result = cap.authenticate() - - # Assert - assert result is False - assert cap.error_status == "Missing CAP client authetication settings. Check CAP_LOGIN, CAP_PWD or CAP_TOKEN enviroment variables." - \ No newline at end of file diff --git a/test/public_api_test.py b/test/public_api_test.py deleted file mode 100644 index 0ed7d87..0000000 --- a/test/public_api_test.py +++ /dev/null @@ -1,271 +0,0 @@ -import pytest -from unittest.mock import MagicMock, ANY -from cap_client import CapClient, MDSession -import pandas as pd - - -CAP_AUTHENTICATE_USER_URL = "authenticate-user-wg6qkl5yea-uc.a.run.app" -CAP_AUTHENTICATE_TOKEN_URL = "authenticate-token-wg6qkl5yea-uc.a.run.app" - -# ------------------------------ -# Tests for CapClient methods -# ------------------------------ - -def test_search_datasets(): - cap = CapClient() - - df = cap.search_datasets( - search="name", - organism=["Homo sapiens"], - tissue=["stomach"], - assay=["10x 3' v1"], - limit=10, - offset=0, - sort=[{"name": "ASC"}] - ) - assert type(df) is pd.DataFrame, "Wrong response type!" - - -def test_search_cell_labels(): - cap = CapClient() - - df = cap.search_cell_labels( - search="name", - organism=["Homo sapiens"], - tissue=["brain"], - assay=["10x 3' v1"], - limit=5, - offset=0, - sort=[{"name": "ASC"}] - ) - assert type(df) is pd.DataFrame, "Wrong response type!" - - -def test_open_md_session(): - cap = CapClient() - dataset_id = "1234" - md_session = cap.md_session(dataset_id) - assert isinstance(md_session, MDSession) - assert md_session.dataset_id == dataset_id - - -# ------------------------------ -# Tests for MDSession methods -# ------------------------------ - -@pytest.fixture -def dummy_md_session(): - """ - Returns an MDSession instance along with a dummy client (a MagicMock) - so we can patch its methods. - """ - dummy_client = MagicMock() - md_session = MDSession(dataset_id="1234", _client=dummy_client) - return md_session, dummy_client - - -@pytest.mark.parametrize("ready", [True, False]) -def test_check_md_ready(ready, dummy_md_session): - md_session, client = dummy_md_session - dataset_mock = MagicMock() - dataset_mock.is_embeddings_up_to_date = ready - client.dataset_ready.return_value = MagicMock(dataset=dataset_mock) - - if ready: - md_session._check_md_ready() - else: - with pytest.raises(RuntimeError): - md_session._check_md_ready() - - -def test_create_session(dummy_md_session): - md_session, dummy_client = dummy_md_session - - # Setup dummy response for dataset_ready (should be "ready") - dummy_ready = MagicMock() - dummy_ready.dataset = MagicMock(is_embeddings_up_to_date=True) - dummy_client.dataset_ready.return_value = dummy_ready - - # Setup dummy response for dataset_initial_state_query - dummy_initial_state = MagicMock() - dummy_dataset = MagicMock() - dummy_labelset = MagicMock() - dummy_labelset.mode = "cell-labels" - dummy_labelset.name = "Test Labelset" - dummy_labelset.id = "label1" - dummy_dataset.labelsets = [dummy_labelset] - dummy_dataset.model_dump.return_value = dict(dummy_dataset) - dummy_initial_state.dataset = dummy_dataset - dummy_client.dataset_initial_state_query.return_value = dummy_initial_state - - # Setup dummy response for cluster_types - dummy_cluster_types = MagicMock() - dummy_cluster_dataset = MagicMock() - dummy_cluster = MagicMock() - dummy_cluster.name = "cluster1" - dummy_cluster_dataset.embedding_cluster_types = [dummy_cluster] - dummy_cluster_types.dataset = dummy_cluster_dataset - dummy_client.cluster_types.return_value = dummy_cluster_types - - # Setup dummy response for md_commons_query (for embeddings) - dummy_embeddings = MagicMock() - dummy_embeddings_dataset = MagicMock() - dummy_embedding = MagicMock() - dummy_embedding.name = "embedding1" - dummy_embeddings_dataset.embeddings = [dummy_embedding] - dummy_embeddings.dataset = dummy_embeddings_dataset - dummy_client.md_commons_query.return_value = dummy_embeddings - - # Setup dummy response for create_session - dummy_create_session = MagicMock() - dummy_create_session.save_embedding_session = "snapshot_updated" - dummy_client.create_session.return_value = dummy_create_session - - session_id = md_session.create_session() - - # Check that the returned session_id matches the session property - assert md_session.session_id == session_id - - # Verify that the expected internal client calls were made - dummy_client.dataset_ready.assert_called_once_with(md_session.dataset_id) - dummy_client.dataset_initial_state_query.assert_called_once_with(md_session.dataset_id) - dummy_client.cluster_types.assert_called_once_with(md_session.dataset_id) - dummy_client.md_commons_query.assert_called_once_with(md_session.dataset_id) - dummy_client.create_session.assert_called_once() - - -def test_create_session_not_ready(dummy_md_session): - md_session, dummy_client = dummy_md_session - - # Simulate dataset not ready (is_embeddings_up_to_date is False) - dummy_ready = MagicMock() - dummy_ready.dataset = MagicMock(is_embeddings_up_to_date=False) - dummy_client.dataset_ready.return_value = dummy_ready - - with pytest.raises(RuntimeError, match="is not ready"): - md_session.create_session() - - -def test_embedding_data_success(dummy_md_session): - md_session, dummy_client = dummy_md_session - # Set available embeddings so that "embedding1" is valid. - md_session._embeddings = ["embedding1"] - - # Setup dummy response for embedding_data - dummy_embedding_data = MagicMock() - dummy_embedding_data.dataset = MagicMock(embedding_data="embedding_data_response") - dummy_client.embedding_data.return_value = dummy_embedding_data - - result = md_session.embedding_data( - embedding="embedding1", - max_points=1000, - labelsets=["label1"], - selection_gene="geneA", - selection_key_major="sel_major", - selection_key_minor="sel_minor" - ) - assert result == "embedding_data_response" - dummy_client.embedding_data.assert_called_once_with( - dataset_id=md_session.dataset_id, - options=ANY # We use ANY because the input options object is complex - ) - - -def test_embedding_data_invalid_embedding(dummy_md_session): - md_session, dummy_client = dummy_md_session - md_session._embeddings = ["embedding1"] - - with pytest.raises(ValueError, match="is not found"): - md_session.embedding_data( - embedding="nonexistent", - max_points=1000 - ) - - -def test_general_de_success(dummy_md_session): - md_session, dummy_client = dummy_md_session - # Set available labelsets so that "Test Label" is valid. - md_session._labelsets = ["Test Label"] - - # Patch the internal helper to return a dummy labelset id. - md_session._labelset_id_from_name = MagicMock(return_value="label1") - - dummy_general_de = MagicMock() - dummy_general_de.dataset = MagicMock(general_diff="diff_key") - dummy_client.general_de.return_value = dummy_general_de - - md_session._session_id = "session-id" - result = md_session.general_de(labelset="Test Label", random_seed=42) - assert result == "diff_key" - md_session._labelset_id_from_name.assert_called_once_with("Test Label") - dummy_client.general_de.assert_called_once_with( - dataset_id=md_session.dataset_id, - options=ANY - ) - - -def test_general_de_invalid_labelset(dummy_md_session): - md_session, dummy_client = dummy_md_session - md_session._labelsets = ["Test Label"] - - with pytest.raises(ValueError, match="is not found"): - md_session.general_de(labelset="Nonexistent") - - -def test_highly_variable_genes(dummy_md_session): - md_session, dummy_client = dummy_md_session - - # Setup dummy response for highly_variable_genes - dummy_gene = MagicMock() - dummy_gene.name = "gene1" - dummy_gene.dispersion = 0.5 - dummy_hvg = MagicMock() - dummy_hvg.dataset = MagicMock(embedding_highly_variable_genes=[dummy_gene]) - dummy_client.highly_variable_genes.return_value = dummy_hvg - - df = md_session.highly_variable_genes( - gene_name_filter="g", - pseudogenes_filter=True, - offset=0, - limit=10, - sort_order="desc" - ) - - # Check that the returned DataFrame contains the expected columns and values. - assert isinstance(df, pd.DataFrame) - assert "gene_symbol" in df.columns - assert "dispersion" in df.columns - assert df.iloc[0]["gene_symbol"] == "gene1" - assert df.iloc[0]["dispersion"] == 0.5 - - dummy_client.highly_variable_genes.assert_called_once_with( - dataset_id=md_session.dataset_id, - options=ANY - ) - - -def test_is_md_cache_ready(dummy_md_session): - md_session, dummy_client = dummy_md_session - dummy_files_status = MagicMock() - dummy_files_status.dataset = MagicMock(get_md_files_status="ready") - dummy_client.files_status.return_value = dummy_files_status - - assert md_session.is_md_cache_ready() is True - dummy_client.files_status.assert_called_once_with(md_session.dataset_id) - - -def test_heatmap(dummy_md_session): - md_session, dummy_client = dummy_md_session - # Assume that create_session was already called so session_id is set. - md_session._session_id = "session123" - - dummy_heatmap = MagicMock() - dummy_heatmap.dataset = MagicMock(embedding_diff_heat_map="heatmap_response") - dummy_client.heatmap.return_value = dummy_heatmap - - result = md_session.heatmap(diff_key="diff_key", n_top_genes=3, max_cells_displayed=1000) - assert result == "heatmap_response" - dummy_client.heatmap.assert_called_once_with( - dataset_id=md_session.dataset_id, - options=ANY - ) \ No newline at end of file diff --git a/test/readme.md b/test/readme.md deleted file mode 100644 index 2cc9a31..0000000 --- a/test/readme.md +++ /dev/null @@ -1,10 +0,0 @@ -## Running tests - -Set following environment variables with correct credentials CAP_LOGIN, CAP_PWD, CAP_TOKEN - -To run unit test use pytest from the root of the repository: - -```commandline -pip install pytest -pytest test/ -``` \ No newline at end of file diff --git a/test/test_api.py b/test/test_api.py new file mode 100644 index 0000000..c5d2cac --- /dev/null +++ b/test/test_api.py @@ -0,0 +1,21 @@ +from cap_sc_client import CapClient + + +def test_dataset_search(): + cp = CapClient() + df = cp.search_datasets(offset=5, limit=5) + assert df.shape[0] == 5 + +def test_label_search(): + cp = CapClient() + df = cp.search_cell_labels(offset=5, limit=5) + assert df.shape[0] == 5 + +def test_md_session(): + cp = CapClient() + datasets = cp.search_datasets(limit=1) + dataset_id = datasets["id"].to_list()[0] + md_session = cp.md_session(dataset_id=dataset_id) + md_session.create_session() + assert md_session.session_id is not None + assert len(md_session.embeddings) > 0