diff --git a/aperturedb/transformers/__init__.py b/aperturedb/transformers/__init__.py index e69de29b..a9d1884f 100644 --- a/aperturedb/transformers/__init__.py +++ b/aperturedb/transformers/__init__.py @@ -0,0 +1,13 @@ +from .transformer import Transformer +from .common_properties import CommonProperties +from .image_properties import ImageProperties +from .video_properties import VideoProperties +from .bounding_box_properties import BoundingBoxProperties + +__all__ = [ + "Transformer", + "CommonProperties", + "ImageProperties", + "VideoProperties", + "BoundingBoxProperties", +] diff --git a/aperturedb/transformers/bounding_box_properties.py b/aperturedb/transformers/bounding_box_properties.py new file mode 100644 index 00000000..3c523bf6 --- /dev/null +++ b/aperturedb/transformers/bounding_box_properties.py @@ -0,0 +1,37 @@ +from aperturedb.transformers.transformer import Transformer +from aperturedb.Subscriptable import Subscriptable +import logging + +logger = logging.getLogger(__name__) + + +class BoundingBoxProperties(Transformer): + """ + This computes bounding box and polygon properties and adds them to the metadata. + """ + + def __init__(self, data: Subscriptable, **kwargs) -> None: + super().__init__(data, **kwargs) + self.annotation_source = kwargs.get("annotation_source", "coco") + self.annotation_mode = kwargs.get("annotation_mode", "auto") + + def getitem(self, subscript): + if not (self.annotation_source or self.annotation_mode): + return self.data[subscript] + + x = self.data[subscript] + try: + for cmd_dict in x[0]: + cmd_name = list(cmd_dict.keys())[0] + if cmd_name in ["AddBoundingBox", "AddPolygon"]: + src_properties = cmd_dict[cmd_name].setdefault( + "properties", {}) + if self.annotation_source: + src_properties["annotation_source"] = self.annotation_source + if self.annotation_mode: + src_properties["annotation_mode"] = self.annotation_mode + except Exception as e: + logger.exception( + "Error applying bounding box properties", stack_info=True) + + return x diff --git a/aperturedb/transformers/clip_pytorch_embeddings.py b/aperturedb/transformers/clip_pytorch_embeddings.py index 894de19f..b7cec5d9 100644 --- a/aperturedb/transformers/clip_pytorch_embeddings.py +++ b/aperturedb/transformers/clip_pytorch_embeddings.py @@ -19,38 +19,57 @@ def __init__(self, data: Subscriptable, **kwargs) -> None: self.search_set_name = kwargs.pop( "search_set_name", descriptor_set) super().__init__(data, **kwargs) - - # Let's sample some data to figure out the descriptorset we need. - if len(self._add_image_index) > 0: - sample = generate_embedding(self.data[0][1][0]) - utils = self.get_utils() - utils.add_descriptorset( - self.search_set_name, dim=len(sample) // 4, metric=["CS"]) + self._descriptorset_initialized = False def getitem(self, subscript): x = self.data[subscript] - for ic in self._add_image_index: - serialized = generate_embedding(x[1][ic]) - # If the image already has an image_sha256, we use it. - image_sha256 = x[0][ic]["AddImage"].get("properties", {}).get( - "adb_image_sha256", None) - if not image_sha256: - image_sha256 = hashlib.sha256(x[1][ic]).hexdigest() - x[1].append(serialized) - x[0].append( - { - "AddDescriptor": { - "set": self.search_set_name, - "properties": { - "image_sha256": image_sha256, - }, - "if_not_found": { - "image_sha256": ["==", image_sha256], - }, - "connect": { - "ref": x[0][ic]["AddImage"]["_ref"] - } - } - }) + blob_index = 0 + new_descriptors = [] + new_blobs = [] + + for cmd_dict in x[0]: + cmd_name = list(cmd_dict.keys())[0] + if cmd_name == "AddImage": + blob = x[1][blob_index] + + serialized = generate_embedding(blob) + + if not getattr(self, "_descriptorset_initialized", False): + utils = self.get_utils() + success = utils.add_descriptorset( + self.search_set_name, dim=len(serialized) // 4, metric=["CS"]) + try: + if success or self.search_set_name in utils.get_descriptorset_list(): + self._descriptorset_initialized = True + except Exception: + pass + + # If the image already has an image_sha256, we use it. + if getattr(self, "_descriptorset_initialized", False): + image_sha256 = cmd_dict["AddImage"].get("properties", {}).get( + "adb_image_sha256", None) + if not image_sha256: + image_sha256 = hashlib.sha256(blob).hexdigest() + new_blobs.append(serialized) + new_descriptors.append( + { + "AddDescriptor": { + "set": self.search_set_name, + "properties": { + "image_sha256": image_sha256, + }, + "if_not_found": { + "image_sha256": ["==", image_sha256], + }, + "connect": { + "ref": cmd_dict["AddImage"]["_ref"] + } + } + }) + if cmd_name in ["AddImage", "AddDescriptor", "AddVideo", "AddBlob"]: + blob_index += 1 + + x[0].extend(new_descriptors) + x[1].extend(new_blobs) return x diff --git a/aperturedb/transformers/common_properties.py b/aperturedb/transformers/common_properties.py index 5f5f47bd..42886adf 100644 --- a/aperturedb/transformers/common_properties.py +++ b/aperturedb/transformers/common_properties.py @@ -27,19 +27,25 @@ def __init__(self, data: Subscriptable, **kwargs) -> None: self.adb_main_object = kwargs.get("adb_main_object", None) def getitem(self, subscript): + if not (self.adb_data_source or self.adb_timestamp or self.adb_main_object): + return self.data[subscript] + x = self.data[subscript] try: - # x is a transaction that has an add_image command and a blob - for ic in self._add_image_index: - src_properties = x[0][ic]["AddImage"]["properties"] - # Set the static properties, if explicitly set - if self.adb_data_source: - src_properties["adb_data_source"] = self.adb_data_source - if self.adb_timestamp: - src_properties["adb_timestamp"] = self.adb_timestamp - if self.adb_main_object: - src_properties["adb_main_object"] = self.adb_main_object + for cmd_dict in x[0]: + cmd_name = list(cmd_dict.keys())[0] + if cmd_name in ["AddImage", "AddVideo", "AddBoundingBox", "AddPolygon"]: + src_properties = cmd_dict[cmd_name].setdefault( + "properties", {}) + if self.adb_data_source: + src_properties["adb_data_source"] = self.adb_data_source + if self.adb_timestamp: + src_properties["adb_timestamp"] = self.adb_timestamp + if self.adb_main_object: + src_properties["adb_main_object"] = self.adb_main_object + except Exception as e: - logger.exception(e.with_traceback(), stack_info=True) + logger.exception( + "Error applying common properties", stack_info=True) return x diff --git a/aperturedb/transformers/facenet_pytorch_embeddings.py b/aperturedb/transformers/facenet_pytorch_embeddings.py index 47fe15b8..8d82343f 100644 --- a/aperturedb/transformers/facenet_pytorch_embeddings.py +++ b/aperturedb/transformers/facenet_pytorch_embeddings.py @@ -21,12 +21,7 @@ def __init__(self, data: Subscriptable, **kwargs) -> None: self.search_set_name = kwargs.pop( "search_set_name", "facenet_pytorch_embeddings") super().__init__(data, **kwargs) - - # Let's sample some data to figure out the descriptorset we need. - if len(self._add_image_index) > 0: - sample = self._get_embedding_from_blob(self.data[0][1][0]) - utils = self.get_utils() - utils.add_descriptorset(self.search_set_name, dim=len(sample) // 4) + self._descriptorset_initialized = False def _get_embedding_from_blob(self, image_blob: bytes): pil_image = Image.open(io.BytesIO(image_blob)) @@ -39,29 +34,53 @@ def getitem(self, subscript): self.ncalls += 1 x = self.data[subscript] - for ic in self._add_image_index: - serialized = self._get_embedding_from_blob( - x[1][self._add_image_index.index(ic)]) - # If the image already has an image_sha256, we use it. - image_sha256 = x[0][ic]["AddImage"].get("properties", {}).get( - "adb_image_sha256", None) - if not image_sha256: - image_sha256 = hashlib.sha256(x[1][ic]).hexdigest() - x[1].append(serialized) - x[0].append( - { - "AddDescriptor": { - "set": self.search_set_name, - "properties": { - "image_sha256": image_sha256, - }, - "if_not_found": { - "image_sha256": ["==", image_sha256], - }, - "connect": { - "ref": x[0][ic]["AddImage"]["_ref"] - } - } - }) + blob_index = 0 + new_descriptors = [] + new_blobs = [] + + for cmd_dict in x[0]: + cmd_name = list(cmd_dict.keys())[0] + if cmd_name == "AddImage": + blob = x[1][blob_index] + + serialized = self._get_embedding_from_blob(blob) + + if not getattr(self, "_descriptorset_initialized", False): + utils = self.get_utils() + success = utils.add_descriptorset( + self.search_set_name, dim=len(serialized) // 4) + try: + if success or self.search_set_name in utils.get_descriptorset_list(): + self._descriptorset_initialized = True + except Exception: + pass + + # If the image already has an image_sha256, we use it. + if getattr(self, "_descriptorset_initialized", False): + image_sha256 = cmd_dict["AddImage"].get("properties", {}).get( + "adb_image_sha256", None) + if not image_sha256: + image_sha256 = hashlib.sha256(blob).hexdigest() + new_blobs.append(serialized) + new_descriptors.append( + { + "AddDescriptor": { + "set": self.search_set_name, + "properties": { + "image_sha256": image_sha256, + }, + "if_not_found": { + "image_sha256": ["==", image_sha256], + }, + "connect": { + "ref": cmd_dict["AddImage"]["_ref"] + } + } + }) + if cmd_name in ["AddImage", "AddDescriptor", "AddVideo", "AddBlob"]: + blob_index += 1 + + x[0].extend(new_descriptors) + x[1].extend(new_blobs) self.cumulative_time += time.time() - start return x diff --git a/aperturedb/transformers/image_properties.py b/aperturedb/transformers/image_properties.py index f680cec4..68f4282e 100644 --- a/aperturedb/transformers/image_properties.py +++ b/aperturedb/transformers/image_properties.py @@ -24,26 +24,32 @@ def __init__(self, data: Subscriptable, **kwargs) -> None: def getitem(self, subscript): x = self.data[subscript] - try: - # x is a transaction that has an add_image command and a blob - for ic in self._add_image_index: - blob_index = self._add_image_index.index(ic) - src_properties = x[0][ic]["AddImage"]["properties"] - # Compute the dynamic properties and apply them to metadata - src_properties["adb_image_size"] = len(x[1][blob_index]) - src_properties["adb_image_sha256"] = hashlib.sha256( - x[1][blob_index]).hexdigest() - - # Compute the image dimensions. - pil_image = Image.open(io.BytesIO(x[1][blob_index])) - src_properties["adb_image_width"] = pil_image.width - src_properties["adb_image_height"] = pil_image.height - src_properties["adb_image_id"] = str( - src_properties["id"] if "id" in src_properties else uuid.uuid4().hex) - - except Exception as e: - # Importantly, do not raise an exception here, since it will kill ingestion. - # Create a log message instead, for post-mortem analysis. - logger.exception(e.with_traceback(None), stack_info=True) + blob_index = 0 + for cmd_dict in x[0]: + cmd_name = list(cmd_dict.keys())[0] + try: + if cmd_name == "AddImage": + src_properties = cmd_dict["AddImage"].setdefault( + "properties", {}) + # Compute the dynamic properties and apply them to metadata + src_properties["adb_image_size"] = len(x[1][blob_index]) + src_properties["adb_image_sha256"] = hashlib.sha256( + x[1][blob_index]).hexdigest() + + # Compute the image dimensions. + pil_image = Image.open(io.BytesIO(x[1][blob_index])) + src_properties["adb_image_width"] = pil_image.width + src_properties["adb_image_height"] = pil_image.height + src_properties["adb_image_id"] = str( + src_properties["id"] if "id" in src_properties else uuid.uuid4().hex) + + except Exception as e: + # Importantly, do not raise an exception here, since it will kill ingestion. + # Create a log message instead, for post-mortem analysis. + logger.exception( + "Error applying image properties", stack_info=True) + + if cmd_name in ["AddImage", "AddDescriptor", "AddVideo", "AddBlob"]: + blob_index += 1 return x diff --git a/aperturedb/transformers/transformer.py b/aperturedb/transformers/transformer.py index 5370364f..6d76d9eb 100644 --- a/aperturedb/transformers/transformer.py +++ b/aperturedb/transformers/transformer.py @@ -65,12 +65,12 @@ def __init__(self, data: Subscriptable, client=None, **kwargs) -> None: command = list(c.keys())[0] if command in ["AddImage", "AddDescriptor", "AddVideo", "AddBlob"]: self._blob_index.append(i) - if command == "AddImage": - self._add_image_index.append(i) bc += 1 + # Kept for backward compatibility + if command == "AddImage": + self._add_image_index.append(i) + logger.info(f"Found {bc} blobs in the data") - logger.info( - f"Found {len(self._add_image_index)} AddImage commands in the data") self.ncalls = 0 self.cumulative_time = 0 diff --git a/aperturedb/transformers/video_properties.py b/aperturedb/transformers/video_properties.py new file mode 100644 index 00000000..39690807 --- /dev/null +++ b/aperturedb/transformers/video_properties.py @@ -0,0 +1,49 @@ +from aperturedb.transformers.transformer import Transformer +from aperturedb.Subscriptable import Subscriptable + +import logging +import uuid +import hashlib + +logger = logging.getLogger(__name__) + + +class VideoProperties(Transformer): + """ + This computes some video properties and adds them to the metadata. + """ + + def __init__(self, data: Subscriptable, **kwargs) -> None: + super().__init__(data, **kwargs) + utils = self.get_utils() + + if "adb_data_source" not in utils.get_indexed_props("_Video"): + utils.create_entity_index("_Video", "adb_data_source") + + def getitem(self, subscript): + x = self.data[subscript] + blob_index = 0 + for cmd_dict in x[0]: + cmd_name = list(cmd_dict.keys())[0] + try: + if cmd_name == "AddVideo": + src_properties = cmd_dict["AddVideo"].setdefault( + "properties", {}) + # Compute the dynamic properties and apply them to metadata + src_properties["adb_video_size"] = len(x[1][blob_index]) + src_properties["adb_video_sha256"] = hashlib.sha256( + x[1][blob_index]).hexdigest() + + src_properties["adb_video_id"] = str( + src_properties["id"] if "id" in src_properties else uuid.uuid4().hex) + + except Exception as e: + # Importantly, do not raise an exception here, since it will kill ingestion. + # Create a log message instead, for post-mortem analysis. + logger.exception( + "Error applying video properties", stack_info=True) + + if cmd_name in ["AddImage", "AddDescriptor", "AddVideo", "AddBlob"]: + blob_index += 1 + + return x diff --git a/test/run_test_container.sh b/test/run_test_container.sh index 6ea9b0e0..128d05f7 100755 --- a/test/run_test_container.sh +++ b/test/run_test_container.sh @@ -48,6 +48,7 @@ IP_REGEX='[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}' function teardown() { echo "Tearing down containers and networks..." + $(get_sudo) chmod -R a+rwX "$(pwd)/aperturedb/logs" "$(pwd)/"*_ca 2>/dev/null || true if [ "$TEST_PROTOCOL" == "http" ] || [ "$TEST_PROTOCOL" == "both" ]; then RUNNER_NAME="${RUNNER_NAME}_http" docker compose -f docker-compose.yml down --remove-orphans || true docker network rm "${RUNNER_NAME}_http_host_default" || true @@ -67,7 +68,7 @@ TESTING_LOG_PATH="/aperturedb/test/server_logs" RUNNER_INFO_PATH="$(pwd)/aperturedb/logs/runner_state" $(get_sudo) mkdir -p "$RUNNER_INFO_PATH" -$(get_sudo) chmod -R 777 "$LOG_PATH" || true +$(get_sudo) chmod -R a+rwX "$(pwd)/aperturedb/logs" "$(pwd)/"*_ca 2>/dev/null || true # Check if TEST_PROTOCOL is set, otherwise default to both TEST_PROTOCOL=${TEST_PROTOCOL:-"both"} diff --git a/test/test_Transformers.py b/test/test_Transformers.py new file mode 100644 index 00000000..a5f08ae2 --- /dev/null +++ b/test/test_Transformers.py @@ -0,0 +1,428 @@ +import pytest +from unittest.mock import patch, MagicMock +from aperturedb.transformers.common_properties import CommonProperties +from aperturedb.transformers.bounding_box_properties import BoundingBoxProperties +from aperturedb.transformers.video_properties import VideoProperties +from aperturedb.transformers.image_properties import ImageProperties +from aperturedb.transformers.transformer import Transformer +import hashlib +import struct + + +class DummyData: + def __init__(self, data): + self._data = data + + def __getitem__(self, i): + return self._data[i] + + def __len__(self): + return len(self._data) + + +def test_variable_annotation_counts(): + data_orig = [ + ([{"AddImage": {}}, {"AddBoundingBox": {}}], [b"dummy_image"]), + ([{"AddImage": {}}], [b"dummy_image"]), + ([{"AddImage": {}}, {"AddBoundingBox": {}}, { + "AddBoundingBox": {}}, {"AddPolygon": {}}], [b"dummy_image"]), + ([{"AddImage": {}}, {"AddPolygon": {}}, + {"AddPolygon": {}}], [b"dummy_image"]), + ([{"AddVideo": {}}, {"AddBoundingBox": {}}], [b"dummy_video"]) + ] + import copy + + data = copy.deepcopy(data_orig) + dummy_data = DummyData(data) + + cp = CommonProperties( + dummy_data, adb_data_source="test_source", adb_timestamp="2026-05-24", adb_main_object="test_object" + ) + for i in range(len(data)): + res = cp[i] + for cmd in res[0]: + cmd_name = list(cmd.keys())[0] + if cmd_name in ["AddImage", "AddBoundingBox", "AddPolygon", "AddVideo"]: + assert cmd[cmd_name]["properties"]["adb_data_source"] == "test_source" + assert cmd[cmd_name]["properties"]["adb_timestamp"] == "2026-05-24" + assert cmd[cmd_name]["properties"]["adb_main_object"] == "test_object" + + data_bbp = copy.deepcopy(data_orig) + dummy_data_bbp = DummyData(data_bbp) + bbp = BoundingBoxProperties( + dummy_data_bbp, annotation_source="test_anno", annotation_mode="auto") + for i in range(len(data_bbp)): + res = bbp[i] + for cmd in res[0]: + cmd_name = list(cmd.keys())[0] + if cmd_name in ["AddBoundingBox", "AddPolygon"]: + assert cmd[cmd_name]["properties"]["annotation_source"] == "test_anno" + assert cmd[cmd_name]["properties"]["annotation_mode"] == "auto" + elif cmd_name in ["AddImage", "AddVideo"]: + assert "properties" not in cmd[cmd_name] or "annotation_source" not in cmd[cmd_name]["properties"] + + # Test empty or missing annotations + data_empty = copy.deepcopy(data_orig) + dummy_data_empty = DummyData(data_empty) + bbp_empty = BoundingBoxProperties( + dummy_data_empty, annotation_source=None, annotation_mode=None) + for i in range(len(data_empty)): + res = bbp_empty[i] + for cmd in res[0]: + cmd_name = list(cmd.keys())[0] + if cmd_name in ["AddBoundingBox", "AddPolygon"]: + assert "properties" not in cmd[cmd_name] or "annotation_source" not in cmd[cmd_name].get( + "properties", {}) + + +@patch('aperturedb.transformers.transformer.Transformer.get_utils') +def test_video_properties(mock_get_utils): + mock_utils = mock_get_utils.return_value + mock_utils.get_indexed_props.return_value = [] + + dummy_video_data = b"fake_video_blob_content" + data = [ + ([ + {"AddVideo": {"properties": {"id": "test_id"}}}, + {"AddBoundingBox": {}} + ], [dummy_video_data]), + ([ + {"AddBoundingBox": {}} + ], []), + ([ + {"AddImage": {}}, + {"AddVideo": {}} + ], [b"image_blob", dummy_video_data]), + ] + + dummy_data = DummyData(data) + vp = VideoProperties(dummy_data) + + # Verify index creation + mock_utils.get_indexed_props.assert_called_with("_Video") + mock_utils.create_entity_index.assert_called_with( + "_Video", "adb_data_source") + + for i in range(len(data)): + res = vp[i] + for cmd in res[0]: + cmd_name = list(cmd.keys())[0] + if cmd_name == "AddVideo": + props = cmd["AddVideo"]["properties"] + assert props["adb_video_size"] == len(dummy_video_data) + assert props["adb_video_sha256"] == hashlib.sha256( + dummy_video_data).hexdigest() + assert "adb_video_id" in props + if "id" in props and props["id"] == "test_id": + assert props["adb_video_id"] == "test_id" + + +@patch('aperturedb.transformers.transformer.Transformer.get_utils') +@patch('aperturedb.transformers.image_properties.Image.open') +def test_image_properties(mock_image_open, mock_get_utils): + mock_utils = mock_get_utils.return_value + mock_utils.get_indexed_props.return_value = [] + + mock_pil = MagicMock() + mock_pil.width = 800 + mock_pil.height = 600 + mock_image_open.return_value = mock_pil + + dummy_image_data = b"fake_image_blob_content" + data = [ + ([ + {"AddImage": {"_ref": 1, "properties": {"id": "test_image_id"}}}, + {"AddVideo": {}} + ], [dummy_image_data, b"video_blob"]), + ([ + {"AddBoundingBox": {}} + ], []), + ([ + {"AddVideo": {}}, + {"AddImage": {"_ref": 2}} + ], [b"video_blob", dummy_image_data]), + ] + + dummy_data = DummyData(data) + ip = ImageProperties(dummy_data) + + for i in range(len(data)): + res = ip[i] + for cmd in res[0]: + cmd_name = list(cmd.keys())[0] + if cmd_name == "AddImage": + props = cmd["AddImage"]["properties"] + assert props["adb_image_size"] == len(dummy_image_data) + assert props["adb_image_sha256"] == hashlib.sha256( + dummy_image_data).hexdigest() + assert props["adb_image_width"] == 800 + assert props["adb_image_height"] == 600 + assert "adb_image_id" in props + if "id" in props and props["id"] == "test_image_id": + assert props["adb_image_id"] == "test_image_id" + + +@patch('aperturedb.transformers.transformer.Transformer.get_utils') +def test_clip_pytorch_embeddings(mock_get_utils): + # Mock the internal generate_embedding dynamically + try: + from aperturedb.transformers.clip_pytorch_embeddings import CLIPPyTorchEmbeddings + except (ImportError, SystemExit): + pytest.skip("Missing deps for CLIP") + + with patch('aperturedb.transformers.clip_pytorch_embeddings.generate_embedding') as mock_generate_embedding: + dummy_embedding = struct.pack('<4f', 0.1, 0.2, 0.3, 0.4) + mock_generate_embedding.return_value = dummy_embedding + mock_utils = mock_get_utils.return_value + + dummy_image_data = b"fake_image_blob_content" + data = [ + ([ + {"AddImage": {"_ref": 1}}, + {"AddVideo": {}} + ], [dummy_image_data, b"video_blob"]) + ] + + dummy_data = DummyData(data) + clip = CLIPPyTorchEmbeddings(dummy_data) + + res = clip[0] + assert mock_utils.add_descriptorset.called + assert len(res[0]) == 3 # AddImage, AddVideo, AddDescriptor + + desc_cmd = [c for c in res[0] if "AddDescriptor" in c] + assert len(desc_cmd) == 1 + assert desc_cmd[0]["AddDescriptor"]["connect"]["ref"] == 1 + + # 2 blobs originally + 1 generated embedding blob + assert len(res[1]) == 3 + assert res[1][-1] == dummy_embedding + + +@patch('aperturedb.transformers.transformer.Transformer.get_utils') +def test_facenet_pytorch_embeddings(mock_get_utils): + try: + from aperturedb.transformers.facenet_pytorch_embeddings import FacenetPyTorchEmbeddings + except (ImportError, SystemExit): + pytest.skip("Missing deps for Facenet") + + with patch('aperturedb.transformers.facenet_pytorch_embeddings.FacenetPyTorchEmbeddings._get_embedding_from_blob') as mock_get_embedding: + dummy_embedding = struct.pack('<4f', 0.5, 0.6, 0.7, 0.8) + mock_get_embedding.return_value = dummy_embedding + mock_utils = mock_get_utils.return_value + + dummy_image_data = b"fake_image_blob_content" + data = [ + ([ + {"AddVideo": {}}, + {"AddImage": {"_ref": 2}} + ], [b"video_blob", dummy_image_data]) + ] + + dummy_data = DummyData(data) + facenet = FacenetPyTorchEmbeddings(dummy_data) + + res = facenet[0] + assert mock_utils.add_descriptorset.called + assert len(res[0]) == 3 # AddVideo, AddImage, AddDescriptor + + desc_cmd = [c for c in res[0] if "AddDescriptor" in c] + assert len(desc_cmd) == 1 + assert desc_cmd[0]["AddDescriptor"]["connect"]["ref"] == 2 + + assert len(res[1]) == 3 + assert res[1][-1] == dummy_embedding + + +def test_base_transformer(): + data = [ + ([{"AddImage": {}}], [b"dummy"]) + ] + dummy_data = DummyData(data) + transformer = Transformer(dummy_data) + + assert len(transformer) == 1 + assert transformer._queries == 1 + assert transformer._blobs == 1 + assert transformer._blob_index == [0] + + # getitem is abstract + with pytest.raises(NotImplementedError): + _ = transformer[0] + + +@patch('aperturedb.transformers.transformer.Transformer.get_utils') +def test_clip_descriptorset_initialization_retry(mock_get_utils): + try: + from aperturedb.transformers.clip_pytorch_embeddings import CLIPPyTorchEmbeddings + except (ImportError, SystemExit): + pytest.skip("Missing deps for CLIP") + + with patch('aperturedb.transformers.clip_pytorch_embeddings.generate_embedding') as mock_generate_embedding: + dummy_embedding = struct.pack('<4f', 0.1, 0.2, 0.3, 0.4) + mock_generate_embedding.return_value = dummy_embedding + + mock_utils = mock_get_utils.return_value + # Fail the first time, succeed the second time + mock_utils.add_descriptorset.side_effect = [False, True] + mock_utils.get_descriptorset_list.return_value = [] + + data = [ + ([{"AddImage": {"_ref": 1}}], [b"image1"]), + ([{"AddImage": {"_ref": 2}}], [b"image2"]) + ] + + dummy_data = DummyData(data) + clip = CLIPPyTorchEmbeddings(dummy_data) + + # First item: creation fails, should not be initialized + res1 = clip[0] + assert mock_utils.add_descriptorset.call_count == 1 + assert not clip._descriptorset_initialized + assert not any("AddDescriptor" in c for c in res1[0]) + + # Second item: creation succeeds, should be initialized + res2 = clip[1] + assert mock_utils.add_descriptorset.call_count == 2 + assert clip._descriptorset_initialized + assert any("AddDescriptor" in c for c in res2[0]) + + +@patch('aperturedb.transformers.transformer.Transformer.get_utils') +def test_facenet_descriptorset_initialization_retry(mock_get_utils): + try: + from aperturedb.transformers.facenet_pytorch_embeddings import FacenetPyTorchEmbeddings + except (ImportError, SystemExit): + pytest.skip("Missing deps for Facenet") + + with patch('aperturedb.transformers.facenet_pytorch_embeddings.FacenetPyTorchEmbeddings._get_embedding_from_blob') as mock_get_embedding: + dummy_embedding = struct.pack('<4f', 0.1, 0.2, 0.3, 0.4) + mock_get_embedding.return_value = dummy_embedding + + mock_utils = mock_get_utils.return_value + # Fail both add and get first time, then succeed get the second time + mock_utils.add_descriptorset.return_value = False + mock_utils.get_descriptorset_list.side_effect = [ + [], ["facenet_pytorch_embeddings"]] + + data = [ + ([{"AddImage": {"_ref": 1}}], [b"image1"]), + ([{"AddImage": {"_ref": 2}}], [b"image2"]) + ] + + dummy_data = DummyData(data) + facenet = FacenetPyTorchEmbeddings(dummy_data) + + res1 = facenet[0] + assert mock_utils.add_descriptorset.call_count == 1 + assert not facenet._descriptorset_initialized + assert not any("AddDescriptor" in c for c in res1[0]) + + res2 = facenet[1] + assert mock_utils.add_descriptorset.call_count == 2 + assert facenet._descriptorset_initialized + assert any("AddDescriptor" in c for c in res2[0]) + + +def test_common_properties_early_return(): + data = [([{"AddImage": {}}], [b"dummy"])] + dummy_data = DummyData(data) + cp = CommonProperties(dummy_data, adb_data_source=None, + adb_timestamp=None, adb_main_object=None) + res = cp[0] + assert "properties" not in res[0][0]["AddImage"] + + +def test_bounding_box_properties_early_return(): + data = [([{"AddBoundingBox": {}}], [])] + dummy_data = DummyData(data) + bbp = BoundingBoxProperties( + dummy_data, annotation_source=None, annotation_mode=None) + res = bbp[0] + assert "properties" not in res[0][0]["AddBoundingBox"] + + +@patch('aperturedb.transformers.transformer.Transformer.get_utils') +@patch('aperturedb.transformers.video_properties.hashlib.sha256') +def test_video_properties_exception_handling(mock_sha256, mock_get_utils): + mock_utils = mock_get_utils.return_value + mock_utils.get_indexed_props.return_value = [] + + mock_sha256.side_effect = Exception("Test Exception") + + dummy_video_data = b"fake_video_blob_content" + data = [ + ([ + {"AddVideo": {}} + ], [dummy_video_data]) + ] + + dummy_data = DummyData(data) + vp = VideoProperties(dummy_data) + + res = vp[0] + + assert "adb_video_sha256" not in res[0][0]["AddVideo"]["properties"] + + +@patch('aperturedb.transformers.bounding_box_properties.logger') +def test_bounding_box_properties_exception_handling(mock_logger): + # Pass a malformed command dictionary where "AddBoundingBox" is not a dict + data = [([{"AddBoundingBox": "invalid_type_not_dict"}], [])] + dummy_data = DummyData(data) + + bbp = BoundingBoxProperties( + dummy_data, annotation_source="test_anno", annotation_mode="auto") + + # This should raise an AttributeError when calling setdefault on a string + # But the exception should be caught and logged + res = bbp[0] + + assert mock_logger.exception.called + assert "Error applying bounding box properties" in mock_logger.exception.call_args[0][0] + # The original command should remain unchanged + assert res[0][0]["AddBoundingBox"] == "invalid_type_not_dict" + + +@patch('aperturedb.transformers.image_properties.logger') +@patch('aperturedb.transformers.transformer.Transformer.get_utils') +@patch('aperturedb.transformers.image_properties.Image.open') +def test_image_properties_exception_handling(mock_image_open, mock_get_utils, mock_logger): + mock_utils = mock_get_utils.return_value + mock_utils.get_indexed_props.return_value = [] + + # Make Image.open raise an exception + mock_image_open.side_effect = Exception("Image load error") + + dummy_image_data = b"fake_image_blob_content" + data = [([{"AddImage": {}}], [dummy_image_data])] + dummy_data = DummyData(data) + + ip = ImageProperties(dummy_data) + res = ip[0] + + assert mock_logger.exception.called + assert "Error applying image properties" in mock_logger.exception.call_args[0][0] + + props = res[0][0]["AddImage"]["properties"] + # size and sha256 are computed before Image.open, so they should be present + assert "adb_image_size" in props + assert "adb_image_sha256" in props + # width, height, and id are computed after Image.open, so they should be missing + assert "adb_image_width" not in props + assert "adb_image_height" not in props + assert "adb_image_id" not in props + + +@patch('aperturedb.transformers.common_properties.logger') +def test_common_properties_exception_handling(mock_logger): + # Pass a malformed command dictionary + data = [([{"AddImage": "invalid_type_not_dict"}], [b"dummy"])] + dummy_data = DummyData(data) + + cp = CommonProperties(dummy_data, adb_data_source="test_source") + res = cp[0] + + assert mock_logger.exception.called + assert "Error applying common properties" in mock_logger.exception.call_args[0][0] + assert res[0][0]["AddImage"] == "invalid_type_not_dict"