diff --git a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml index 363d5b1e08332..b458bfaaf1015 100644 --- a/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml +++ b/.github/ISSUE_TEMPLATE/airflow_providers_bug_report.yml @@ -87,6 +87,7 @@ body: - oracle - pagerduty - papermill + - pinecone - plexus - postgres - presto diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index e3b3798a7d8e5..e568eb7d7fe50 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -677,9 +677,9 @@ doc, doc_gen, docker, druid, elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, openfaas, openlineage, opensearch, opsgenie, -oracle, otel, pagerduty, pandas, papermill, password, pinot, plexus, postgres, presto, rabbitmq, -redis, s3, s3fs, salesforce, samba, segment, sendgrid, sentry, sftp, singularity, slack, smtp, -snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, +oracle, otel, pagerduty, pandas, papermill, password, pinecone, pinot, plexus, postgres, presto, +rabbitmq, redis, s3, s3fs, salesforce, samba, segment, sendgrid, sentry, sftp, singularity, slack, +smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, webhdfs, winrm, yandex, zendesk .. END EXTRAS HERE diff --git a/INSTALL b/INSTALL index 20abb40214022..3035883ff57d5 100644 --- a/INSTALL +++ b/INSTALL @@ -104,9 +104,9 @@ doc, doc_gen, docker, druid, elasticsearch, exasol, facebook, ftp, gcp, gcp_api, github_enterprise, google, google_auth, grpc, hashicorp, hdfs, hive, http, imap, influxdb, jdbc, jenkins, kerberos, kubernetes, ldap, leveldb, microsoft.azure, microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, odbc, openfaas, openlineage, opensearch, opsgenie, -oracle, otel, pagerduty, pandas, papermill, password, pinot, plexus, postgres, presto, rabbitmq, -redis, s3, s3fs, salesforce, samba, segment, sendgrid, sentry, sftp, singularity, slack, smtp, -snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, +oracle, otel, pagerduty, pandas, papermill, password, pinecone, pinot, plexus, postgres, presto, +rabbitmq, redis, s3, s3fs, salesforce, samba, segment, sendgrid, sentry, sftp, singularity, slack, +smtp, snowflake, spark, sqlite, ssh, statsd, tableau, tabular, telegram, trino, vertica, virtualenv, weaviate, webhdfs, winrm, yandex, zendesk # END EXTRAS HERE diff --git a/airflow/providers/pinecone/CHANGELOG.rst b/airflow/providers/pinecone/CHANGELOG.rst new file mode 100644 index 0000000000000..fe2c1b4c121d1 --- /dev/null +++ b/airflow/providers/pinecone/CHANGELOG.rst @@ -0,0 +1,26 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +``apache-airflow-providers-pinecone`` + +Changelog +--------- + +1.0.0 +..... + +Initial version of the provider. diff --git a/airflow/providers/pinecone/__init__.py b/airflow/providers/pinecone/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/pinecone/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/pinecone/hooks/__init__.py b/airflow/providers/pinecone/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/pinecone/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/pinecone/hooks/pinecone.py b/airflow/providers/pinecone/hooks/pinecone.py new file mode 100644 index 0000000000000..92fd620f76af0 --- /dev/null +++ b/airflow/providers/pinecone/hooks/pinecone.py @@ -0,0 +1,128 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Hook for Pinecone.""" +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import pinecone + +from airflow.hooks.base import BaseHook + +if TYPE_CHECKING: + from pinecone.core.client.models import UpsertResponse + + +class PineconeHook(BaseHook): + """ + Interact with Pinecone. This hook uses the Pinecone conn_id. + + :param conn_id: Optional, default connection id is `pinecone_default`. The connection id to use when + connecting to Pinecone. + """ + + conn_name_attr = "conn_id" + default_conn_name = "pinecone_default" + conn_type = "pinecone" + hook_name = "Pinecone" + + @staticmethod + def get_connection_form_widgets() -> dict[str, Any]: + """Returns connection widgets to add to connection form.""" + from flask_appbuilder.fieldwidgets import BS3TextFieldWidget + from flask_babel import lazy_gettext + from wtforms import StringField + + return { + "log_level": StringField(lazy_gettext("Log Level"), widget=BS3TextFieldWidget(), default=None), + "project_name": StringField( + lazy_gettext("Project Name"), + widget=BS3TextFieldWidget(), + ), + } + + @classmethod + def get_ui_field_behaviour(cls) -> dict[str, Any]: + """Returns custom field behaviour.""" + return { + "hidden_fields": ["port", "schema"], + "relabeling": {"login": "Pinecone Environment", "password": "Pinecone API key"}, + } + + def __init__(self, conn_id: str = default_conn_name) -> None: + self.conn_id = conn_id + self.get_conn() + + def get_conn(self) -> None: + pinecone_connection = self.get_connection(self.conn_id) + api_key = pinecone_connection.password + pinecone_environment = pinecone_connection.login + pinecone_host = pinecone_connection.host + extras = pinecone_connection.extra_dejson + pinecone_project_name = extras.get("project_name") + log_level = extras.get("log_level", None) + pinecone.init( + api_key=api_key, + environment=pinecone_environment, + host=pinecone_host, + project_name=pinecone_project_name, + log_level=log_level, + ) + + def test_connection(self) -> tuple[bool, str]: + try: + pinecone.list_indexes() + return True, "Connection established" + except Exception as e: + return False, str(e) + + @staticmethod + def upsert( + index_name: str, + vectors: list[Any], + namespace: str = "", + batch_size: int | None = None, + show_progress: bool = True, + **kwargs: Any, + ) -> UpsertResponse: + """ + The upsert operation writes vectors into a namespace. + + If a new value is upserted for an existing vector id, it will overwrite the previous value. + + .. seealso:: https://docs.pinecone.io/reference/upsert + + To upsert in parallel follow + + .. seealso:: https://docs.pinecone.io/docs/insert-data#sending-upserts-in-parallel + + :param index_name: The name of the index to describe. + :param vectors: A list of vectors to upsert. + :param namespace: The namespace to write to. If not specified, the default namespace - "" is used. + :param batch_size: The number of vectors to upsert in each batch. + :param show_progress: Whether to show a progress bar using tqdm. Applied only + if batch_size is provided. + """ + index = pinecone.Index(index_name) + return index.upsert( + vectors=vectors, + namespace=namespace, + batch_size=batch_size, + show_progress=show_progress, + **kwargs, + ) diff --git a/airflow/providers/pinecone/operators/__init__.py b/airflow/providers/pinecone/operators/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/airflow/providers/pinecone/operators/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/airflow/providers/pinecone/operators/pinecone.py b/airflow/providers/pinecone/operators/pinecone.py new file mode 100644 index 0000000000000..1c757d8fa541c --- /dev/null +++ b/airflow/providers/pinecone/operators/pinecone.py @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from functools import cached_property +from typing import TYPE_CHECKING, Any, Sequence + +from airflow.models import BaseOperator +from airflow.providers.pinecone.hooks.pinecone import PineconeHook + +if TYPE_CHECKING: + from airflow.utils.context import Context + + +class PineconeIngestOperator(BaseOperator): + """ + Ingest vector embeddings into Pinecone. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:PineconeIngestOperator` + + :param conn_id: The connection id to use when connecting to Pinecone. + :param index_name: Name of the Pinecone index. + :param input_vectors: Data to be ingested, in the form of a list of tuples where each tuple + contains (id, vector_embedding, metadata). + :param namespace: The namespace to write to. If not specified, the default namespace is used. + :param batch_size: The number of vectors to upsert in each batch. + :param upsert_kwargs: .. seealso:: https://docs.pinecone.io/reference/upsert + """ + + template_fields: Sequence[str] = ("index_name", "input_vectors", "namespace") + + def __init__( + self, + *, + conn_id: str = PineconeHook.default_conn_name, + index_name: str, + input_vectors: list[tuple], + namespace: str = "", + batch_size: int | None = None, + upsert_kwargs: dict | None = None, + **kwargs: Any, + ) -> None: + self.upsert_kwargs = upsert_kwargs or {} + super().__init__(**kwargs) + self.conn_id = conn_id + self.index_name = index_name + self.namespace = namespace + self.batch_size = batch_size + self.input_vectors = input_vectors + + @cached_property + def hook(self) -> PineconeHook: + """Return an instance of the PineconeHook.""" + return PineconeHook(conn_id=self.conn_id) + + def execute(self, context: Context) -> None: + """Ingest data into Pinecone using the PineconeHook.""" + self.hook.upsert( + index_name=self.index_name, + vectors=self.input_vectors, + namespace=self.namespace, + batch_size=self.batch_size, + **self.upsert_kwargs, + ) + + self.log.info("Successfully ingested data into Pinecone index %s.", self.index_name) diff --git a/airflow/providers/pinecone/provider.yaml b/airflow/providers/pinecone/provider.yaml new file mode 100644 index 0000000000000..f3050cfe229b6 --- /dev/null +++ b/airflow/providers/pinecone/provider.yaml @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +package-name: apache-airflow-providers-pinecone + +name: Pinecone + +description: | + `Pinecone `__ + +suspended: false + +versions: + - 1.0.0 + +integrations: + - integration-name: Pinecone + external-doc-url: https://docs.pinecone.io/docs/overview + how-to-guide: + - /docs/apache-airflow-providers-pinecone/operators/pinecone.rst + tags: [software] + +dependencies: + - apache-airflow>=2.5.0 + - pinecone-client>=2.2.4 + +hooks: + - integration-name: Pinecone + python-modules: + - airflow.providers.pinecone.hooks.pinecone + +connection-types: + - hook-class-name: airflow.providers.pinecone.hooks.pinecone.PineconeHook + connection-type: pinecone + +operators: + - integration-name: Pinecone + python-modules: + - airflow.providers.pinecone.operators.pinecone diff --git a/docs/apache-airflow-providers-pinecone/changelog.rst b/docs/apache-airflow-providers-pinecone/changelog.rst new file mode 100644 index 0000000000000..d9d6217310b78 --- /dev/null +++ b/docs/apache-airflow-providers-pinecone/changelog.rst @@ -0,0 +1,19 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../../airflow/providers/pinecone/CHANGELOG.rst diff --git a/docs/apache-airflow-providers-pinecone/commits.rst b/docs/apache-airflow-providers-pinecone/commits.rst new file mode 100644 index 0000000000000..070b1b1258239 --- /dev/null +++ b/docs/apache-airflow-providers-pinecone/commits.rst @@ -0,0 +1,19 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Package apache-airflow-providers-pinecone +------------------------------------------- diff --git a/docs/apache-airflow-providers-pinecone/connections.rst b/docs/apache-airflow-providers-pinecone/connections.rst new file mode 100644 index 0000000000000..0e7f94b414509 --- /dev/null +++ b/docs/apache-airflow-providers-pinecone/connections.rst @@ -0,0 +1,43 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. _howto/connection:pinecone: + +Pinecone Connection +=================== + +The `Pinecone `__ connection type enables access to Pinecone APIs. + +Default Connection IDs +---------------------- + +Pinecone hook points to ``pinecone_default`` connection by default. + +Configuring the Connection +-------------------------- + +Host (optional) + Host URL to connect to a specific Pinecone index. + +Pinecone Environment (required) + Specify your Pinecone environment to connect to. + +Pinecone API key (required) + Specify your Pinecone API Key to connect. + +Project Name (required) + Project Name corresponding to your API Key. diff --git a/docs/apache-airflow-providers-pinecone/index.rst b/docs/apache-airflow-providers-pinecone/index.rst new file mode 100644 index 0000000000000..a999afff358ed --- /dev/null +++ b/docs/apache-airflow-providers-pinecone/index.rst @@ -0,0 +1,98 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +``apache-airflow-providers-pinecone`` +====================================== + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Basics + + Home + Changelog + Security + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Guides + + Connection types + Operators + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Commits + + Detailed list of commits + + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Resources + + Python API <_api/airflow/providers/pinecone/index> + PyPI Repository + Installing from sources + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: System tests + + System Tests <_api/tests/system/providers/pinecone/index> + +Package apache-airflow-providers-pinecone +----------------------------------------- + +`Pinecone `__ + + +Release: 1.0.0 + +Provider package +---------------- + +This is a provider package for ``pinecone`` APIs. All classes for this provider package +are in ``airflow.providers.pinecone`` python module. + +Installation +------------ + +You can install this package on top of an existing Airflow 2 installation (see ``Requirements`` below) +for the minimum Airflow version supported) via +``pip install apache-airflow-providers-pinecone`` + + + +Requirements +------------ + +The minimum Apache Airflow version supported by this provider package is ``2.5.0``. + +=================== ================== +PIP package Version required +=================== ================== +``apache-airflow`` ``>=2.5.0`` +``pinecone-client`` ``>=2.2.4`` +=================== ================== diff --git a/docs/apache-airflow-providers-pinecone/installing-providers-from-sources.rst b/docs/apache-airflow-providers-pinecone/installing-providers-from-sources.rst new file mode 100644 index 0000000000000..b4e730f4ff21a --- /dev/null +++ b/docs/apache-airflow-providers-pinecone/installing-providers-from-sources.rst @@ -0,0 +1,18 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. include:: ../exts/includes/installing-providers-from-sources.rst diff --git a/docs/apache-airflow-providers-pinecone/operators/pinecone.rst b/docs/apache-airflow-providers-pinecone/operators/pinecone.rst new file mode 100644 index 0000000000000..71f847919fa80 --- /dev/null +++ b/docs/apache-airflow-providers-pinecone/operators/pinecone.rst @@ -0,0 +1,40 @@ + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +.. _howto/operator:PineconeIngestOperator: + +PineconeIngestOperator +====================== + +Use the :class:`~airflow.providers.pinecone.operators.pinecone.PineconeIngestOperator` to +interact with Pinecone APIs to ingest vectors. + + +Using the Operator +^^^^^^^^^^^^^^^^^^ + +The PineconeIngestOperator requires the ``vectors`` as an input ingest into Pinecone. Use the ``conn_id`` parameter to +specify the Pinecone connection to use to connect to your account. The vectors could also contain metadata referencing +the original text corresponding to the vectors that could be ingested into the database. + +An example using the operator in this way: + +.. exampleinclude:: /../../tests/system/providers/pinecone/example_dag_pinecone.py + :language: python + :dedent: 4 + :start-after: [START howto_operator_pinecone_ingest] + :end-before: [END howto_operator_pinecone_ingest] diff --git a/docs/apache-airflow-providers-pinecone/security.rst b/docs/apache-airflow-providers-pinecone/security.rst new file mode 100644 index 0000000000000..66c6f79a4ecfc --- /dev/null +++ b/docs/apache-airflow-providers-pinecone/security.rst @@ -0,0 +1,38 @@ + + .. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + .. http://www.apache.org/licenses/LICENSE-2.0 + + .. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Releasing security patches +-------------------------- + +Airflow providers are released independently from Airflow itself and the information about vulnerabilities +is published separately. You can upgrade providers independently from Airflow itself, following the +instructions found in :doc:`apache-airflow:installation/installing-from-pypi`. + +When we release Provider version, the development is always done from the ``main`` branch where we prepare +the next version. The provider uses strict `SemVer `_ versioning policy. Depending on +the scope of the change, Provider will get ''MAJOR'' version upgrade when there are +breaking changes, ``MINOR`` version upgrade when there are new features or ``PATCHLEVEL`` version upgrade +when there are only bug fixes (including security bugfixes) - and this is the only version that receives +security fixes by default, so you should upgrade to latest version of the provider if you want to receive +all released security fixes. + +The only exception to that rule is when we have a critical security fix and good reason to provide an +out-of-band release for the provider, in which case stakeholders in the provider might decide to cherry-pick +and prepare a branch for an older version of the provider following the +`mixed governance model `_ +and requires interested parties to cherry-pick and test the fixes. diff --git a/docs/apache-airflow/extra-packages-ref.rst b/docs/apache-airflow/extra-packages-ref.rst index 170cc2c30e807..07f2dafe25900 100644 --- a/docs/apache-airflow/extra-packages-ref.rst +++ b/docs/apache-airflow/extra-packages-ref.rst @@ -196,6 +196,8 @@ These are extras that add dependencies needed for integration with external serv +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | pagerduty | ``pip install 'apache-airflow[pagerduty]'`` | Pagerduty hook | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ +| pinecone | ``pip install 'apache-airflow[pinecone]'`` | Pinecone Operators and Hooks | ++---------------------+-----------------------------------------------------+-----------------------------------------------------+ | plexus | ``pip install 'apache-airflow[plexus]'`` | Plexus service of CoreScientific.com AI platform | +---------------------+-----------------------------------------------------+-----------------------------------------------------+ | salesforce | ``pip install 'apache-airflow[salesforce]'`` | Salesforce hook | diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 63fa7309bd0e1..559450b416954 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -1136,6 +1136,7 @@ picklable pid pidbox pigcmd +pinecone pinodb Pinot pinot @@ -1665,6 +1666,7 @@ updateMask updateonly Upsert upsert +upserted upserts Upsight upstreams diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json index b988660912c57..0ea137bba8975 100644 --- a/generated/provider_dependencies.json +++ b/generated/provider_dependencies.json @@ -730,6 +730,14 @@ "cross-providers-deps": [], "excluded-python-versions": [] }, + "pinecone": { + "deps": [ + "apache-airflow>=2.5.0", + "pinecone-client>=2.2.4" + ], + "cross-providers-deps": [], + "excluded-python-versions": [] + }, "plexus": { "deps": [ "apache-airflow>=2.5.0", diff --git a/images/breeze/output-commands-hash.txt b/images/breeze/output-commands-hash.txt index be5f84cc5afb9..56d5d101382ec 100644 --- a/images/breeze/output-commands-hash.txt +++ b/images/breeze/output-commands-hash.txt @@ -2,7 +2,7 @@ # Please do not solve it but run `breeze setup regenerate-command-images`. # This command should fix the conflict and regenerate help images that you have conflict with. main:96b4884054753db922cb8ca2cc555368 -build-docs:114a27a589a09882632e6d813deea576 +build-docs:1560aefac4e4ae7c550d6cec531bbbe8 ci:find-backtracking-candidates:17fe56b867a745e5032a08dfcd3f73ee ci:fix-ownership:3e5a73533cc96045e72cb258783cfc96 ci:free-space:49af17b032039c05c41a7a8283f365cc @@ -36,26 +36,26 @@ prod-image:build:1628f7bff3e7e369f0358a646682e674 prod-image:pull:3817ef211b023b76df84ee1110ef64dd prod-image:verify:bd2b78738a7c388dbad6076c41a9f906 prod-image:6011405076eb0e1049d87e971e3adce1 -release-management:add-back-references:888c520eaeb00cb298c2d2e44d1c2d54 +release-management:add-back-references:fc91a28724d36053478939e79c10286c release-management:create-minor-branch:a3834afc4aa5d1e98002c9e9e7a9931d release-management:generate-constraints:01aef235b11e59ed7f10c970a5cdaba7 -release-management:generate-issue-content-providers:4de9a057b32cefaa60a5d7d262e12012 +release-management:generate-issue-content-providers:3769c2e3fe90c0d29745d32953a5cffc release-management:generate-providers-metadata:d4e8e5cfaa024e3963af02d7a873048d release-management:install-provider-packages:34c38aca17d23dbb454fe7a6bfd8e630 release-management:prepare-airflow-package:85d01c57e5b5ee0fb9e5f9d9706ed3b5 -release-management:prepare-provider-documentation:79385f87663ed150080db49e1dc525cb -release-management:prepare-provider-packages:0cefd5fc8de7e14e36f4d454d99a3a16 -release-management:publish-docs:33b92ebbe95b9198abd0fca8f646179b +release-management:prepare-provider-documentation:aed68105548d751ecd6d87e0e8ed9cf3 +release-management:prepare-provider-packages:b08037dbedfb2a5be4a3bca1a2ac62ba +release-management:publish-docs:68102147e8be61788701f871a0b7e6da release-management:release-prod-images:cfbfe8b19fee91fd90718f98ef2fd078 release-management:start-rc-process:b27bd524dd3c89f50a747b60a7e892c1 release-management:start-release:419f48f6a4ff4457cb9de7ff496aebbe release-management:update-constraints:02ec4b119150e3fdbac52026e94820ef release-management:verify-provider-packages:96dce5644aad6b37080acf77b3d8de3a -release-management:cec4f9d98f92189c434f4062650bbee9 +release-management:22f143379aa27d19d98e1c02f6be6409 sbom:build-all-airflow-images:32f8acade299c2b112e986bae99846db -sbom:generate-providers-requirements:406d961af480f64f034465a0cd79f62f +sbom:generate-providers-requirements:91f4810996b9be53e660265c923d1498 sbom:update-sbom-information:653be48be70b4b7ff5172d491aadc694 -sbom:c16d2b52e739aaa8a4489f95fbc01fc7 +sbom:65c6d696f6b1b27778031b72add8b2b0 setup:autocomplete:fffcd49e102e09ccd69b3841a9e3ea8e setup:check-all-params-in-groups:5c5e3c382fc8ce84899d224448b3f48a setup:config:3435f1f1535a82c30591dbf577294d2e diff --git a/images/breeze/output_build-docs.svg b/images/breeze/output_build-docs.svg index 387bcc422266f..55b37735acf56 100644 --- a/images/breeze/output_build-docs.svg +++ b/images/breeze/output_build-docs.svg @@ -167,9 +167,9 @@ daskexecutor | databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol | facebook |    ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc | jenkins | microsoft.azure | microsoft.mssql microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openfaas | openlineage | opensearch | opsgenie |   -oracle | pagerduty | papermill | plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | -singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate |    -yandex | zendesk]...                                                                                                   +oracle | pagerduty | papermill | pinecone | plexus | postgres | presto | redis | salesforce | samba | segment |        +sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino |       +vertica | weaviate | yandex | zendesk]...                                                                              Build documents. diff --git a/images/breeze/output_release-management_add-back-references.svg b/images/breeze/output_release-management_add-back-references.svg index b2a189dbdcf7d..99e178669ba86 100644 --- a/images/breeze/output_release-management_add-back-references.svg +++ b/images/breeze/output_release-management_add-back-references.svg @@ -138,9 +138,9 @@ daskexecutor | databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol | facebook |    ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc | jenkins | microsoft.azure | microsoft.mssql microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openfaas | openlineage | opensearch | opsgenie |   -oracle | pagerduty | papermill | plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | -singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate |    -yandex | zendesk]...                                                                                                   +oracle | pagerduty | papermill | pinecone | plexus | postgres | presto | redis | salesforce | samba | segment |        +sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino |       +vertica | weaviate | yandex | zendesk]...                                                                              Command to add back references for documentation to make it backward compatible. diff --git a/images/breeze/output_release-management_generate-issue-content-providers.svg b/images/breeze/output_release-management_generate-issue-content-providers.svg index dd8167adb731f..8b4aab2e2b3e7 100644 --- a/images/breeze/output_release-management_generate-issue-content-providers.svg +++ b/images/breeze/output_release-management_generate-issue-content-providers.svg @@ -148,8 +148,8 @@ dbt.cloud | dingding | discord | docker | elasticsearch | exasol | facebook | ftp | github | google | grpc | hashicorp http | imap | influxdb | jdbc | jenkins | microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm |     mongo | mysql | neo4j | odbc | openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | papermill |       -plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp |     -snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate | yandex | zendesk]...            +pinecone | plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | singularity | slack | +smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate | yandex | zendesk]...     Generates content for issue to test the release. diff --git a/images/breeze/output_release-management_prepare-provider-documentation.svg b/images/breeze/output_release-management_prepare-provider-documentation.svg index 18897210ee444..1bf60c6cb20de 100644 --- a/images/breeze/output_release-management_prepare-provider-documentation.svg +++ b/images/breeze/output_release-management_prepare-provider-documentation.svg @@ -160,8 +160,8 @@ dbt.cloud | dingding | discord | docker | elasticsearch | exasol | facebook | ftp | github | google | grpc | hashicorp http | imap | influxdb | jdbc | jenkins | microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm |     mongo | mysql | neo4j | odbc | openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | papermill |       -plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp |     -snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate | yandex | zendesk]...            +pinecone | plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | singularity | slack | +smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate | yandex | zendesk]...     Prepare CHANGELOG, README and COMMITS information for providers. diff --git a/images/breeze/output_release-management_prepare-provider-packages.svg b/images/breeze/output_release-management_prepare-provider-packages.svg index e268cc91b753a..f74b5e73c9322 100644 --- a/images/breeze/output_release-management_prepare-provider-packages.svg +++ b/images/breeze/output_release-management_prepare-provider-packages.svg @@ -145,8 +145,8 @@ dbt.cloud | dingding | discord | docker | elasticsearch | exasol | facebook | ftp | github | google | grpc | hashicorp http | imap | influxdb | jdbc | jenkins | microsoft.azure | microsoft.mssql | microsoft.psrp | microsoft.winrm |     mongo | mysql | neo4j | odbc | openfaas | openlineage | opensearch | opsgenie | oracle | pagerduty | papermill |       -plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp |     -snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate | yandex | zendesk]...            +pinecone | plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | singularity | slack | +smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate | yandex | zendesk]...     Prepare sdist/whl packages of Airflow Providers. diff --git a/images/breeze/output_release-management_publish-docs.svg b/images/breeze/output_release-management_publish-docs.svg index e85ba52e48169..3e8a82cf19f46 100644 --- a/images/breeze/output_release-management_publish-docs.svg +++ b/images/breeze/output_release-management_publish-docs.svg @@ -184,9 +184,9 @@ daskexecutor | databricks | datadog | dbt.cloud | dingding | discord | docker | elasticsearch | exasol | facebook |    ftp | github | google | grpc | hashicorp | http | imap | influxdb | jdbc | jenkins | microsoft.azure | microsoft.mssql microsoft.psrp | microsoft.winrm | mongo | mysql | neo4j | odbc | openfaas | openlineage | opensearch | opsgenie |   -oracle | pagerduty | papermill | plexus | postgres | presto | redis | salesforce | samba | segment | sendgrid | sftp | -singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate |    -yandex | zendesk]...                                                                                                   +oracle | pagerduty | papermill | pinecone | plexus | postgres | presto | redis | salesforce | samba | segment |        +sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh | tableau | tabular | telegram | trino |       +vertica | weaviate | yandex | zendesk]...                                                                              Command to publish generated documentation to airflow-site diff --git a/images/breeze/output_sbom_generate-providers-requirements.svg b/images/breeze/output_sbom_generate-providers-requirements.svg index 3709da6c1e766..b537a29fb64fa 100644 --- a/images/breeze/output_sbom_generate-providers-requirements.svg +++ b/images/breeze/output_sbom_generate-providers-requirements.svg @@ -191,9 +191,9 @@ elasticsearch | exasol | facebook | ftp | github | google | grpc | hashicorp | http | imap |   influxdb | jdbc | jenkins | microsoft.azure | microsoft.mssql | microsoft.psrp |               microsoft.winrm | mongo | mysql | neo4j | odbc | openfaas | openlineage | opensearch |         -opsgenie | oracle | pagerduty | papermill | plexus | postgres | presto | redis | salesforce |  -samba | segment | sendgrid | sftp | singularity | slack | smtp | snowflake | sqlite | ssh |    -tableau | tabular | telegram | trino | vertica | weaviate | yandex | zendesk)                  +opsgenie | oracle | pagerduty | papermill | pinecone | plexus | postgres | presto | redis |    +salesforce | samba | segment | sendgrid | sftp | singularity | slack | smtp | snowflake |      +sqlite | ssh | tableau | tabular | telegram | trino | vertica | weaviate | yandex | zendesk)   --provider-versionProvider version to generate the requirements for i.e `2.1.0`. `latest` is also a supported    value to account for the most recent version of the provider                                   (TEXT)                                                                                         diff --git a/tests/providers/pinecone/__init__.py b/tests/providers/pinecone/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/pinecone/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/pinecone/hooks/__init__.py b/tests/providers/pinecone/hooks/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/pinecone/hooks/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/pinecone/hooks/test_pinecone.py b/tests/providers/pinecone/hooks/test_pinecone.py new file mode 100644 index 0000000000000..d358ca94852fd --- /dev/null +++ b/tests/providers/pinecone/hooks/test_pinecone.py @@ -0,0 +1,44 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from unittest.mock import Mock, patch + +from airflow.providers.pinecone.hooks.pinecone import PineconeHook + + +class TestPineconeHook: + def setup_method(self): + """Set up the test environment, mocking necessary connections and initializing the + PineconeHook object.""" + with patch("airflow.models.Connection.get_connection_from_secrets") as mock_get_connection: + mock_conn = Mock() + mock_conn.host = "pinecone.io" + mock_conn.login = "test_user" + mock_conn.password = "test_password" + mock_get_connection.return_value = mock_conn + self.pinecone_hook = PineconeHook() + self.index_name = "test_index" + + @patch("airflow.providers.pinecone.hooks.pinecone.pinecone.Index") + def test_upsert(self, mock_index): + """Test the upsert_data_async method of PineconeHook for correct data insertion asynchronously.""" + data = [("id1", [1.0, 2.0, 3.0], {"meta": "data"})] + mock_upsert = Mock() + mock_index.return_value.upsert = mock_upsert + self.pinecone_hook.upsert(self.index_name, data) + mock_upsert.assert_called_once_with(vectors=data, namespace="", batch_size=None, show_progress=True) diff --git a/tests/providers/pinecone/operators/__init__.py b/tests/providers/pinecone/operators/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/providers/pinecone/operators/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/providers/pinecone/operators/test_pinecone.py b/tests/providers/pinecone/operators/test_pinecone.py new file mode 100644 index 0000000000000..20bbcc2e7d3d7 --- /dev/null +++ b/tests/providers/pinecone/operators/test_pinecone.py @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +from datetime import datetime +from unittest.mock import Mock, patch + +import pytest + +from airflow.models import DAG +from airflow.providers.pinecone.operators.pinecone import PineconeIngestOperator + + +class MockPineconeHook: + """Mocking PineconeHook to avoid actual external calls""" + + def create_index(self, *args, **kwargs): + pass + + @staticmethod + def upsert(*args, **kwargs): + return Mock() + + +@pytest.fixture +def dummy_dag(): + """Fixture to provide a dummy Airflow DAG for testing.""" + return DAG(dag_id="test_dag", start_date=datetime(2023, 9, 29)) + + +class TestPineconeVectorIngestOperator: + def test_vector_ingest_operator_execution(self, dummy_dag): + """ + Test the execution of the PineconeVectorIngestOperator. + Ensures that the upsert method on the hook is correctly called. + """ + test_vectors = [("id1", [1.0, 2.0, 3.0], {"meta": "data"})] + + task = PineconeIngestOperator( + task_id="ingest_vectors", + index_name="test_index", + input_vectors=test_vectors, + dag=dummy_dag, + ) + + with patch( + "airflow.providers.pinecone.operators.pinecone.PineconeIngestOperator.hook", + new_callable=MockPineconeHook, + ) as mock_hook_instance: + mock_hook_instance.upsert = Mock() + + task.execute(context={}) + mock_hook_instance.upsert.assert_called_once_with( + index_name="test_index", + vectors=test_vectors, + namespace="", + batch_size=None, + ) + + def test_vector_ingest_operator_with_extra_args(self, dummy_dag): + """ + Test the execution of the PineconeVectorIngestOperator with additional parameters. + """ + test_vectors = [("id1", [1.0, 2.0, 3.0], {"meta": "data"})] + + task = PineconeIngestOperator( + task_id="ingest_vectors", + index_name="test_index", + input_vectors=test_vectors, + namespace="test_namespace", + batch_size=100, + upsert_kwargs={"custom_param": "value"}, + dag=dummy_dag, + ) + + with patch( + "airflow.providers.pinecone.operators.pinecone.PineconeIngestOperator.hook", + new_callable=MockPineconeHook, + ) as mock_hook_instance: + mock_hook_instance.upsert = Mock() + + task.execute(context={}) + + mock_hook_instance.upsert.assert_called_once_with( + index_name="test_index", + vectors=test_vectors, + namespace="test_namespace", + batch_size=100, + custom_param="value", + ) diff --git a/tests/system/providers/pinecone/__init__.py b/tests/system/providers/pinecone/__init__.py new file mode 100644 index 0000000000000..13a83393a9124 --- /dev/null +++ b/tests/system/providers/pinecone/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/system/providers/pinecone/example_dag_pinecone.py b/tests/system/providers/pinecone/example_dag_pinecone.py new file mode 100644 index 0000000000000..8bf59febeca93 --- /dev/null +++ b/tests/system/providers/pinecone/example_dag_pinecone.py @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import os +from datetime import datetime + +from airflow import DAG +from airflow.providers.pinecone.operators.pinecone import PineconeIngestOperator + +index_name = os.getenv("INDEX_NAME", "test") +namespace = os.getenv("NAMESPACE", "example-pinecone-namespace") + + +with DAG( + "example_pinecone_ingest", + schedule=None, + start_date=datetime(2023, 1, 1), + catchup=False, +) as dag: + # [START howto_operator_pinecone_ingest] + PineconeIngestOperator( + task_id="pinecone_vector_ingest", + index_name=index_name, + input_vectors=[ + ("id1", [1.0, 2.0, 3.0], {"key": "value"}), + ("id2", [1.0, 2.0, 3.0]), + ], + namespace=namespace, + batch_size=1, + ) + # [END howto_operator_pinecone_ingest] + + +from tests.system.utils import get_test_run # noqa: E402 + +# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest) +test_run = get_test_run(dag)