From f4e2391c26b94ed45b1574611615b076bdfeff43 Mon Sep 17 00:00:00 2001 From: lance hester Date: Tue, 3 Jun 2025 11:11:21 -0400 Subject: [PATCH 1/6] adding the .github workflows and template file to execute github actions --- .github/pull_request_template.md | 48 +++++++++++++++ .github/workflows/semantic_search_test.yml | 68 ++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 .github/pull_request_template.md create mode 100644 .github/workflows/semantic_search_test.yml diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..3d4442c --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,48 @@ +## Title: + +Example: + + - Brief Title + + +* +* +* +* + +## What does this change do? + +This feature creates ..., It is a bug fix that... + +<-- bugfix write--> +<--If a bug fix, give bugfix details fill in details below. Otherwise delete--> + +## Bug fix details + +### Who discovered this bug? + +### On what instance & build was the issue found? + +current build + +### What is the extent of this bug? (is it an edge case, who is affected, is there an existing workaround, etc) + +The bug effects module... + +<-- End of bugfix write--> + +## Why do we need it? + +To ... + +## Changes made + +- Update `xxx.py` especially methods: ` method_1()` and `mehtod_2()` + +## Follows or Closes (e.g., Jira, Ticket Number, Request): + +[ticket-#](https://t) + +## What might break? + +nothing diff --git a/.github/workflows/semantic_search_test.yml b/.github/workflows/semantic_search_test.yml new file mode 100644 index 0000000..9d2658f --- /dev/null +++ b/.github/workflows/semantic_search_test.yml @@ -0,0 +1,68 @@ +name: Semantic Search Tests + +on: + pull_request: + branches: + - main + push: + branches: + - main + +jobs: + + # Set up operating system to test virtually + test: + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: ["3.13"] + + # Secrets/env variables go here - read from github secrets + #env: + #EXAMPLE_ENVIRONMENT_VARIABLE: ${{ secrets.EXAMPLE_ENVIRONMENT_VARIABLE }} + + steps: + # Step 1: Check out the code from the repository + - name: Checkout repository code + uses: actions/checkout@v4 + + # Step 2: Set up Python environment + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + # Step 3: Install dependencies - notice use of pyproject toml groups instead of requirements + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[ci] + + # Step 4: Add any debugging information we want to see + - name: Print debugging information + run: | + echo "Python Version: $(python --version)" + echo "Working Directory: $(pwd)" + echo "Contents of Working Directory: $(ls -l)" + + # Step 4: Run Tests + # (pytest .) instructs pytest test framework to discover and execute tests within current and sub directories. + - name: Run pytests + run : pytest . + + # Step 5: Continuous Delivery to Dockerhub - Build latest image + - name: Login to Docker Hub + uses: docker/login-action@v2 + if: ${{ github.event_name == 'push'}} + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - name: Push image to Dockerhub + uses: docker/build-push-action@v4.0.0 + if: ${{ github.event_name == 'push'}} + with: + context: . + push: true + tags: | + ${{ secrets.DOCKERHUB_USERNAME}}/semantic_search_app:latest From 2ab3a18873bc1a082c1c354dbe9a6fe6b7cec274 Mon Sep 17 00:00:00 2001 From: lance hester Date: Tue, 3 Jun 2025 11:11:51 -0400 Subject: [PATCH 2/6] update uv.lock --- uv.lock | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/uv.lock b/uv.lock index b516b9c..a930e0b 100644 --- a/uv.lock +++ b/uv.lock @@ -311,6 +311,37 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b0/e6/6000d0094e8a5e32ad62591c8609e269febb6e4db83a1c75ff8868b42731/contourpy-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:78e9253c3de756b3f6a5174d024c4835acd59eb3f8e2ca13e775dbffe1558f69", size = 238214, upload-time = "2025-04-15T17:44:40.827Z" }, ] +[[package]] +name = "coverage" +version = "7.8.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/07/998afa4a0ecdf9b1981ae05415dad2d4e7716e1b1f00abbd91691ac09ac9/coverage-7.8.2.tar.gz", hash = "sha256:a886d531373a1f6ff9fad2a2ba4a045b68467b779ae729ee0b3b10ac20033b27", size = 812759, upload-time = "2025-05-23T11:39:57.856Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/93/eb6400a745ad3b265bac36e8077fdffcf0268bdbbb6c02b7220b624c9b31/coverage-7.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ea561010914ec1c26ab4188aef8b1567272ef6de096312716f90e5baa79ef8ca", size = 211898, upload-time = "2025-05-23T11:38:49.066Z" }, + { url = "https://files.pythonhosted.org/packages/1b/7c/bdbf113f92683024406a1cd226a199e4200a2001fc85d6a6e7e299e60253/coverage-7.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cb86337a4fcdd0e598ff2caeb513ac604d2f3da6d53df2c8e368e07ee38e277d", size = 212171, upload-time = "2025-05-23T11:38:51.207Z" }, + { url = "https://files.pythonhosted.org/packages/91/22/594513f9541a6b88eb0dba4d5da7d71596dadef6b17a12dc2c0e859818a9/coverage-7.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26a4636ddb666971345541b59899e969f3b301143dd86b0ddbb570bd591f1e85", size = 245564, upload-time = "2025-05-23T11:38:52.857Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f4/2860fd6abeebd9f2efcfe0fd376226938f22afc80c1943f363cd3c28421f/coverage-7.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5040536cf9b13fb033f76bcb5e1e5cb3b57c4807fef37db9e0ed129c6a094257", size = 242719, upload-time = "2025-05-23T11:38:54.529Z" }, + { url = "https://files.pythonhosted.org/packages/89/60/f5f50f61b6332451520e6cdc2401700c48310c64bc2dd34027a47d6ab4ca/coverage-7.8.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc67994df9bcd7e0150a47ef41278b9e0a0ea187caba72414b71dc590b99a108", size = 244634, upload-time = "2025-05-23T11:38:57.326Z" }, + { url = "https://files.pythonhosted.org/packages/3b/70/7f4e919039ab7d944276c446b603eea84da29ebcf20984fb1fdf6e602028/coverage-7.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e6c86888fd076d9e0fe848af0a2142bf606044dc5ceee0aa9eddb56e26895a0", size = 244824, upload-time = "2025-05-23T11:38:59.421Z" }, + { url = "https://files.pythonhosted.org/packages/26/45/36297a4c0cea4de2b2c442fe32f60c3991056c59cdc3cdd5346fbb995c97/coverage-7.8.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:684ca9f58119b8e26bef860db33524ae0365601492e86ba0b71d513f525e7050", size = 242872, upload-time = "2025-05-23T11:39:01.049Z" }, + { url = "https://files.pythonhosted.org/packages/a4/71/e041f1b9420f7b786b1367fa2a375703889ef376e0d48de9f5723fb35f11/coverage-7.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8165584ddedb49204c4e18da083913bdf6a982bfb558632a79bdaadcdafd0d48", size = 244179, upload-time = "2025-05-23T11:39:02.709Z" }, + { url = "https://files.pythonhosted.org/packages/bd/db/3c2bf49bdc9de76acf2491fc03130c4ffc51469ce2f6889d2640eb563d77/coverage-7.8.2-cp313-cp313-win32.whl", hash = "sha256:34759ee2c65362163699cc917bdb2a54114dd06d19bab860725f94ef45a3d9b7", size = 214393, upload-time = "2025-05-23T11:39:05.457Z" }, + { url = "https://files.pythonhosted.org/packages/c6/dc/947e75d47ebbb4b02d8babb1fad4ad381410d5bc9da7cfca80b7565ef401/coverage-7.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:2f9bc608fbafaee40eb60a9a53dbfb90f53cc66d3d32c2849dc27cf5638a21e3", size = 215194, upload-time = "2025-05-23T11:39:07.171Z" }, + { url = "https://files.pythonhosted.org/packages/90/31/a980f7df8a37eaf0dc60f932507fda9656b3a03f0abf188474a0ea188d6d/coverage-7.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9fe449ee461a3b0c7105690419d0b0aba1232f4ff6d120a9e241e58a556733f7", size = 213580, upload-time = "2025-05-23T11:39:08.862Z" }, + { url = "https://files.pythonhosted.org/packages/8a/6a/25a37dd90f6c95f59355629417ebcb74e1c34e38bb1eddf6ca9b38b0fc53/coverage-7.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8369a7c8ef66bded2b6484053749ff220dbf83cba84f3398c84c51a6f748a008", size = 212734, upload-time = "2025-05-23T11:39:11.109Z" }, + { url = "https://files.pythonhosted.org/packages/36/8b/3a728b3118988725f40950931abb09cd7f43b3c740f4640a59f1db60e372/coverage-7.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:159b81df53a5fcbc7d45dae3adad554fdbde9829a994e15227b3f9d816d00b36", size = 212959, upload-time = "2025-05-23T11:39:12.751Z" }, + { url = "https://files.pythonhosted.org/packages/53/3c/212d94e6add3a3c3f412d664aee452045ca17a066def8b9421673e9482c4/coverage-7.8.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6fcbbd35a96192d042c691c9e0c49ef54bd7ed865846a3c9d624c30bb67ce46", size = 257024, upload-time = "2025-05-23T11:39:15.569Z" }, + { url = "https://files.pythonhosted.org/packages/a4/40/afc03f0883b1e51bbe804707aae62e29c4e8c8bbc365c75e3e4ddeee9ead/coverage-7.8.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05364b9cc82f138cc86128dc4e2e1251c2981a2218bfcd556fe6b0fbaa3501be", size = 252867, upload-time = "2025-05-23T11:39:17.64Z" }, + { url = "https://files.pythonhosted.org/packages/18/a2/3699190e927b9439c6ded4998941a3c1d6fa99e14cb28d8536729537e307/coverage-7.8.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46d532db4e5ff3979ce47d18e2fe8ecad283eeb7367726da0e5ef88e4fe64740", size = 255096, upload-time = "2025-05-23T11:39:19.328Z" }, + { url = "https://files.pythonhosted.org/packages/b4/06/16e3598b9466456b718eb3e789457d1a5b8bfb22e23b6e8bbc307df5daf0/coverage-7.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4000a31c34932e7e4fa0381a3d6deb43dc0c8f458e3e7ea6502e6238e10be625", size = 256276, upload-time = "2025-05-23T11:39:21.077Z" }, + { url = "https://files.pythonhosted.org/packages/a7/d5/4b5a120d5d0223050a53d2783c049c311eea1709fa9de12d1c358e18b707/coverage-7.8.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:43ff5033d657cd51f83015c3b7a443287250dc14e69910577c3e03bd2e06f27b", size = 254478, upload-time = "2025-05-23T11:39:22.838Z" }, + { url = "https://files.pythonhosted.org/packages/ba/85/f9ecdb910ecdb282b121bfcaa32fa8ee8cbd7699f83330ee13ff9bbf1a85/coverage-7.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94316e13f0981cbbba132c1f9f365cac1d26716aaac130866ca812006f662199", size = 255255, upload-time = "2025-05-23T11:39:24.644Z" }, + { url = "https://files.pythonhosted.org/packages/50/63/2d624ac7d7ccd4ebbd3c6a9eba9d7fc4491a1226071360d59dd84928ccb2/coverage-7.8.2-cp313-cp313t-win32.whl", hash = "sha256:3f5673888d3676d0a745c3d0e16da338c5eea300cb1f4ada9c872981265e76d8", size = 215109, upload-time = "2025-05-23T11:39:26.722Z" }, + { url = "https://files.pythonhosted.org/packages/22/5e/7053b71462e970e869111c1853afd642212568a350eba796deefdfbd0770/coverage-7.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:2c08b05ee8d7861e45dc5a2cc4195c8c66dca5ac613144eb6ebeaff2d502e73d", size = 216268, upload-time = "2025-05-23T11:39:28.429Z" }, + { url = "https://files.pythonhosted.org/packages/07/69/afa41aa34147655543dbe96994f8a246daf94b361ccf5edfd5df62ce066a/coverage-7.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:1e1448bb72b387755e1ff3ef1268a06617afd94188164960dba8d0245a46004b", size = 214071, upload-time = "2025-05-23T11:39:30.55Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1a/0b9c32220ad694d66062f571cc5cedfa9997b64a591e8a500bb63de1bd40/coverage-7.8.2-py3-none-any.whl", hash = "sha256:726f32ee3713f7359696331a18daf0c3b3a70bb0ae71141b9d3c52be7c595e32", size = 203623, upload-time = "2025-05-23T11:39:53.846Z" }, +] + [[package]] name = "cycler" version = "0.12.1" @@ -1810,6 +1841,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/30/3d/64ad57c803f1fa1e963a7946b6e0fea4a70df53c1a7fed304586539c2bac/pytest-8.3.5-py3-none-any.whl", hash = "sha256:c69214aa47deac29fad6c2a4f590b9c4a9fdb16a403176fe154b79c0b4d4d820", size = 343634, upload-time = "2025-03-02T12:54:52.069Z" }, ] +[[package]] +name = "pytest-cov" +version = "6.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "coverage" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/69/5f1e57f6c5a39f81411b550027bf72842c4567ff5fd572bed1edc9e4b5d9/pytest_cov-6.1.1.tar.gz", hash = "sha256:46935f7aaefba760e716c2ebfbe1c216240b9592966e7da99ea8292d4d3e2a0a", size = 66857, upload-time = "2025-04-05T14:07:51.592Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/d0/def53b4a790cfb21483016430ed828f64830dd981ebe1089971cd10cab25/pytest_cov-6.1.1-py3-none-any.whl", hash = "sha256:bddf29ed2d0ab6f4df17b4c55b0a657287db8684af9c42ea546b21b1041b3dde", size = 23841, upload-time = "2025-04-05T14:07:49.641Z" }, +] + [[package]] name = "pytest-mock" version = "3.14.1" @@ -2196,9 +2240,16 @@ dependencies = [ { name = "polars" }, { name = "requests" }, { name = "sentence-transformers" }, + { name = "types-requests" }, { name = "youtube-transcript-api" }, ] +[package.optional-dependencies] +ci = [ + { name = "pytest" }, + { name = "pytest-cov" }, +] + [package.dev-dependencies] dev = [ { name = "mypy" }, @@ -2217,10 +2268,14 @@ requires-dist = [ { name = "jupyter", specifier = ">=1.1.1" }, { name = "matplotlib", specifier = ">=3.10.3" }, { name = "polars", specifier = ">=1.30.0" }, + { name = "pytest", marker = "extra == 'ci'" }, + { name = "pytest-cov", marker = "extra == 'ci'" }, { name = "requests", specifier = ">=2.32.3" }, { name = "sentence-transformers", specifier = ">=4.1.0" }, + { name = "types-requests", specifier = ">=2.32.0.20250602" }, { name = "youtube-transcript-api", specifier = ">=1.0.3" }, ] +provides-extras = ["ci"] [package.metadata.requires-dev] dev = [ @@ -2565,6 +2620,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/ba/e205cd11c1c7183b23c97e4bcd1de7bc0633e2e867601c32ecfc6ad42675/types_pytz-2025.2.0.20250516-py3-none-any.whl", hash = "sha256:e0e0c8a57e2791c19f718ed99ab2ba623856b11620cb6b637e5f62ce285a7451", size = 10136, upload-time = "2025-05-16T03:07:01.075Z" }, ] +[[package]] +name = "types-requests" +version = "2.32.0.20250602" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/b0/5321e6eeba5d59e4347fcf9bf06a5052f085c3aa0f4876230566d6a4dc97/types_requests-2.32.0.20250602.tar.gz", hash = "sha256:ee603aeefec42051195ae62ca7667cd909a2f8128fdf8aad9e8a5219ecfab3bf", size = 23042, upload-time = "2025-06-02T03:15:02.958Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/da/18/9b782980e575c6581d5c0c1c99f4c6f89a1d7173dad072ee96b2756c02e6/types_requests-2.32.0.20250602-py3-none-any.whl", hash = "sha256:f4f335f87779b47ce10b8b8597b409130299f6971ead27fead4fe7ba6ea3e726", size = 20638, upload-time = "2025-06-02T03:15:01.959Z" }, +] + [[package]] name = "typing-extensions" version = "4.13.2" From b6b3d3b2c9842ac106e3c0fa3604588559b048be Mon Sep 17 00:00:00 2001 From: lance hester Date: Tue, 3 Jun 2025 11:12:44 -0400 Subject: [PATCH 3/6] update pyproject.toml to include changes for github actions and installs --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 5257c33..53dfa27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "polars>=1.30.0", "requests>=2.32.3", "sentence-transformers>=4.1.0", + "types-requests>=2.32.0.20250602", "youtube-transcript-api>=1.0.3", ] @@ -25,3 +26,6 @@ dev = [ "pytest-mock>=3.14.0", "ruff>=0.11.12", ] + +[project.optional-dependencies] +ci = ["pytest", "pytest-cov"] From 335d363e92b0f17f05a98c1825b102c2fc42c26f Mon Sep 17 00:00:00 2001 From: lance hester Date: Tue, 3 Jun 2025 11:13:35 -0400 Subject: [PATCH 4/6] just adding more for final README file --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index b230d16..8e554f0 100644 --- a/README.md +++ b/README.md @@ -108,6 +108,12 @@ Open a browser and use: http://127.0.0.1:8000 ## Special Notes +#you can put these three in an .env folder locally and to add in github secrets for your own actions (see placement in .github/workflow/{}.yml) + +API_URL="https://www.googleapis.com/youtube/v3/search" #Publically available API from YOUTUBE +CHANNEL_ID="UCa9gErQ9AE5jT2DZLjXBIdA" # Channel ID of the Shaw Talebi's youtub chanel +MY_YOUTUBE_API_KEY="GET YOUR OWN YOUTUBE API KEY" #link to video of how to get your own API + I ignore warning message like below during testing and running the code. ``` @@ -181,3 +187,13 @@ Need 12. Monitor the deployment progress in the ECS console to ensure the changes resolve the issue https://gallery.ecr.aws/docker/library/python + +### + +Get hub actions + +crontabresource +https://crontab.guru/ + +Setup github actions to generate new image and push to docker. +Complete readme From 4fd2f03bcd64d2dd81f79ab5f36296af286c258b Mon Sep 17 00:00:00 2001 From: lance hester Date: Tue, 3 Jun 2025 11:14:33 -0400 Subject: [PATCH 5/6] use proper class import and fix type hints --- src/functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/functions.py b/src/functions.py index 29b392a..a4aa537 100644 --- a/src/functions.py +++ b/src/functions.py @@ -1,11 +1,11 @@ import numpy as np import polars -import sentence_transformers +from sentence_transformers import SentenceTransformer import sklearn def return_search_result_indices(query: str, df: polars.lazyframe.frame.LazyFrame, - model: sentence_transformers, + model: SentenceTransformer, dist: sklearn.metrics.DistanceMetric) -> np.ndarray: """ Method to return the indices of the top search results @@ -13,7 +13,7 @@ def return_search_result_indices(query: str, Args: query (str): The user query string to search for videos df (polars.lazyframe.frame.LazyFrame): the data - model (sentence_transformers): The sentence transformer model + model (SentenceTransformer): The sentence transformer model dist (sklearn.metrics.DistanceMetric): The distance measure used here it is Manhattan Returns: From 02439d1d939753194926c05961cc9e732a941a5b Mon Sep 17 00:00:00 2001 From: lance hester Date: Tue, 3 Jun 2025 11:15:13 -0400 Subject: [PATCH 6/6] fix return indentation --- src/get_video_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/get_video_data.py b/src/get_video_data.py index 61d4b86..e02e6f9 100644 --- a/src/get_video_data.py +++ b/src/get_video_data.py @@ -50,7 +50,7 @@ def make_video_records(response: requests.models.Response) -> list[dict[str,str] video_record_list.append(video_record) - return video_record_list + return video_record_list def extract_video_data( page_token: int,