From b117abbe335a7c831b23a2288caf2dd656c226dd Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 09:48:27 -0500 Subject: [PATCH 01/15] add postgres vectordb --- store/neurostore/ingest/__init__.py | 145 +++++++++++---------- store/neurostore/models/data.py | 4 +- store/neurostore/models/migration_types.py | 18 +++ store/neurostore/requirements.txt | 1 + store/postgres/Dockerfile | 30 +++++ store/postgres/custom-entrypoint.sh | 19 +++ store/postgres/init-vector-extension.sql | 1 + 7 files changed, 148 insertions(+), 70 deletions(-) create mode 100644 store/postgres/custom-entrypoint.sh create mode 100644 store/postgres/init-vector-extension.sql diff --git a/store/neurostore/ingest/__init__.py b/store/neurostore/ingest/__init__.py index f525c2dee..0e6bd8828 100644 --- a/store/neurostore/ingest/__init__.py +++ b/store/neurostore/ingest/__init__.py @@ -469,9 +469,6 @@ def ace_ingestion_logic(coordinates_df, metadata_df, text_df): # see if there are duplicates for the newly created base_studies all_base_studies = [] with db.session.no_autoflush: - all_studies = { - s.pmid: s for s in Study.query.filter_by(source="neurosynth").all() - } for metadata_row, text_row in zip( metadata_df.itertuples(), text_df.itertuples() ): @@ -561,76 +558,86 @@ def ace_ingestion_logic(coordinates_df, metadata_df, text_df): # append base study to commit to_commit.append(base_study) + # keep track of all created/modified base studies + all_base_studies.append(base_study) - s = all_studies.get(pmid, Study()) - - # try to update the study if information is missing - study_info = { - "name": metadata_row.title, - "doi": doi, - "pmid": pmid, - "description": text_row.abstract, - "authors": metadata_row.authors, - "publication": metadata_row.journal, - "year": year, - "level": "group", - "source": "neurosynth", - } - for col, value in study_info.items(): - source_attr = getattr(s, col) - setattr(s, col, source_attr or value) + relevant_studies = base_study.versions + if not relevant_studies: + relevant_studies.append(Study()) - analyses = [] - points = [] + # if all studies have a user, + # add a new study version to incorporate the new data + if all(s.user is not None for s in relevant_studies): + relevant_studies.append(Study()) - try: - study_coord_data = coordinates_df.loc[[id_]] - except KeyError: - print(f"pmid: {id_} has no coordinates") - continue - for order, (t_id, df) in enumerate(study_coord_data.groupby("table_id")): - a = ( - Analysis.query.filter_by(table_id=str(t_id)).one_or_none() - or Analysis() - ) - a.name = df["table_label"][0] or str(t_id) - a.table_id = str(t_id) - a.order = a.order or order - a.description = ( - df["table_caption"][0] - if not df["table_caption"].isna()[0] - else None - ) - if not a.study: - a.study = s - analyses.append(a) - point_idx = 0 - for _, p in df.iterrows(): - point = Point( - x=p["x"], - y=p["y"], - z=p["z"], - space=metadata_row.coordinate_space, - kind=( - df["statistic"][0] - if not df["statistic"].isna()[0] - else "unknown" - ), - analysis=a, - entities=[Entity(label=a.name, level="group", analysis=a)], - order=point_idx, + for s in relevant_studies: + # try to update the study if information is missing + study_info = { + "name": metadata_row.title, + "doi": doi, + "pmid": pmid, + "description": text_row.abstract, + "authors": metadata_row.authors, + "publication": metadata_row.journal, + "year": year, + "level": "group", + "source": "neurosynth", + } + for col, value in study_info.items(): + source_attr = getattr(s, col) + setattr(s, col, source_attr or value) + if s.user is not None: + # do not edit studies that are user owned + continue + analyses = [] + points = [] + + try: + study_coord_data = coordinates_df.loc[[id_]] + except KeyError: + print(f"pmid: {id_} has no coordinates") + continue + for order, (t_id, df) in enumerate(study_coord_data.groupby("table_id")): + a = ( + Analysis.query.filter_by(table_id=str(t_id)).one_or_none() + or Analysis() ) - points.append(point) - point_idx += 1 - to_commit.extend(points) - to_commit.extend(analyses) - # append study as version of study - base_study.versions.append(s) - - db.session.add_all(to_commit) - db.session.flush() - for bs in all_base_studies: - bs.update_has_images_and_points() + a.name = df["table_label"][0] or str(t_id) + a.table_id = str(t_id) + a.order = a.order or order + a.description = ( + df["table_caption"][0] + if not df["table_caption"].isna()[0] + else None + ) + if not a.study: + a.study = s + analyses.append(a) + point_idx = 0 + for _, p in df.iterrows(): + point = Point( + x=p["x"], + y=p["y"], + z=p["z"], + space=metadata_row.coordinate_space, + kind=( + df["statistic"][0] + if not df["statistic"].isna()[0] + else "unknown" + ), + analysis=a, + entities=[Entity(label=a.name, level="group", analysis=a)], + order=point_idx, + ) + points.append(point) + point_idx += 1 + to_commit.extend(points) + to_commit.extend(analyses) + + db.session.add_all(to_commit) + db.session.flush() + for bs in all_base_studies: + bs.update_has_images_and_points() db.session.commit() diff --git a/store/neurostore/models/data.py b/store/neurostore/models/data.py index f6a6cb996..4ba10e7dd 100644 --- a/store/neurostore/models/data.py +++ b/store/neurostore/models/data.py @@ -8,9 +8,10 @@ from sqlalchemy.ext.mutable import MutableDict from sqlalchemy.orm import relationship, backref from sqlalchemy.sql import func + import shortuuid -from .migration_types import TSVector +from .migration_types import TSVector, PGVector from ..database import db @@ -178,6 +179,7 @@ class BaseStudy(BaseMixin, db.Model): has_coordinates = db.Column(db.Boolean, default=False, nullable=False) has_images = db.Column(db.Boolean, default=False, nullable=False) user_id = db.Column(db.Text, db.ForeignKey("users.external_id"), index=True) + ada_openai_vector = db.Column(PGVector(1536)) # length of openai ada vector _ts_vector = db.Column( "__ts_vector__", TSVector(), diff --git a/store/neurostore/models/migration_types.py b/store/neurostore/models/migration_types.py index 0b81587bf..ce24de7d9 100644 --- a/store/neurostore/models/migration_types.py +++ b/store/neurostore/models/migration_types.py @@ -1,5 +1,23 @@ import sqlalchemy as sa from sqlalchemy.dialects.postgresql import TSVECTOR +from pgvector.sqlalchemy import Vector + +class PGVector(sa.types.TypeDecorator): + """class for semantic search""" + cache_ok = True + impl = Vector + + def __init__(self, dim): + super().__init__() + self.impl = Vector(dim) + + def process_bind_param(self, value, dialect): + # Ensure the value is of the correct type + return value + + def process_result_value(self, value, dialect): + # Ensure the value is returned correctly + return value class TSVector(sa.types.TypeDecorator): diff --git a/store/neurostore/requirements.txt b/store/neurostore/requirements.txt index 236abded2..4328135d9 100644 --- a/store/neurostore/requirements.txt +++ b/store/neurostore/requirements.txt @@ -14,6 +14,7 @@ gunicorn~=22.0 ipython~=7.19 pandas~=1.2 pip-chill~=1.0 +pgvector~=0.2.5 psycopg2-binary~=2.8 pyld~=2.0 python-jose~=3.3 diff --git a/store/postgres/Dockerfile b/store/postgres/Dockerfile index a4ae2c4e9..4ff63dd19 100644 --- a/store/postgres/Dockerfile +++ b/store/postgres/Dockerfile @@ -1,11 +1,41 @@ FROM postgres:12.12 + +# Install necessary dependencies RUN apt-get update && apt-get install -y dos2unix RUN apt-get install -yq python3-pip python-dev build-essential RUN apt-get install -yq cron RUN pip3 install awscli + +# Install pgvector dependencies +RUN apt-get install -y postgresql-server-dev-12 gcc make curl clang + +# Download and install pgvector +RUN curl -L https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.0.tar.gz -o pgvector.tar.gz \ + && tar -xzf pgvector.tar.gz \ + && cd pgvector-0.7.0 \ + && make \ + && make install \ + && rm -rf pgvector.tar.gz pgvector-0.7.0 + +# Copy initialization script +COPY init-vector-extension.sql /docker-entrypoint-initdb.d/ + +# Copy custom entrypoint script +COPY custom-entrypoint.sh /usr/local/bin/ +RUN chmod +x /usr/local/bin/custom-entrypoint.sh + +# Copy scripts and set permissions COPY pg_dump-to-s3 /home RUN chmod +x /home/pg_dump-to-s3.sh /home/s3-autodelete.sh + +# Set up cron jobs RUN crontab /home/backup.txt RUN service cron start + +# Convert scripts to Unix format RUN dos2unix /home/pg_dump-to-s3.sh RUN dos2unix /home/s3-autodelete.sh + +# Set the entrypoint to the custom script +ENTRYPOINT ["custom-entrypoint.sh"] +CMD ["postgres"] diff --git a/store/postgres/custom-entrypoint.sh b/store/postgres/custom-entrypoint.sh new file mode 100644 index 000000000..b494de2ac --- /dev/null +++ b/store/postgres/custom-entrypoint.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -e + +# Start PostgreSQL and wait for it to be ready +/usr/local/bin/docker-entrypoint.sh postgres & + +# Wait for PostgreSQL to start +until pg_isready -h localhost -p 5432 -U "$POSTGRES_USER"; do + echo "$(date) - waiting for database to start" + sleep 2 +done + +# Enable the pgvector extension +psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL + CREATE EXTENSION IF NOT EXISTS vector; +EOSQL + +# Wait for PostgreSQL process to keep the container running +wait diff --git a/store/postgres/init-vector-extension.sql b/store/postgres/init-vector-extension.sql new file mode 100644 index 000000000..0aa0fc225 --- /dev/null +++ b/store/postgres/init-vector-extension.sql @@ -0,0 +1 @@ +CREATE EXTENSION IF NOT EXISTS vector; From 97f84a3b3a6f6133d087fc11d19a46aa1023d437 Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 09:51:38 -0500 Subject: [PATCH 02/15] formatting --- store/neurostore/ingest/__init__.py | 4 +++- store/neurostore/models/migration_types.py | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/store/neurostore/ingest/__init__.py b/store/neurostore/ingest/__init__.py index 0e6bd8828..0dc405f24 100644 --- a/store/neurostore/ingest/__init__.py +++ b/store/neurostore/ingest/__init__.py @@ -597,7 +597,9 @@ def ace_ingestion_logic(coordinates_df, metadata_df, text_df): except KeyError: print(f"pmid: {id_} has no coordinates") continue - for order, (t_id, df) in enumerate(study_coord_data.groupby("table_id")): + for order, (t_id, df) in enumerate( + study_coord_data.groupby("table_id") + ): a = ( Analysis.query.filter_by(table_id=str(t_id)).one_or_none() or Analysis() diff --git a/store/neurostore/models/migration_types.py b/store/neurostore/models/migration_types.py index ce24de7d9..36145cd1a 100644 --- a/store/neurostore/models/migration_types.py +++ b/store/neurostore/models/migration_types.py @@ -2,8 +2,10 @@ from sqlalchemy.dialects.postgresql import TSVECTOR from pgvector.sqlalchemy import Vector + class PGVector(sa.types.TypeDecorator): """class for semantic search""" + cache_ok = True impl = Vector From ead6bc6437b0eda5bc0056748ebb903799d8a6f9 Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 10:03:25 -0500 Subject: [PATCH 03/15] temp disable cache postgres build --- .github/workflows/workflow.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index f39a26dd8..63c6aa6f8 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -110,8 +110,6 @@ jobs: store_nginx.cache-from=type=gha,scope=cached-store-nginx store_nginx.cache-to=type=gha,scope=cached-store-nginx,mode=max,ignore-error=true store_pgsql.tags=ghcr.io/${{ github.repository_owner }}/store_pgsql:${{ hashFiles('**/store/postgres/**') }} - store_pgsql.cache-from=type=registry,ref=ghcr.io/${{ github.repository_owner }}/store_pgsql:${{ hashFiles('**/store/postgres/**') }} - store_pgsql.cache-from=type=gha,scope=cached-store-pgsql store_pgsql.cache-to=type=gha,scope=cached-store-pgsql,mode=max,ignore-error=true From e2ca88660c4a6cd2fa8de4e3f6c0a3911e4e337f Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 10:09:31 -0500 Subject: [PATCH 04/15] Revert "temp disable cache postgres build" This reverts commit ead6bc6437b0eda5bc0056748ebb903799d8a6f9. --- .github/workflows/workflow.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 63c6aa6f8..f39a26dd8 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -110,6 +110,8 @@ jobs: store_nginx.cache-from=type=gha,scope=cached-store-nginx store_nginx.cache-to=type=gha,scope=cached-store-nginx,mode=max,ignore-error=true store_pgsql.tags=ghcr.io/${{ github.repository_owner }}/store_pgsql:${{ hashFiles('**/store/postgres/**') }} + store_pgsql.cache-from=type=registry,ref=ghcr.io/${{ github.repository_owner }}/store_pgsql:${{ hashFiles('**/store/postgres/**') }} + store_pgsql.cache-from=type=gha,scope=cached-store-pgsql store_pgsql.cache-to=type=gha,scope=cached-store-pgsql,mode=max,ignore-error=true From 66fb291a9932de365256f623d079c9f00903de16 Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 10:26:32 -0500 Subject: [PATCH 05/15] try to add more time --- .github/workflows/workflow.yml | 2 ++ store/postgres/custom-entrypoint.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index f39a26dd8..f60a9408b 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -158,6 +158,7 @@ jobs: - name: Create Test Database run: | + sleep 1 until docker compose exec -T \ store_pgsql pg_isready -U postgres; do sleep 1; done @@ -434,6 +435,7 @@ jobs: name: Create Store Database run: | cd store + sleep 1 until docker compose exec -T \ store_pgsql pg_isready -U postgres; do sleep 1; done diff --git a/store/postgres/custom-entrypoint.sh b/store/postgres/custom-entrypoint.sh index b494de2ac..2c6cb60c9 100644 --- a/store/postgres/custom-entrypoint.sh +++ b/store/postgres/custom-entrypoint.sh @@ -5,7 +5,7 @@ set -e /usr/local/bin/docker-entrypoint.sh postgres & # Wait for PostgreSQL to start -until pg_isready -h localhost -p 5432 -U "$POSTGRES_USER"; do +until pg_isready -p 5432 -U "$POSTGRES_USER"; do echo "$(date) - waiting for database to start" sleep 2 done From 1b4045ec715ffb7578398fec225ecc61fc650702 Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 10:32:39 -0500 Subject: [PATCH 06/15] define entrypoint explicitly --- store/docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/store/docker-compose.yml b/store/docker-compose.yml index 13bcd421c..6ef10bc70 100644 --- a/store/docker-compose.yml +++ b/store/docker-compose.yml @@ -49,6 +49,7 @@ services: - '5432' env_file: - .env + entrypoint: ["/usr/local/bin/custom-entrypoint.sh"] volumes: postgres_data: From bb3e6455217dc732a2a1a770b08794e2705e9c38 Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 10:43:38 -0500 Subject: [PATCH 07/15] update example env --- store/.env.example | 2 ++ 1 file changed, 2 insertions(+) diff --git a/store/.env.example b/store/.env.example index 115a80f2f..435a3f4b3 100644 --- a/store/.env.example +++ b/store/.env.example @@ -4,6 +4,8 @@ ACE_DIR=/media FILE_DIR=/tmp COMPOSE_CONVERT_WINDOWS_PATHS=1 POSTGRES_HOST=store_pgsql +POSTGRES_USER=postgres +POSTGRES_DB=neurostore POSTGRES_PASSWORD=example BEARERINFO_FUNC=neurostore.resources.auth.decode_token AUTH0_CLIENT_ID=YOUR_CLIENT_ID From be84d335e59b0df3c1203a511efd4b3d7e629c5a Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 10:47:46 -0500 Subject: [PATCH 08/15] add more sleep --- store/postgres/custom-entrypoint.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/store/postgres/custom-entrypoint.sh b/store/postgres/custom-entrypoint.sh index 2c6cb60c9..654f5a0b2 100644 --- a/store/postgres/custom-entrypoint.sh +++ b/store/postgres/custom-entrypoint.sh @@ -10,6 +10,8 @@ until pg_isready -p 5432 -U "$POSTGRES_USER"; do sleep 2 done +sleep 1 + # Enable the pgvector extension psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL CREATE EXTENSION IF NOT EXISTS vector; From 2cfd7f9186374a6c8077dcbb24bc6e6b3f995918 Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 11:33:14 -0500 Subject: [PATCH 09/15] add more info --- .github/workflows/workflow.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index f60a9408b..a56fbbdad 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -162,6 +162,9 @@ jobs: until docker compose exec -T \ store_pgsql pg_isready -U postgres; do sleep 1; done + sleep 1 + docker compose logs store_pgsql + docker compose exec -T \ store_pgsql \ psql -U postgres -c "create database test_db" From 2fe3b3b291cf6be89ec38691468b2d193ecf2146 Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 11:33:37 -0500 Subject: [PATCH 10/15] add more info --- store/postgres/custom-entrypoint.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/store/postgres/custom-entrypoint.sh b/store/postgres/custom-entrypoint.sh index 654f5a0b2..9675d90fd 100644 --- a/store/postgres/custom-entrypoint.sh +++ b/store/postgres/custom-entrypoint.sh @@ -12,6 +12,7 @@ done sleep 1 +echo "STARTING EXTENSION" # Enable the pgvector extension psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL CREATE EXTENSION IF NOT EXISTS vector; From 2321e6831055b5b298d0dea3f63bb8adf7813efd Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 11:52:34 -0500 Subject: [PATCH 11/15] activate extension --- .github/workflows/workflow.yml | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index a56fbbdad..2acaf7ed5 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -163,11 +163,15 @@ jobs: store_pgsql pg_isready -U postgres; do sleep 1; done sleep 1 - docker compose logs store_pgsql - + + docker compose exec -T \ + store_pgsql \ + psql -U postgres -c "create database test_db;" + docker compose exec -T \ store_pgsql \ - psql -U postgres -c "create database test_db" + psql -U postgres -c "CREATE EXTENSION IF NOT EXISTS vector;" + - name: Backend Tests env: @@ -238,6 +242,10 @@ jobs: docker compose exec -T \ compose_pgsql \ psql -U postgres -c "create database test_db" + + docker compose exec -T \ + store_pgsql \ + psql -U postgres -c "CREATE EXTENSION IF NOT EXISTS vector;" - name: Backend Tests env: @@ -445,6 +453,10 @@ jobs: docker compose exec -T \ store_pgsql \ psql -U postgres -c "create database neurostore" + + docker compose exec -T \ + store_pgsql \ + psql -U postgres -c "CREATE EXTENSION IF NOT EXISTS vector;" - name: Initialize Compose Database run: | From 8e3acb3be3d4c8086184c1b8bd2fdacde0edeeda Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 11:53:42 -0500 Subject: [PATCH 12/15] remove surperfluous extension --- .github/workflows/workflow.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 2acaf7ed5..d8d6bbc77 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -242,10 +242,6 @@ jobs: docker compose exec -T \ compose_pgsql \ psql -U postgres -c "create database test_db" - - docker compose exec -T \ - store_pgsql \ - psql -U postgres -c "CREATE EXTENSION IF NOT EXISTS vector;" - name: Backend Tests env: From 4bb3e6a3d6dcb701f2d4b055a6feaefd48755e59 Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 11:59:53 -0500 Subject: [PATCH 13/15] specify database --- .github/workflows/workflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index d8d6bbc77..bfa538cb5 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -170,7 +170,7 @@ jobs: docker compose exec -T \ store_pgsql \ - psql -U postgres -c "CREATE EXTENSION IF NOT EXISTS vector;" + psql -U postgres -d test_db -c "CREATE EXTENSION IF NOT EXISTS vector;" - name: Backend Tests @@ -452,7 +452,7 @@ jobs: docker compose exec -T \ store_pgsql \ - psql -U postgres -c "CREATE EXTENSION IF NOT EXISTS vector;" + psql -U postgres -d neurostore -c "CREATE EXTENSION IF NOT EXISTS vector;" - name: Initialize Compose Database run: | From a3973b7a35e3f576fa39bfb56d103d7c18bccb7b Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 12:21:02 -0500 Subject: [PATCH 14/15] remove unnecessary functions --- store/docker-compose.yml | 1 - store/postgres/Dockerfile | 8 -------- store/postgres/custom-entrypoint.sh | 22 ---------------------- 3 files changed, 31 deletions(-) delete mode 100644 store/postgres/custom-entrypoint.sh diff --git a/store/docker-compose.yml b/store/docker-compose.yml index 6ef10bc70..13bcd421c 100644 --- a/store/docker-compose.yml +++ b/store/docker-compose.yml @@ -49,7 +49,6 @@ services: - '5432' env_file: - .env - entrypoint: ["/usr/local/bin/custom-entrypoint.sh"] volumes: postgres_data: diff --git a/store/postgres/Dockerfile b/store/postgres/Dockerfile index 4ff63dd19..3c03f6b35 100644 --- a/store/postgres/Dockerfile +++ b/store/postgres/Dockerfile @@ -20,10 +20,6 @@ RUN curl -L https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.0.tar.gz # Copy initialization script COPY init-vector-extension.sql /docker-entrypoint-initdb.d/ -# Copy custom entrypoint script -COPY custom-entrypoint.sh /usr/local/bin/ -RUN chmod +x /usr/local/bin/custom-entrypoint.sh - # Copy scripts and set permissions COPY pg_dump-to-s3 /home RUN chmod +x /home/pg_dump-to-s3.sh /home/s3-autodelete.sh @@ -35,7 +31,3 @@ RUN service cron start # Convert scripts to Unix format RUN dos2unix /home/pg_dump-to-s3.sh RUN dos2unix /home/s3-autodelete.sh - -# Set the entrypoint to the custom script -ENTRYPOINT ["custom-entrypoint.sh"] -CMD ["postgres"] diff --git a/store/postgres/custom-entrypoint.sh b/store/postgres/custom-entrypoint.sh deleted file mode 100644 index 9675d90fd..000000000 --- a/store/postgres/custom-entrypoint.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -set -e - -# Start PostgreSQL and wait for it to be ready -/usr/local/bin/docker-entrypoint.sh postgres & - -# Wait for PostgreSQL to start -until pg_isready -p 5432 -U "$POSTGRES_USER"; do - echo "$(date) - waiting for database to start" - sleep 2 -done - -sleep 1 - -echo "STARTING EXTENSION" -# Enable the pgvector extension -psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" --dbname "$POSTGRES_DB" <<-EOSQL - CREATE EXTENSION IF NOT EXISTS vector; -EOSQL - -# Wait for PostgreSQL process to keep the container running -wait From dc18362b311f3755330a6ffd04316a7c96e55053 Mon Sep 17 00:00:00 2001 From: James Kent Date: Fri, 24 May 2024 12:33:08 -0500 Subject: [PATCH 15/15] do not create the database --- .github/workflows/workflow.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index bfa538cb5..89bf453a0 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -446,9 +446,9 @@ jobs: until docker compose exec -T \ store_pgsql pg_isready -U postgres; do sleep 1; done - docker compose exec -T \ - store_pgsql \ - psql -U postgres -c "create database neurostore" + # docker compose exec -T \ + # store_pgsql \ + # psql -U postgres -c "create database neurostore" docker compose exec -T \ store_pgsql \