Skip to content

Commit

Permalink
add package
Browse files Browse the repository at this point in the history
dnth committed Jul 19, 2024
1 parent 2180bcb commit 38a2c3f
Showing 10 changed files with 40 additions and 4 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -168,3 +168,6 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# pixi environments
.pixi
*.egg-info
5 changes: 3 additions & 2 deletions compute_embeddings.py
Original file line number Diff line number Diff line change
@@ -7,8 +7,9 @@
from tqdm.auto import tqdm
from transformers import CLIPModel, CLIPProcessor, CLIPTokenizerFast

from src.database import PostgreSQLDatabase
from src.datasets import HuggingFaceDatasets

from pgmmr.database import PostgreSQLDatabase
from pgmmr.datasets import HuggingFaceDatasets

from PIL import Image

32 changes: 32 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[project]
name = "postgresql-multimodal-retrieval"
version = "0.1.0"
description = "Add a short description here"
authors = [{name = "dnth", email = "dickson.neoh@gmail.com"}]
requires-python = ">= 3.10"
dependencies = ["pgvector>=0.3.2,<0.4", "tqdm>=4.66.4,<4.67", "transformers>=4.41.2,<4.42", "huggingface-hub"]

[system-requirements]
cuda = "11"

[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[tool.pixi.project]
channels = ["nvidia", "conda-forge", "pytorch"]
platforms = ["linux-64"]

[tool.pixi.pypi-dependencies]
pgmmr = { path = ".", editable = true }

[tool.pixi.tasks]

[tool.pixi.dependencies]
pandas = ">=2.2.2,<2.3"
psycopg = ">=3.2.1,<3.3"
loguru = ">=0.7.2,<0.8"
torchvision = ">=0.18.1,<0.19"
torchaudio = ">=2.3.1,<2.4"
pytorch = ">=2.3.1,<2.4"
datasets = ">=2.20.0,<2.21"
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -11,13 +11,13 @@ class HuggingFaceDatasets:
"""

def __init__(
self, dataset_name: str, num_images: int = None, **load_dataset_kwargs
self, dataset_name: str, num_images: int = None
):
self.dataset_name = dataset_name
logger.info(f"Loading dataset: {dataset_name}")

self.dataset = load_dataset(
self.dataset_name, split="all", **load_dataset_kwargs
self.dataset_name, split="all"
)

if num_images:
File renamed without changes.
File renamed without changes.

0 comments on commit 38a2c3f

Please sign in to comment.