Skip to content

Commit

Permalink
wip: query analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
baskaryan committed Mar 4, 2024
1 parent 22a9eee commit d783816
Show file tree
Hide file tree
Showing 9 changed files with 513 additions and 36 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,13 @@ poetry install --with lint,dev,test
Run the following script to create a database and schema:

```sh
python -m scripts.run_migrations create
poetry run python -m scripts.run_migrations create
```

From `/backend`:

```sh
OPENAI_API_KEY=[YOUR API KEY] python -m server.main
OPENAI_API_KEY=[YOUR API KEY] poetry run python -m server.main
```

### Testing
Expand All @@ -189,7 +189,7 @@ separate from the main database. It will have the same schema as the main
database.

```sh
python -m scripts.run_migrations create-test-db
poetry run python -m scripts.run_migrations create-test-db
```

Run the tests
Expand Down
68 changes: 67 additions & 1 deletion backend/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session, relationship, sessionmaker
from sqlalchemy.sql import func

from server.settings import get_postgres_url

Expand Down Expand Up @@ -123,3 +122,70 @@ class Example(TimestampedModel):

def __repr__(self) -> str:
return f"<Example(uuid={self.uuid}, content={self.content[:20]}>"


class QueryAnalyzer(TimestampedModel):
__tablename__ = "query_analyzers"

name = Column(
String(100),
nullable=False,
server_default="",
comment="The name of the query analyser.",
)
schema = Column(
JSONB,
nullable=False,
comment="JSON Schema that describes schema of query",
)
description = Column(
String(100),
nullable=False,
server_default="",
comment="Surfaced via UI to the users.",
)
instruction = Column(
Text, nullable=False, comment="The prompt to the language model."
) # TODO: This will need to evolve

examples = relationship("QueryAnalysisExample", backref="query_analyzer")

def __repr__(self) -> str:
return f"<QueryAnalyzer(id={self.uuid}, description={self.description})>"


class QueryAnalysisExample(TimestampedModel):
"""A representation of an example.
Examples consist of content together with the expected output.
The output is a JSON object that is expected to be extracted from the content.
The JSON object should be valid according to the schema of the associated extractor.
The JSON object is defined by the schema of the associated extractor, so
it's perfectly fine for a given example to represent the extraction
of multiple instances of some object from the content since
the JSON schema can represent a list of objects.
"""

__tablename__ = "query_analysis_examples"

content = Column(
JSONB,
nullable=False,
comment="The input portion of the example.",
)
output = Column(
JSONB,
comment="The output associated with the example.",
)
query_analyzer_id = Column(
UUID(as_uuid=True),
ForeignKey("query_analyzers.uuid", ondelete="CASCADE"),
nullable=False,
comment="Foreign key referencing the associated query analyzer.",
)

def __repr__(self) -> str:
return f"<QueryAnalysisExample(uuid={self.uuid}, content={self.content[:20]}>"
6 changes: 2 additions & 4 deletions backend/extraction/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,8 @@ def convert_json_schema_to_openai_schema(
else:
raise NotImplementedError("Only multi is supported for now.")

schema_.pop("definitions", None)

return {
"name": "extractor",
"description": "Extract information matching the given schema.",
"name": "query_analyzer",
"description": "Generate optimized queries matching the given schema.",
"parameters": _rm_titles(schema_) if rm_titles else schema_,
}
70 changes: 44 additions & 26 deletions backend/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ fastapi = "^0.109.2"
langserve = "^0.0.45"
uvicorn = "^0.27.1"
pydantic = "^1.10"
langchain-openai = "^0.0.6"
langchain-openai = "^0.0.8"
jsonschema = "^4.21.1"
sse-starlette = "^2.0.0"
alembic = "^1.13.1"
Expand Down
13 changes: 13 additions & 0 deletions backend/server/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
ExtractResponse,
extraction_runnable,
)
from server.query_analysis import (
QueryAnalysisRequest,
QueryAnalysisResponse,
query_analyzer,
)

app = FastAPI(
title="Extraction Powered by LangChain",
Expand Down Expand Up @@ -56,6 +61,14 @@ def ready():
enabled_endpoints=["invoke", "batch"],
)

add_routes(
app,
query_analyzer.with_types(
input_type=QueryAnalysisRequest, output_type=QueryAnalysisResponse
),
path="/query_analysis",
enabled_endpoints=["invoke", "batch"],
)

if __name__ == "__main__":
import uvicorn
Expand Down
Loading

0 comments on commit d783816

Please sign in to comment.