Skip to content

Commit

Permalink
Merge branch 'langchain-ai:master' into community_chroma_patch
Browse files Browse the repository at this point in the history
  • Loading branch information
shjunn authored Nov 4, 2024
2 parents 7b31bfe + ba5cba0 commit 69de5ea
Show file tree
Hide file tree
Showing 80 changed files with 2,556 additions and 5,631 deletions.
95 changes: 73 additions & 22 deletions .github/scripts/get_min_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,17 @@
# for python 3.10 and below, which doesnt have stdlib tomllib
import tomli as tomllib

from packaging.version import parse as parse_version
from packaging.specifiers import SpecifierSet
from packaging.version import Version


import requests
from packaging.version import parse
from typing import List

import re


MIN_VERSION_LIBS = [
"langchain-core",
"langchain-community",
Expand All @@ -31,29 +36,61 @@
]


def get_min_version(version: str) -> str:
# base regex for x.x.x with cases for rc/post/etc
# valid strings: https://peps.python.org/pep-0440/#public-version-identifiers
vstring = r"\d+(?:\.\d+){0,2}(?:(?:a|b|rc|\.post|\.dev)\d+)?"
# case ^x.x.x
_match = re.match(f"^\\^({vstring})$", version)
if _match:
return _match.group(1)
def get_pypi_versions(package_name: str) -> List[str]:
"""
Fetch all available versions for a package from PyPI.
Args:
package_name (str): Name of the package
Returns:
List[str]: List of all available versions
# case >=x.x.x,<y.y.y
_match = re.match(f"^>=({vstring}),<({vstring})$", version)
if _match:
_min = _match.group(1)
_max = _match.group(2)
assert parse_version(_min) < parse_version(_max)
return _min
Raises:
requests.exceptions.RequestException: If PyPI API request fails
KeyError: If package not found or response format unexpected
"""
pypi_url = f"https://pypi.org/pypi/{package_name}/json"
response = requests.get(pypi_url)
response.raise_for_status()
return list(response.json()["releases"].keys())

# case x.x.x
_match = re.match(f"^({vstring})$", version)
if _match:
return _match.group(1)

raise ValueError(f"Unrecognized version format: {version}")
def get_minimum_version(package_name: str, spec_string: str) -> Optional[str]:
"""
Find the minimum published version that satisfies the given constraints.
Args:
package_name (str): Name of the package
spec_string (str): Version specification string (e.g., ">=0.2.43,<0.4.0,!=0.3.0")
Returns:
Optional[str]: Minimum compatible version or None if no compatible version found
"""
# rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
spec_string = re.sub(r"\^0\.0\.(\d+)", r"0.0.\1", spec_string)
# rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1 (can be anywhere in constraint string)
for y in range(1, 10):
spec_string = re.sub(rf"\^0\.{y}\.(\d+)", rf">=0.{y}.\1,<0.{y+1}", spec_string)
# rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
for x in range(1, 10):
spec_string = re.sub(
rf"\^{x}\.(\d+)\.(\d+)", rf">={x}.\1.\2,<{x+1}", spec_string
)

spec_set = SpecifierSet(spec_string)
all_versions = get_pypi_versions(package_name)

valid_versions = []
for version_str in all_versions:
try:
version = parse(version_str)
if spec_set.contains(version):
valid_versions.append(version)
except ValueError:
continue

return str(min(valid_versions)) if valid_versions else None


def get_min_version_from_toml(
Expand Down Expand Up @@ -96,7 +133,7 @@ def get_min_version_from_toml(
][0]["version"]

# Use parse_version to get the minimum supported version from version_string
min_version = get_min_version(version_string)
min_version = get_minimum_version(lib, version_string)

# Store the minimum version in the min_versions dictionary
min_versions[lib] = min_version
Expand All @@ -112,6 +149,20 @@ def check_python_version(version_string, constraint_string):
:param constraint_string: A string representing the package's Python version constraints (e.g. ">=3.6, <4.0").
:return: True if the version matches the constraints, False otherwise.
"""

# rewrite occurrences of ^0.0.z to 0.0.z (can be anywhere in constraint string)
constraint_string = re.sub(r"\^0\.0\.(\d+)", r"0.0.\1", constraint_string)
# rewrite occurrences of ^0.y.z to >=0.y.z,<0.y+1.0 (can be anywhere in constraint string)
for y in range(1, 10):
constraint_string = re.sub(
rf"\^0\.{y}\.(\d+)", rf">=0.{y}.\1,<0.{y+1}.0", constraint_string
)
# rewrite occurrences of ^x.y.z to >=x.y.z,<x+1.0.0 (can be anywhere in constraint string)
for x in range(1, 10):
constraint_string = re.sub(
rf"\^{x}\.0\.(\d+)", rf">={x}.0.\1,<{x+1}.0.0", constraint_string
)

try:
version = Version(version_string)
constraints = SpecifierSet(constraint_string)
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/_integration_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ jobs:
ES_URL: ${{ secrets.ES_URL }}
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
Expand Down
25 changes: 20 additions & 5 deletions .github/workflows/_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,25 @@ jobs:
PKG_NAME: ${{ needs.build.outputs.pkg-name }}
VERSION: ${{ needs.build.outputs.version }}
run: |
REGEX="^$PKG_NAME==\\d+\\.\\d+\\.\\d+\$"
echo $REGEX
PREV_TAG=$(git tag --sort=-creatordate | grep -P $REGEX || true | head -1)
PREV_TAG="$PKG_NAME==${VERSION%.*}.$(( ${VERSION##*.} - 1 ))"; [[ "${VERSION##*.}" -eq 0 ]] && PREV_TAG=""
# backup case if releasing e.g. 0.3.0, looks up last release
# note if last release (chronologically) was e.g. 0.1.47 it will get
# that instead of the last 0.2 release
if [ -z "$PREV_TAG" ]; then
REGEX="^$PKG_NAME==\\d+\\.\\d+\\.\\d+\$"
echo $REGEX
PREV_TAG=$(git tag --sort=-creatordate | grep -P $REGEX || true | head -1)
fi
# confirm prev-tag actually exists in git repo with git tag
GIT_TAG_RESULT=$(git tag -l "$PREV_TAG")
if [ -z "$GIT_TAG_RESULT" ]; then
echo "Previous tag $PREV_TAG not found in git repo"
exit 1
fi
TAG="${PKG_NAME}==${VERSION}"
if [ "$TAG" == "$PREV_TAG" ]; then
echo "No new version to release"
Expand Down Expand Up @@ -231,7 +247,7 @@ jobs:
working-directory: ${{ inputs.working-directory }}
id: min-version
run: |
poetry run pip install packaging
poetry run pip install packaging requests
python_version="$(poetry run python --version | awk '{print $2}')"
min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml release $python_version)"
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
Expand Down Expand Up @@ -289,7 +305,6 @@ jobs:
ES_URL: ${{ secrets.ES_URL }}
ES_CLOUD_ID: ${{ secrets.ES_CLOUD_ID }}
ES_API_KEY: ${{ secrets.ES_API_KEY }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # for airbyte
MONGODB_ATLAS_URI: ${{ secrets.MONGODB_ATLAS_URI }}
VOYAGE_API_KEY: ${{ secrets.VOYAGE_API_KEY }}
UPSTAGE_API_KEY: ${{ secrets.UPSTAGE_API_KEY }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
id: min-version
shell: bash
run: |
poetry run pip install packaging tomli
poetry run pip install packaging tomli requests
python_version="$(poetry run python --version | awk '{print $2}')"
min_versions="$(poetry run python $GITHUB_WORKSPACE/.github/scripts/get_min_versions.py pyproject.toml pull_request $python_version)"
echo "min-versions=$min_versions" >> "$GITHUB_OUTPUT"
Expand Down
4 changes: 1 addition & 3 deletions .github/workflows/api_doc_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,7 @@ jobs:
- name: Install dependencies
working-directory: langchain
run: |
# skip airbyte due to pandas dependency issue
python -m uv pip install $(ls ./libs/partners | grep -vE "airbyte" | xargs -I {} echo "./libs/partners/{}")
python -m uv pip install $(ls ./libs/partners | xargs -I {} echo "./libs/partners/{}")
python -m uv pip install libs/core libs/langchain libs/text-splitters libs/community libs/experimental
python -m uv pip install -r docs/api_reference/requirements.txt
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check_diffs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
uses: Ana06/[email protected]
- id: set-matrix
run: |
python -m pip install packaging
python -m pip install packaging requests
python .github/scripts/check_diff.py ${{ steps.files.outputs.all }} >> $GITHUB_OUTPUT
outputs:
lint: ${{ steps.set-matrix.outputs.lint }}
Expand Down
1 change: 0 additions & 1 deletion docs/api_reference/create_api_rst.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,6 @@ def _out_file_path(package_name: str) -> Path:

def _build_index(dirs: List[str]) -> None:
custom_names = {
"airbyte": "Airbyte",
"aws": "AWS",
"ai21": "AI21",
"ibm": "IBM",
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/concepts/messages.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Each message has a **role** (e.g., "user", "assistant"), **content** (e.g., text

LangChain provides a unified message format that can be used across chat models, allowing users to work with different chat models without worrying about the specific details of the message format used by each model provider.

## What inside a message?
## What is inside a message?

A message typically consists of the following pieces of information:

Expand Down
2 changes: 1 addition & 1 deletion docs/docs/how_to/document_loader_csv.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@
" temp_file_path = temp_file.name\n",
"\n",
"loader = CSVLoader(file_path=temp_file_path)\n",
"loader.load()\n",
"data = loader.load()\n",
"for record in data[:2]:\n",
" print(record)"
]
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/how_to/multi_vector.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@
"id": "cdef8339-f9fa-4b3b-955f-ad9dbdf2734f",
"metadata": {},
"source": [
"The default search type the retriever performs on the vector database is a similarity search. LangChain vector stores also support searching via [Max Marginal Relevance](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.max_marginal_relevance_search). This can be controlled via the `search_type` parameter of the retriever:"
"The default search type the retriever performs on the vector database is a similarity search. LangChain vector stores also support searching via [Max Marginal Relevance](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html#langchain_core.vectorstores.base.VectorStore.max_marginal_relevance_search). This can be controlled via the `search_type` parameter of the retriever:"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/how_to/qa_chat_history_how_to.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@
"id": "15f8ad59-19de-42e3-85a8-3ba95ee0bd43",
"metadata": {},
"source": [
"For the retriever, we will use [WebBaseLoader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.web_base.WebBaseLoader.html) to load the content of a web page. Here we instantiate a `InMemoryVectorStore` vectorstore and then use its [.as_retriever](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.as_retriever) method to build a retriever that can be incorporated into [LCEL](/docs/concepts/lcel) chains."
"For the retriever, we will use [WebBaseLoader](https://python.langchain.com/api_reference/community/document_loaders/langchain_community.document_loaders.web_base.WebBaseLoader.html) to load the content of a web page. Here we instantiate a `InMemoryVectorStore` vectorstore and then use its [.as_retriever](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html#langchain_core.vectorstores.base.VectorStore.as_retriever) method to build a retriever that can be incorporated into [LCEL](/docs/concepts/lcel) chains."
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/how_to/vectorstore_retriever.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"\n",
"## Creating a retriever from a vectorstore\n",
"\n",
"You can build a retriever from a vectorstore using its [.as_retriever](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.as_retriever) method. Let's walk through an example.\n",
"You can build a retriever from a vectorstore using its [.as_retriever](https://python.langchain.com/api_reference/core/vectorstores/langchain_core.vectorstores.base.VectorStore.html#langchain_core.vectorstores.base.VectorStore.as_retriever) method. Let's walk through an example.\n",
"\n",
"First we instantiate a vectorstore. We will use an in-memory [FAISS](https://python.langchain.com/api_reference/community/vectorstores/langchain_community.vectorstores.faiss.FAISS.html) vectorstore:"
]
Expand Down
22 changes: 6 additions & 16 deletions docs/docs/integrations/providers/databricks.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,21 @@ Databricks embraces the LangChain ecosystem in various ways:
Installation
------------

First-party Databricks integrations are available in the langchain-databricks partner package.
First-party Databricks integrations are now available in the databricks-langchain partner package.

```
pip install langchain-databricks
pip install databricks-langchain
```

🚧 Upcoming Package Consolidation Notice

This package (`langchain-databricks`) will soon be consolidated into a new package: `databricks-langchain`. The new package will serve as the primary hub for all Databricks Langchain integrations.

What’s Changing?
In the coming months, `databricks-langchain` will include all features currently in `langchain-databricks`, as well as additional integrations to provide a unified experience for Databricks users.

What You Need to Know
For now, continue to use `langchain-databricks` as usual. When `databricks-langchain` is ready, we’ll provide clear migration instructions to make the transition seamless. During the transition period, `langchain-databricks` will remain operational, and updates will be shared here with timelines and guidance.

Thank you for your support as we work toward an improved, streamlined experience!
The legacy langchain-databricks partner package is still available but will be soon deprecated.

Chat Model
----------

`ChatDatabricks` is a Chat Model class to access chat endpoints hosted on Databricks, including state-of-the-art models such as Llama3, Mixtral, and DBRX, as well as your own fine-tuned models.

```
from langchain_databricks import ChatDatabricks
from databricks_langchain import ChatDatabricks
chat_model = ChatDatabricks(endpoint="databricks-meta-llama-3-70b-instruct")
```
Expand Down Expand Up @@ -69,7 +59,7 @@ Embeddings
`DatabricksEmbeddings` is an Embeddings class to access text-embedding endpoints hosted on Databricks, including state-of-the-art models such as BGE, as well as your own fine-tuned models.

```
from langchain_databricks import DatabricksEmbeddings
from databricks_langchain import DatabricksEmbeddings
embeddings = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
```
Expand All @@ -83,7 +73,7 @@ Vector Search
Databricks Vector Search is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database. With Vector Search, you can create auto-updating vector search indexes from [Delta](https://docs.databricks.com/en/introduction/delta-comparison.html) tables managed by [Unity Catalog](https://www.databricks.com/product/unity-catalog) and query them with a simple API to return the most similar vectors.

```
from langchain_databricks.vectorstores import DatabricksVectorSearch
from databricks_langchain import DatabricksVectorSearch
dvs = DatabricksVectorSearch(
endpoint="<YOUT_ENDPOINT_NAME>",
Expand Down
11 changes: 5 additions & 6 deletions docs/docs/integrations/providers/vectara/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@
> which is grounded in the data, documents, and knowledge that they have (technically, it is Retrieval-Augmented-Generation-as-a-service).
**Vectara Overview:**
`Vectara` is RAG-as-a-service, providing all the components of RAG behind an easy-to-use API, including:
[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications.
Vectara serverless RAG-as-a-service provides all the components of RAG behind an easy-to-use API, including:
1. A way to extract text from files (PDF, PPT, DOCX, etc)
2. ML-based chunking that provides state of the art performance.
3. The [Boomerang](https://vectara.com/how-boomerang-takes-retrieval-augmented-generation-to-the-next-level-via-grounded-generation/) embeddings model.
4. Its own internal vector database where text chunks and embedding vectors are stored.
5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments
(including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) and
[MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/))
7. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.
5. A query service that automatically encodes the query into embedding, and retrieves the most relevant text segments, including support for [Hybrid Search](https://docs.vectara.com/docs/api-reference/search-apis/lexical-matching) as well as multiple reranking options such as the [multi-lingual relevance reranker](https://www.vectara.com/blog/deep-dive-into-vectara-multilingual-reranker-v1-state-of-the-art-reranker-across-100-languages), [MMR](https://vectara.com/get-diverse-results-and-comprehensive-summaries-with-vectaras-mmr-reranker/), [UDF reranker](https://www.vectara.com/blog/rag-with-user-defined-functions-based-reranking).
6. An LLM to for creating a [generative summary](https://docs.vectara.com/docs/learn/grounded-generation/grounded-generation-overview), based on the retrieved documents (context), including citations.

For more information:
- [Documentation](https://docs.vectara.com/docs/)
Expand All @@ -22,7 +21,7 @@ For more information:
## Installation and Setup

To use `Vectara` with LangChain no special installation steps are required.
To get started, [sign up](https://vectara.com/integrations/langchain) for a free Vectara account (if you don't already have one),
To get started, [sign up](https://vectara.com/integrations/langchain) for a free Vectara trial,
and follow the [quickstart](https://docs.vectara.com/docs/quickstart) guide to create a corpus and an API key.
Once you have these, you can provide them as arguments to the Vectara `vectorstore`, or you can set them as environment variables.

Expand Down
Loading

0 comments on commit 69de5ea

Please sign in to comment.