Skip to content

Commit fc2f443

Browse files
committed
Merge branch 'dev' into f/rate-limit-transition
2 parents f728532 + fcc02f1 commit fc2f443

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+834
-269
lines changed

Diff for: .github/workflows/lint-agents-api-pr.yml

+1-4
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,7 @@ jobs:
1717
- uses: actions/checkout@v4
1818

1919
- name: Install libboost
20-
uses: awalsh128/cache-apt-pkgs-action@latest
21-
with:
22-
packages: libboost-all-dev
23-
version: 1.0
20+
run: sudo apt-get install -y libboost-all-dev
2421

2522
- name: Install uv
2623
uses: astral-sh/setup-uv@v4

Diff for: .github/workflows/test-agents-api-pr.yml

+1-4
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,7 @@ jobs:
1717
- uses: actions/checkout@v4
1818

1919
- name: Install libboost
20-
uses: awalsh128/cache-apt-pkgs-action@latest
21-
with:
22-
packages: libboost-all-dev
23-
version: 1.0
20+
run: sudo apt-get install -y libboost-all-dev
2421

2522
- name: Install uv
2623
uses: astral-sh/setup-uv@v4

Diff for: .github/workflows/typecheck-agents-api-pr.yml

+1-4
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,7 @@ jobs:
1717
- uses: actions/checkout@v4
1818

1919
- name: Install libboost
20-
uses: awalsh128/cache-apt-pkgs-action@latest
21-
with:
22-
packages: libboost-all-dev
23-
version: 1.0
20+
run: sudo apt-get install -y libboost-all-dev
2421

2522
- name: Cache pytype
2623
uses: actions/cache@v4

Diff for: README.md

+41-6
Original file line numberDiff line numberDiff line change
@@ -1281,7 +1281,7 @@ arguments:
12811281
query: string # The search query for searching with Brave
12821282
12831283
output:
1284-
result: string # The result of the Brave Search
1284+
result: list[dict] # A list of search results, each containing: title, link, snippet
12851285
```
12861286

12871287
</td>
@@ -1356,11 +1356,11 @@ setup:
13561356
13571357
arguments:
13581358
url: string # The URL for which to fetch data
1359-
mode: string # The type of crawlers (default: "scrape")
13601359
params: dict # (Optional) The parameters for the Spider API
1360+
content_type: string # (Optional) The content type to return. Default is "application/json". Other options: "text/csv", "application/xml", "application/jsonl"
13611361
13621362
output:
1363-
documents: list # The documents returned from the spider
1363+
result: list[dict] # A list of results, each containing: content, error, status, costs, url
13641364
```
13651365

13661366
</td>
@@ -1452,13 +1452,18 @@ arguments:
14521452
base64: boolean # Whether the input file is base64 encoded. Default is false.
14531453
14541454
output:
1455-
documents: list # The parsed data from the document
1455+
documents: list[Document] # A list of parsed documents
14561456
```
14571457

14581458
</td>
1459+
<td>
1460+
1461+
**Example cookbook**: [cookbooks/07-personalized-research-assistant.ipynb](https://github.com/julep-ai/julep/blob/dev/cookbooks/07-personalized-research-assistant.ipynb)
14591462

1463+
</td>
14601464
</tr>
14611465

1466+
14621467
<tr>
14631468
<td> <b>Cloudinary</b> </td>
14641469
<td>
@@ -1489,14 +1494,44 @@ output:
14891494
```
14901495

14911496
</td>
1492-
14931497
<td>
14941498

14951499
**Example cookbook**: [cookbooks/05-video-processing-with-natural-language.ipynb](https://github.com/julep-ai/julep/blob/dev/cookbooks/05-video-processing-with-natural-language.ipynb)
14961500

14971501
</td>
14981502
</tr>
14991503

1504+
<tr>
1505+
<td> <b>Arxiv</b> </td>
1506+
<td>
1507+
1508+
```yaml
1509+
method: search # The method to use for the Arxiv integration
1510+
1511+
setup:
1512+
# No specific setup parameters are required for Arxiv
1513+
1514+
arguments:
1515+
query: string # The search query for searching with Arxiv
1516+
id_list: list[string] | None # (Optional) The list of Arxiv IDs to search with
1517+
max_results: integer # The maximum number of results to return, must be between 1 and 300000
1518+
download_pdf: boolean # Whether to download the PDF of the results. Default is false.
1519+
sort_by: string # The sort criterion for the results, options: relevance, lastUpdatedDate, submittedDate
1520+
sort_order: string # The sort order for the results, options: ascending, descending
1521+
1522+
output:
1523+
result: list[dict] # A list of search results, each containing: entry_id, title, updated, published, authors, summary, comment, journal_ref, doi, primary_category, categories, links, pdf_url, pdf_downloaded
1524+
```
1525+
1526+
</td>
1527+
1528+
<td>
1529+
1530+
**Example cookbook**: [cookbooks/07-personalized-research-assistant.ipynb](https://github.com/julep-ai/julep/blob/dev/cookbooks/07-personalized-research-assistant.ipynb)
1531+
1532+
</td>
1533+
</tr>
1534+
15001535
</table>
15011536

15021537
For more details, refer to our [Integrations Documentation](#integrations).
@@ -1674,4 +1709,4 @@ Choose Julep when you need a robust framework for stateful agents with advanced
16741709
<a href="#-table-of-contents">
16751710
<img src="https://img.shields.io/badge/Table%20of%20Contents-000000?style=for-the-badge&logo=github&logoColor=white" alt="Table of Contents">
16761711
</a>
1677-
</div>
1712+
</div>

Diff for: agents-api/agents_api/activities/embed_docs.py

+27-11
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,42 @@
1818
async def embed_docs(
1919
payload: EmbedDocsPayload, cozo_client=None, max_batch_size: int = 100
2020
) -> None:
21-
indices, snippets = list(zip(*enumerate(payload.content)))
22-
batched_snippets = batched(snippets, max_batch_size)
21+
# Create batches of both indices and snippets together
22+
indexed_snippets = list(enumerate(payload.content))
23+
# Batch snippets into groups of max_batch_size for parallel processing
24+
batched_indexed_snippets = list(batched(indexed_snippets, max_batch_size))
25+
# Get embedding instruction and title from payload, defaulting to empty strings
2326
embed_instruction: str = payload.embed_instruction or ""
2427
title: str = payload.title or ""
2528

26-
async def embed_batch(snippets):
27-
return await litellm.aembedding(
29+
# Helper function to embed a batch of snippets
30+
async def embed_batch(indexed_batch):
31+
# Split indices and snippets for the batch
32+
batch_indices, batch_snippets = zip(*indexed_batch)
33+
embeddings = await litellm.aembedding(
2834
inputs=[
29-
(
30-
embed_instruction + (title + "\n\n" + snippet) if title else snippet
31-
).strip()
32-
for snippet in snippets
33-
]
35+
((title + "\n\n" + snippet) if title else snippet).strip()
36+
for snippet in batch_snippets
37+
],
38+
embed_instruction=embed_instruction,
3439
)
40+
return list(zip(batch_indices, embeddings))
3541

36-
embeddings = reduce(
42+
# Gather embeddings with their corresponding indices
43+
indexed_embeddings = reduce(
3744
operator.add,
38-
await asyncio.gather(*[embed_batch(snippets) for snippets in batched_snippets]),
45+
await asyncio.gather(
46+
*[embed_batch(batch) for batch in batched_indexed_snippets]
47+
),
3948
)
4049

50+
# Split indices and embeddings after all batches are processed
51+
indices, embeddings = zip(*indexed_embeddings)
52+
53+
# Convert to lists since embed_snippets_query expects list types
54+
indices = list(indices)
55+
embeddings = list(embeddings)
56+
4157
embed_snippets_query(
4258
developer_id=payload.developer_id,
4359
doc_id=payload.doc_id,

Diff for: agents-api/agents_api/activities/execute_integration.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from ..autogen.openapi_model import BaseIntegrationDef
77
from ..clients import integrations
88
from ..common.exceptions.tools import IntegrationExecutionException
9-
from ..common.protocol.tasks import StepContext
9+
from ..common.protocol.tasks import ExecutionInput, StepContext
1010
from ..common.storage_handler import auto_blob_store
1111
from ..env import testing
1212
from ..models.tools import get_tool_args_from_metadata
@@ -21,6 +21,9 @@ async def execute_integration(
2121
arguments: dict[str, Any],
2222
setup: dict[str, Any] = {},
2323
) -> Any:
24+
if not isinstance(context.execution_input, ExecutionInput):
25+
raise TypeError("Expected ExecutionInput type for context.execution_input")
26+
2427
developer_id = context.execution_input.developer_id
2528
agent_id = context.execution_input.agent.id
2629
task_id = context.execution_input.task.id

Diff for: agents-api/agents_api/activities/execute_system.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from beartype import beartype
88
from box import Box, BoxList
9+
from fastapi import HTTPException
910
from fastapi.background import BackgroundTasks
1011
from temporalio import activity
1112

@@ -18,7 +19,7 @@
1819
TextOnlyDocSearchRequest,
1920
VectorDocSearchRequest,
2021
)
21-
from ..common.protocol.tasks import StepContext
22+
from ..common.protocol.tasks import ExecutionInput, StepContext
2223
from ..common.storage_handler import auto_blob_store, load_from_blob_store_if_remote
2324
from ..env import testing
2425
from ..models.developer import get_developer
@@ -40,6 +41,9 @@ async def execute_system(
4041
if set(arguments.keys()) == {"bucket", "key"}:
4142
arguments = await load_from_blob_store_if_remote(arguments)
4243

44+
if not isinstance(context.execution_input, ExecutionInput):
45+
raise TypeError("Expected ExecutionInput type for context.execution_input")
46+
4347
arguments["developer_id"] = context.execution_input.developer_id
4448

4549
# Unbox all the arguments

Diff for: agents-api/agents_api/activities/task_steps/prompt_step.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from ...clients import (
88
litellm, # We dont directly import `acompletion` so we can mock it
99
)
10-
from ...common.protocol.tasks import StepContext, StepOutcome
10+
from ...common.protocol.tasks import ExecutionInput, StepContext, StepOutcome
1111
from ...common.storage_handler import auto_blob_store
1212
from ...common.utils.template import render_template
1313
from ...env import debug
@@ -98,6 +98,9 @@ async def prompt_step(context: StepContext) -> StepOutcome:
9898
skip_vars=["developer_id"],
9999
)
100100

101+
if not isinstance(context.execution_input, ExecutionInput):
102+
raise TypeError("Expected ExecutionInput type for context.execution_input")
103+
101104
# Get settings and run llm
102105
agent_default_settings: dict = (
103106
context.execution_input.agent.default_settings.model_dump()

Diff for: agents-api/agents_api/activities/task_steps/transition_step.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import asyncio
2+
from typing import cast
23

34
from beartype import beartype
45
from fastapi import HTTPException
56
from temporalio import activity
67

78
from ...autogen.openapi_model import CreateTransitionRequest, Transition
89
from ...clients.temporal import get_workflow_handle
9-
from ...common.protocol.tasks import StepContext
10+
from ...common.protocol.tasks import ExecutionInput, StepContext
1011
from ...common.storage_handler import load_from_blob_store_if_remote
1112
from ...env import (
1213
temporal_activity_after_retry_timeout,
@@ -52,6 +53,9 @@ async def transition_step(
5253
transition_info.output
5354
)
5455

56+
if not isinstance(context.execution_input, ExecutionInput):
57+
raise TypeError("Expected ExecutionInput type for context.execution_input")
58+
5559
# Create transition
5660
try:
5761
transition = await create_execution_transition_async(
@@ -64,12 +68,11 @@ async def transition_step(
6468
)
6569

6670
except Exception as e:
67-
if isinstance(e, HTTPException) and e.status_code == 429:
71+
if isinstance(e, HTTPException) and cast(HTTPException, e).status_code == 429:
6872
await wf_handle.signal(
6973
TaskExecutionWorkflow.set_last_error,
7074
LastErrorInput(last_error=TooManyRequestsError()),
7175
)
72-
7376
raise e
7477

7578
return transition

Diff for: agents-api/agents_api/activities/task_steps/yield_step.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from temporalio import activity
55

66
from ...autogen.openapi_model import TransitionTarget, YieldStep
7-
from ...common.protocol.tasks import StepContext, StepOutcome
7+
from ...common.protocol.tasks import ExecutionInput, StepContext, StepOutcome
88
from ...common.storage_handler import auto_blob_store
99
from ...env import testing
1010
from .base_evaluate import base_evaluate
@@ -16,6 +16,9 @@ async def yield_step(context: StepContext) -> StepOutcome:
1616
try:
1717
assert isinstance(context.current_step, YieldStep)
1818

19+
if not isinstance(context.execution_input, ExecutionInput):
20+
raise TypeError("Expected ExecutionInput type for context.execution_input")
21+
1922
all_workflows = context.execution_input.task.workflows
2023
workflow = context.current_step.workflow
2124
exprs = context.current_step.arguments

Diff for: agents-api/agents_api/activities/utils.py

+11
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,22 @@
2121
from simpleeval import EvalWithCompoundTypes, SimpleEval
2222

2323
from ..autogen.openapi_model import SystemDef
24+
from ..common.nlp import nlp
2425
from ..common.utils import yaml
2526

2627
T = TypeVar("T")
2728
R = TypeVar("R")
2829
P = ParamSpec("P")
2930

3031

32+
def chunk_doc(string: str) -> list[str]:
33+
"""
34+
Chunk a string into sentences.
35+
"""
36+
doc = nlp(string)
37+
return [" ".join([sent.text for sent in chunk]) for chunk in doc._.chunks]
38+
39+
3140
# TODO: We need to make sure that we dont expose any security issues
3241
ALLOWED_FUNCTIONS = {
3342
"abs": abs,
@@ -58,6 +67,8 @@
5867
"dump_json": json.dumps,
5968
"dump_yaml": yaml.dump,
6069
"match_regex": lambda pattern, string: bool(re2.fullmatch(pattern, string)),
70+
"nlp": nlp.__call__,
71+
"chunk_doc": chunk_doc,
6172
}
6273

6374

0 commit comments

Comments
 (0)