Skip to content

Commit 0b72f75

Browse files
Extract improvement take 3 (#304)
* update readme * add bounding box functions to use on the DOM * add parameterized textExtract * add formatText function and helpers * ad TextAnnotation type * changeset & prettier * add useTextExtract as param for ExtractOptions * use textExtract method in CI * rm NotImplementedError * indentation fix * dont run domExtract and textExtract at the same time * 📈 time limit * ⬆️ time limit & rm extract_partners
1 parent cd7d13d commit 0b72f75

File tree

12 files changed

+952
-138
lines changed

12 files changed

+952
-138
lines changed

.changeset/sixty-poets-battle.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
add textExtract: an optional, text based approach to the existing extract method. textExtract often performs better on long form extraction tasks. By default `extract` uses the existing approach `domExtract`.

.github/workflows/ci.yml

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ jobs:
5858
run-extract-evals:
5959
needs: [run-lint, run-build]
6060
runs-on: ubuntu-latest
61-
timeout-minutes: 25
61+
timeout-minutes: 50
6262
env:
6363
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
6464
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -82,31 +82,32 @@ jobs:
8282
- name: Install Playwright browsers
8383
run: npm exec playwright install --with-deps
8484

85-
# Run extract category with domExtract
85+
# 1. Run extract category with domExtract
8686
- name: Run Extract Evals (domExtract)
8787
run: npm run evals category extract -- --extract-method=domExtract
8888
- name: Save Extract Dom Results
8989
run: mv eval-summary.json eval-summary-extract-dom.json
9090

91-
# Run extract category with textExtract
91+
# 2. Once domExtract finishes, run extract category with textExtract
9292
- name: Run Extract Evals (textExtract)
9393
run: npm run evals category extract -- --extract-method=textExtract
94-
continue-on-error: true
95-
# - name: Save Extract Text Results
96-
# run: mv eval-summary.json eval-summary-extract-text.json
94+
- name: Save Extract Text Results
95+
run: mv eval-summary.json eval-summary-extract-text.json
9796

97+
# 3. Log and Compare Extract Evals Performance
9898
- name: Log and Compare Extract Evals Performance
9999
run: |
100100
experimentNameDom=$(jq -r '.experimentName' eval-summary-extract-dom.json)
101101
dom_score=$(jq '.categories.extract' eval-summary-extract-dom.json)
102102
echo "DomExtract Extract category score: $dom_score%"
103103
echo "View domExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameDom}"
104104
105-
# experimentNameText=$(jq -r '.experimentName' eval-summary-extract-text.json)
106-
# text_score=$(jq '.categories.extract' eval-summary-extract-text.json)
107-
# echo "TextExtract Extract category score: $text_score%"
108-
# echo "View textExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameText}"
105+
experimentNameText=$(jq -r '.experimentName' eval-summary-extract-text.json)
106+
text_score=$(jq '.categories.extract' eval-summary-extract-text.json)
107+
echo "TextExtract Extract category score: $text_score%"
108+
echo "View textExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameText}"
109109
110+
# 4. If domExtract <80% fail CI
110111
if (( $(echo "$dom_score < 80" | bc -l) )); then
111112
echo "DomExtract extract category score is below 80%. Failing CI."
112113
exit 1
@@ -115,7 +116,7 @@ jobs:
115116
run-text-extract-evals:
116117
needs: [run-extract-evals]
117118
runs-on: ubuntu-latest
118-
timeout-minutes: 40
119+
timeout-minutes: 120
119120
env:
120121
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
121122
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -139,36 +140,36 @@ jobs:
139140
- name: Install Playwright browsers
140141
run: npm exec playwright install --with-deps
141142

142-
# Run text_extract category with domExtract
143+
# 1. Run text_extract category with textExtract first
144+
- name: Run text_extract Evals (textExtract)
145+
run: npm run evals category text_extract -- --extract-method=textExtract
146+
- name: Save text_extract Text Results
147+
run: mv eval-summary.json eval-summary-text_extract-text.json
148+
149+
# 2. Then run text_extract category with domExtract
143150
- name: Run text_extract Evals (domExtract)
144151
run: npm run evals category text_extract -- --extract-method=domExtract
145152
- name: Save text_extract Dom Results
146153
run: mv eval-summary.json eval-summary-text_extract-dom.json
147154

148-
# Run text_extract category with textExtract
149-
- name: Run text_extract Evals (textExtract)
150-
run: npm run evals category text_extract -- --extract-method=textExtract
151-
continue-on-error: true
152-
# - name: Save text_extract Text Results
153-
# run: mv eval-summary.json eval-summary-text_extract-text.json
154-
155+
# 3. Log and Compare text_extract Evals Performance
155156
- name: Log and Compare text_extract Evals Performance
156157
run: |
158+
experimentNameText=$(jq -r '.experimentName' eval-summary-text_extract-text.json)
159+
text_score=$(jq '.categories.text_extract' eval-summary-text_extract-text.json)
160+
echo "TextExtract text_extract category score: $text_score%"
161+
echo "View textExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameText}"
162+
157163
experimentNameDom=$(jq -r '.experimentName' eval-summary-text_extract-dom.json)
158164
dom_score=$(jq '.categories.text_extract' eval-summary-text_extract-dom.json)
159165
echo "DomExtract text_extract category score: $dom_score%"
160166
echo "View domExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameDom}"
161167
162-
# experimentNameText=$(jq -r '.experimentName' eval-summary-text_extract-text.json)
163-
# text_score=$(jq '.categories.text_extract' eval-summary-text_extract-text.json)
164-
# echo "TextExtract text_extract category score: $text_score%"
165-
# echo "View textExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameText}"
166-
167-
# Fail CI only if textExtract is below 80%
168-
# if (( $(echo "$text_score < 80" | bc -l) )); then
169-
# echo "textExtract text_extract category score is below 80%. Failing CI."
170-
# exit 1
171-
# fi
168+
# 4. If textExtract (for text_extract category) <80% fail CI
169+
if (( $(echo "$text_score < 80" | bc -l) )); then
170+
echo "textExtract text_extract category score is below 80%. Failing CI."
171+
exit 1
172+
fi
172173
173174
run-act-evals:
174175
runs-on: ubuntu-latest

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ This constructor is used to create an instance of Stagehand.
260260
- `modelName`: (optional) an `AvailableModel` string to specify the model to use
261261
- `modelClientOptions`: (optional) configuration options for the model client
262262
- `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle
263+
- `useTextExtract`: (optional) a `boolean` to determine if text-based extraction should be used. Defaults to `false`
263264

264265
- **Returns:**
265266

evals/index.eval.ts

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,6 @@ const extractMethodArg = args.find((arg) =>
3434
);
3535
if (extractMethodArg) {
3636
extractMethod = extractMethodArg.split("=")[1];
37-
38-
if (extractMethod === "textExtract") {
39-
throw new Error(
40-
"NotImplementedError: textExtract method is not implemented on this branch.",
41-
);
42-
}
4337
}
4438

4539
process.env.EXTRACT_METHOD = extractMethod;

evals/text_extract/extract_partners.ts

Lines changed: 0 additions & 101 deletions
This file was deleted.

lib/dom/global.d.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,15 @@ declare global {
2424
__playwright?: unknown;
2525
__pw_manual?: unknown;
2626
__PW_inspect?: unknown;
27+
storeDOM: () => string;
28+
restoreDOM: (storedDOM: string) => void;
29+
createTextBoundingBoxes: () => void;
30+
getElementBoundingBoxes: (xpath: string) => Array<{
31+
text: string;
32+
top: number;
33+
left: number;
34+
width: number;
35+
height: number;
36+
}>;
2737
}
2838
}

0 commit comments

Comments
 (0)