browserbase
diff --git a/‎.changeset/sixty-poets-battle.md‎
Lines changed: 1 addition & 0 deletions b/‎.changeset/sixty-poets-battle.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 30 additions & 29 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 30 additions & 29 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎evals/index.eval.ts‎
Lines changed: 0 additions & 6 deletions b/‎evals/index.eval.ts‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎evals/text_extract/extract_partners.ts‎
Lines changed: 0 additions & 101 deletions b/‎evals/text_extract/extract_partners.ts‎
Lines changed: 0 additions & 101 deletions
diff --git a/‎lib/dom/global.d.ts‎
Lines changed: 10 additions & 0 deletions b/‎lib/dom/global.d.ts‎
Lines changed: 10 additions & 0 deletions
@@ -0,0 +1 @@
+add textExtract: an optional, text based approach to the existing extract method. textExtract often performs better on long form extraction tasks. By default `extract` uses the existing approach `domExtract`.
@@ -58,7 +58,7 @@ jobs:
   run-extract-evals:
     needs: [run-lint, run-build]
     runs-on: ubuntu-latest
-    timeout-minutes: 25
+    timeout-minutes: 50
     env:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -82,31 +82,32 @@ jobs:
       - name: Install Playwright browsers
         run: npm exec playwright install --with-deps
 
-      # Run extract category with domExtract
+      # 1. Run extract category with domExtract
       - name: Run Extract Evals (domExtract)
         run: npm run evals category extract -- --extract-method=domExtract
       - name: Save Extract Dom Results
         run: mv eval-summary.json eval-summary-extract-dom.json
 
-      # Run extract category with textExtract
+      # 2. Once domExtract finishes, run extract category with textExtract
       - name: Run Extract Evals (textExtract)
         run: npm run evals category extract -- --extract-method=textExtract
-        continue-on-error: true
-      #      - name: Save Extract Text Results
-      #        run: mv eval-summary.json eval-summary-extract-text.json
+      - name: Save Extract Text Results
+        run: mv eval-summary.json eval-summary-extract-text.json
 
+      # 3. Log and Compare Extract Evals Performance
       - name: Log and Compare Extract Evals Performance
         run: |
           experimentNameDom=$(jq -r '.experimentName' eval-summary-extract-dom.json)
           dom_score=$(jq '.categories.extract' eval-summary-extract-dom.json)
           echo "DomExtract Extract category score: $dom_score%"
           echo "View domExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameDom}"
 
-          # experimentNameText=$(jq -r '.experimentName' eval-summary-extract-text.json)
-          # text_score=$(jq '.categories.extract' eval-summary-extract-text.json)
-          # echo "TextExtract Extract category score: $text_score%"
-          # echo "View textExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameText}"
+          experimentNameText=$(jq -r '.experimentName' eval-summary-extract-text.json)
+          text_score=$(jq '.categories.extract' eval-summary-extract-text.json)
+          echo "TextExtract Extract category score: $text_score%"
+          echo "View textExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameText}"
 
+          # 4. If domExtract <80% fail CI
           if (( $(echo "$dom_score < 80" | bc -l) )); then
             echo "DomExtract extract category score is below 80%. Failing CI."
             exit 1
@@ -115,7 +116,7 @@ jobs:
   run-text-extract-evals:
     needs: [run-extract-evals]
     runs-on: ubuntu-latest
-    timeout-minutes: 40
+    timeout-minutes: 120
     env:
       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
       ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
@@ -139,36 +140,36 @@ jobs:
       - name: Install Playwright browsers
         run: npm exec playwright install --with-deps
 
-      # Run text_extract category with domExtract
+      # 1. Run text_extract category with textExtract first
+      - name: Run text_extract Evals (textExtract)
+        run: npm run evals category text_extract -- --extract-method=textExtract
+      - name: Save text_extract Text Results
+        run: mv eval-summary.json eval-summary-text_extract-text.json
+
+      # 2. Then run text_extract category with domExtract
       - name: Run text_extract Evals (domExtract)
         run: npm run evals category text_extract -- --extract-method=domExtract
       - name: Save text_extract Dom Results
         run: mv eval-summary.json eval-summary-text_extract-dom.json
 
-      # Run text_extract category with textExtract
-      - name: Run text_extract Evals (textExtract)
-        run: npm run evals category text_extract -- --extract-method=textExtract
-        continue-on-error: true
-      #      - name: Save text_extract Text Results
-      #        run: mv eval-summary.json eval-summary-text_extract-text.json
-
+      # 3. Log and Compare text_extract Evals Performance
       - name: Log and Compare text_extract Evals Performance
         run: |
+          experimentNameText=$(jq -r '.experimentName' eval-summary-text_extract-text.json)
+          text_score=$(jq '.categories.text_extract' eval-summary-text_extract-text.json)
+          echo "TextExtract text_extract category score: $text_score%"
+          echo "View textExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameText}"
+
           experimentNameDom=$(jq -r '.experimentName' eval-summary-text_extract-dom.json)
           dom_score=$(jq '.categories.text_extract' eval-summary-text_extract-dom.json)
           echo "DomExtract text_extract category score: $dom_score%"
           echo "View domExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameDom}"
 
-  #          experimentNameText=$(jq -r '.experimentName' eval-summary-text_extract-text.json)
-  #          text_score=$(jq '.categories.text_extract' eval-summary-text_extract-text.json)
-  #          echo "TextExtract text_extract category score: $text_score%"
-  #          echo "View textExtract results: https://www.braintrust.dev/app/Browserbase/p/stagehand/experiments/${experimentNameText}"
-
-  # Fail CI only if textExtract is below 80%
-  #          if (( $(echo "$text_score < 80" | bc -l) )); then
-  #            echo "textExtract text_extract category score is below 80%. Failing CI."
-  #            exit 1
-  #          fi
+          # 4. If textExtract (for text_extract category) <80% fail CI
+          if (( $(echo "$text_score < 80" | bc -l) )); then
+            echo "textExtract text_extract category score is below 80%. Failing CI."
+            exit 1
+          fi
 
   run-act-evals:
     runs-on: ubuntu-latest
 
@@ -260,6 +260,7 @@ This constructor is used to create an instance of Stagehand.
   - `modelName`: (optional) an `AvailableModel` string to specify the model to use
   - `modelClientOptions`: (optional) configuration options for the model client
   - `domSettleTimeoutMs`: (optional) timeout in milliseconds for waiting for the DOM to settle
+  - `useTextExtract`: (optional) a `boolean` to determine if text-based extraction should be used. Defaults to `false`
 
 - **Returns:**
 
 
@@ -34,12 +34,6 @@ const extractMethodArg = args.find((arg) =>
 );
 if (extractMethodArg) {
   extractMethod = extractMethodArg.split("=")[1];
-
-  if (extractMethod === "textExtract") {
-    throw new Error(
-      "NotImplementedError: textExtract method is not implemented on this branch.",
-    );
-  }
 }
 
 process.env.EXTRACT_METHOD = extractMethod;
 
@@ -24,5 +24,15 @@ declare global {
     __playwright?: unknown;
     __pw_manual?: unknown;
     __PW_inspect?: unknown;
+    storeDOM: () => string;
+    restoreDOM: (storedDOM: string) => void;
+    createTextBoundingBoxes: () => void;
+    getElementBoundingBoxes: (xpath: string) => Array<{
+      text: string;
+      top: number;
+      left: number;
+      width: number;
+      height: number;
+    }>;
   }
 }
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+add textExtract: an optional, text based approach to the existing extract method. textExtract often performs better on long form extraction tasks. By default `extract` uses the existing approach `domExtract`.