From a1884bfc0a85df5da417465ebdc87e3d15b9ebe6 Mon Sep 17 00:00:00 2001 From: Mish Ushakov Date: Thu, 18 Apr 2024 20:44:37 +0200 Subject: [PATCH 1/5] added browserbase fetcher --- integrations/browserbase.md | 67 ++++++++++++++++++++++++++++++++++++ logos/browserbase.png | Bin 0 -> 11928 bytes 2 files changed, 67 insertions(+) create mode 100644 integrations/browserbase.md create mode 100644 logos/browserbase.png diff --git a/integrations/browserbase.md b/integrations/browserbase.md new file mode 100644 index 0000000..5a72596 --- /dev/null +++ b/integrations/browserbase.md @@ -0,0 +1,67 @@ +--- +layout: integration +name: Browserbase +description: Use Browserbase headless browsers with Haystack +authors: + - name: Browserbase + socials: + github: https://github.com/browserbase + twitter: https://twitter.com/browserbasehq + linkedin: https://www.linkedin.com/company/browserbasehq +pypi: https://pypi.org/project/browserbase-haystack +repo: https://github.com/browserbase/haystack +report_issue: https://github.com/browserbase/haystack/issues +type: Data Ingestion +logo: /logos/browserbase.png +version: Haystack 2.0 +--- + +# Browserbase Haystack Fetcher + +[Browserbase](https://browserbase.com) is a serverless platform for running headless browsers, it offers advanced debugging, session recordings, stealth mode, integrated proxies and captcha solving. + +## Installation and setup + +- Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_KEY`). +- Install the required dependencies: + +``` +pip install browserbase-haystack +``` + +## Usage + +You can load webpages into Haystack using `BrowserbaseFetcher`. Optionally, you can set `text_content` parameter to convert the pages to text-only representation. + +### Standalone + +```py +from browserbase_haystack import BrowserbaseFetcher + +browserbase_fetcher = BrowserbaseFetcher() +browserbase_fetcher.run(urls=["https://example.com"], text_content=False) +``` + +### In a pipeline + +```py +from browserbase_haystack import BrowserbaseFetcher +from haystack import Pipeline +from haystack.components.generators import OpenAIGenerator +from haystack.components.builders import PromptBuilder + +prompt_template = ( + "Tell me the titles of the given pages. Pages: {{ documents }}" +) +prompt_builder = PromptBuilder(template=prompt_template) +llm = OpenAIGenerator() + +pipe = Pipeline() +pipe.add_component("fetcher", self.browserbase_fetcher) +pipe.add_component("prompt_builder", prompt_builder) +pipe.add_component("llm", llm) + +pipe.connect("fetcher.documents", "prompt_builder.documents") +pipe.connect("prompt_builder.prompt", "llm.prompt") +result = pipe.run(data={"fetcher": {"urls": ["https://example.com"]}}) +``` diff --git a/logos/browserbase.png b/logos/browserbase.png new file mode 100644 index 0000000000000000000000000000000000000000..79aeb0e71a942eecf0cbebcd029f1c4a019c5bd8 GIT binary patch literal 11928 zcmZv?2RNKh*FUbU-l9jZ(R&b`sL=^)_0`)d(L*G9S%eTPx)7o)c15qdqIc02Aqb-P z5+wY5pXYhY|GobAea$szK6B2QGiPS*Yi90~Xk?&8M$AZzg@r}-Ok3Rq3kw@`pUeqy z?~#f1vd{O0XYOihM(&P|SXd!>;c3dcI4_v{lt0H&K2?37N~Wcrk!bz2+Jh?gt25C~ zP(fK#fwBSD0wW~LBayuaW~&7!5DlWkdl4`9ZV!$t4?)p+m;nf`qmpt{6`Xu0e za^*uSqQOKbouIB2R4f6TV~*_uRkIYFS67A&hwmU=xlKMA9tF?q3g>#2*QV8PXit%o%wM9rZ?ll_D^iSM^XymAoEQofioUfTznfp?#wdX$kx z+H~1wqyeu&-nCuQ-~`Qe$?v&tJQC`N{IWd$t>3qWB7@>Dq!F7-s=4_=PDQwkV1rF& zHK4S=fRa?vVs@9gnd0N~CirEUbKn^2ig2!vm++B;V`8(t<-KoVMtY_idhRdo?(WXD z<(C!2#}cB4!U7j#!cnh+(T>)G{$2;l5+~|1+e#AaN)q4X#TOJLrsTyBpGsjAB(9!o z%*aciWktv3MMvdC2V}*r>;6(`aN2TNAcl{KUOx3HBRSeJHLMY^0Dxwvt>q8VdrD+X!<4M-a_I46NBei z_x5Qd-2VMDq@Aqr;vOaP)wb}*!XlymCt+jdBwRD02R*R_>nu2NvaI{})Sury?gv--un!$IqEvT0~4l zj8lo2ot<65&&fs3L|yYg;`b#*PPc#nUpZ0H;NW19U`Y`lKUYz4Sy@?8F$qx#3E_JV zVgHxj0rnxn-u_(wR`OqZ)Sdku{oH*6+FmKQ*G_B4VQdt@~b7;U84a!o=C%2NL*?ekBA!6HKk$E~6h!}V%KviGzenI-==~5V z5i5xP_i!o^8|9n^+|PzQXV5bjeSIu}dzugnfK7$<&vyg+{>8#(!~*<_#=_FUX8Ld1 z1e@~)z#}A8~l)( z?azpdkNai@sR+n*)orNEw#@vq$w7Y**0flz@75r{fU`cq96pssV1eMN^OA|Z$Wa04 z(e^7*H2r#@?#1n{k_C$0eS`>lVh+fp?&#=nT)mvfvt^6?aGVKoe-4yc0LIswr!rSf z@GG`sZAG+qhJ$A&A|BfG0*k6=*uT3qNFXn;O-%}pMsQq5wy7Ek_scY`!m1H|B)(z1 z5q`yk5Al12^0SqL#70^w;8IPqVVbP6%l+t#8g;=12@eW6sq%{)SZxEd#2*Gx@bIKWn6E#f z`NC+ibv9Xq>4R^4I#Z9!I1-`{8MWsv*v%&4QZ{23O;2_A(P+O+P$rP}xe4?=jmHz_ z)c)O5!cULBa;yqrkx>_)+{=+TvA3wtkEtkX;dcf<*7%ooiP6&?RY*=cKQ3md+IG=6wm zpCil_CaBS7NIkpb-7JU&k0{IWE5qaGxejoNqWg(HNbQOp_)OH}0V=sz$wLqsDGlZQU@tm)2z( z@>X>-jM_gdkyH&haQT~}!%iePn9kDKt2CN@fU2u1$K_rx8Mp;U5RucR@Wv-(dDU4q zmpopau3DPl^3rU8szXA)Xrppk#|!Jh?$!LvtFRl1!s;@$6)wX1;HG2dO{u{eIOUO1HY;;P6f6b|1^m*Rz`i^K(|4?H-b-IDeUro$cvB*jyO9=PAa`f?*qbA1=})IQu~N<6`fh`Seu_7s8OQz0DUU zgPDtSS*{XWC&ee}Qx?ZzXtNAsCAIu$6$VQuGUyG_G*?kcZL(+axm@W1mi=|H#I(P{ zxX1HzeF1ff;oJ@sj+r?y%!Motds^t+HCvqT$R(QtD>S9qwj!LC|5vPQW|QTSU;38) zjq)Nb@@rmz??;!HhLQDlS4#9(iWl#8rSPn?!^36$p3rzl+sj(nd$Nx}Ue7h^19Vx0 zXMK@M5Irs1ZPYGLd5Lx$s$#BpG?H7F=Vo~<*glbik_8u?dC|l-%X8Hm)&8449}no<^ZSUI!kEM2_1YN*m=&Ix|;3$H3%A5@XkwsuyV ztFE>1ru%kW5)q66^F1xB$PViu94~?T40=!-DwTKGzi zTa9C1;Mk+vW5;MsRNBbr4?>ru%<20&sVe>{+v5QLK{LJ6tCM)cHH|l>X|pO}_^{!b zh4!zoq!*?5K_cBI9a;WZ7VxRbQd_*Ju>CslzF+MYRpv}~D{|J%!0`vqf+6+?ld9d_v%A+wOHRd*s-c0PkDaNbkV={*1kg zos&&^GaMI&MRuB9(6r`1=fOa0wO$=~AU{3`7m}-@38Z&^OQJN_Rv~4@YCjivJWTdp z^rlnQK!_=hY;|QggqUje;np)4n7l65(-rs~B;P;y(L%CKXYyrcc^jW(38z4=Y`H{9 zx5()Pc*n{c=sy3-J=JsCJUk{>zt9RNwBwOy3udGD)el`h(r!A+I-R<| zmv0#(wl5E-k938EvFAM-nu~_HhJH+os-*KYsxZz2vw~gGqF-qCqF&ex91c4fkfs1& z7o9QhIu5=qW1kS=fsnN>Rkj+&EpCxbVojo!yM^;Z*$YFWK1D3Sn>FF)jKnvZn$q@Q zjoQYg*d+^hhd@d#*N;!KG-o)l`(5mNfSb31m(rOh7rIN|Upgna*?(GJFxQ$_W_ZyA zthe1Cjh@W!1&e+Z(?+<TbUAd<@iC4c@Yu$>B@l_iQ#F9T`D8V(U|(T zZr(fnqiuX&_@mdcZsuvtQOU_{4+i%Me%mJ3=sX=?tSt_V?=tw%l&zFgy4&v<(DHyK zK^07WD;MQI{;gc{ytth{-Wiq^Y;1EnuS*!CuHw|El79@FZgY}-(M zfAF2N5NsGR3Wy5Yb^wUaw5yxuPs3!50-|>)fPAoy{=MkVbtYlNPql_r<_!hpmBI?a%H4qT^e zYoK11UuH$zHu%yPrjVi@Cny@49RHenQQA@TYK-AA%i!6!5$n}g!=b8yTuYmD%T?cP zXDV@EOHHN)F2KPhdr1CPANjgS`C7Jt%`wo9kJB}|n)y&9zt!fo&$m^y{oa=lh|tQr zBB%pPMe6bJ-&iOvIT`Lv({u9({uj-04+)$Z7vG5svDERpO@i|AfagZ-#SWfiKLc-k zs8u;#0=|5uky~~&Tn^fhvJK{eDk|XN?Y$Yrd-ef68uyt{8k5W1i+>(%1G#(r;I<^A zN4K<1N%}jxF9yRD^aB3151%aJZ%FvxhP&aM%*fMB-wJR3`IX;M!hm5~E8g$yY2Y^x zgl;H+&i5%297sSoa^|GVH!6?1M31;1Z|({0IX>AcLrAu zuo`d?%z+6_3gAZQHGP15rQ<-*3=_2KBU;a&wz1qkWFvt)=<4sw2@09|`Zm;qIWShF zz$Gw&4iHyq{WJkeI}595A*tnQDb(owmNk(=^LMy5cd)E5Fa zj3tV8>`y8=Fp>95z5~s?){qa0mO0;+Oj=d$MSxE_0 zF%KD!t9mSO#>n1#8x*zi<{)7lPe`gh1|m-q)mKQog)dmU1#Db-g`%qSGW_h>EDHR2 zYqj3}vxW&m+e;n}r7I+aBfjjPNB-8yJ7XZDHR=s&;3tgaKayBz*`GW5VGKWW9Zp6c zVKs}NIqD;VLl8xJNVP);Qp0MHy$hM6n2_h+c;^{rxpv`r2YrOphk`fi$UGq_{F_Fm z(zlTk$hee_iWG_Bjd9N(fpFR8W@*US_s$5r)idcFNm?~JDR5CCH9Bn@a3xt zSd%P>v9WfJw;t2o{uaNc_)_+TY27|Nu@U#ejRm|q+7kKBtoB>a z=^AqD^eQ5J1N_$z#mAXSjS2s)6EsSp2Pb(*59AlAbeJ_Y zOJ6>(j`Y;>D2IKyY_qp$_EX;jM_0SMS9`NZ2xe~_heiH0nN>lYsuU(vw7o??E9pUbp+Nx=*$Jj2DcGrkN zA->+Bcs1Z)%HsAE$+F3cY<6-_zUqT9C9(d(g+SN@fU>EY!=yQ`?j935?0y-(Jhkk; zzwro#>uJxNKq1ZepHdMl1VL*^rP9&T7hpPUmul(mzqmm^s)I>{n1vHgX_@3-=c96a zu|0Sz`J~5=B^;J~dyZD#5!ed_E2S^h;C|-X4qEuBu-~USjelcAIQub=VTb21tt8GW zbDdI|E*h^I|1NoL6;2GwvSmpp%R7BrT@@9$knhMhyH9U*pymglODUQUG_>Vq)9CR zOmO$sK{%^K8wvNW`eoB1j3Gue2n2~DJ{v>!&fcBU3=e2dl@ef*90d7*Pw;p5r?zRU z3M$)c9ez>^%mxNH5?>6nLAanPeyE|O4K!+%pVm`$;zg%J1wVsNmkz}u!S+k35)4iB zJ}zQUoUkte!YrTamFQp-&*80&tn_0A+ev{wgOd!O<|^GepVf-~U zfd)n{&tNzBx{B+`dleM(U?TmvsMq@`D+8}*QEY*int zer4K~Vse@W`gx#IUJ&aQx+jKLdDH?*Fo$r|0iCL~7?5&(!Ddlrbvl+GlWt~`RVesk~T?CH9lG+d|_b__#CoPS-t2CRt_5fsoAn1-H2OQ z_F4xK&v)T@`T6%kO6rmy=q_;!PaX&Sl0wy*KK^eMqiq=9tc-I(=!6KtO*`@QxIu&2 zF#d31j$5R@9>SfaR*_~mS0aqSBJ9!*$se*b+F#6fKFzNIF7ZPsl3rM0FcIHFD?x}) z+m-;bMw>fnOotbw@m2tE)1L4~2-88E`v46CMSY%s(#qG!IScvQgtxcb|B5k>2lIG{ z@i}DHE6Z=GI_@|TwJZ(C!1;SMcgKEEk?7vei^c8#7DvCm#YX5M%_I$ESX>C9-AN>e zLmkPF-a((g6p;V&SsbwStEY7`G&+TIe8FkHOm}@}(pNaacIpul_5p{i@pYXs0CDBS z_44U+_``mk7CLF`@kI`#`e;dpuf@ z1_)eVo?k>X!?#M913LU;H@s&J7}7NIxf4F?(!{{Xa20R~^*kt4^mErAeu6hw1h0^Z z1c?njb!iWxH1{JWgWeTV5v+e!CxmGOBWGT#hC20j-WtpN5GCAG`sM6VO7Hfn-RKRw z2eTmF%_14_7RR>Dj`Avya(#dG^lRZACS`11czpP^LS9Pr6OUzbH|kw{!BE0Mel+yv zD}B6QxoNfGFl|dQpIlU=v$g6SiU1O7jp$ncdwKVSm-clg2KbFXq(DDjbwO+n7gb4_ z)MWm+p%}@Fu{;RUgQJ4P_0C_Kn@X71n`n@rmv?Kjusq)>3aR&F=?bH~FV7_Gk}E8C zYt&RX#+ZaM1<-O_pr2bIAm_#2_?AJtu2b|yClAQQva3<@+I)TM1DFdBc0n;lx<|0x z^iGQ_G09*cV)O+dm*8TxE&A`d>R{vG*LTvn<3hWo#;;84zNa{Y54{kp!P0=W2TFU1 z?o1NaYF`w~5*4iwwQEF5dHXJ3CpCEq!Y(_|L$8@a92OqwZexBYqSIHafBazD6Fr}J zd^J)pV!jW6r1)+)b|iTGbWy}x@sdR;6%c>`=E`Bvg=oif3O{srjd?XBl?bRTo>B!X z;t5R5*<&5}LGgNusr!9&%Fc+Q{B`xG(mRd8i<}ekCqP2j83id~;z#kF4pc?^CG`q@ zX42UoepGk=A7g)Z5;G&Cjzqin!rfl<52KOj<&TkSp%>+e`-N-k%*Kuc59L!F&TAFV z_}?%VmlV0Q%oMM=2_SH&cd_o5cD7UXPaLX|q|JA!#VDpd!JL;;u9snObD3f0nZb;N z%I7cMi|#x_*L6*2MN08lX_~SatNrqjJhY=!`h9ksBlGfaGB~j@_Z)GS2hgA-DQ2Hd zy5D5cFlm_acxvMQ$Spin!0AG_UoO3UFkcrX_t=8Dx)_ttWJ{piSnMzeP(AH^ucu+( zkmL1h*fsR#bZ(A#Qw3u|F$g1}6Qsj{NcK9Mi9Y(BkpxrGF(z_X#ad0z$F zTKc5m$0{C<(-;qcFlocDzn-Ntz7d$Vl|V2egGFKPl>@S{mBp z-Ja=cu_b*Q&4NfVkv!in?S+OjpO5zKK899Pf;hg!j+0GPNTrcsR?`7YpPix3V6(ZZ zpuxDvyV(9^(AE1C`r-U>eXc>tbm{DPCqe#3a~DS<%wh-6VvIw|&y6RqI<%Wb(V++a zK57dQSD<-;sEyDKgGhRe?faiYbZlceG*(+KjQORk;zQyshQTj)Na0%cNfi9hi~5ue zMN0w<0WRb0JQmn&J|6Z?X5JY>U~bzD5MT1C!;@rZ<+`phWmo^t)J`TId#@_j%aDC9-F9Y!cUamgm9*h zn(S9q)P42)WjbSas!c=}89oI*WC6#Dm^hQh_&q1H;n!rvLvYkZWztktR=(9=NK(PB zPl1BB`>OKe9gj3}L{$U;yo7aOm78M9Hq18M1enXGh)P?XaNRYhe18AoR3o5rFx>=CVQEznCmtkadZ2+)OveEXmToeMY zb=KIqR}%(Y*>)xUwuRgFJ=4nK1ogP&6?x_dX)-5@QO5<`GL<&uB{eCK;wxVl)<)8p zf)q&;v*~+#kIrbc~%DzI#9DsB2()`xCwlf-2km#3^;x(v^64&YsdL(@?p1(Dg*8U)p#}tQr9Hq;8AeJ28p&{9XRqg@=`stX zA$~c1`~-XIQTc9KuxS0zbNn~GtZ7=9(-zMziMsPxZbPIG4xI5_H48-atm8oshnDDB zi>ETKm8X&QHipgwatbavVt}Sd6!vpGCTu*UknUysGyG~db^5#0o8B_TypIhNN`l&t zSi#BzgjqIQ1(mT+a=QUOOt~i>LyYI99P4rc zg!7YvfE$tsjYO-4$DS_UrR3N+H*NxS!5!}%!o(Z{h<9i`qI*EqG8L7ePRI0|6DeKn zRC}5O?AeE=4XUHQ7T_f+l07FA!dVa0H$3LPX+1Wd?1Mwvb(y@xv=4N-^G_QEv!ht) zym36YE9EdlZf&H9Lk9R>=)50Z}N zZL*5#3g+h>rO-@8JgiwA3l_xV!49y~lo=^)BZWz+ zh%0VQMe2R(4}YT*i5e+Tk7OO5`Enb#Jkn^bbaPbIr`npH_cVGi_>;07{YWvRjBB!o zhgtmzcGL`uD2WhD+`Cb82cFn1S(>EnD0)xCU)tRFEQZgWPxLBqX4m5Is!qTDf}1h& zVkCMW@h$e)Xh@o8B_2rvCi=Q1#b2yvHlL~M?Y&{O#UADsPkvSb&M!U2m47NN{i6KG z=yc}pDzta0?bC`==5F2IZkIA^)=wWv*RNZ0KaSdMm}e9z!CJCqE3*}#g&89eZcieM zMV`C|^NPlc8lkpQpG=Ya;eSBWbX5rgxbO5ot-Wgf} zH;vlip53p&eB!N&vqjVxGfJd0H(}T_B}rrU`kJaR81+y6XM=tZvWMzp%S$(sLSuIA zDA2dpA(nzZ%+orrib7-FJ&a@~){9JasQCP7G_c!ls8>I|U>kBmcJ|hxrORO--!g9} zaU+r&QLQIkI-X4l4Y!~UBMnNXnl{qeMJ0{DwfS^|Gd-})62oLw2~q+TS=RP^3Cn(8 zbALu{`{Mdkojl=F!KD~xMtAu6hTdoS^R;xsuVnXMZ0)m3ioD63|tBTm{TC8lZ2nm7wiS@pqf!~ zv{o6yVjRC0FhQF{jRflp`+xq9XYAH4%}G~yc|ynH9~=OdT}In<#wG~6UCWJbz%VL~LC9#Y2M9@lgooL7tw+oM-i43u+y}6v z{SynIt62AwBKh#^d*!{1pw{knZm1Li$y+#x?2=xM_t9St*NYZc=D-FI8N^d$v_vn? z8$nWPe>wIYvPN{=jP8nRT->|Ljo{p2aTVK9iy>$S=E3+P^Tte!IAx zFPMnX+`-|^*SB*$IKwIM_wYdW3?1W;WJ6Mz(t8PAYuArv2gI}+p&BrDL*ygSVwq{e ztvs~UdhVIQ@T%JB0tE_$8f>7MRzmdQKG9UF)+0c zD$YOv+N66jz{;3D70J`Z2>F6&LFoxGYz7H7@*{Yk3w1~rEMm_k27Ca8giiBPc7^Nu zR53tIek8?EN0u6vPzABO>mYHdAr>3%=+oN!HcyD zFD$s~JI17U);ePRXQcDEF%M|Y!jBW-qxBe|g)@xrpJWt{n_@gXGZW|`CXoHgLyaZ?_1g(q)fWsKV33Gx(rSt zOlkt$NZOFF6xz4&$93#SFHR;z1z@X76%KZ}gltbMbji)!q?-7Yj1lEe6teh zX@^levq}49W!Pa;6t_c(-7G7e>UG1lwY9F|71`}>s@DCsov;{Q4jr7x>xWQMH8YWS zZ$6ky1P5SU1gRTG;bL8MV!?N&Z-ZQZ(PxS|$Ba48x4ug=E-uV-Der=!Vq+(3 z9fh&wk!V)R?nq}{r7zcCdGntxnG$JpM1bi_g&0G%2tG1F-lc3rnTft^jm{XR1uT}h z4UirC*uF*k$Y!yPT19Z-lnh`OegeySdmDYHWeL$#dp6tOr*wAyn@%A*S#sn7%ZEj- zqt&8!aSKCpZnVk%A!1@b$I3rzFvQ?vIw0^RyqGY(R+b*3#mPJ#++WgTL|VyTBYs|} z606B?F3jV}^EV;wBXKD5hRD;0CyK%eW+qkoV4>s=Kn&(}MT!|$-M1)~;_;_T ztS^a4e%d3K{Y&bOP5$EA6W7Rsmd>fF=kmQsI&VzN20A`r=i|_0h_Fbwt8%~+WUtwj zD(LU%d>WYHFyS|2RD!ap?nVr?mdfYnxq)u_pHZbC!i&qI!9fM^!6UJdRNR`l-Bn!9%A^XJi?!LV;ql9$TqH~~9B0pLqmgx7K90V_HQwOMO=nX(GSHyvL?PIDBFjc7m>E& zZ#b*~dmZ9KWM0^&djPl$Xt<}-y{3n*2wP;+H$}v0xaF;8V)hj^ z@oRW}b!>l_!J?%)Oboxs~6jd=AA zNqOhxGM*0%25{!J9F*>+7a}}Q^|h&xybv*+8I)DhXUlWRYRTerzq98B)rgoxIELu= zK8nX8dJo`*4J@6z2~+6t()q{X7=h54sQqSCV=HyU>=~)ZM=W?9Von4R&~Bv*%;-P` zs%XTieE+mET4An4BZ;0}SzTRqpzs^1y4BDm3_1zQz@-Dt4+0>vpeNd>1Gc3h+{K+{ z?l5f6jPQs!l>qYAIrEl}Q&MpF>>i-Tr0Sa>9fY^AO06zxFB*YSrmuBFUZcoqQac$s zVh)%Z>vX<69=uh%TkVf>&gAvmcdQwT@8}dDmGLPX=b12>sr&Ti(Hw!SVso#Ki z!*<#Iy=a~5{&`#%>?Lbn6U0(gm0w*3(N4s(x}EZO5Y0cr`yy}OdL&$2CQGLW*Vyvb zZR|7-p&MJ%tDeDfq9@f}u8xT*_}Zb|n%-+obab|!a3opdz6Bz~8U79wRo)SfR3UJw zm1`EHREhy^Y_H{jJ%J~{W#8?O^D?Lz_L~w~67){ZUS@FffdTXE$}vGS%Y4-N_6u4N zZt}&wSvGdhX6kV>n{n#mWFy?L!SmpqQUXn_Fip=vCR`2EQd&%w;0+OJ131=cxAuGI zq+Ap(gnaH9?TrV;0O6=dJ$dUdCUCVdT*f>MZ+~2%K4G7t=w_=}0C{~uLflXGzCvoE zK+v`gW@5>>duu(Px!g>2^U#gWzr~=4#m#1)t)|WAHDp}&dvsB}jdu+>akar*0L5!M zYXVbfgMWCSo0z)%l`4d+VrLB4G}S7nQ2*Eh1I?GT`w~aJPRjhvgLVN|t?3u*BctkR z;_vk}9waDMqef){;WD|wwuAi$-{{HZMd0eOkS$?l#cY!|OEt`$!du0V^h1ol>7ja# zS^2D7N>yUxRBY?a>+bcuvc#9S!=85X)m44n4Y9CnYfzr#3iKoKROl2t(EWj4eVvAU z(p*oJ&K=vLm>K#)bgDY~U>-1Q%@SV;4qM{zo!@^nf7iDWp0?b1{&@9GP(}kMEw=&c zL4`oGOmfX;m~8;E6|1YDg_;91aEckVKo`59+2M%+>ayjnn~@lf1{ Qnqy%-(=br4c?ypDfA+y_^8f$< literal 0 HcmV?d00001 From 15eef1f97743c3c9d39793b49ab347472eddf39a Mon Sep 17 00:00:00 2001 From: Mish Ushakov Date: Fri, 19 Apr 2024 13:23:32 +0200 Subject: [PATCH 2/5] updated browserbase docs --- integrations/browserbase.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/browserbase.md b/integrations/browserbase.md index 5a72596..0ccb9ca 100644 --- a/integrations/browserbase.md +++ b/integrations/browserbase.md @@ -22,7 +22,7 @@ version: Haystack 2.0 ## Installation and setup -- Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_KEY`). +- Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`). - Install the required dependencies: ``` From 615808816aeedb6fbcc8df3dc63959918e4d8c61 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 2 May 2024 14:01:46 +0200 Subject: [PATCH 3/5] updated Browserbase fetcher docs --- integrations/browserbase.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/integrations/browserbase.md b/integrations/browserbase.md index 0ccb9ca..20bcd06 100644 --- a/integrations/browserbase.md +++ b/integrations/browserbase.md @@ -18,7 +18,13 @@ version: Haystack 2.0 # Browserbase Haystack Fetcher -[Browserbase](https://browserbase.com) is a serverless platform for running headless browsers, it offers advanced debugging, session recordings, stealth mode, integrated proxies and captcha solving. +[Browserbase](https://browserbase.com) is a developer platform to reliably run, manage, and monitor headless browsers. + +Power your AI data retrievals with: +- [Serverless Infrastructure](https://docs.browserbase.com/under-the-hood) providing reliable browsers to extract data from complex UIs +- [Stealth Mode](https://docs.browserbase.com/features/stealth-mode) with included fingerprinting tactics and automatic captcha solving +- [Session Debugger](https://docs.browserbase.com/features/sessions) to inspect your Browser Session with networks timeline and logs +- [Live Debug](https://docs.browserbase.com/guides/session-debug-connection/browser-remote-control) to quickly debug your automation ## Installation and setup @@ -45,10 +51,10 @@ browserbase_fetcher.run(urls=["https://example.com"], text_content=False) ### In a pipeline ```py -from browserbase_haystack import BrowserbaseFetcher from haystack import Pipeline from haystack.components.generators import OpenAIGenerator from haystack.components.builders import PromptBuilder +from browserbase_haystack import BrowserbaseFetcher prompt_template = ( "Tell me the titles of the given pages. Pages: {{ documents }}" @@ -56,8 +62,10 @@ prompt_template = ( prompt_builder = PromptBuilder(template=prompt_template) llm = OpenAIGenerator() +browserbase_fetcher = BrowserbaseFetcher() + pipe = Pipeline() -pipe.add_component("fetcher", self.browserbase_fetcher) +pipe.add_component("fetcher", browserbase_fetcher) pipe.add_component("prompt_builder", prompt_builder) pipe.add_component("llm", llm) From faaeabd93463380f138f0527e84fd43dbc919300 Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Mon, 13 May 2024 12:33:57 +0200 Subject: [PATCH 4/5] updated browserbase fetcher docs --- integrations/browserbase.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/integrations/browserbase.md b/integrations/browserbase.md index 20bcd06..26ea821 100644 --- a/integrations/browserbase.md +++ b/integrations/browserbase.md @@ -28,7 +28,7 @@ Power your AI data retrievals with: ## Installation and setup -- Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`). +- Get an API key and Project ID from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID`). - Install the required dependencies: ``` @@ -73,3 +73,10 @@ pipe.connect("fetcher.documents", "prompt_builder.documents") pipe.connect("prompt_builder.prompt", "llm.prompt") result = pipe.run(data={"fetcher": {"urls": ["https://example.com"]}}) ``` + +### Parameters + +- `urls` Required. A list of URLs to fetch +- `text_content` Optional. Only return page text content +- `session_id` Optional. The Session ID +- `proxy` Optional. Enable Proxy From 6483c75a6bfb222348a9987647b509be64cafade Mon Sep 17 00:00:00 2001 From: Mish Ushakov <10400064+mishushakov@users.noreply.github.com> Date: Thu, 16 May 2024 16:29:41 +0200 Subject: [PATCH 5/5] updated browserbase docs --- integrations/browserbase.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integrations/browserbase.md b/integrations/browserbase.md index 26ea821..2a2b81f 100644 --- a/integrations/browserbase.md +++ b/integrations/browserbase.md @@ -76,7 +76,7 @@ result = pipe.run(data={"fetcher": {"urls": ["https://example.com"]}}) ### Parameters -- `urls` Required. A list of URLs to fetch -- `text_content` Optional. Only return page text content -- `session_id` Optional. The Session ID -- `proxy` Optional. Enable Proxy +- `urls` Required. A list of URLs to fetch. +- `text_content` Retrieve only text content. Default is `False`. +- `session_id` Optional. Provide an existing Session ID. +- `proxy` Optional. Enable/Disable Proxies.## Loading images