From 7bed04991498093bff6d30ec66930d1c8c1801f6 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Fri, 2 May 2025 08:28:26 +0300 Subject: [PATCH 1/7] fix token --- .github/workflows/docs.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c368b2cb9a..b72a2f975d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -7,6 +7,9 @@ on: branches: [main] pull_request: +permissions: + contents: write + jobs: create-table-on-pr: if: github.event_name == 'pull_request' @@ -32,8 +35,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - with: - token: ${{ secrets.RELEASE }} - uses: actions/setup-python@v4 with: From 5bb3a373eacf29e71b4e3111ebf92b24f16e432e Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Fri, 2 May 2025 08:29:13 +0300 Subject: [PATCH 2/7] try to trigger --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b72a2f975d..b9eea8c031 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -31,7 +31,7 @@ jobs: make build-docs create-table-and-push: - if: github.ref == 'refs/heads/main' +# if: github.ref == 'refs/heads/main' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 From e2111b001bea8c5d34ec2acf7d5bbd39cf75df7f Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Fri, 2 May 2025 08:32:31 +0300 Subject: [PATCH 3/7] add token --- .github/workflows/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index b9eea8c031..986ea8ae8a 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -50,6 +50,8 @@ jobs: make build-docs - name: Push table + env: + GITHUB_TOKEN: ${{ github.token }} run: | git config --global user.email "github-actions[bot]@users.noreply.github.com" git config --global user.name "github-actions[bot]" From 89601ea17cdea36d62d4709aa6838b5df4995e68 Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Fri, 2 May 2025 08:37:42 +0300 Subject: [PATCH 4/7] test ci --- .github/workflows/docs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 986ea8ae8a..1a0d65df0b 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -35,6 +35,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + with: + ref: ${{ github.head_ref }} - uses: actions/setup-python@v4 with: From 60a801fb823c05ae104febcf8deb15ded05d067f Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 2 May 2025 05:42:44 +0000 Subject: [PATCH 5/7] Update tasks & benchmarks tables --- docs/benchmarks.md | 66 +++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 44b356e649..0eb5ed6521 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -8,40 +8,40 @@ The following table gives you an overview of the benchmarks in MTEB. | Name | Leaderboard name | # Tasks | Task Types | Domains | Languages | |------|------------------|---------|------------|---------|-----------| -| [BEIR](https://arxiv.org/abs/2104.08663) | BEIR | 15 | Retrieval: 15 | [Social, Financial, Reviews, Government, Programming, Encyclopaedic, Blog, Non-fiction, News, Written, Web, Medical, Academic] | eng | -| [BEIR-NL](https://arxiv.org/abs/2412.08329) | BEIR-NL | 15 | Retrieval: 15 | [Encyclopaedic, Non-fiction, Written, Web, Medical, Academic] | nld | -| [BRIGHT](https://brightbenchmark.github.io/) | BRIGHT | 1 | Retrieval: 1 | [Non-fiction, Written] | eng | -| [BRIGHT (long)](https://brightbenchmark.github.io/) | BRIGHT (long) | 1 | Retrieval: 1 | [Non-fiction, Written] | eng | -| [BuiltBench(eng)](https://arxiv.org/abs/2411.12056) | BuiltBench(eng) | 4 | Clustering: 2, Retrieval: 1, Reranking: 1 | [Written, Engineering] | eng | -| [ChemTEB](https://arxiv.org/abs/2412.00532) | Chemical | 27 | BitextMining: 1, Classification: 17, Clustering: 2, PairClassification: 5, Retrieval: 2 | [Chemistry] | fra,nld,zho,hin,ces,tur,kor,spa,jpn,msa,por,deu,eng | -| [CoIR](https://github.com/CoIR-team/coir) | Code Information Retrieval | 10 | Retrieval: 10 | [Programming, Written] | php,java,ruby,go,c++,python,javascript,eng,sql | +| [BEIR](https://arxiv.org/abs/2104.08663) | BEIR | 15 | Retrieval: 15 | [News, Social, Encyclopaedic, Web, Government, Written, Medical, Financial, Academic, Reviews, Programming, Blog, Non-fiction] | eng | +| [BEIR-NL](https://arxiv.org/abs/2412.08329) | BEIR-NL | 15 | Retrieval: 15 | [Encyclopaedic, Web, Written, Medical, Academic, Non-fiction] | nld | +| [BRIGHT](https://brightbenchmark.github.io/) | BRIGHT | 1 | Retrieval: 1 | [Written, Non-fiction] | eng | +| [BRIGHT (long)](https://brightbenchmark.github.io/) | BRIGHT (long) | 1 | Retrieval: 1 | [Written, Non-fiction] | eng | +| [BuiltBench(eng)](https://arxiv.org/abs/2411.12056) | BuiltBench(eng) | 4 | Clustering: 2, Retrieval: 1, Reranking: 1 | [Engineering, Written] | eng | +| [ChemTEB](https://arxiv.org/abs/2412.00532) | Chemical | 27 | BitextMining: 1, Classification: 17, Clustering: 2, PairClassification: 5, Retrieval: 2 | [Chemistry] | kor,hin,fra,ces,jpn,msa,tur,zho,deu,nld,eng,spa,por | +| [CoIR](https://github.com/CoIR-team/coir) | Code Information Retrieval | 10 | Retrieval: 10 | [Programming, Written] | c++,sql,ruby,eng,java,php,javascript,python,go | | [CodeRAG](https://arxiv.org/abs/2406.14497) | CodeRAG | 4 | Reranking: 4 | [Programming] | python | -| [Encodechka](https://github.com/avidale/encodechka) | Encodechka | 7 | STS: 2, Classification: 4, PairClassification: 1 | [Social, Government, Fiction, Non-fiction, News, Written, Web] | rus | +| [Encodechka](https://github.com/avidale/encodechka) | Encodechka | 7 | STS: 2, Classification: 4, PairClassification: 1 | [News, Social, Web, Government, Written, Fiction, Non-fiction] | rus | | [FollowIR](https://arxiv.org/abs/2403.15246) | Instruction Following | 3 | InstructionRetrieval: 3 | [News, Written] | eng | -| [LongEmbed](https://arxiv.org/abs/2404.12096v2) | Long-context Retrieval | 6 | Retrieval: 6 | [Spoken, Fiction, Encyclopaedic, Blog, Non-fiction, Written, Academic] | eng | -| [MIEB(Img)](https://arxiv.org/abs/2504.10471) | Image only | 49 | Any2AnyRetrieval: 15, ImageClassification: 22, ImageClustering: 5, VisualSTS(eng): 5, VisualSTS(multi): 2 | [Social, Reviews, Encyclopaedic, Blog, Non-fiction, News, Scene, Written, Web, Medical, Spoken] | fra,nld,cmn,ita,pol,tur,rus,ara,kor,spa,por,deu,eng | -| [MIEB(Multilingual)](https://arxiv.org/abs/2504.10471) | Image-Text, Multilingual | 130 | ImageClassification: 22, ImageClustering: 5, ZeroShotClassification: 23, VisionCentricQA: 6, Compositionality: 7, VisualSTS(eng): 7, Any2AnyRetrieval: 45, DocumentUnderstanding: 10, Any2AnyMultilingualRetrieval: 3, VisualSTS(multi): 2 | [Constructed, Social, Spoken, Reviews, Encyclopaedic, Blog, Non-fiction, News, Scene, Written, Web, Medical, Academic] | tha,fas,hrv,bul,jpn,deu,nld,quz,zho,heb,hin,ces,tur,vie,kor,rus,swa,ben,eng,ukr,ron,fra,ita,pol,fin,tel,ind,swe,cmn,fil,nor,dan,ara,spa,mri,por,est,ell,hun | -| [MIEB(eng)](https://arxiv.org/abs/2504.10471) | Image-Text, English | 125 | ImageClassification: 22, ImageClustering: 5, ZeroShotClassification: 23, VisionCentricQA: 6, Compositionality: 7, VisualSTS(eng): 7, Any2AnyRetrieval: 45, DocumentUnderstanding: 10 | [Constructed, Social, Spoken, Reviews, Encyclopaedic, Blog, Non-fiction, News, Scene, Written, Web, Medical, Academic] | eng | -| [MIEB(lite)](https://arxiv.org/abs/2504.10471) | Image-Text, Lite | 51 | ImageClassification: 8, ImageClustering: 2, ZeroShotClassification: 7, VisionCentricQA: 5, Compositionality: 6, VisualSTS(eng): 2, VisualSTS(multi): 2, Any2AnyRetrieval: 11, DocumentUnderstanding: 6, Any2AnyMultilingualRetrieval: 2 | [Social, Spoken, Reviews, Encyclopaedic, Blog, Non-fiction, News, Scene, Written, Web, Medical, Academic] | tha,fas,hrv,bul,jpn,deu,nld,quz,zho,heb,hin,tur,rus,vie,kor,ces,swa,ben,eng,ukr,ron,fra,ita,pol,fin,tel,ind,swe,cmn,fil,nor,dan,ara,spa,mri,por,est,ell,hun | -| [MINERSBitextMining](https://arxiv.org/pdf/2406.07424) | MINERSBitextMining | 7 | BitextMining: 7 | [Social, Reviews, Written] | eus,cat,ido,kzj,tam,tat,wuu,kat,mak,pes,heb,ibo,mar,cor,ile,ukr,rej,pol,fao,bhp,ber,mon,dsb,pms,epo,ara,ina,uig,bos,ell,nds,tha,war,deu,ang,uzb,nov,hye,hin,cha,rus,vie,nno,yor,ron,max,bug,mkd,nij,swe,cmn,awa,min,gsw,ace,spa,aze,est,bre,bjn,hau,khm,oci,kab,bul,ban,lat,sun,jpn,nob,hsb,lvs,kur,nld,cbk,bbc,ces,lit,yue,ben,eng,isl,sqi,amh,slk,fin,tuk,bew,lfn,slv,ast,jav,pam,por,swg,yid,hrv,glg,gla,xho,mui,bel,mal,abs,fry,arq,swh,tur,cym,kor,srp,urd,dtp,zsm,afr,gle,pcm,ceb,fra,ita,arz,tel,csb,ind,orv,mad,kaz,mhr,tzl,dan,hun,tgl | -| MTEB(Code, v1) | Code | 12 | Retrieval: 12 | [Programming, Written] | scala,rust,typescript,php,java,shell,swift,ruby,go,c++,python,javascript,eng,c,sql | -| MTEB(Europe, v1) | European | 74 | BitextMining: 7, Classification: 21, Clustering: 8, Retrieval: 15, InstructionRetrieval: 3, MultilabelClassification: 2, PairClassification: 6, Reranking: 3, STS: 9 | [Government, Programming, Non-fiction, Constructed, Legal, Subtitles, Fiction, Blog, Written, Religious, Web, Financial, Academic, Reviews, Encyclopaedic, News, Spoken, Social, Medical] | eus,hrv,bul,rom,nob,deu,nld,ces,lit,nno,eng,isl,lav,gle,ron,fra,slk,ita,pol,fin,swe,fao,mlt,slv,dan,spa,por,est,ell,hun | -| MTEB(Indic, v1) | Indic | 23 | BitextMining: 4, Clustering: 1, Classification: 13, PairClassification: 1, Retrieval: 2, Reranking: 1, STS: 1 | [Social, Legal, Constructed, Reviews, Government, Fiction, Encyclopaedic, Religious, News, Non-fiction, Written, Web, Spoken] | bod,pan,asm,snd,mal,san,tam,mai,bgc,npi,hin,mni,urd,guj,mar,ben,eng,nep,boy,mwr,mup,kas,pus,tel,brx,ory,bho,awa,hne,raj,gom,gbm,kan,sat,doi | -| MTEB(Law, v1) | Legal | 8 | Retrieval: 8 | [Legal, Written] | deu,eng,zho | -| MTEB(Medical, v1) | Medical | 12 | Retrieval: 9, Clustering: 2, Reranking: 1 | [Government, Non-fiction, Written, Web, Medical, Academic] | fra,zho,cmn,pol,rus,vie,ara,kor,spa,eng | -| MTEB(Multilingual, v1) | Multilingual | 132 | BitextMining: 13, Classification: 43, Clustering: 17, Retrieval: 18, InstructionRetrieval: 3, MultilabelClassification: 5, PairClassification: 11, Reranking: 6, STS: 16 | [Government, Programming, Non-fiction, Constructed, Legal, Subtitles, Fiction, Blog, Written, Religious, Web, Financial, Entertainment, Academic, Reviews, Encyclopaedic, News, Spoken, Social, Medical] | kbc,kpx,mks,rom,cat,snd,ido,ata,haw,ken,shi,aka,cjo,tfr,sny,mpm,pab,pes,bqp,lug,mgc,roo,ewe,dgz,maz,sri,stp,mar,kup,ukr,upv,rej,anh,cab,cbu,dov,auc,mhl,ptp,lid,mva,gdn,nbq,gun,mic,sps,bem,bkd,guh,bmr,kin,bhp,ber,amo,ycn,hlt,heg,kpg,epo,raj,ara,cgc,xav,bos,tir,ulk,tgp,ncj,cpu,lua,uli,knf,ptu,mcq,tbz,cco,mwp,lww,spl,noa,ura,swp,sag,bea,bjz,bgc,uzb,tgk,aon,tah,jic,apu,cni,zpo,swa,ilo,agr,nnq,kbp,enq,nep,nsn,ron,zai,lij,pap,fuv,tsw,ven,mir,cui,ppo,hns,wbi,kmr,mil,sgb,tod,ctu,gwi,mhr,mam,mbt,mih,gub,min,lim,kmh,bhl,aze,wnu,abx,oci,csy,anv,kgf,run,bul,agt,zaj,aeb,yml,kjs,mcp,cjk,dwr,jpn,bmh,hui,apn,fuc,hsb,ixl,jvn,knj,nld,nho,cbk,mxb,wim,jao,lit,luo,kqf,yue,grc,isl,mgw,zpz,amh,mcf,cao,chd,dgc,gnw,mie,myk,gaz,tuk,tlf,wln,huv,ipi,nya,kyg,big,crh,gul,quf,byx,ake,srq,tof,poe,amn,spy,slv,wer,nfa,hne,reg,pam,qul,por,div,ksj,tte,tpt,nas,wbp,otn,zpm,hrv,mco,mlp,glg,pbt,pma,yaa,gla,xho,pls,gaw,cav,fry,lac,bao,gyr,swh,djk,sbs,grn,ded,gnn,srn,nab,kmb,kmk,mlh,quy,mwr,tnk,ghs,wol,ksr,tel,yuj,csb,orv,mbj,awx,cnl,ngu,xtd,uzn,glk,hmn,zia,som,dan,wed,sua,qup,not,kql,blz,nhu,eus,sna,dhg,car,meu,lao,kzj,maa,tso,tam,omw,tat,trc,wuu,con,kqa,tee,dad,heb,zlm,qvs,guj,mxt,tet,kbq,txu,zaa,rai,huu,hat,als,bzj,kas,taq,cek,tzj,box,ctp,wsk,apc,ltz,isn,soy,yle,gng,mvn,tsn,etr,sot,xsi,chz,smk,ikw,tzo,kaq,cwe,mcr,qvw,acm,ell,acr,wnc,hvn,beu,mib,vid,fas,knc,maq,rop,far,cbi,tum,pag,tpa,deu,bus,soq,zca,ang,cpy,chf,nov,aer,too,zpc,hye,sah,hin,qvn,tke,dah,atb,tcs,qve,ztq,nno,zpq,lus,msa,gam,pib,aoj,aso,aai,att,mup,nij,uvh,tbf,kam,cub,tvk,zga,pri,zac,poy,cya,ncl,nss,knv,bbb,cle,pwg,cbr,jid,wuv,eri,ace,gvs,xon,tzm,gsw,crx,kyq,est,row,bre,inb,sgz,ubu,kwj,kab,kgp,atg,nhe,acq,bjv,nde,bzh,nob,zap,pir,lvs,ote,toc,bsn,nus,jae,tuf,umb,mxq,msm,ben,eng,mkn,leu,klv,kir,bon,aak,fuh,nhg,zao,mph,zty,bam,msy,mig,nor,bss,myw,nna,kkc,kqw,acu,avt,yid,tos,bsj,pan,ntp,cjv,tew,nqo,ino,bel,kmu,wal,mal,cta,sbe,tnc,iou,prf,qvz,buk,rgu,shj,zab,orm,ncu,zpl,mmo,sim,tur,agm,urd,zad,azg,aui,rmy,sab,tac,zsm,qxn,chv,gle,mux,vec,mmx,toj,kgk,nin,yuw,piu,ind,mgh,tiy,agn,eko,emp,imo,mbb,wiv,acf,khz,aau,bef,kaz,shn,nhr,tue,bjr,bvr,ign,bxh,cof,mwe,amr,usp,bmu,for,ziw,fon,kan,zas,aby,hun,ong,zyp,ebk,poh,lbb,nii,hmo,nou,mcb,emi,plu,kms,san,tna,nso,mey,snp,mak,mdy,cbs,gui,ibo,cnt,guo,gux,prs,boj,tgo,vmy,cor,ndj,bpr,usa,yap,sxb,kmo,muy,hix,suz,cbv,pus,pad,mos,brx,kyf,fao,kue,apr,ood,bho,chq,nak,azb,dsb,bzd,mxp,mzz,uri,uig,ina,tuc,maj,dww,bkx,naf,tif,mri,doi,kik,cpc,bbr,med,mpj,nds,scn,gmv,obo,kpf,rmc,yka,alq,apz,clu,nhw,seh,xbi,cth,agg,aly,msk,nhy,cha,meq,ter,bsp,yss,apb,yor,kek,khs,lbk,max,spp,bqc,cux,gvn,hla,hub,cmn,ory,mlt,hop,sin,bba,agu,met,geb,kne,gup,nys,waj,mbs,tca,ssx,sey,myu,tbo,khm,tim,kde,ban,lat,ian,glv,mbc,mti,ons,amp,khk,dob,gym,sbk,ars,qxh,qvc,iws,gah,cap,ces,nhi,hus,yby,srm,yre,zpu,mbl,fin,bco,mkl,mox,ydd,yva,pjt,cpa,dyu,aaz,boa,pao,ksd,azz,bew,kbm,ikk,mcd,wat,awb,nvm,rkb,jav,npl,mps,kos,qwh,yrb,kud,dzo,mek,bki,quh,ssw,twi,wrs,snc,esk,kje,kqc,aom,mpx,wos,amu,byr,lmo,kew,kea,zho,arq,qxo,tuo,fur,srp,zat,dgr,chk,mqb,cym,kor,tdt,smo,dtp,bjk,alp,nlg,mit,mkj,ita,rwo,cmo,kiw,ckb,cpb,kpr,zav,cuc,rug,sue,mad,wap,aoi,mbh,taw,spm,otm,nyu,tku,okv,fuf,zpv,bch,otq,tgl,xtm,krc,dik,kvg,arp,gfk,ntu,ubr,cme,tcz,bak,zar,zaw,amk,kat,mee,viv,fai,mqj,poi,tnn,szl,nif,bmk,ayr,fij,zos,ssd,kbh,msc,ile,bgt,sja,wmw,ssg,pol,dop,beo,dif,pah,zsr,mca,aii,kpw,mle,mon,mto,pms,ame,sus,wro,nop,agd,srd,cop,nca,kwd,tav,ton,tha,urb,lcm,bod,arb,cax,mjc,mio,klt,war,dji,arn,ktm,nko,tyv,wrk,amm,kto,wmt,rus,vie,qvm,cbt,urw,jni,llg,atd,yon,ltg,qvh,boy,dwy,bug,fue,are,tnp,mkd,crn,kdc,kvn,gof,zam,mau,swe,tmd,caf,snn,awa,fil,bdd,mpp,apw,lav,qub,xla,bkq,ttc,mpt,spa,gbm,kiz,wiu,sat,yaq,bjp,bjn,hau,amx,cac,asm,adz,tbg,udu,gai,sun,jiv,kmg,npi,kur,rro,faa,top,bbc,svk,hbo,abt,cuk,cut,cbc,ary,sqi,xed,lif,ese,hot,kyc,kze,bvd,xnn,urt,yut,slk,kpj,opm,tbc,mop,bps,txq,bgs,ffm,ntj,lfn,nuy,mag,aey,kyz,shp,ast,pio,lex,tiw,kdl,bnp,swg,mwc,caa,ajp,jac,daa,mwf,gvc,sll,aia,bhg,lin,snx,ruf,awk,yad,blw,cak,quc,mui,hto,lgl,arl,mai,plt,azj,ape,abs,kkl,sco,ndg,ngp,ots,mni,mlg,nwi,mav,uvl,afr,nch,kon,pcm,ceb,cso,gum,fra,gdr,miz,tpi,tpz,kac,taj,arz,zul,yal,pon,cot,djr,auy,gvf,kwf,kwi,mna,hch,tzl,myy,gom,amf,mya,msb | -| [MTEB(Scandinavian, v1)](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/) | Scandinavian | 28 | BitextMining: 2, Classification: 13, Retrieval: 7, Clustering: 6 | [Social, Legal, Reviews, Government, Fiction, Encyclopaedic, Blog, Non-fiction, News, Written, Web, Spoken] | dan,nob,nno,swe,fao,isl | -| [MTEB(cmn, v1)](https://github.com/FlagOpen/FlagEmbedding/tree/master/research/C_MTEB) | Chinese | 32 | Retrieval: 8, Reranking: 4, PairClassification: 2, Clustering: 4, STS: 7, Classification: 7 | [Government, Non-fiction, Written, Financial, Entertainment, Academic, Medical] | cmn | -| [MTEB(deu, v1)](https://arxiv.org/html/2401.02709v1) | German | 19 | Classification: 6, Clustering: 4, PairClassification: 2, Reranking: 1, Retrieval: 4, STS: 2 | [Legal, Reviews, Encyclopaedic, Non-fiction, News, Written, Web, Spoken] | deu | -| MTEB(eng, v1) | English Legacy | 56 | Classification: 12, Retrieval: 15, Clustering: 11, Reranking: 4, STS: 10, PairClassification: 3, Summarization: 1 | [Social, Financial, Spoken, Reviews, Government, Programming, Encyclopaedic, Blog, Non-fiction, News, Written, Web, Medical, Academic] | eng | -| MTEB(eng, v2) | English | 41 | Retrieval: 10, Clustering: 8, Reranking: 2, STS: 9, Classification: 8, PairClassification: 3, Summarization: 1 | [Social, Financial, Spoken, Reviews, Programming, Encyclopaedic, Blog, Non-fiction, News, Written, Web, Medical, Academic] | eng | -| MTEB(fas, beta) | Farsi (BETA) | 60 | Classification: 18, Clustering: 5, PairClassification: 8, Reranking: 2, Retrieval: 21, STS: 3, BitextMining: 3 | [Social, Spoken, Reviews, Encyclopaedic, Blog, Religious, News, Written, Web, Medical, Academic] | fas | -| [MTEB(fra, v1)](https://arxiv.org/abs/2405.20468) | French | 25 | Classification: 6, Clustering: 7, PairClassification: 1, Reranking: 2, Retrieval: 5, STS: 3, Summarization: 1 | [Social, Legal, Reviews, Encyclopaedic, Non-fiction, News, Written, Web, Academic, Spoken] | eng,fra | -| [MTEB(jpn, v1)](https://github.com/sbintuitions/JMTEB) | Japanese | 16 | Clustering: 2, Classification: 4, STS: 2, PairClassification: 1, Retrieval: 6, Reranking: 1 | [Reviews, Encyclopaedic, Non-fiction, News, Written, Web, Academic, Spoken] | jpn | -| MTEB(kor, v1) | Korean | 6 | Classification: 1, Reranking: 1, Retrieval: 2, STS: 2 | [Reviews, Encyclopaedic, News, Written, Web, Spoken] | kor | -| [MTEB(pol, v1)](https://arxiv.org/abs/2405.10138) | Polish | 17 | Classification: 7, Clustering: 3, PairClassification: 4, STS: 3 | [Social, Legal, Reviews, Fiction, Non-fiction, News, Written, Web, Academic, Spoken] | pol | -| [MTEB(rus, v1)](https://aclanthology.org/2023.eacl-main.148/) | Russian | 23 | Classification: 9, Clustering: 3, MultilabelClassification: 2, PairClassification: 1, Reranking: 2, Retrieval: 3, STS: 3 | [Social, Reviews, Encyclopaedic, Blog, News, Written, Web, Academic, Spoken] | rus | -| [NanoBEIR](https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6) | NanoBEIR | 13 | Retrieval: 13 | [Social, Encyclopaedic, Non-fiction, News, Written, Web, Medical, Academic] | eng | -| [RAR-b](https://arxiv.org/abs/2404.06347) | Reasoning retrieval | 17 | Retrieval: 17 | [Encyclopaedic, Programming, Written] | eng | +| [LongEmbed](https://arxiv.org/abs/2404.12096v2) | Long-context Retrieval | 6 | Retrieval: 6 | [Encyclopaedic, Written, Spoken, Fiction, Academic, Blog, Non-fiction] | eng | +| [MIEB(Img)](https://arxiv.org/abs/2504.10471) | Image only | 49 | Any2AnyRetrieval: 15, ImageClassification: 22, ImageClustering: 5, VisualSTS(eng): 5, VisualSTS(multi): 2 | [News, Social, Encyclopaedic, Web, Written, Medical, Scene, Spoken, Reviews, Blog, Non-fiction] | ita,kor,rus,fra,tur,deu,nld,eng,spa,ara,pol,por,cmn | +| [MIEB(Multilingual)](https://arxiv.org/abs/2504.10471) | Image-Text, Multilingual | 130 | ImageClassification: 22, ImageClustering: 5, ZeroShotClassification: 23, VisionCentricQA: 6, Compositionality: 7, VisualSTS(eng): 7, Any2AnyRetrieval: 45, DocumentUnderstanding: 10, Any2AnyMultilingualRetrieval: 3, VisualSTS(multi): 2 | [News, Social, Encyclopaedic, Web, Constructed, Medical, Scene, Written, Spoken, Academic, Reviews, Blog, Non-fiction] | rus,hin,zho,swa,vie,deu,tel,nld,eng,fil,mri,bul,ben,fra,ukr,heb,ind,pol,spa,ara,fin,dan,ita,jpn,tur,fas,hun,hrv,swe,kor,ces,quz,tha,ron,est,por,cmn,nor,ell | +| [MIEB(eng)](https://arxiv.org/abs/2504.10471) | Image-Text, English | 125 | ImageClassification: 22, ImageClustering: 5, ZeroShotClassification: 23, VisionCentricQA: 6, Compositionality: 7, VisualSTS(eng): 7, Any2AnyRetrieval: 45, DocumentUnderstanding: 10 | [News, Social, Encyclopaedic, Web, Constructed, Medical, Scene, Written, Spoken, Academic, Reviews, Blog, Non-fiction] | eng | +| [MIEB(lite)](https://arxiv.org/abs/2504.10471) | Image-Text, Lite | 51 | ImageClassification: 8, ImageClustering: 2, ZeroShotClassification: 7, VisionCentricQA: 5, Compositionality: 6, VisualSTS(eng): 2, VisualSTS(multi): 2, Any2AnyRetrieval: 11, DocumentUnderstanding: 6, Any2AnyMultilingualRetrieval: 2 | [News, Social, Encyclopaedic, Web, Written, Scene, Medical, Spoken, Academic, Reviews, Blog, Non-fiction] | rus,hin,swa,zho,vie,deu,tel,nld,eng,fil,mri,bul,ben,fra,ukr,heb,ind,pol,spa,ara,fin,ita,dan,jpn,tur,fas,hun,hrv,swe,kor,ces,quz,tha,ron,est,por,cmn,nor,ell | +| [MINERSBitextMining](https://arxiv.org/pdf/2406.07424) | MINERSBitextMining | 7 | BitextMining: 7 | [Reviews, Written, Social] | kaz,afr,cat,tel,nld,gsw,ban,fra,cor,awa,mar,cym,ita,bbc,pam,ceb,ido,pes,swe,dtp,srp,swg,oci,ace,ron,est,pcm,mhr,fry,ina,ang,tam,bos,eus,kab,zsm,pms,abs,nob,mal,min,arq,jpn,tur,mkd,cbk,orv,bel,isl,bug,kor,ces,cha,yue,max,slk,tha,xho,hye,lat,tuk,lfn,aze,bhp,hin,ast,csb,bre,vie,uzb,deu,eng,sun,tgl,tat,tzl,ukr,heb,ind,sqi,hau,ara,fin,khm,fao,jav,bjn,lit,lvs,war,swh,por,cmn,rus,bew,kur,yor,ben,bul,uig,nov,epo,mak,kzj,pol,spa,rej,urd,dsb,mad,amh,dan,glg,ile,kat,mui,yid,wuu,hun,nij,hrv,mon,nno,slv,gle,ibo,hsb,arz,nds,ber,gla,ell | +| MTEB(Code, v1) | Code | 12 | Retrieval: 12 | [Programming, Written] | c,c++,shell,sql,ruby,scala,eng,typescript,java,php,swift,javascript,python,rust,go | +| MTEB(Europe, v1) | European | 74 | BitextMining: 7, Classification: 21, Clustering: 8, Retrieval: 15, InstructionRetrieval: 3, MultilabelClassification: 2, PairClassification: 6, Reranking: 3, STS: 9 | [Encyclopaedic, Web, Fiction, Religious, Legal, Government, Medical, Financial, Subtitles, Reviews, Non-fiction, News, Written, Constructed, Spoken, Academic, Social, Programming, Blog] | lav,deu,nld,eng,bul,fra,eus,rom,pol,nob,spa,fin,dan,ita,fao,hun,mlt,hrv,swe,isl,lit,nno,ces,slk,slv,gle,ron,est,por,ell | +| MTEB(Indic, v1) | Indic | 23 | BitextMining: 4, Clustering: 1, Classification: 13, PairClassification: 1, Retrieval: 2, Reranking: 1, STS: 1 | [News, Social, Encyclopaedic, Legal, Web, Government, Written, Constructed, Spoken, Fiction, Religious, Reviews, Non-fiction] | hin,mwr,doi,hne,pan,tel,eng,npi,gbm,brx,tam,ben,guj,bod,nep,awa,pus,mar,urd,mal,mup,boy,kas,bho,ory,snd,bgc,asm,raj,sat,san,mai,gom,mni,kan | +| MTEB(Law, v1) | Legal | 8 | Retrieval: 8 | [Written, Legal] | eng,zho,deu | +| MTEB(Medical, v1) | Medical | 12 | Retrieval: 9, Clustering: 2, Reranking: 1 | [Government, Web, Written, Medical, Academic, Non-fiction] | kor,rus,fra,zho,vie,pol,eng,spa,ara,cmn | +| MTEB(Multilingual, v1) | Multilingual | 132 | BitextMining: 13, Classification: 43, Clustering: 17, Retrieval: 18, InstructionRetrieval: 3, MultilabelClassification: 5, PairClassification: 11, Reranking: 6, STS: 16 | [Encyclopaedic, Web, Fiction, Religious, Entertainment, Legal, Government, Medical, Financial, Subtitles, Reviews, Non-fiction, News, Written, Constructed, Spoken, Academic, Social, Programming, Blog] | hla,pbt,ilo,swa,bzj,far,med,mwr,djr,dad,wrk,bmr,gsw,tbg,sah,bki,ssd,kze,ydd,cbs,seh,mqb,spl,awa,kdl,krc,cac,ceb,caa,glk,hus,qwh,aom,hmn,sat,crn,wiv,xtd,blw,qxn,swe,ycn,kyz,hvn,hat,lij,cth,zai,swg,qvz,aer,tet,wos,yka,est,ghs,xon,ssw,tnk,piu,bgs,pcm,rop,urt,zam,mxt,gvf,bak,max,noa,sus,aaz,hch,tam,aly,tum,bos,zsm,ztq,sue,bod,ziw,ake,nca,mwc,mag,gui,mto,sim,apr,dgc,min,nfa,lus,nus,kne,dob,khs,ken,lug,met,snp,azj,twi,bgc,tpa,naf,hlt,bbr,kpw,kor,cbt,apw,cta,ign,kpr,nuy,quf,zaw,zpo,tnn,dgz,amn,kms,eri,mil,hye,xho,otn,hin,gof,nho,zas,arp,eng,gum,ntp,tif,kgp,mgc,ber,aui,fuh,mqj,mwp,tod,tso,mxp,cav,trc,ind,kbm,ncj,ptu,zap,pao,con,kvn,qup,dgr,boy,rwo,bao,gyr,wuv,usp,bho,khk,bdd,bba,hop,isn,aey,zpc,mie,lcm,qvc,urb,hix,tnp,jiv,plt,mpt,kew,ubu,lvs,lex,meu,yrb,kac,mpx,anh,bef,smk,car,gam,ndg,guo,hne,apc,quh,knf,nyu,ots,kur,daa,cek,yor,bul,mcd,nov,mak,bjv,mkj,bon,nak,cab,emi,pol,fon,scn,kzj,aby,boj,ndj,kqw,toc,wnc,mad,mbt,chq,nnq,auc,sja,zpq,cui,ipi,mui,gub,snd,ulk,viv,arb,asm,nij,amo,kek,kje,mon,mox,dyu,mya,iws,nno,knj,jao,hsb,okv,bps,ell,doi,zga,ksd,nld,mee,run,maz,alq,cpc,brx,kmg,ikw,amm,ons,cnl,tpt,mlh,myk,maq,wsk,gux,aoi,tpz,xsi,att,chd,kgf,taw,llg,mup,yby,miz,abt,nhu,ayr,zad,mvn,iou,qvs,tew,ido,pes,ame,srp,wer,ncu,agt,amu,hub,lif,jac,buk,rai,nwi,ruf,dzo,hto,tzo,mbb,sin,zho,srm,meq,pan,lua,enq,tsn,for,usa,kbp,zsr,kin,bzh,ina,ang,bea,yap,tte,dov,bqp,mop,nas,tsw,mwe,chf,nhr,blz,bjr,myu,suz,bxh,atb,fue,wed,kbq,mti,apn,aak,jpn,nii,dhg,cot,opm,mkd,cbk,amf,bmh,orv,amx,aau,tbz,mmx,pio,mhl,san,tbc,ces,nsn,kea,too,rmc,tha,kan,lgl,lid,sbe,knc,gah,tuo,ast,txq,nhe,mos,mxq,nss,csb,xbi,uzb,mkn,tgk,uri,quy,bpr,gbm,tzl,nys,tlf,mjc,arl,heb,acr,grn,szl,shn,kwj,yut,reg,kmk,urw,pag,mmo,jic,sgz,ffm,orm,kqf,ksr,luo,kbc,mib,ars,smo,tnc,bhl,bjn,mey,bss,tah,mlt,kmu,txu,lit,kyg,war,pri,mni,stp,swh,huu,por,npl,agu,rus,cle,gmv,wnu,chz,alp,hui,quc,pon,bbb,dji,epo,amr,kkc,tmd,aka,ven,urd,sco,kas,glg,yue,kdc,tku,kaq,yid,wuu,ian,swp,ixl,svk,cux,cax,lww,roo,amp,zat,kpx,xav,gdn,gwi,nhi,tiw,awk,khz,gnn,cao,mih,zlm,mic,zao,eko,nds,yre,sag,gla,lim,nor,lbk,dif,kaz,fuf,leu,kud,ubr,cat,tee,kmr,geb,tzj,crh,als,guj,dop,mps,gdr,pus,soq,crx,mar,mpm,tdt,zav,tuc,tpi,mbj,kpf,lbb,srq,cuc,mdy,taq,kvg,dtp,cco,mca,poi,spy,cpu,tyv,kam,gom,bzd,kwd,qxh,ron,kqc,zul,tuf,jid,cuk,som,ckb,rkb,snn,ksj,kgk,yml,jni,mbl,ary,kjs,nna,ktm,upv,rom,azz,sri,bus,ewe,nob,srn,ino,tof,sxb,inb,mal,obo,mxb,sna,tur,ngp,tav,are,bco,bel,nif,cap,isl,mlg,vid,bug,nhg,qul,slk,fij,ote,hot,pma,rug,uzn,bjz,wro,lat,ptp,lav,cpy,zos,bhp,umb,tos,vie,bkd,mio,mva,sun,fil,npi,nou,tim,dwr,cya,kto,ngu,maj,tat,bkx,kmo,jvn,zaa,bmk,ctp,hau,fin,nko,cso,bhg,msk,waj,bkq,kwf,msa,dik,mit,bjp,bjk,wbp,jav,rro,tiy,bam,lin,ata,yuj,cut,cjk,ttc,aii,dww,tir,yss,azb,kpj,pls,dwy,lmo,cof,nab,shp,cop,ncl,zpu,aon,sey,wmw,acf,auy,wln,tke,tvk,guh,cgc,rmy,fuc,cjv,amk,tac,lac,wap,ben,yaa,fuv,cbc,apz,tue,zyp,byx,div,shi,rej,yad,bmu,zaj,dsb,wal,dan,cwe,gnw,nya,aai,uvh,kat,klv,spm,yva,prs,kiw,kmh,ile,raj,ntu,tna,yle,sps,vmy,mpp,gai,slv,abx,gle,mcf,ibo,zac,cmo,qvw,ood,yaq,caf,arq,bvr,taj,kyc,afr,xnn,nqo,tbf,tel,ban,mcp,boa,mri,gvc,fra,cor,kiz,lao,ajp,ura,nep,udu,mek,clu,sny,agg,gym,ita,cym,bbc,pam,kmb,shj,ory,vec,gfk,nin,pib,klt,kpg,nlg,mbs,kup,cjo,cpa,nhw,nde,apb,fai,haw,sab,toj,srd,oci,qvh,zpv,ace,msm,mig,aoj,bsj,poy,row,sbk,glv,knv,ssx,hbo,maa,mir,pwg,wmt,mhr,kik,kwi,fry,gng,byr,anv,cbu,myy,ssg,otq,acm,tca,eus,kab,kon,pms,abs,wol,gul,wiu,not,bnp,apu,ese,mam,mzz,spp,pjt,mco,poh,ltz,sot,cbi,faa,mbh,pab,pad,tfr,azg,cak,sbs,beu,bqc,emp,pir,hns,ton,zpz,esk,qve,qvm,beo,msb,prf,cha,csy,otm,sua,chk,ter,fur,nhy,box,etr,sll,zar,acu,mcq,tuk,mcb,ctu,agr,lfn,aze,top,mau,awx,tbo,chv,tzm,ikk,bre,snc,deu,heg,cpb,mcr,arn,mav,mna,zia,kkl,tgl,xla,nch,gup,mph,ebk,mux,adz,awb,mpj,ltg,agm,ukr,kql,sqi,aeb,ara,dah,muy,mwf,wim,xed,khm,agn,atg,fao,pap,cbv,ppo,ape,qub,nbq,xtm,kyq,kbh,myw,gaw,ded,gaz,tgp,yon,yal,tgo,plu,bvd,qxo,snx,cmn,cme,soy,mbc,tcs,zpl,kir,aia,rgu,big,bsn,cbr,yuw,imo,wbi,hmo,bew,cub,nvm,mkl,gvs,atd,mgw,msc,zpm,uig,tcz,bgt,acq,gun,zty,kde,ong,kyf,kos,ntj,msy,poe,spa,zab,pah,mks,qvn,bch,kue,zca,djk,amh,fas,cni,aso,hun,uli,hrv,wat,nso,agd,grc,avt,bsp,mai,wrs,uvl,bem,huv,nop,cnt,jae,kqa,arz,mgh,gvn,sgb,mlp,omw,mle | +| [MTEB(Scandinavian, v1)](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/) | Scandinavian | 28 | BitextMining: 2, Classification: 13, Retrieval: 7, Clustering: 6 | [News, Social, Encyclopaedic, Legal, Web, Government, Written, Spoken, Fiction, Reviews, Blog, Non-fiction] | dan,nno,fao,isl,nob,swe | +| [MTEB(cmn, v1)](https://github.com/FlagOpen/FlagEmbedding/tree/master/research/C_MTEB) | Chinese | 32 | Retrieval: 8, Reranking: 4, PairClassification: 2, Clustering: 4, STS: 7, Classification: 7 | [Entertainment, Government, Written, Medical, Financial, Academic, Non-fiction] | cmn | +| [MTEB(deu, v1)](https://arxiv.org/html/2401.02709v1) | German | 19 | Classification: 6, Clustering: 4, PairClassification: 2, Reranking: 1, Retrieval: 4, STS: 2 | [News, Encyclopaedic, Legal, Web, Written, Spoken, Reviews, Non-fiction] | deu | +| MTEB(eng, v1) | English Legacy | 56 | Classification: 12, Retrieval: 15, Clustering: 11, Reranking: 4, STS: 10, PairClassification: 3, Summarization: 1 | [News, Social, Encyclopaedic, Web, Government, Written, Medical, Financial, Spoken, Academic, Reviews, Programming, Blog, Non-fiction] | eng | +| MTEB(eng, v2) | English | 41 | Retrieval: 10, Clustering: 8, Reranking: 2, STS: 9, Classification: 8, PairClassification: 3, Summarization: 1 | [News, Social, Encyclopaedic, Web, Written, Medical, Financial, Spoken, Academic, Reviews, Programming, Blog, Non-fiction] | eng | +| MTEB(fas, beta) | Farsi (BETA) | 60 | Classification: 18, Clustering: 5, PairClassification: 8, Reranking: 2, Retrieval: 21, STS: 3, BitextMining: 3 | [News, Social, Encyclopaedic, Web, Written, Medical, Spoken, Religious, Academic, Reviews, Blog] | fas | +| [MTEB(fra, v1)](https://arxiv.org/abs/2405.20468) | French | 25 | Classification: 6, Clustering: 7, PairClassification: 1, Reranking: 2, Retrieval: 5, STS: 3, Summarization: 1 | [News, Social, Encyclopaedic, Legal, Web, Written, Spoken, Academic, Reviews, Non-fiction] | eng,fra | +| [MTEB(jpn, v1)](https://github.com/sbintuitions/JMTEB) | Japanese | 16 | Clustering: 2, Classification: 4, STS: 2, PairClassification: 1, Retrieval: 6, Reranking: 1 | [News, Encyclopaedic, Web, Written, Spoken, Academic, Reviews, Non-fiction] | jpn | +| MTEB(kor, v1) | Korean | 6 | Classification: 1, Reranking: 1, Retrieval: 2, STS: 2 | [News, Encyclopaedic, Web, Written, Spoken, Reviews] | kor | +| [MTEB(pol, v1)](https://arxiv.org/abs/2405.10138) | Polish | 17 | Classification: 7, Clustering: 3, PairClassification: 4, STS: 3 | [News, Social, Legal, Web, Written, Spoken, Fiction, Academic, Reviews, Non-fiction] | pol | +| [MTEB(rus, v1)](https://aclanthology.org/2023.eacl-main.148/) | Russian | 23 | Classification: 9, Clustering: 3, MultilabelClassification: 2, PairClassification: 1, Reranking: 2, Retrieval: 3, STS: 3 | [News, Social, Encyclopaedic, Web, Written, Spoken, Academic, Reviews, Blog] | rus | +| [NanoBEIR](https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6) | NanoBEIR | 13 | Retrieval: 13 | [News, Social, Encyclopaedic, Web, Written, Medical, Academic, Non-fiction] | eng | +| [RAR-b](https://arxiv.org/abs/2404.06347) | Reasoning retrieval | 17 | Retrieval: 17 | [Programming, Written, Encyclopaedic] | eng | From 34343fea8ace3e2ac35214e1388b25a304c51c30 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 2 May 2025 06:39:10 +0000 Subject: [PATCH 6/7] Update tasks & benchmarks tables --- docs/benchmarks.md | 64 +++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/docs/benchmarks.md b/docs/benchmarks.md index 0eb5ed6521..4b17c17988 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -8,40 +8,40 @@ The following table gives you an overview of the benchmarks in MTEB. | Name | Leaderboard name | # Tasks | Task Types | Domains | Languages | |------|------------------|---------|------------|---------|-----------| -| [BEIR](https://arxiv.org/abs/2104.08663) | BEIR | 15 | Retrieval: 15 | [News, Social, Encyclopaedic, Web, Government, Written, Medical, Financial, Academic, Reviews, Programming, Blog, Non-fiction] | eng | -| [BEIR-NL](https://arxiv.org/abs/2412.08329) | BEIR-NL | 15 | Retrieval: 15 | [Encyclopaedic, Web, Written, Medical, Academic, Non-fiction] | nld | -| [BRIGHT](https://brightbenchmark.github.io/) | BRIGHT | 1 | Retrieval: 1 | [Written, Non-fiction] | eng | -| [BRIGHT (long)](https://brightbenchmark.github.io/) | BRIGHT (long) | 1 | Retrieval: 1 | [Written, Non-fiction] | eng | +| [BEIR](https://arxiv.org/abs/2104.08663) | BEIR | 15 | Retrieval: 15 | [Academic, Blog, Encyclopaedic, Financial, Government, Medical, News, Non-fiction, Programming, Reviews, Social, Web, Written] | eng | +| [BEIR-NL](https://arxiv.org/abs/2412.08329) | BEIR-NL | 15 | Retrieval: 15 | [Academic, Encyclopaedic, Medical, Non-fiction, Web, Written] | nld | +| [BRIGHT](https://brightbenchmark.github.io/) | BRIGHT | 1 | Retrieval: 1 | [Non-fiction, Written] | eng | +| [BRIGHT (long)](https://brightbenchmark.github.io/) | BRIGHT (long) | 1 | Retrieval: 1 | [Non-fiction, Written] | eng | | [BuiltBench(eng)](https://arxiv.org/abs/2411.12056) | BuiltBench(eng) | 4 | Clustering: 2, Retrieval: 1, Reranking: 1 | [Engineering, Written] | eng | -| [ChemTEB](https://arxiv.org/abs/2412.00532) | Chemical | 27 | BitextMining: 1, Classification: 17, Clustering: 2, PairClassification: 5, Retrieval: 2 | [Chemistry] | kor,hin,fra,ces,jpn,msa,tur,zho,deu,nld,eng,spa,por | -| [CoIR](https://github.com/CoIR-team/coir) | Code Information Retrieval | 10 | Retrieval: 10 | [Programming, Written] | c++,sql,ruby,eng,java,php,javascript,python,go | +| [ChemTEB](https://arxiv.org/abs/2412.00532) | Chemical | 27 | BitextMining: 1, Classification: 17, Clustering: 2, PairClassification: 5, Retrieval: 2 | [Chemistry] | ces,deu,eng,fra,hin,jpn,kor,msa,nld,por,spa,tur,zho | +| [CoIR](https://github.com/CoIR-team/coir) | Code Information Retrieval | 10 | Retrieval: 10 | [Programming, Written] | c++,eng,go,java,javascript,php,python,ruby,sql | | [CodeRAG](https://arxiv.org/abs/2406.14497) | CodeRAG | 4 | Reranking: 4 | [Programming] | python | -| [Encodechka](https://github.com/avidale/encodechka) | Encodechka | 7 | STS: 2, Classification: 4, PairClassification: 1 | [News, Social, Web, Government, Written, Fiction, Non-fiction] | rus | +| [Encodechka](https://github.com/avidale/encodechka) | Encodechka | 7 | STS: 2, Classification: 4, PairClassification: 1 | [Fiction, Government, News, Non-fiction, Social, Web, Written] | rus | | [FollowIR](https://arxiv.org/abs/2403.15246) | Instruction Following | 3 | InstructionRetrieval: 3 | [News, Written] | eng | -| [LongEmbed](https://arxiv.org/abs/2404.12096v2) | Long-context Retrieval | 6 | Retrieval: 6 | [Encyclopaedic, Written, Spoken, Fiction, Academic, Blog, Non-fiction] | eng | -| [MIEB(Img)](https://arxiv.org/abs/2504.10471) | Image only | 49 | Any2AnyRetrieval: 15, ImageClassification: 22, ImageClustering: 5, VisualSTS(eng): 5, VisualSTS(multi): 2 | [News, Social, Encyclopaedic, Web, Written, Medical, Scene, Spoken, Reviews, Blog, Non-fiction] | ita,kor,rus,fra,tur,deu,nld,eng,spa,ara,pol,por,cmn | -| [MIEB(Multilingual)](https://arxiv.org/abs/2504.10471) | Image-Text, Multilingual | 130 | ImageClassification: 22, ImageClustering: 5, ZeroShotClassification: 23, VisionCentricQA: 6, Compositionality: 7, VisualSTS(eng): 7, Any2AnyRetrieval: 45, DocumentUnderstanding: 10, Any2AnyMultilingualRetrieval: 3, VisualSTS(multi): 2 | [News, Social, Encyclopaedic, Web, Constructed, Medical, Scene, Written, Spoken, Academic, Reviews, Blog, Non-fiction] | rus,hin,zho,swa,vie,deu,tel,nld,eng,fil,mri,bul,ben,fra,ukr,heb,ind,pol,spa,ara,fin,dan,ita,jpn,tur,fas,hun,hrv,swe,kor,ces,quz,tha,ron,est,por,cmn,nor,ell | -| [MIEB(eng)](https://arxiv.org/abs/2504.10471) | Image-Text, English | 125 | ImageClassification: 22, ImageClustering: 5, ZeroShotClassification: 23, VisionCentricQA: 6, Compositionality: 7, VisualSTS(eng): 7, Any2AnyRetrieval: 45, DocumentUnderstanding: 10 | [News, Social, Encyclopaedic, Web, Constructed, Medical, Scene, Written, Spoken, Academic, Reviews, Blog, Non-fiction] | eng | -| [MIEB(lite)](https://arxiv.org/abs/2504.10471) | Image-Text, Lite | 51 | ImageClassification: 8, ImageClustering: 2, ZeroShotClassification: 7, VisionCentricQA: 5, Compositionality: 6, VisualSTS(eng): 2, VisualSTS(multi): 2, Any2AnyRetrieval: 11, DocumentUnderstanding: 6, Any2AnyMultilingualRetrieval: 2 | [News, Social, Encyclopaedic, Web, Written, Scene, Medical, Spoken, Academic, Reviews, Blog, Non-fiction] | rus,hin,swa,zho,vie,deu,tel,nld,eng,fil,mri,bul,ben,fra,ukr,heb,ind,pol,spa,ara,fin,ita,dan,jpn,tur,fas,hun,hrv,swe,kor,ces,quz,tha,ron,est,por,cmn,nor,ell | -| [MINERSBitextMining](https://arxiv.org/pdf/2406.07424) | MINERSBitextMining | 7 | BitextMining: 7 | [Reviews, Written, Social] | kaz,afr,cat,tel,nld,gsw,ban,fra,cor,awa,mar,cym,ita,bbc,pam,ceb,ido,pes,swe,dtp,srp,swg,oci,ace,ron,est,pcm,mhr,fry,ina,ang,tam,bos,eus,kab,zsm,pms,abs,nob,mal,min,arq,jpn,tur,mkd,cbk,orv,bel,isl,bug,kor,ces,cha,yue,max,slk,tha,xho,hye,lat,tuk,lfn,aze,bhp,hin,ast,csb,bre,vie,uzb,deu,eng,sun,tgl,tat,tzl,ukr,heb,ind,sqi,hau,ara,fin,khm,fao,jav,bjn,lit,lvs,war,swh,por,cmn,rus,bew,kur,yor,ben,bul,uig,nov,epo,mak,kzj,pol,spa,rej,urd,dsb,mad,amh,dan,glg,ile,kat,mui,yid,wuu,hun,nij,hrv,mon,nno,slv,gle,ibo,hsb,arz,nds,ber,gla,ell | -| MTEB(Code, v1) | Code | 12 | Retrieval: 12 | [Programming, Written] | c,c++,shell,sql,ruby,scala,eng,typescript,java,php,swift,javascript,python,rust,go | -| MTEB(Europe, v1) | European | 74 | BitextMining: 7, Classification: 21, Clustering: 8, Retrieval: 15, InstructionRetrieval: 3, MultilabelClassification: 2, PairClassification: 6, Reranking: 3, STS: 9 | [Encyclopaedic, Web, Fiction, Religious, Legal, Government, Medical, Financial, Subtitles, Reviews, Non-fiction, News, Written, Constructed, Spoken, Academic, Social, Programming, Blog] | lav,deu,nld,eng,bul,fra,eus,rom,pol,nob,spa,fin,dan,ita,fao,hun,mlt,hrv,swe,isl,lit,nno,ces,slk,slv,gle,ron,est,por,ell | -| MTEB(Indic, v1) | Indic | 23 | BitextMining: 4, Clustering: 1, Classification: 13, PairClassification: 1, Retrieval: 2, Reranking: 1, STS: 1 | [News, Social, Encyclopaedic, Legal, Web, Government, Written, Constructed, Spoken, Fiction, Religious, Reviews, Non-fiction] | hin,mwr,doi,hne,pan,tel,eng,npi,gbm,brx,tam,ben,guj,bod,nep,awa,pus,mar,urd,mal,mup,boy,kas,bho,ory,snd,bgc,asm,raj,sat,san,mai,gom,mni,kan | -| MTEB(Law, v1) | Legal | 8 | Retrieval: 8 | [Written, Legal] | eng,zho,deu | -| MTEB(Medical, v1) | Medical | 12 | Retrieval: 9, Clustering: 2, Reranking: 1 | [Government, Web, Written, Medical, Academic, Non-fiction] | kor,rus,fra,zho,vie,pol,eng,spa,ara,cmn | -| MTEB(Multilingual, v1) | Multilingual | 132 | BitextMining: 13, Classification: 43, Clustering: 17, Retrieval: 18, InstructionRetrieval: 3, MultilabelClassification: 5, PairClassification: 11, Reranking: 6, STS: 16 | [Encyclopaedic, Web, Fiction, Religious, Entertainment, Legal, Government, Medical, Financial, Subtitles, Reviews, Non-fiction, News, Written, Constructed, Spoken, Academic, Social, Programming, Blog] | hla,pbt,ilo,swa,bzj,far,med,mwr,djr,dad,wrk,bmr,gsw,tbg,sah,bki,ssd,kze,ydd,cbs,seh,mqb,spl,awa,kdl,krc,cac,ceb,caa,glk,hus,qwh,aom,hmn,sat,crn,wiv,xtd,blw,qxn,swe,ycn,kyz,hvn,hat,lij,cth,zai,swg,qvz,aer,tet,wos,yka,est,ghs,xon,ssw,tnk,piu,bgs,pcm,rop,urt,zam,mxt,gvf,bak,max,noa,sus,aaz,hch,tam,aly,tum,bos,zsm,ztq,sue,bod,ziw,ake,nca,mwc,mag,gui,mto,sim,apr,dgc,min,nfa,lus,nus,kne,dob,khs,ken,lug,met,snp,azj,twi,bgc,tpa,naf,hlt,bbr,kpw,kor,cbt,apw,cta,ign,kpr,nuy,quf,zaw,zpo,tnn,dgz,amn,kms,eri,mil,hye,xho,otn,hin,gof,nho,zas,arp,eng,gum,ntp,tif,kgp,mgc,ber,aui,fuh,mqj,mwp,tod,tso,mxp,cav,trc,ind,kbm,ncj,ptu,zap,pao,con,kvn,qup,dgr,boy,rwo,bao,gyr,wuv,usp,bho,khk,bdd,bba,hop,isn,aey,zpc,mie,lcm,qvc,urb,hix,tnp,jiv,plt,mpt,kew,ubu,lvs,lex,meu,yrb,kac,mpx,anh,bef,smk,car,gam,ndg,guo,hne,apc,quh,knf,nyu,ots,kur,daa,cek,yor,bul,mcd,nov,mak,bjv,mkj,bon,nak,cab,emi,pol,fon,scn,kzj,aby,boj,ndj,kqw,toc,wnc,mad,mbt,chq,nnq,auc,sja,zpq,cui,ipi,mui,gub,snd,ulk,viv,arb,asm,nij,amo,kek,kje,mon,mox,dyu,mya,iws,nno,knj,jao,hsb,okv,bps,ell,doi,zga,ksd,nld,mee,run,maz,alq,cpc,brx,kmg,ikw,amm,ons,cnl,tpt,mlh,myk,maq,wsk,gux,aoi,tpz,xsi,att,chd,kgf,taw,llg,mup,yby,miz,abt,nhu,ayr,zad,mvn,iou,qvs,tew,ido,pes,ame,srp,wer,ncu,agt,amu,hub,lif,jac,buk,rai,nwi,ruf,dzo,hto,tzo,mbb,sin,zho,srm,meq,pan,lua,enq,tsn,for,usa,kbp,zsr,kin,bzh,ina,ang,bea,yap,tte,dov,bqp,mop,nas,tsw,mwe,chf,nhr,blz,bjr,myu,suz,bxh,atb,fue,wed,kbq,mti,apn,aak,jpn,nii,dhg,cot,opm,mkd,cbk,amf,bmh,orv,amx,aau,tbz,mmx,pio,mhl,san,tbc,ces,nsn,kea,too,rmc,tha,kan,lgl,lid,sbe,knc,gah,tuo,ast,txq,nhe,mos,mxq,nss,csb,xbi,uzb,mkn,tgk,uri,quy,bpr,gbm,tzl,nys,tlf,mjc,arl,heb,acr,grn,szl,shn,kwj,yut,reg,kmk,urw,pag,mmo,jic,sgz,ffm,orm,kqf,ksr,luo,kbc,mib,ars,smo,tnc,bhl,bjn,mey,bss,tah,mlt,kmu,txu,lit,kyg,war,pri,mni,stp,swh,huu,por,npl,agu,rus,cle,gmv,wnu,chz,alp,hui,quc,pon,bbb,dji,epo,amr,kkc,tmd,aka,ven,urd,sco,kas,glg,yue,kdc,tku,kaq,yid,wuu,ian,swp,ixl,svk,cux,cax,lww,roo,amp,zat,kpx,xav,gdn,gwi,nhi,tiw,awk,khz,gnn,cao,mih,zlm,mic,zao,eko,nds,yre,sag,gla,lim,nor,lbk,dif,kaz,fuf,leu,kud,ubr,cat,tee,kmr,geb,tzj,crh,als,guj,dop,mps,gdr,pus,soq,crx,mar,mpm,tdt,zav,tuc,tpi,mbj,kpf,lbb,srq,cuc,mdy,taq,kvg,dtp,cco,mca,poi,spy,cpu,tyv,kam,gom,bzd,kwd,qxh,ron,kqc,zul,tuf,jid,cuk,som,ckb,rkb,snn,ksj,kgk,yml,jni,mbl,ary,kjs,nna,ktm,upv,rom,azz,sri,bus,ewe,nob,srn,ino,tof,sxb,inb,mal,obo,mxb,sna,tur,ngp,tav,are,bco,bel,nif,cap,isl,mlg,vid,bug,nhg,qul,slk,fij,ote,hot,pma,rug,uzn,bjz,wro,lat,ptp,lav,cpy,zos,bhp,umb,tos,vie,bkd,mio,mva,sun,fil,npi,nou,tim,dwr,cya,kto,ngu,maj,tat,bkx,kmo,jvn,zaa,bmk,ctp,hau,fin,nko,cso,bhg,msk,waj,bkq,kwf,msa,dik,mit,bjp,bjk,wbp,jav,rro,tiy,bam,lin,ata,yuj,cut,cjk,ttc,aii,dww,tir,yss,azb,kpj,pls,dwy,lmo,cof,nab,shp,cop,ncl,zpu,aon,sey,wmw,acf,auy,wln,tke,tvk,guh,cgc,rmy,fuc,cjv,amk,tac,lac,wap,ben,yaa,fuv,cbc,apz,tue,zyp,byx,div,shi,rej,yad,bmu,zaj,dsb,wal,dan,cwe,gnw,nya,aai,uvh,kat,klv,spm,yva,prs,kiw,kmh,ile,raj,ntu,tna,yle,sps,vmy,mpp,gai,slv,abx,gle,mcf,ibo,zac,cmo,qvw,ood,yaq,caf,arq,bvr,taj,kyc,afr,xnn,nqo,tbf,tel,ban,mcp,boa,mri,gvc,fra,cor,kiz,lao,ajp,ura,nep,udu,mek,clu,sny,agg,gym,ita,cym,bbc,pam,kmb,shj,ory,vec,gfk,nin,pib,klt,kpg,nlg,mbs,kup,cjo,cpa,nhw,nde,apb,fai,haw,sab,toj,srd,oci,qvh,zpv,ace,msm,mig,aoj,bsj,poy,row,sbk,glv,knv,ssx,hbo,maa,mir,pwg,wmt,mhr,kik,kwi,fry,gng,byr,anv,cbu,myy,ssg,otq,acm,tca,eus,kab,kon,pms,abs,wol,gul,wiu,not,bnp,apu,ese,mam,mzz,spp,pjt,mco,poh,ltz,sot,cbi,faa,mbh,pab,pad,tfr,azg,cak,sbs,beu,bqc,emp,pir,hns,ton,zpz,esk,qve,qvm,beo,msb,prf,cha,csy,otm,sua,chk,ter,fur,nhy,box,etr,sll,zar,acu,mcq,tuk,mcb,ctu,agr,lfn,aze,top,mau,awx,tbo,chv,tzm,ikk,bre,snc,deu,heg,cpb,mcr,arn,mav,mna,zia,kkl,tgl,xla,nch,gup,mph,ebk,mux,adz,awb,mpj,ltg,agm,ukr,kql,sqi,aeb,ara,dah,muy,mwf,wim,xed,khm,agn,atg,fao,pap,cbv,ppo,ape,qub,nbq,xtm,kyq,kbh,myw,gaw,ded,gaz,tgp,yon,yal,tgo,plu,bvd,qxo,snx,cmn,cme,soy,mbc,tcs,zpl,kir,aia,rgu,big,bsn,cbr,yuw,imo,wbi,hmo,bew,cub,nvm,mkl,gvs,atd,mgw,msc,zpm,uig,tcz,bgt,acq,gun,zty,kde,ong,kyf,kos,ntj,msy,poe,spa,zab,pah,mks,qvn,bch,kue,zca,djk,amh,fas,cni,aso,hun,uli,hrv,wat,nso,agd,grc,avt,bsp,mai,wrs,uvl,bem,huv,nop,cnt,jae,kqa,arz,mgh,gvn,sgb,mlp,omw,mle | -| [MTEB(Scandinavian, v1)](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/) | Scandinavian | 28 | BitextMining: 2, Classification: 13, Retrieval: 7, Clustering: 6 | [News, Social, Encyclopaedic, Legal, Web, Government, Written, Spoken, Fiction, Reviews, Blog, Non-fiction] | dan,nno,fao,isl,nob,swe | -| [MTEB(cmn, v1)](https://github.com/FlagOpen/FlagEmbedding/tree/master/research/C_MTEB) | Chinese | 32 | Retrieval: 8, Reranking: 4, PairClassification: 2, Clustering: 4, STS: 7, Classification: 7 | [Entertainment, Government, Written, Medical, Financial, Academic, Non-fiction] | cmn | -| [MTEB(deu, v1)](https://arxiv.org/html/2401.02709v1) | German | 19 | Classification: 6, Clustering: 4, PairClassification: 2, Reranking: 1, Retrieval: 4, STS: 2 | [News, Encyclopaedic, Legal, Web, Written, Spoken, Reviews, Non-fiction] | deu | -| MTEB(eng, v1) | English Legacy | 56 | Classification: 12, Retrieval: 15, Clustering: 11, Reranking: 4, STS: 10, PairClassification: 3, Summarization: 1 | [News, Social, Encyclopaedic, Web, Government, Written, Medical, Financial, Spoken, Academic, Reviews, Programming, Blog, Non-fiction] | eng | -| MTEB(eng, v2) | English | 41 | Retrieval: 10, Clustering: 8, Reranking: 2, STS: 9, Classification: 8, PairClassification: 3, Summarization: 1 | [News, Social, Encyclopaedic, Web, Written, Medical, Financial, Spoken, Academic, Reviews, Programming, Blog, Non-fiction] | eng | -| MTEB(fas, beta) | Farsi (BETA) | 60 | Classification: 18, Clustering: 5, PairClassification: 8, Reranking: 2, Retrieval: 21, STS: 3, BitextMining: 3 | [News, Social, Encyclopaedic, Web, Written, Medical, Spoken, Religious, Academic, Reviews, Blog] | fas | -| [MTEB(fra, v1)](https://arxiv.org/abs/2405.20468) | French | 25 | Classification: 6, Clustering: 7, PairClassification: 1, Reranking: 2, Retrieval: 5, STS: 3, Summarization: 1 | [News, Social, Encyclopaedic, Legal, Web, Written, Spoken, Academic, Reviews, Non-fiction] | eng,fra | -| [MTEB(jpn, v1)](https://github.com/sbintuitions/JMTEB) | Japanese | 16 | Clustering: 2, Classification: 4, STS: 2, PairClassification: 1, Retrieval: 6, Reranking: 1 | [News, Encyclopaedic, Web, Written, Spoken, Academic, Reviews, Non-fiction] | jpn | -| MTEB(kor, v1) | Korean | 6 | Classification: 1, Reranking: 1, Retrieval: 2, STS: 2 | [News, Encyclopaedic, Web, Written, Spoken, Reviews] | kor | -| [MTEB(pol, v1)](https://arxiv.org/abs/2405.10138) | Polish | 17 | Classification: 7, Clustering: 3, PairClassification: 4, STS: 3 | [News, Social, Legal, Web, Written, Spoken, Fiction, Academic, Reviews, Non-fiction] | pol | -| [MTEB(rus, v1)](https://aclanthology.org/2023.eacl-main.148/) | Russian | 23 | Classification: 9, Clustering: 3, MultilabelClassification: 2, PairClassification: 1, Reranking: 2, Retrieval: 3, STS: 3 | [News, Social, Encyclopaedic, Web, Written, Spoken, Academic, Reviews, Blog] | rus | -| [NanoBEIR](https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6) | NanoBEIR | 13 | Retrieval: 13 | [News, Social, Encyclopaedic, Web, Written, Medical, Academic, Non-fiction] | eng | -| [RAR-b](https://arxiv.org/abs/2404.06347) | Reasoning retrieval | 17 | Retrieval: 17 | [Programming, Written, Encyclopaedic] | eng | +| [LongEmbed](https://arxiv.org/abs/2404.12096v2) | Long-context Retrieval | 6 | Retrieval: 6 | [Academic, Blog, Encyclopaedic, Fiction, Non-fiction, Spoken, Written] | eng | +| [MIEB(Img)](https://arxiv.org/abs/2504.10471) | Image only | 49 | Any2AnyRetrieval: 15, ImageClassification: 22, ImageClustering: 5, VisualSTS(eng): 5, VisualSTS(multi): 2 | [Blog, Encyclopaedic, Medical, News, Non-fiction, Reviews, Scene, Social, Spoken, Web, Written] | ara,cmn,deu,eng,fra,ita,kor,nld,pol,por,rus,spa,tur | +| [MIEB(Multilingual)](https://arxiv.org/abs/2504.10471) | Image-Text, Multilingual | 130 | ImageClassification: 22, ImageClustering: 5, ZeroShotClassification: 23, VisionCentricQA: 6, Compositionality: 7, VisualSTS(eng): 7, Any2AnyRetrieval: 45, DocumentUnderstanding: 10, Any2AnyMultilingualRetrieval: 3, VisualSTS(multi): 2 | [Academic, Blog, Constructed, Encyclopaedic, Medical, News, Non-fiction, Reviews, Scene, Social, Spoken, Web, Written] | ara,ben,bul,ces,cmn,dan,deu,ell,eng,est,fas,fil,fin,fra,heb,hin,hrv,hun,ind,ita,jpn,kor,mri,nld,nor,pol,por,quz,ron,rus,spa,swa,swe,tel,tha,tur,ukr,vie,zho | +| [MIEB(eng)](https://arxiv.org/abs/2504.10471) | Image-Text, English | 125 | ImageClassification: 22, ImageClustering: 5, ZeroShotClassification: 23, VisionCentricQA: 6, Compositionality: 7, VisualSTS(eng): 7, Any2AnyRetrieval: 45, DocumentUnderstanding: 10 | [Academic, Blog, Constructed, Encyclopaedic, Medical, News, Non-fiction, Reviews, Scene, Social, Spoken, Web, Written] | eng | +| [MIEB(lite)](https://arxiv.org/abs/2504.10471) | Image-Text, Lite | 51 | ImageClassification: 8, ImageClustering: 2, ZeroShotClassification: 7, VisionCentricQA: 5, Compositionality: 6, VisualSTS(eng): 2, VisualSTS(multi): 2, Any2AnyRetrieval: 11, DocumentUnderstanding: 6, Any2AnyMultilingualRetrieval: 2 | [Academic, Blog, Encyclopaedic, Medical, News, Non-fiction, Reviews, Scene, Social, Spoken, Web, Written] | ara,ben,bul,ces,cmn,dan,deu,ell,eng,est,fas,fil,fin,fra,heb,hin,hrv,hun,ind,ita,jpn,kor,mri,nld,nor,pol,por,quz,ron,rus,spa,swa,swe,tel,tha,tur,ukr,vie,zho | +| [MINERSBitextMining](https://arxiv.org/pdf/2406.07424) | MINERSBitextMining | 7 | BitextMining: 7 | [Reviews, Social, Written] | abs,ace,afr,amh,ang,ara,arq,arz,ast,awa,aze,ban,bbc,bel,ben,ber,bew,bhp,bjn,bos,bre,bug,bul,cat,cbk,ceb,ces,cha,cmn,cor,csb,cym,dan,deu,dsb,dtp,ell,eng,epo,est,eus,fao,fin,fra,fry,gla,gle,glg,gsw,hau,heb,hin,hrv,hsb,hun,hye,ibo,ido,ile,ina,ind,isl,ita,jav,jpn,kab,kat,kaz,khm,kor,kur,kzj,lat,lfn,lit,lvs,mad,mak,mal,mar,max,mhr,min,mkd,mon,mui,nds,nij,nld,nno,nob,nov,oci,orv,pam,pcm,pes,pms,pol,por,rej,ron,rus,slk,slv,spa,sqi,srp,sun,swe,swg,swh,tam,tat,tel,tgl,tha,tuk,tur,tzl,uig,ukr,urd,uzb,vie,war,wuu,xho,yid,yor,yue,zsm | +| MTEB(Code, v1) | Code | 12 | Retrieval: 12 | [Programming, Written] | c,c++,eng,go,java,javascript,php,python,ruby,rust,scala,shell,sql,swift,typescript | +| MTEB(Europe, v1) | European | 74 | BitextMining: 7, Classification: 21, Clustering: 8, Retrieval: 15, InstructionRetrieval: 3, MultilabelClassification: 2, PairClassification: 6, Reranking: 3, STS: 9 | [Academic, Blog, Constructed, Encyclopaedic, Fiction, Financial, Government, Legal, Medical, News, Non-fiction, Programming, Religious, Reviews, Social, Spoken, Subtitles, Web, Written] | bul,ces,dan,deu,ell,eng,est,eus,fao,fin,fra,gle,hrv,hun,isl,ita,lav,lit,mlt,nld,nno,nob,pol,por,rom,ron,slk,slv,spa,swe | +| MTEB(Indic, v1) | Indic | 23 | BitextMining: 4, Clustering: 1, Classification: 13, PairClassification: 1, Retrieval: 2, Reranking: 1, STS: 1 | [Constructed, Encyclopaedic, Fiction, Government, Legal, News, Non-fiction, Religious, Reviews, Social, Spoken, Web, Written] | asm,awa,ben,bgc,bho,bod,boy,brx,doi,eng,gbm,gom,guj,hin,hne,kan,kas,mai,mal,mar,mni,mup,mwr,nep,npi,ory,pan,pus,raj,san,sat,snd,tam,tel,urd | +| MTEB(Law, v1) | Legal | 8 | Retrieval: 8 | [Legal, Written] | deu,eng,zho | +| MTEB(Medical, v1) | Medical | 12 | Retrieval: 9, Clustering: 2, Reranking: 1 | [Academic, Government, Medical, Non-fiction, Web, Written] | ara,cmn,eng,fra,kor,pol,rus,spa,vie,zho | +| MTEB(Multilingual, v1) | Multilingual | 132 | BitextMining: 13, Classification: 43, Clustering: 17, Retrieval: 18, InstructionRetrieval: 3, MultilabelClassification: 5, PairClassification: 11, Reranking: 6, STS: 16 | [Academic, Blog, Constructed, Encyclopaedic, Entertainment, Fiction, Financial, Government, Legal, Medical, News, Non-fiction, Programming, Religious, Reviews, Social, Spoken, Subtitles, Web, Written] | aai,aak,aau,aaz,abs,abt,abx,aby,ace,acf,acm,acq,acr,acu,adz,aeb,aer,aey,afr,agd,agg,agm,agn,agr,agt,agu,aia,aii,ajp,aka,ake,alp,alq,als,aly,ame,amf,amh,amk,amm,amn,amo,amp,amr,amu,amx,ang,anh,anv,aoi,aoj,aom,aon,apb,apc,ape,apn,apr,apu,apw,apz,ara,arb,are,arl,arn,arp,arq,ars,ary,arz,asm,aso,ast,ata,atb,atd,atg,att,auc,aui,auy,avt,awa,awb,awk,awx,ayr,azb,aze,azg,azj,azz,bak,bam,ban,bao,bba,bbb,bbc,bbr,bch,bco,bdd,bea,bef,bel,bem,ben,beo,ber,beu,bew,bgc,bgs,bgt,bhg,bhl,bho,bhp,big,bjk,bjn,bjp,bjr,bjv,bjz,bkd,bki,bkq,bkx,blw,blz,bmh,bmk,bmr,bmu,bnp,boa,bod,boj,bon,bos,box,boy,bpr,bps,bqc,bqp,bre,brx,bsj,bsn,bsp,bss,bug,buk,bul,bus,bvd,bvr,bxh,byr,byx,bzd,bzh,bzj,caa,cab,cac,caf,cak,cao,cap,car,cat,cav,cax,cbc,cbi,cbk,cbr,cbs,cbt,cbu,cbv,cco,ceb,cek,ces,cgc,cha,chd,chf,chk,chq,chv,chz,cjk,cjo,cjv,ckb,cle,clu,cme,cmn,cmo,cni,cnl,cnt,cof,con,cop,cor,cot,cpa,cpb,cpc,cpu,cpy,crh,crn,crx,csb,cso,csy,cta,cth,ctp,ctu,cub,cuc,cui,cuk,cut,cux,cwe,cya,cym,daa,dad,dah,dan,ded,deu,dgc,dgr,dgz,dhg,dif,dik,div,dji,djk,djr,dob,doi,dop,dov,dsb,dtp,dwr,dww,dwy,dyu,dzo,ebk,eko,ell,emi,emp,eng,enq,epo,eri,ese,esk,est,etr,eus,ewe,faa,fai,fao,far,fas,ffm,fij,fil,fin,fon,for,fra,fry,fuc,fue,fuf,fuh,fur,fuv,gah,gai,gam,gaw,gaz,gbm,gdn,gdr,geb,gfk,ghs,gla,gle,glg,glk,glv,gmv,gng,gnn,gnw,gof,gom,grc,grn,gsw,gub,guh,gui,guj,gul,gum,gun,guo,gup,gux,gvc,gvf,gvn,gvs,gwi,gym,gyr,hat,hau,haw,hbo,hch,heb,heg,hin,hix,hla,hlt,hmn,hmo,hne,hns,hop,hot,hrv,hsb,hto,hub,hui,hun,hus,huu,huv,hvn,hye,ian,ibo,ido,ign,ikk,ikw,ile,ilo,imo,ina,inb,ind,ino,iou,ipi,isl,isn,ita,iws,ixl,jac,jae,jao,jav,jic,jid,jiv,jni,jpn,jvn,kab,kac,kam,kan,kaq,kas,kat,kaz,kbc,kbh,kbm,kbp,kbq,kdc,kde,kdl,kea,kek,ken,kew,kgf,kgk,kgp,khk,khm,khs,khz,kik,kin,kir,kiw,kiz,kje,kjs,kkc,kkl,klt,klv,kmb,kmg,kmh,kmk,kmo,kmr,kms,kmu,knc,kne,knf,knj,knv,kon,kor,kos,kpf,kpg,kpj,kpr,kpw,kpx,kqa,kqc,kqf,kql,kqw,krc,ksd,ksj,ksr,ktm,kto,kud,kue,kup,kur,kvg,kvn,kwd,kwf,kwi,kwj,kyc,kyf,kyg,kyq,kyz,kze,kzj,lac,lao,lat,lav,lbb,lbk,lcm,leu,lex,lfn,lgl,lid,lif,lij,lim,lin,lit,llg,lmo,ltg,ltz,lua,lug,luo,lus,lvs,lww,maa,mad,mag,mai,maj,mak,mal,mam,maq,mar,mau,mav,max,maz,mbb,mbc,mbh,mbj,mbl,mbs,mbt,mca,mcb,mcd,mcf,mco,mcp,mcq,mcr,mdy,med,mee,mek,meq,met,meu,mey,mgc,mgh,mgw,mhl,mhr,mib,mic,mie,mig,mih,mil,min,mio,mir,mit,miz,mjc,mkd,mkj,mkl,mkn,mks,mle,mlg,mlh,mlp,mlt,mmo,mmx,mna,mni,mon,mop,mos,mox,mph,mpj,mpm,mpp,mps,mpt,mpx,mqb,mqj,mri,msa,msb,msc,msk,msm,msy,mti,mto,mui,mup,mux,muy,mva,mvn,mwc,mwe,mwf,mwp,mwr,mxb,mxp,mxq,mxt,mya,myk,myu,myw,myy,mzz,nab,naf,nak,nas,nbq,nca,nch,ncj,ncl,ncu,nde,ndg,ndj,nds,nep,nfa,ngp,ngu,nhe,nhg,nhi,nho,nhr,nhu,nhw,nhy,nif,nii,nij,nin,nko,nld,nlg,nna,nno,nnq,noa,nob,nop,nor,not,nou,nov,npi,npl,nqo,nsn,nso,nss,ntj,ntp,ntu,nus,nuy,nvm,nwi,nya,nys,nyu,obo,oci,okv,omw,ong,ons,ood,opm,orm,orv,ory,ote,otm,otn,otq,ots,pab,pad,pag,pah,pam,pan,pao,pap,pbt,pcm,pes,pib,pio,pir,piu,pjt,pls,plt,plu,pma,pms,poe,poh,poi,pol,pon,por,poy,ppo,prf,pri,prs,ptp,ptu,pus,pwg,qub,quc,quf,quh,qul,qup,quy,qvc,qve,qvh,qvm,qvn,qvs,qvw,qvz,qwh,qxh,qxn,qxo,rai,raj,reg,rej,rgu,rkb,rmc,rmy,rom,ron,roo,rop,row,rro,ruf,rug,run,rus,rwo,sab,sag,sah,san,sat,sbe,sbk,sbs,scn,sco,seh,sey,sgb,sgz,shi,shj,shn,shp,sim,sin,sja,slk,sll,slv,smk,smo,sna,snc,snd,snn,snp,snx,sny,som,soq,sot,soy,spa,spl,spm,spp,sps,spy,sqi,srd,sri,srm,srn,srp,srq,ssd,ssg,ssw,ssx,stp,sua,sue,sun,sus,suz,svk,swa,swe,swg,swh,swp,sxb,szl,tac,tah,taj,tam,taq,tat,tav,taw,tbc,tbf,tbg,tbo,tbz,tca,tcs,tcz,tdt,tee,tel,ter,tet,tew,tfr,tgk,tgl,tgo,tgp,tha,tif,tim,tir,tiw,tiy,tke,tku,tlf,tmd,tna,tnc,tnk,tnn,tnp,toc,tod,tof,toj,ton,too,top,tos,tpa,tpi,tpt,tpz,trc,tsn,tso,tsw,ttc,tte,tuc,tue,tuf,tuk,tum,tuo,tur,tvk,twi,txq,txu,tyv,tzj,tzl,tzm,tzo,ubr,ubu,udu,uig,ukr,uli,ulk,umb,upv,ura,urb,urd,uri,urt,urw,usa,usp,uvh,uvl,uzb,uzn,vec,ven,vid,vie,viv,vmy,waj,wal,wap,war,wat,wbi,wbp,wed,wer,wim,wiu,wiv,wln,wmt,wmw,wnc,wnu,wol,wos,wrk,wro,wrs,wsk,wuu,wuv,xav,xbi,xed,xho,xla,xnn,xon,xsi,xtd,xtm,yaa,yad,yal,yap,yaq,yby,ycn,ydd,yid,yka,yle,yml,yon,yor,yrb,yre,yss,yue,yuj,yut,yuw,yva,zaa,zab,zac,zad,zai,zaj,zam,zao,zap,zar,zas,zat,zav,zaw,zca,zga,zho,zia,ziw,zlm,zos,zpc,zpl,zpm,zpo,zpq,zpu,zpv,zpz,zsm,zsr,ztq,zty,zul,zyp | +| [MTEB(Scandinavian, v1)](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/) | Scandinavian | 28 | BitextMining: 2, Classification: 13, Retrieval: 7, Clustering: 6 | [Blog, Encyclopaedic, Fiction, Government, Legal, News, Non-fiction, Reviews, Social, Spoken, Web, Written] | dan,fao,isl,nno,nob,swe | +| [MTEB(cmn, v1)](https://github.com/FlagOpen/FlagEmbedding/tree/master/research/C_MTEB) | Chinese | 32 | Retrieval: 8, Reranking: 4, PairClassification: 2, Clustering: 4, STS: 7, Classification: 7 | [Academic, Entertainment, Financial, Government, Medical, Non-fiction, Written] | cmn | +| [MTEB(deu, v1)](https://arxiv.org/html/2401.02709v1) | German | 19 | Classification: 6, Clustering: 4, PairClassification: 2, Reranking: 1, Retrieval: 4, STS: 2 | [Encyclopaedic, Legal, News, Non-fiction, Reviews, Spoken, Web, Written] | deu | +| MTEB(eng, v1) | English Legacy | 56 | Classification: 12, Retrieval: 15, Clustering: 11, Reranking: 4, STS: 10, PairClassification: 3, Summarization: 1 | [Academic, Blog, Encyclopaedic, Financial, Government, Medical, News, Non-fiction, Programming, Reviews, Social, Spoken, Web, Written] | eng | +| MTEB(eng, v2) | English | 41 | Retrieval: 10, Clustering: 8, Reranking: 2, STS: 9, Classification: 8, PairClassification: 3, Summarization: 1 | [Academic, Blog, Encyclopaedic, Financial, Medical, News, Non-fiction, Programming, Reviews, Social, Spoken, Web, Written] | eng | +| MTEB(fas, beta) | Farsi (BETA) | 60 | Classification: 18, Clustering: 5, PairClassification: 8, Reranking: 2, Retrieval: 21, STS: 3, BitextMining: 3 | [Academic, Blog, Encyclopaedic, Medical, News, Religious, Reviews, Social, Spoken, Web, Written] | fas | +| [MTEB(fra, v1)](https://arxiv.org/abs/2405.20468) | French | 25 | Classification: 6, Clustering: 7, PairClassification: 1, Reranking: 2, Retrieval: 5, STS: 3, Summarization: 1 | [Academic, Encyclopaedic, Legal, News, Non-fiction, Reviews, Social, Spoken, Web, Written] | eng,fra | +| [MTEB(jpn, v1)](https://github.com/sbintuitions/JMTEB) | Japanese | 16 | Clustering: 2, Classification: 4, STS: 2, PairClassification: 1, Retrieval: 6, Reranking: 1 | [Academic, Encyclopaedic, News, Non-fiction, Reviews, Spoken, Web, Written] | jpn | +| MTEB(kor, v1) | Korean | 6 | Classification: 1, Reranking: 1, Retrieval: 2, STS: 2 | [Encyclopaedic, News, Reviews, Spoken, Web, Written] | kor | +| [MTEB(pol, v1)](https://arxiv.org/abs/2405.10138) | Polish | 17 | Classification: 7, Clustering: 3, PairClassification: 4, STS: 3 | [Academic, Fiction, Legal, News, Non-fiction, Reviews, Social, Spoken, Web, Written] | pol | +| [MTEB(rus, v1)](https://aclanthology.org/2023.eacl-main.148/) | Russian | 23 | Classification: 9, Clustering: 3, MultilabelClassification: 2, PairClassification: 1, Reranking: 2, Retrieval: 3, STS: 3 | [Academic, Blog, Encyclopaedic, News, Reviews, Social, Spoken, Web, Written] | rus | +| [NanoBEIR](https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6) | NanoBEIR | 13 | Retrieval: 13 | [Academic, Encyclopaedic, Medical, News, Non-fiction, Social, Web, Written] | eng | +| [RAR-b](https://arxiv.org/abs/2404.06347) | Reasoning retrieval | 17 | Retrieval: 17 | [Encyclopaedic, Programming, Written] | eng | From 162425f176ac1f7cc029df724569716876f9342f Mon Sep 17 00:00:00 2001 From: Roman Solomatin <36135455+Samoed@users.noreply.github.com> Date: Fri, 2 May 2025 09:45:00 +0300 Subject: [PATCH 7/7] remove test lines --- .github/workflows/docs.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 1a0d65df0b..a861d60a97 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -31,12 +31,10 @@ jobs: make build-docs create-table-and-push: -# if: github.ref == 'refs/heads/main' + if: github.ref == 'refs/heads/main' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - with: - ref: ${{ github.head_ref }} - uses: actions/setup-python@v4 with: