|
497 | 497 | ] |
498 | 498 | }, |
499 | 499 | { |
500 | | - "cell_type": "markdown", |
501 | | - "metadata": {}, |
502 | | - "source": [ |
503 | | - "### VoyageAI\n", |
504 | | - "\n", |
505 | | - "[VoyageAI](https://dash.voyageai.com/) allows you to implement language AI into your product. The `VoyageAITextVectorizer` makes it simple to use RedisVL with the embeddings models at VoyageAI. For this you will need to install `voyageai`.\n", |
506 | | - "\n", |
507 | | - "```bash\n", |
508 | | - "pip install voyageai\n", |
509 | | - "```" |
510 | | - ] |
511 | | - }, |
512 | | - { |
513 | | - "cell_type": "code", |
514 | | - "execution_count": 11, |
515 | | - "metadata": {}, |
516 | | - "outputs": [], |
517 | | - "source": [ |
518 | | - "import getpass\n", |
519 | | - "# setup the API Key\n", |
520 | | - "api_key = os.environ.get(\"VOYAGE_API_KEY\") or getpass.getpass(\"Enter your VoyageAI API key: \")" |
521 | | - ] |
522 | | - }, |
523 | | - { |
524 | | - "cell_type": "markdown", |
525 | | - "metadata": {}, |
526 | | - "source": [ |
527 | | - "\n", |
528 | | - "Special attention needs to be paid to the `input_type` parameter for each `embed` call. For example, for embedding \n", |
529 | | - "queries, you should set `input_type='query'`; for embedding documents, set `input_type='document'`. See\n", |
530 | | - "more information [here](https://docs.voyageai.com/docs/embeddings)" |
531 | | - ] |
532 | | - }, |
533 | | - { |
534 | | - "cell_type": "code", |
535 | | - "execution_count": 12, |
536 | | - "metadata": {}, |
537 | | - "outputs": [ |
538 | | - { |
539 | | - "name": "stdout", |
540 | | - "output_type": "stream", |
541 | | - "text": [ |
542 | | - "Vector dimensions: 1024\n", |
543 | | - "[0.015814896672964096, 0.046988241374492645, -0.00518248463049531, -0.05383478105068207, -0.015586535446345806, -0.0837097093462944, 0.03744547441601753, -0.007797810714691877, 0.00717928446829319, 0.06857716292142868]\n", |
544 | | - "Vector dimensions: 1024\n", |
545 | | - "[0.006725038401782513, 0.01441393606364727, -0.030212024226784706, -0.06782275438308716, -0.021446991711854935, -0.07667966187000275, 0.01804908737540245, -0.015767497941851616, -0.02152789570391178, 0.049741245806217194]\n" |
546 | | - ] |
547 | | - } |
548 | | - ], |
549 | | - "source": [ |
550 | | - "from redisvl.utils.vectorize import VoyageAITextVectorizer\n", |
551 | | - "\n", |
552 | | - "# create a vectorizer\n", |
553 | | - "vo = VoyageAITextVectorizer(\n", |
554 | | - " model=\"voyage-law-2\", # Please check the available models at https://docs.voyageai.com/docs/embeddings\n", |
555 | | - " api_config={\"api_key\": api_key},\n", |
556 | | - ")\n", |
557 | | - "\n", |
558 | | - "# embed a search query\n", |
559 | | - "test = vo.embed(\"This is a test sentence.\", input_type='query')\n", |
560 | | - "print(\"Vector dimensions: \", len(test))\n", |
561 | | - "print(test[:10])\n", |
562 | | - "\n", |
563 | | - "# embed a document\n", |
564 | | - "test = vo.embed(\"This is a test sentence.\", input_type='document')\n", |
565 | | - "print(\"Vector dimensions: \", len(test))\n", |
566 | | - "print(test[:10])" |
567 | | - ] |
568 | | - }, |
569 | | - { |
| 500 | + "cell_type": "markdown", |
| 501 | + "metadata": {}, |
| 502 | + "source": [ |
| 503 | + "### VoyageAI\n", |
| 504 | + "\n", |
| 505 | + "[VoyageAI](https://dash.voyageai.com/) allows you to implement language AI into your product. The `VoyageAITextVectorizer` makes it simple to use RedisVL with the embeddings models at VoyageAI. For this you will need to install `voyageai`.\n", |
| 506 | + "\n", |
| 507 | + "```bash\n", |
| 508 | + "pip install voyageai\n", |
| 509 | + "```" |
| 510 | + ] |
| 511 | + }, |
| 512 | + { |
| 513 | + "cell_type": "code", |
| 514 | + "execution_count": 11, |
| 515 | + "metadata": {}, |
| 516 | + "outputs": [], |
| 517 | + "source": [ |
| 518 | + "import getpass\n", |
| 519 | + "# setup the API Key\n", |
| 520 | + "api_key = os.environ.get(\"VOYAGE_API_KEY\") or getpass.getpass(\"Enter your VoyageAI API key: \")" |
| 521 | + ] |
| 522 | + }, |
| 523 | + { |
| 524 | + "cell_type": "markdown", |
| 525 | + "metadata": {}, |
| 526 | + "source": [ |
| 527 | + "\n", |
| 528 | + "Special attention needs to be paid to the `input_type` parameter for each `embed` call. For example, for embedding \n", |
| 529 | + "queries, you should set `input_type='query'`; for embedding documents, set `input_type='document'`. See\n", |
| 530 | + "more information [here](https://docs.voyageai.com/docs/embeddings)" |
| 531 | + ] |
| 532 | + }, |
| 533 | + { |
| 534 | + "cell_type": "code", |
| 535 | + "execution_count": 12, |
| 536 | + "metadata": {}, |
| 537 | + "outputs": [ |
| 538 | + { |
| 539 | + "name": "stdout", |
| 540 | + "output_type": "stream", |
| 541 | + "text": [ |
| 542 | + "Vector dimensions: 1024\n", |
| 543 | + "[0.015814896672964096, 0.046988241374492645, -0.00518248463049531, -0.05383478105068207, -0.015586535446345806, -0.0837097093462944, 0.03744547441601753, -0.007797810714691877, 0.00717928446829319, 0.06857716292142868]\n", |
| 544 | + "Vector dimensions: 1024\n", |
| 545 | + "[0.006725038401782513, 0.01441393606364727, -0.030212024226784706, -0.06782275438308716, -0.021446991711854935, -0.07667966187000275, 0.01804908737540245, -0.015767497941851616, -0.02152789570391178, 0.049741245806217194]\n" |
| 546 | + ] |
| 547 | + } |
| 548 | + ], |
| 549 | + "source": [ |
| 550 | + "from redisvl.utils.vectorize import VoyageAITextVectorizer\n", |
| 551 | + "\n", |
| 552 | + "# create a vectorizer\n", |
| 553 | + "vo = VoyageAITextVectorizer(\n", |
| 554 | + " model=\"voyage-law-2\", # Please check the available models at https://docs.voyageai.com/docs/embeddings\n", |
| 555 | + " api_config={\"api_key\": api_key},\n", |
| 556 | + ")\n", |
| 557 | + "\n", |
| 558 | + "# embed a search query\n", |
| 559 | + "test = vo.embed(\"This is a test sentence.\", input_type='query')\n", |
| 560 | + "print(\"Vector dimensions: \", len(test))\n", |
| 561 | + "print(test[:10])\n", |
| 562 | + "\n", |
| 563 | + "# embed a document\n", |
| 564 | + "test = vo.embed(\"This is a test sentence.\", input_type='document')\n", |
| 565 | + "print(\"Vector dimensions: \", len(test))\n", |
| 566 | + "print(test[:10])" |
| 567 | + ] |
| 568 | + }, |
| 569 | + { |
570 | 570 | "cell_type": "markdown", |
571 | 571 | "metadata": {}, |
572 | 572 | "source": [ |
|
831 | 831 | "!rvl index listall" |
832 | 832 | ] |
833 | 833 | }, |
| 834 | + { |
| 835 | + "cell_type": "markdown", |
| 836 | + "metadata": {}, |
| 837 | + "source": [ |
| 838 | + "Loading data to RedisVL is easy. It expects a list of dictionaries. The vector is stored as bytes." |
| 839 | + ] |
| 840 | + }, |
834 | 841 | { |
835 | 842 | "cell_type": "code", |
836 | | - "execution_count": null, |
| 843 | + "execution_count": 17, |
837 | 844 | "metadata": {}, |
838 | 845 | "outputs": [ |
839 | 846 | { |
840 | 847 | "data": { |
841 | 848 | "text/plain": [ |
842 | | - "['doc:17c401b679ce43cb82f3ab2280ad02f2',\n", |
843 | | - " 'doc:3fc0502bec434b17a3f06e20824b2e59',\n", |
844 | | - " 'doc:199f17b0e5d24dcaa1fd4fb41558150c']" |
| 849 | + "['doc:529c6d58da9e4be4a29dd0481f59c286',\n", |
| 850 | + " 'doc:81aa1ef8a9494b299e8593548d0af34a',\n", |
| 851 | + " 'doc:6ab03d6da8f041ffa3fdb83996d3b297']" |
845 | 852 | ] |
846 | 853 | }, |
847 | 854 | "execution_count": 17, |
|
850 | 857 | } |
851 | 858 | ], |
852 | 859 | "source": [ |
853 | | - "# load expects an iterable of dictionaries where\n", |
854 | | - "# the vector is stored as a bytes buffer\n", |
855 | 860 | "from redisvl.redis.utils import array_to_buffer\n", |
856 | 861 | "\n", |
| 862 | + "embeddings = hf.embed_many(sentences)\n", |
| 863 | + "\n", |
857 | 864 | "data = [{\"text\": t,\n", |
858 | 865 | " \"embedding\": array_to_buffer(v, dtype=\"float32\")}\n", |
859 | 866 | " for t, v in zip(sentences, embeddings)]\n", |
|
870 | 877 | "name": "stdout", |
871 | 878 | "output_type": "stream", |
872 | 879 | "text": [ |
873 | | - "That is a happy dog 0.160862326622\n", |
874 | | - "That is a happy person 0.273598492146\n", |
875 | | - "Today is a sunny day 0.744559407234\n" |
| 880 | + "That is a happy dog 0.160862207413\n", |
| 881 | + "That is a happy dog 0.160862207413\n", |
| 882 | + "That is a happy person 0.273598313332\n" |
876 | 883 | ] |
877 | 884 | } |
878 | 885 | ], |
|
905 | 912 | }, |
906 | 913 | { |
907 | 914 | "cell_type": "code", |
908 | | - "execution_count": null, |
| 915 | + "execution_count": 19, |
909 | 916 | "metadata": {}, |
910 | 917 | "outputs": [ |
911 | 918 | { |
|
914 | 921 | "True" |
915 | 922 | ] |
916 | 923 | }, |
917 | | - "execution_count": 4, |
| 924 | + "execution_count": 19, |
918 | 925 | "metadata": {}, |
919 | 926 | "output_type": "execute_result" |
920 | 927 | } |
|
933 | 940 | }, |
934 | 941 | { |
935 | 942 | "cell_type": "code", |
936 | | - "execution_count": null, |
| 943 | + "execution_count": 20, |
937 | 944 | "metadata": {}, |
938 | 945 | "outputs": [], |
939 | 946 | "source": [ |
|
0 commit comments