|
1 | 1 | {
|
2 | 2 | "cells": [
|
3 | 3 | {
|
4 |
| - "attachments": {}, |
5 | 4 | "cell_type": "markdown",
|
6 | 5 | "id": "13afcae7",
|
7 | 6 | "metadata": {},
|
|
18 | 17 | ]
|
19 | 18 | },
|
20 | 19 | {
|
21 |
| - "attachments": {}, |
22 | 20 | "cell_type": "markdown",
|
23 | 21 | "id": "68e75fb9",
|
24 | 22 | "metadata": {},
|
|
42 | 40 | ]
|
43 | 41 | },
|
44 | 42 | {
|
45 |
| - "attachments": {}, |
46 | 43 | "cell_type": "markdown",
|
47 | 44 | "id": "83811610-7df3-4ede-b268-68a6a83ba9e2",
|
48 | 45 | "metadata": {},
|
|
86 | 83 | ]
|
87 | 84 | },
|
88 | 85 | {
|
89 |
| - "attachments": {}, |
90 | 86 | "cell_type": "markdown",
|
91 | 87 | "id": "bf7f6fc4",
|
92 | 88 | "metadata": {},
|
|
145 | 141 | ]
|
146 | 142 | },
|
147 | 143 | {
|
148 |
| - "attachments": {}, |
149 | 144 | "cell_type": "markdown",
|
150 | 145 | "id": "5ecaab6d",
|
151 | 146 | "metadata": {},
|
|
203 | 198 | ]
|
204 | 199 | },
|
205 | 200 | {
|
206 |
| - "attachments": {}, |
207 | 201 | "cell_type": "markdown",
|
208 | 202 | "id": "ea9df8d4",
|
209 | 203 | "metadata": {},
|
|
274 | 268 | ]
|
275 | 269 | },
|
276 | 270 | {
|
277 |
| - "attachments": {}, |
278 | 271 | "cell_type": "markdown",
|
279 | 272 | "id": "86371ac8",
|
280 | 273 | "metadata": {},
|
|
331 | 324 | ]
|
332 | 325 | },
|
333 | 326 | {
|
334 |
| - "attachments": {}, |
335 | 327 | "cell_type": "markdown",
|
336 | 328 | "id": "39bd1de1-b9fe-4a98-89da-58d8a7a6ae51",
|
337 | 329 | "metadata": {},
|
|
374 | 366 | "# This example only specifies a relevant query\n",
|
375 | 367 | "retriever.get_relevant_documents(\"what are two movies about dinosaurs\")"
|
376 | 368 | ]
|
377 |
| - }, |
378 |
| - { |
379 |
| - "attachments": {}, |
380 |
| - "cell_type": "markdown", |
381 |
| - "id": "d25c52b0", |
382 |
| - "metadata": {}, |
383 |
| - "source": [ |
384 |
| - "## SQL Self-Query Retriever with MyScale" |
385 |
| - ] |
386 |
| - }, |
387 |
| - { |
388 |
| - "cell_type": "code", |
389 |
| - "execution_count": null, |
390 |
| - "id": "0f824b20", |
391 |
| - "metadata": {}, |
392 |
| - "outputs": [], |
393 |
| - "source": [ |
394 |
| - "!pip3 install clickhouse-sqlalchemy InstructorEmbedding sentence_transformers openai" |
395 |
| - ] |
396 |
| - }, |
397 |
| - { |
398 |
| - "cell_type": "code", |
399 |
| - "execution_count": null, |
400 |
| - "id": "a7af1d99", |
401 |
| - "metadata": {}, |
402 |
| - "outputs": [], |
403 |
| - "source": [ |
404 |
| - "from os import environ\n", |
405 |
| - "\n", |
406 |
| - "environ[\"HTTPS_PROXY\"] = \"http://192.168.40.161:7890\"\n", |
407 |
| - "environ[\"OPENAI_API_BASE\"] = \"https://one-api.myscale.cloud/v1\"\n", |
408 |
| - "import getpass\n", |
409 |
| - "from typing import Dict, Any\n", |
410 |
| - "from langchain import OpenAI, SQLDatabase, SQLDatabaseChain, LLMChain\n", |
411 |
| - "from sqlalchemy import create_engine, Column, MetaData\n", |
412 |
| - "from langchain import PromptTemplate\n", |
413 |
| - "\n", |
414 |
| - "\n", |
415 |
| - "from sqlalchemy import create_engine\n", |
416 |
| - "\n", |
417 |
| - "MYSCALE_HOST = \"msc-1decbcc9.us-east-1.aws.staging.myscale.cloud\"\n", |
418 |
| - "MYSCALE_PORT = 443\n", |
419 |
| - "MYSCALE_USER = \"chatdata\"\n", |
420 |
| - "MYSCALE_PASSWORD = \"myscale_rocks\"\n", |
421 |
| - "OPENAI_API_KEY = getpass.getpass(\"OpenAI API Key:\")\n", |
422 |
| - "\n", |
423 |
| - "engine = create_engine(\n", |
424 |
| - " f\"clickhouse://{MYSCALE_USER}:{MYSCALE_PASSWORD}@{MYSCALE_HOST}:{MYSCALE_PORT}/default?protocol=https\"\n", |
425 |
| - ")\n", |
426 |
| - "environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY" |
427 |
| - ] |
428 |
| - }, |
429 |
| - { |
430 |
| - "cell_type": "code", |
431 |
| - "execution_count": null, |
432 |
| - "id": "eceb0f9e", |
433 |
| - "metadata": {}, |
434 |
| - "outputs": [], |
435 |
| - "source": [ |
436 |
| - "from langchain.embeddings import HuggingFaceInstructEmbeddings\n", |
437 |
| - "from langchain.chains.sql_database.parser import VectorSQLOutputParser\n", |
438 |
| - "\n", |
439 |
| - "output_parser = VectorSQLOutputParser.from_embeddings(\n", |
440 |
| - " model=HuggingFaceInstructEmbeddings(\n", |
441 |
| - " model_name=\"hkunlp/instructor-base\", model_kwargs={\"device\": \"cpu\"}\n", |
442 |
| - " )\n", |
443 |
| - ")" |
444 |
| - ] |
445 |
| - }, |
446 |
| - { |
447 |
| - "cell_type": "code", |
448 |
| - "execution_count": null, |
449 |
| - "id": "c7b3e108", |
450 |
| - "metadata": {}, |
451 |
| - "outputs": [], |
452 |
| - "source": [ |
453 |
| - "from langchain.callbacks import StdOutCallbackHandler\n", |
454 |
| - "\n", |
455 |
| - "metadata = MetaData(bind=engine)\n", |
456 |
| - "\n", |
457 |
| - "from langchain.chains.sql_database.base import SQLDatabaseChain\n", |
458 |
| - "from langchain.chains.sql_database.prompt import MYSCALE_PROMPT\n", |
459 |
| - "from langchain.sql_database import SQLDatabase\n", |
460 |
| - "from langchain.llms import OpenAI\n", |
461 |
| - "from langchain.chat_models import ChatOpenAI\n", |
462 |
| - "\n", |
463 |
| - "chain = SQLDatabaseChain(\n", |
464 |
| - " llm_chain=LLMChain(\n", |
465 |
| - " llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0),\n", |
466 |
| - " prompt=MYSCALE_PROMPT,\n", |
467 |
| - " output_parser=output_parser,\n", |
468 |
| - " ),\n", |
469 |
| - " top_k=10,\n", |
470 |
| - " return_direct=True,\n", |
471 |
| - " database=SQLDatabase(engine, None, metadata),\n", |
472 |
| - " native_format=True,\n", |
473 |
| - ")\n", |
474 |
| - "\n", |
475 |
| - "import pandas as pd\n", |
476 |
| - "\n", |
477 |
| - "pd.DataFrame(\n", |
478 |
| - " chain.run(\n", |
479 |
| - " \"Please give me 10 papers to ask what is PageRank?\",\n", |
480 |
| - " callbacks=[StdOutCallbackHandler()],\n", |
481 |
| - " )\n", |
482 |
| - ")" |
483 |
| - ] |
484 |
| - }, |
485 |
| - { |
486 |
| - "attachments": {}, |
487 |
| - "cell_type": "markdown", |
488 |
| - "id": "9d6b1385", |
489 |
| - "metadata": {}, |
490 |
| - "source": [ |
491 |
| - "## SQL Database as Retriever" |
492 |
| - ] |
493 |
| - }, |
494 |
| - { |
495 |
| - "cell_type": "code", |
496 |
| - "execution_count": null, |
497 |
| - "id": "864ad4b1", |
498 |
| - "metadata": {}, |
499 |
| - "outputs": [], |
500 |
| - "source": [ |
501 |
| - "from langchain.retrievers import SQLDatabaseChainRetriever\n", |
502 |
| - "from langchain.chains.sql_database.parser import VectorSQLRetrieveAllOutputParser\n", |
503 |
| - "from langchain.chains.qa_with_sources.retrieval import RetrievalQAWithSourcesChain\n", |
504 |
| - "\n", |
505 |
| - "output_parser_retrieve_all = VectorSQLRetrieveAllOutputParser.from_embeddings(\n", |
506 |
| - " output_parser.model\n", |
507 |
| - ")\n", |
508 |
| - "\n", |
509 |
| - "chain = SQLDatabaseChain.from_llm(\n", |
510 |
| - " llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0),\n", |
511 |
| - " prompt=MYSCALE_PROMPT,\n", |
512 |
| - " top_k=10,\n", |
513 |
| - " return_direct=True,\n", |
514 |
| - " db=SQLDatabase(engine, None, metadata),\n", |
515 |
| - " sql_cmd_parser=output_parser_retrieve_all,\n", |
516 |
| - " native_format=True,\n", |
517 |
| - ")\n", |
518 |
| - "\n", |
519 |
| - "# You need all those keys to get docs\n", |
520 |
| - "retriever = SQLDatabaseChainRetriever(sql_db_chain=chain, page_content_key=\"abstract\")\n", |
521 |
| - "\n", |
522 |
| - "document_with_metadata_prompt = PromptTemplate(\n", |
523 |
| - " input_variables=[\"page_content\", \"id\", \"title\", \"authors\", \"pubdate\", \"categories\"],\n", |
524 |
| - " template=\"Content:\\n\\tTitle: {title}\\n\\tAbstract: {page_content}\\n\\tAuthors: {authors}\\n\\tDate of Publication: {pubdate}\\n\\tCategories: {categories}\\nSOURCE: {id}\",\n", |
525 |
| - ")\n", |
526 |
| - "\n", |
527 |
| - "chain = RetrievalQAWithSourcesChain.from_chain_type(\n", |
528 |
| - " ChatOpenAI(\n", |
529 |
| - " model_name=\"gpt-3.5-turbo-16k\", openai_api_key=OPENAI_API_KEY, temperature=0.6\n", |
530 |
| - " ),\n", |
531 |
| - " retriever=retriever,\n", |
532 |
| - " chain_type=\"stuff\",\n", |
533 |
| - " chain_type_kwargs={\n", |
534 |
| - " \"document_prompt\": document_with_metadata_prompt,\n", |
535 |
| - " },\n", |
536 |
| - " return_source_documents=True,\n", |
537 |
| - ")\n", |
538 |
| - "ans = chain(\"Please give me 10 papers to ask what is PageRank?\")\n", |
539 |
| - "print(ans[\"answer\"])" |
540 |
| - ] |
541 |
| - }, |
542 |
| - { |
543 |
| - "cell_type": "code", |
544 |
| - "execution_count": null, |
545 |
| - "id": "1b1dddf5", |
546 |
| - "metadata": {}, |
547 |
| - "outputs": [], |
548 |
| - "source": [] |
549 | 369 | }
|
550 | 370 | ],
|
551 | 371 | "metadata": {
|
|
564 | 384 | "name": "python",
|
565 | 385 | "nbconvert_exporter": "python",
|
566 | 386 | "pygments_lexer": "ipython3",
|
567 |
| - "version": "3.10.9" |
| 387 | + "version": "3.11.3" |
568 | 388 | }
|
569 | 389 | },
|
570 | 390 | "nbformat": 4,
|
|
0 commit comments