From 768fbbd02ddfb55c5e73920586b8a45cb2811538 Mon Sep 17 00:00:00 2001 From: Menghuan1918 Date: Mon, 18 Nov 2024 12:38:49 +0800 Subject: [PATCH] [example] Add examples --- examples/convert_folder_pdfs.py | 19 +++++++++++++++++++ examples/convert_pdfs_multiple_types.py | 19 +++++++++++++++++++ examples/convert_single_pdf.py | 19 +++++++++++++++++++ pyproject.toml | 6 +++--- 4 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 examples/convert_folder_pdfs.py create mode 100644 examples/convert_pdfs_multiple_types.py create mode 100644 examples/convert_single_pdf.py diff --git a/examples/convert_folder_pdfs.py b/examples/convert_folder_pdfs.py new file mode 100644 index 0000000..1501ec9 --- /dev/null +++ b/examples/convert_folder_pdfs.py @@ -0,0 +1,19 @@ +# This is an example of how to convert all PDF files in a folder to DOCX files. +# 这是一个将文件夹中的所有 PDF 文件转换为 DOCX 文件的示例。 + +from pdfdeal import Doc2X + +# gets API Key from environment variable DOC2X_APIKEY, or you can pass it as a string to the apikey parameter +# 从环境变量 DOC2X_APIKEY 获取 API Key, 或者您可以将其作为字符串传递给 apikey 参数 + +# client = Doc2X(apikey="Your API key",debug=True) +client = Doc2X(debug=True) + +success, failed, flag = client.pdf2file( + pdf_file="tests/pdf/test", + output_path="./Output", + output_format="docx", +) +print(success) +print(failed) +print(flag) diff --git a/examples/convert_pdfs_multiple_types.py b/examples/convert_pdfs_multiple_types.py new file mode 100644 index 0000000..5434ea1 --- /dev/null +++ b/examples/convert_pdfs_multiple_types.py @@ -0,0 +1,19 @@ +# This is an example of how to convert all PDF files in a folder to multiple types of files. +# 这是一个将文件夹中的所有 PDF 文件转换为多种类型文件的示例。 + +from pdfdeal import Doc2X + +# gets API Key from environment variable DOC2X_APIKEY, or you can pass it as a string to the apikey parameter +# 从环境变量 DOC2X_APIKEY 获取 API Key, 或者您可以将其作为字符串传递给 apikey 参数 + +# client = Doc2X(apikey="Your API key",debug=True) +client = Doc2X(debug=True) + +success, failed, flag = client.pdf2file( + pdf_file="/home/menghuan/文档/Test/pdf", + output_path="./Output", + output_format="docx,md", +) +print(success) +print(failed) +print(flag) diff --git a/examples/convert_single_pdf.py b/examples/convert_single_pdf.py new file mode 100644 index 0000000..2341c7c --- /dev/null +++ b/examples/convert_single_pdf.py @@ -0,0 +1,19 @@ +# This is an example of how to convert a single PDF file to a single DOCX file. +# 这是一个将单个 PDF 文件转换为单个 DOCX 文件的示例。 + +from pdfdeal import Doc2X + +# gets API Key from environment variable DOC2X_APIKEY, or you can pass it as a string to the apikey parameter +# 从环境变量 DOC2X_APIKEY 获取 API Key, 或者您可以将其作为字符串传递给 apikey 参数 + +# client = Doc2X(apikey="Your API key",debug=True) +client = Doc2X(debug=True) + +success, failed, flag = client.pdf2file( + pdf_file="tests/pdf/sample.pdf", + output_path="Output", + output_format="docx", +) +print(success) +print(failed) +print(flag) diff --git a/pyproject.toml b/pyproject.toml index a1f4b6d..cb77a48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pdfdeal" -version = "0.4.8" +version = "0.4.9" authors = [{ name = "Menghuan1918", email = "menghuan@menghuan1918.com" }] description = "A python wrapper for the Doc2X API and comes with native texts processing (to improve texts recall in RAG)." readme = "README.md" @@ -13,8 +13,8 @@ classifiers = [ dependencies = ["httpx[http2]>=0.23.1, <1", "pypdf"] [project.optional-dependencies] -rag = ["emoji", "Pillow", "reportlab", "oss2", "boto3"] -dev = ["pytest", "emoji", "Pillow", "reportlab", "oss2", "boto3"] +rag = ["emoji", "Pillow", "reportlab", "oss2", "boto3", "minio"] +dev = ["pytest", "emoji", "Pillow", "reportlab", "oss2", "boto3", "minio"] [project.urls] Issues = "https://github.com/Menghuan1918/pdfdeal/issues"