utensils

aosan · Apr 8, 2024 · 82c66a6 · 82c66a6
1 parent 8a54d61
commit 82c66a6
Show file tree

Hide file tree

Showing 11 changed files with 365 additions and 0 deletions.
diff --git a/utensils/epub2md/.python-version b/utensils/epub2md/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/utensils/epub2md/README.md b/utensils/epub2md/README.md
@@ -0,0 +1,53 @@
+# EPUB to Markdown Converter
+
+## Overview
+
+This script provides a utility for converting EPUB files to Markdown format. It leverages the `ebooklib` library to read EPUB content and the `html2text` module for the conversion of HTML documents contained within the EPUB to Markdown format.
+
+## Installation
+
+Before running the script, ensure you have Python 3.12 installed on your system and install the required Python dependencies by running:
+
+```Bash
+pip install -r requirements.txt
+```
+
+Optional, use pyenv to manage your Python environments
+
+```Bash
+pyenv local 3.12
+```
+
+## Usage
+
+To convert an EPUB file to Markdown:
+
+```
+./epub_to_md.py <path_to_epub_file>
+```
+
+For example:
+
+```Bash
+./epub_to_md.py example.epub
+```
+
+This will create a Markdown file in the same directory as the EPUB file, with the same name but with a `.md` extension.
+
+## How It Works
+
+1. **EPUB Loading**: The script reads the EPUB file using `ebooklib`.
+2. **HTML to Markdown Conversion**: Each document within the EPUB is converted from HTML to Markdown format using `html2text`.
+3. **Markdown File Creation**: The script combines all converted Markdown content into a single file, naming it after the original EPUB file but with a `.md` extension.
+
+## Dependencies
+
+- Python 3.12
+- Optional pyenv to manage your Python environments
+- `ebooklib`: a Python library for managing EPUB2/EPUB3 and Kindle files
+- `html2text`: a Python library for the conversion of HTML into Markdown
+
+## License
+
+This software is released under the AGPL-3.0 license.
+
diff --git a/utensils/epub2md/epub2md.py b/utensils/epub2md/epub2md.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import argparse
+import ebooklib
+from ebooklib import epub
+import html2text
+
+def convert_html_to_markdown(html_content):
+    # Initialize html2text
+    h = html2text.HTML2Text()
+    h.ignore_links = False
+    return h.handle(html_content)
+
+def epub_to_md(epub_path):
+    try:
+        # Load the EPUB file
+        book = epub.read_epub(epub_path)
+    except Exception as e:
+        print(f"Failed to read EPUB file: {e}")
+        return
+
+    # Initialize Markdown content
+    md_content = ""
+
+    # check each item in the EPUB book
+    for item in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
+        # Convert HTML to Markdown
+        html_content = item.content.decode("utf-8")
+        md_content += convert_html_to_markdown(html_content) + "\n\n"
+
+
+    if not md_content:
+        print("No readable document items found in the EPUB file.")
+        return
+
+    # Generate output file path
+    md_path = os.path.splitext(epub_path)[0] + ".md"
+
+    try:
+        # Write to Markdown file
+        with open(md_path, "w", encoding="utf-8") as md_file:
+            md_file.write(md_content)
+        print(f"Markdown file saved to: {md_path}")
+    except Exception as e:
+        print(f"Failed to write Markdown file: {e}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert an EPUB file to Markdown format.")
+    parser.add_argument("epub_file_path", help="Path to the EPUB file to be converted.")
+    args = parser.parse_args()
+
+    epub_to_md(args.epub_file_path)
+
+if __name__ == "__main__":
+    main()
diff --git a/utensils/epub2md/requirements.txt b/utensils/epub2md/requirements.txt
@@ -0,0 +1,2 @@
+ebooklib==0.18
+html2text==2024.2.26
diff --git a/utensils/evaluate_llm/README.md b/utensils/evaluate_llm/README.md
@@ -0,0 +1,40 @@
+# LLM Performance Comparison Tool
+
+## Overview
+This script facilitates the comparison of multiple LLMs by running requests through each selected model and measuring their performance. It is designed to help you compare and evaluate the capabilities and response times of your models or Modelfile fine-tuning in processing an identical request.
+
+## Prerequisites
+- Bash shell environment
+- `ollama` installed and configured on your system
+
+## Usage
+```bash
+./evaluate_llm.sh <number of models> <request file path>
+```
+
+Where:
+- `<number of models>` is the number of models you wish to compare
+- `<request file path>` is the path to a file containing the request
+
+### Example
+```bash
+./evaluate_llm.sh 3 evaluate_marketing.txt
+```
+
+This will select 3 models from your available LLMs and make a request with the contents of `evaluate_marketing.txt` to each selected model.
+
+## Outcome
+The script displays:
+- The response from each selected model.
+- The evaluation durations for each model.
+- A direct link to [LLM Examiner](https://chat.openai.com/g/g-WaEKsoStj-llm-examiner) for further analysis of the results.
+
+Copy-paste the complete output between -@@@- to the LLM Examiner for an evaluation and a 1 to 10 score for each model's accuracy, completeness, clarity, responsiveness and efficiency.
+
+## Note
+Ensure `ollama` is correctly installed and you have access to the models you wish to compare.
+
+For more information on the `ollama` usage and models, please visit [ollama](https://github.com/ollama/ollama).
+
+## License
+This software is released under the AGPL-3.0 license.
diff --git a/utensils/evaluate_llm/evaluate_llm.sh b/utensils/evaluate_llm/evaluate_llm.sh
@@ -0,0 +1,106 @@
+#!/usr/bin/env bash
+# Compare multiple models by running them with the same questions based on user input
+
+# Define color codes
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m' # No Color
+
+# Function to exit with a message
+exit_with_message() {
+    printf "${RED}%s${NC}\n" "$1" >&2
+    exit 1
+}
+
+# Check for correct usage
+if [ "$#" -ne 2 ]; then
+    exit_with_message "Usage: $0 <number of models> <request file path>"
+fi
+
+# Validate number of models input
+if ! [[ "$1" =~ ^[0-9]+$ ]] || [ "$1" -le 0 ]; then
+    exit_with_message "The number of models must be a positive integer."
+fi
+
+NUMBEROFCHOICES=$1
+
+# Check if the question file exists and is readable
+if [ ! -f "$2" ] || [ ! -r "$2" ]; then
+    exit_with_message "$2 not found or is not readable. Please make sure the file exists and is readable."
+fi
+
+REQUEST_CONTENT=$(<"$2")
+
+# Retrieve model list
+CHOICES=$(ollama list | awk '{print $1}' 2>/dev/null)
+if [ $? -ne 0 ] || [ -z "$CHOICES" ]; then
+    exit_with_message "Failed to retrieve model list or no models available."
+fi
+
+SELECTIONS=()
+declare -a SUMS=()
+COUNT=0
+
+printf "${GREEN}Select $NUMBEROFCHOICES models to compare:${NC}\n"
+select ITEM in $CHOICES; do
+    if [[ -n $ITEM ]]; then
+        printf "${YELLOW}You have selected $ITEM${NC}\n"
+        SELECTIONS+=("$ITEM")
+        ((COUNT++))
+        if [[ $COUNT -eq $NUMBEROFCHOICES ]]; then
+            break
+        fi
+    else
+        printf "${RED}Invalid selection. Please try again.${NC}\n"
+    fi
+done
+
+printf "\n${YELLOW}-@@@-${NC}\n"
+for ITEM in "${SELECTIONS[@]}"; do
+    printf "${YELLOW}--------------------------------------------------------------${NC}\n"
+    printf "Selecting the model ${GREEN}$ITEM${NC}\n"
+    if ! ollama run "$ITEM" ""; then
+        printf "${RED}Failed to load model $ITEM. Skipping...${NC}\n"
+        continue
+    fi
+    printf "${YELLOW}--------------------------------------------------------------${NC}\n"
+    printf "Running the request ---${RED}$REQUEST_CONTENT${NC}--- with the model ${GREEN}$ITEM${NC}\n"
+
+    if COMMAND_OUTPUT=$(ollama run "$ITEM" --verbose < "$2" 2>&1 | tee /dev/stderr); then
+        SUM=$(echo "$COMMAND_OUTPUT" | awk '/eval duration:/ {
+        value = $3
+        if (index(value, "ms") > 0) {
+            gsub("ms", "", value)
+            value /= 1000
+        } else if (index(value, "m") > 0) {
+            gsub("m", "", value)
+            value *= 60
+        } else {
+            gsub("s", "", value)
+        }
+        sum += value
+    }
+    END { print sum }')
+
+
+        SUMS+=("The request for $ITEM completed in $SUM seconds")
+    else
+        printf "${RED}An error occurred while running the model $ITEM. Skipping...${NC}\n"
+    fi
+done
+
+printf "\n${YELLOW}--------------------------------------------------------------${NC}\n"
+printf "\n${GREEN}Request evaluation for each run:${NC}\n"
+for val in "${SUMS[@]}"; do
+    printf "%s\n" "$val"
+done
+
+printf "\n${YELLOW}--------------------------------------------------------------${NC}\n"
+printf "\n${GREEN}LLMs comparison complete for request: \"%s\"${NC}\n" "$REQUEST_CONTENT"
+printf "\n${YELLOW}--------------------------------------------------------------${NC}\n"
+printf "\n${YELLOW}-@@@-${NC}\n"
+printf "\n${YELLOW} Evaluate the results above with LLM Examiner at: ${NC}\n"
+printf "\n${YELLOW} https://chat.openai.com/g/g-WaEKsoStj-llm-examiner ${NC}\n"
+
+printf "\n${YELLOW}--------------------------------------------------------------${NC}\n"
diff --git a/utensils/evaluate_llm/evaluate_marketing.txt b/utensils/evaluate_llm/evaluate_marketing.txt
@@ -0,0 +1 @@
+Act as a business expert and define, compare, and contrast the role of marketing for a start-up.
diff --git a/utensils/pdf2md/.python-version b/utensils/pdf2md/.python-version
@@ -0,0 +1 @@
+3.12
diff --git a/utensils/pdf2md/README.md b/utensils/pdf2md/README.md
@@ -0,0 +1,52 @@
+# PDF to Markdown Converter
+
+## Overview
+
+This script provides a utility for converting PDF files to Markdown format. It leverages the `PyMuPDF` library to convert PDF documents to the Markdown format.
+
+## Installation
+
+Before running the script, ensure you have Python 3.12 installed on your system and install the required Python dependencies by running:
+
+```Bash
+pip install -r requirements.txt
+```
+
+Optional, use pyenv to manage your Python environments
+
+```Bash
+pyenv local 3.12
+```
+
+## Usage
+
+To convert an PDF file to Markdown:
+
+```
+./pdf2md.py <path_to_PDF_file>
+```
+
+For example:
+
+```Bash
+./pdf2md.py example.pdf
+```
+
+This will create a Markdown file in the same directory as the PDF file, with the same name but with a `.md` extension.
+
+## How It Works
+
+1. **PDF Loading**: The script reads the PDF file using `PyMuPDF`.
+2. **PDF to Markdown Conversion**: Each PDF is converted to Markdown format.
+3. **Markdown File Creation**: The script combines all converted Markdown content into a single file, naming it after the original PDF file but with a `.md` extension.
+
+## Dependencies
+
+- Python 3.12
+- Optional pyenv to manage your Python environments
+- `PyMuPDF` is a versatile Python library for the manipulation, rendering, and extraction of content from PDF files
+
+## License
+
+This software is released under the AGPL-3.0 license.
+
diff --git a/utensils/pdf2md/pdf2md.py b/utensils/pdf2md/pdf2md.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import argparse
+import fitz  # PyMuPDF
+
+def convert_pdf_to_markdown(pdf_path):
+    try:
+        # Load the PDF file
+        doc = fitz.open(pdf_path)
+    except Exception as e:
+        print(f"Failed to read PDF file: {e}")
+        return
+
+    # Initialize Markdown content
+    md_content = ""
+
+    # Iterate through each page of the PDF
+    for page_num in range(len(doc)):
+        # Get the page
+        page = doc.load_page(page_num)
+        # Extract text from the page
+        text = page.get_text("text")
+        # Add the text to our Markdown content
+        md_content += text + "\n\n"
+
+    if not md_content:
+        print("No readable document items found in the PDF file.")
+        return
+
+    # Generate output file path
+    md_path = os.path.splitext(pdf_path)[0] + ".md"
+
+    try:
+        # Write to Markdown file
+        with open(md_path, "w", encoding="utf-8") as md_file:
+            md_file.write(md_content)
+        print(f"Markdown file saved to: {md_path}")
+    except Exception as e:
+        print(f"Failed to write Markdown file: {e}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert a PDF file to Markdown format.")
+    parser.add_argument("pdf_file_path", help="Path to the PDF file to be converted.")
+    args = parser.parse_args()
+
+    convert_pdf_to_markdown(args.pdf_file_path)
+
+if __name__ == "__main__":
+    main()
diff --git a/utensils/pdf2md/requirements.txt b/utensils/pdf2md/requirements.txt
@@ -0,0 +1 @@
+PyMuPDF==1.24.1
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Act as a business expert and define, compare, and contrast the role of marketing for a start-up.