-
-
Notifications
You must be signed in to change notification settings - Fork 89
Fix(jupyter):use non-blocking stdin check to prevent hanging #380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
+220
−4
Merged
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
11ab8cf
implement non-blocking stdin check to prevent hanging
kafka1991 4b9bf3e
Add Jupyter notebook tests and run it in CI
kafka1991 7974d23
add more juper notebook tests
kafka1991 92dc0c4
Merge remote-tracking branch 'upstream/main' into non_blocking_stdin
kafka1991 a7315cd
Merge remote-tracking branch 'upstream/main' into non_blocking_stdin
kafka1991 b182126
Merge branch 'main' into non_blocking_stdin
kafka1991 b14b984
generate temporary CSV for testing
kafka1991 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,165 @@ | ||
| { | ||
| "cells": [ | ||
| { | ||
| "cell_type": "code", | ||
| "execution_count": null, | ||
| "metadata": { | ||
| "jupyter": { | ||
| "is_executing": true | ||
| } | ||
| }, | ||
| "outputs": [], | ||
| "source": [ | ||
| "from chdb import session\n", | ||
| "import time\n", | ||
| "import tempfile\n", | ||
| "import os\n", | ||
| "\n", | ||
| "print(\"Connecting to chdb session...\")\n", | ||
| "chs = session.Session()\n", | ||
| "\n", | ||
| "temp_csv = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False)\n", | ||
| "temp_csv.write(\"movieId,embedding\\n\") # Header\n", | ||
| "\n", | ||
| "# Generate 10,000 rows of test data\n", | ||
| "for i in range(1, 10001):\n", | ||
| " embedding = [float(i + j * 0.1) for j in range(10)]\n", | ||
| " embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n", | ||
| " temp_csv.write(f'{i},\"{embedding_str}\"\\n')\n", | ||
| "\n", | ||
| "temp_csv.close()\n", | ||
| "csv_path = temp_csv.name\n", | ||
| "\n", | ||
| "# Setup database and table\n", | ||
| "print(\"\\n=== Setup Phase ===\")\n", | ||
| "chs.query(\"CREATE DATABASE IF NOT EXISTS test ENGINE = Atomic\")\n", | ||
| "chs.query(\"USE test\")\n", | ||
| "chs.query('DROP TABLE IF EXISTS embeddings')\n", | ||
| "\n", | ||
| "chs.query(\"\"\"CREATE TABLE embeddings (\n", | ||
| " movieId UInt32 NOT NULL,\n", | ||
| " embedding Array(Float32) NOT NULL\n", | ||
| " ) ENGINE = MergeTree()\n", | ||
| " ORDER BY movieId\"\"\")\n", | ||
| "\n", | ||
| "# Test 1: INFILE insertion (10k rows)\n", | ||
| "print(\"\\n=== Test 1: INFILE Insertion (10k rows) ===\")\n", | ||
| "start_time = time.time()\n", | ||
| "try:\n", | ||
| " result = chs.query(f\"INSERT INTO embeddings FROM INFILE '{csv_path}' FORMAT CSV\")\n", | ||
| " infile_time = time.time() - start_time\n", | ||
| " print(f\"✓ INFILE insertion successful! Time: {infile_time:.3f}s\")\n", | ||
| " \n", | ||
| " count = chs.query('SELECT COUNT(*) as count FROM embeddings')\n", | ||
| " print(f\"Records inserted via INFILE: {count}\")\n", | ||
| " \n", | ||
| " if count != '0':\n", | ||
| " print(\"Sample data from INFILE:\")\n", | ||
| " sample = chs.query('SELECT movieId, embedding FROM embeddings ORDER BY movieId LIMIT 3')\n", | ||
| " print(sample)\n", | ||
| " \n", | ||
| "except Exception as e:\n", | ||
| " print(f\"✗ INFILE insertion failed: {e}\")\n", | ||
| " infile_time = 0\n", | ||
| "\n", | ||
| "# Test 2: Regular insertion (10 additional rows)\n", | ||
| "print(\"\\n=== Test 2: Regular VALUES Insertion (10 rows) ===\")\n", | ||
| "start_time = time.time()\n", | ||
| "try:\n", | ||
| " # Insert 10 additional rows with movieId starting from 20001\n", | ||
| " for i in range(20001, 20011):\n", | ||
| " embedding = [float(i + j * 0.1) for j in range(10)]\n", | ||
| " embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n", | ||
| " chs.query(f\"INSERT INTO embeddings VALUES ({i}, {embedding_str})\")\n", | ||
| " \n", | ||
| " values_time = time.time() - start_time\n", | ||
| " print(f\"✓ VALUES insertion successful! Time: {values_time:.3f}s\")\n", | ||
| " \n", | ||
| "except Exception as e:\n", | ||
| " print(f\"✗ VALUES insertion failed: {e}\")\n", | ||
| " values_time = 0\n", | ||
| "\n", | ||
| "# Test 3: Verify total count\n", | ||
| "print(\"\\n=== Test 3: Count Verification ===\")\n", | ||
| "try:\n", | ||
| " total_count = chs.query('SELECT COUNT(*) as total FROM embeddings')\n", | ||
| " print(f\"Total records in embeddings table: {total_count}\")\n", | ||
| " \n", | ||
| " # Count by range\n", | ||
| " infile_count = chs.query('SELECT COUNT(*) as infile_count FROM embeddings WHERE movieId <= 10000')\n", | ||
| " values_count = chs.query('SELECT COUNT(*) as values_count FROM embeddings WHERE movieId >= 20001')\n", | ||
| " \n", | ||
| " print(f\"Records from INFILE (movieId <= 10000): {infile_count}\")\n", | ||
| " print(f\"Records from VALUES (movieId >= 20001): {values_count}\")\n", | ||
| " \n", | ||
| " # Sample from both ranges\n", | ||
| " print(\"\\nSample from INFILE data:\")\n", | ||
| " print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId <= 10000 ORDER BY movieId LIMIT 2'))\n", | ||
| " \n", | ||
| " print(\"Sample from VALUES data:\")\n", | ||
| " print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId >= 20001 ORDER BY movieId LIMIT 2'))\n", | ||
| " \n", | ||
| "except Exception as e:\n", | ||
| " print(f\"Count verification error: {e}\")\n", | ||
| "\n", | ||
| "# Test 4: Direct CSV engine reading\n", | ||
| "print(\"\\n=== Test 4: CSV Engine Direct Reading ===\")\n", | ||
| "try:\n", | ||
| " print(\"Reading generated CSV file directly using CSV engine:\")\n", | ||
| " \n", | ||
| " # Method 1: Using file() function\n", | ||
| " csv_count1 = chs.query(f\"SELECT COUNT(*) as csv_count FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String')\")\n", | ||
| " print(f\"CSV file rows (via file() function): {csv_count1}\")\n", | ||
| " \n", | ||
| " # Method 2: Using CSV table engine directly\n", | ||
| " print(\"Sample rows from CSV file:\")\n", | ||
| " csv_sample = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId LIMIT 3\")\n", | ||
| " print(csv_sample)\n", | ||
| " \n", | ||
| " print(\"Last few rows from CSV file:\")\n", | ||
| " csv_tail = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId DESC LIMIT 3\")\n", | ||
| " print(csv_tail)\n", | ||
| " \n", | ||
| "except Exception as e:\n", | ||
| " print(f\"CSV engine reading error: {e}\")\n", | ||
| "\n", | ||
| "# Cleanup\n", | ||
| "print(\"\\n=== Cleanup ===\")\n", | ||
| "try:\n", | ||
| " os.unlink(csv_path)\n", | ||
| " print(\"✓ Temporary CSV file cleaned up\")\n", | ||
| "except Exception as e:\n", | ||
| " print(f\"Warning: Could not clean up temporary file: {e}\")\n", | ||
| "\n", | ||
| "print(f\"\\n=== Performance Summary ===\")\n", | ||
| "if infile_time > 0:\n", | ||
| " print(f\"INFILE insertion (10k rows): {infile_time:.3f}s\")\n", | ||
| "if values_time > 0:\n", | ||
| " print(f\"VALUES insertion (10 rows): {values_time:.3f}s\")\n", | ||
| " if infile_time > 0:\n", | ||
| " print(f\"INFILE is {values_time/infile_time*1000:.1f}x faster per 1000 rows\")" | ||
| ] | ||
| } | ||
| ], | ||
| "metadata": { | ||
| "kernelspec": { | ||
| "display_name": ".venv", | ||
| "language": "python", | ||
| "name": "python3" | ||
| }, | ||
| "language_info": { | ||
| "codemirror_mode": { | ||
| "name": "ipython", | ||
| "version": 3 | ||
| }, | ||
| "file_extension": ".py", | ||
| "mimetype": "text/x-python", | ||
| "name": "python", | ||
| "nbconvert_exporter": "python", | ||
| "pygments_lexer": "ipython3", | ||
| "version": "3.13.7" | ||
| } | ||
| }, | ||
| "nbformat": 4, | ||
| "nbformat_minor": 4 | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.