Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .github/workflows/build_linux_arm64_wheels-gh.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ jobs:
echo "Installing dependencies for Python $version"
pyenv shell $version
python -m pip install --upgrade pip
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel jupyter nbconvert
pyenv shell --unset
done
- name: Upgrade Rust toolchain
Expand Down Expand Up @@ -281,6 +281,15 @@ jobs:
pyenv shell --unset
done
continue-on-error: false
- name: Run notebook tests
run: |
export PATH="$HOME/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
pyenv shell 3.8
python -m pip install dist/*.whl --force-reinstall
jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb
pyenv shell --unset
continue-on-error: false
- name: Check and upload core files if present
if: always()
run: |
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/build_linux_x86_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ jobs:
echo "Installing dependencies for Python $version"
pyenv shell $version
python -m pip install --upgrade pip
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel jupyter nbconvert
pyenv shell --unset
done
- name: Upgrade Rust toolchain
Expand Down Expand Up @@ -280,6 +280,15 @@ jobs:
pyenv shell --unset
done
continue-on-error: false
- name: Run notebook tests
run: |
export PATH="$HOME/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
pyenv shell 3.8
python -m pip install dist/*.whl --force-reinstall
jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb
pyenv shell --unset
continue-on-error: false
- name: Check and upload core files if present
if: always()
run: |
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/build_macos_arm64_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ jobs:
echo "Installing dependencies for Python $version"
pyenv shell $version
python -m pip install --upgrade pip
python -m pip install setuptools wheel tox pandas pyarrow twine psutil deltalake wheel>=0.40.0
python -m pip install setuptools wheel tox pandas pyarrow twine psutil deltalake wheel>=0.40.0 jupyter nbconvert
pyenv shell --unset
done
- name: Remove /usr/local/bin/python3
Expand Down Expand Up @@ -276,6 +276,15 @@ jobs:
pyenv shell --unset
done
continue-on-error: false
- name: Run notebook tests
run: |
export PATH="$HOME/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
pyenv shell 3.8
python -m pip install dist/*.whl --force-reinstall
jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb
pyenv shell --unset
continue-on-error: false
- name: Check and upload core files if present
if: always()
run: |
Expand Down
11 changes: 10 additions & 1 deletion .github/workflows/build_macos_x86_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ jobs:
echo "Installing dependencies for Python $version"
pyenv shell $version
python -m pip install --upgrade pip
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel>=0.40.0
python -m pip install setuptools tox pandas pyarrow twine psutil deltalake wheel>=0.40.0 jupyter nbconvert
pyenv shell --unset
done
- name: Remove /usr/local/bin/python3
Expand Down Expand Up @@ -277,6 +277,15 @@ jobs:
pyenv shell --unset
done
continue-on-error: false
- name: Run notebook tests
run: |
export PATH="$HOME/.pyenv/bin:$PATH"
eval "$(pyenv init -)"
pyenv shell 3.8
python -m pip install dist/*.whl --force-reinstall
jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb
pyenv shell --unset
continue-on-error: false
- name: Check and upload core files if present
if: always()
run: |
Expand Down
15 changes: 15 additions & 0 deletions src/Client/ClientBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,21 @@ bool isStdinNotEmptyAndValid(ReadBuffer & std_in)
{
try
{
// Use non-blocking check for stdin to avoid hanging
if (auto * fd_buffer = typeid_cast<ReadBufferFromFileDescriptor *>(&std_in))
{
int fd = fd_buffer->getFD();
if (fd == STDIN_FILENO)
{
int flags = fcntl(fd, F_GETFL);
if (flags != -1)
{
fcntl(fd, F_SETFL, flags | O_NONBLOCK);
SCOPE_EXIT({ fcntl(fd, F_SETFL, flags); });
return !std_in.eof();
}
}
}
return !std_in.eof();
}
catch (const Exception & e)
Expand Down
165 changes: 165 additions & 0 deletions tests/test_data_insertion.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"is_executing": true
}
},
"outputs": [],
"source": [
"from chdb import session\n",
"import time\n",
"import tempfile\n",
"import os\n",
"\n",
"print(\"Connecting to chdb session...\")\n",
"chs = session.Session()\n",
"\n",
"temp_csv = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False)\n",
"temp_csv.write(\"movieId,embedding\\n\") # Header\n",
"\n",
"# Generate 10,000 rows of test data\n",
"for i in range(1, 10001):\n",
" embedding = [float(i + j * 0.1) for j in range(10)]\n",
" embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n",
" temp_csv.write(f'{i},\"{embedding_str}\"\\n')\n",
"\n",
"temp_csv.close()\n",
"csv_path = temp_csv.name\n",
"\n",
"# Setup database and table\n",
"print(\"\\n=== Setup Phase ===\")\n",
"chs.query(\"CREATE DATABASE IF NOT EXISTS test ENGINE = Atomic\")\n",
"chs.query(\"USE test\")\n",
"chs.query('DROP TABLE IF EXISTS embeddings')\n",
"\n",
"chs.query(\"\"\"CREATE TABLE embeddings (\n",
" movieId UInt32 NOT NULL,\n",
" embedding Array(Float32) NOT NULL\n",
" ) ENGINE = MergeTree()\n",
" ORDER BY movieId\"\"\")\n",
"\n",
"# Test 1: INFILE insertion (10k rows)\n",
"print(\"\\n=== Test 1: INFILE Insertion (10k rows) ===\")\n",
"start_time = time.time()\n",
"try:\n",
" result = chs.query(f\"INSERT INTO embeddings FROM INFILE '{csv_path}' FORMAT CSV\")\n",
" infile_time = time.time() - start_time\n",
" print(f\"✓ INFILE insertion successful! Time: {infile_time:.3f}s\")\n",
" \n",
" count = chs.query('SELECT COUNT(*) as count FROM embeddings')\n",
" print(f\"Records inserted via INFILE: {count}\")\n",
" \n",
" if count != '0':\n",
" print(\"Sample data from INFILE:\")\n",
" sample = chs.query('SELECT movieId, embedding FROM embeddings ORDER BY movieId LIMIT 3')\n",
" print(sample)\n",
" \n",
"except Exception as e:\n",
" print(f\"✗ INFILE insertion failed: {e}\")\n",
" infile_time = 0\n",
"\n",
"# Test 2: Regular insertion (10 additional rows)\n",
"print(\"\\n=== Test 2: Regular VALUES Insertion (10 rows) ===\")\n",
"start_time = time.time()\n",
"try:\n",
" # Insert 10 additional rows with movieId starting from 20001\n",
" for i in range(20001, 20011):\n",
" embedding = [float(i + j * 0.1) for j in range(10)]\n",
" embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n",
" chs.query(f\"INSERT INTO embeddings VALUES ({i}, {embedding_str})\")\n",
" \n",
" values_time = time.time() - start_time\n",
" print(f\"✓ VALUES insertion successful! Time: {values_time:.3f}s\")\n",
" \n",
"except Exception as e:\n",
" print(f\"✗ VALUES insertion failed: {e}\")\n",
" values_time = 0\n",
"\n",
"# Test 3: Verify total count\n",
"print(\"\\n=== Test 3: Count Verification ===\")\n",
"try:\n",
" total_count = chs.query('SELECT COUNT(*) as total FROM embeddings')\n",
" print(f\"Total records in embeddings table: {total_count}\")\n",
" \n",
" # Count by range\n",
" infile_count = chs.query('SELECT COUNT(*) as infile_count FROM embeddings WHERE movieId <= 10000')\n",
" values_count = chs.query('SELECT COUNT(*) as values_count FROM embeddings WHERE movieId >= 20001')\n",
" \n",
" print(f\"Records from INFILE (movieId <= 10000): {infile_count}\")\n",
" print(f\"Records from VALUES (movieId >= 20001): {values_count}\")\n",
" \n",
" # Sample from both ranges\n",
" print(\"\\nSample from INFILE data:\")\n",
" print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId <= 10000 ORDER BY movieId LIMIT 2'))\n",
" \n",
" print(\"Sample from VALUES data:\")\n",
" print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId >= 20001 ORDER BY movieId LIMIT 2'))\n",
" \n",
"except Exception as e:\n",
" print(f\"Count verification error: {e}\")\n",
"\n",
"# Test 4: Direct CSV engine reading\n",
"print(\"\\n=== Test 4: CSV Engine Direct Reading ===\")\n",
"try:\n",
" print(\"Reading generated CSV file directly using CSV engine:\")\n",
" \n",
" # Method 1: Using file() function\n",
" csv_count1 = chs.query(f\"SELECT COUNT(*) as csv_count FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String')\")\n",
" print(f\"CSV file rows (via file() function): {csv_count1}\")\n",
" \n",
" # Method 2: Using CSV table engine directly\n",
" print(\"Sample rows from CSV file:\")\n",
" csv_sample = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId LIMIT 3\")\n",
" print(csv_sample)\n",
" \n",
" print(\"Last few rows from CSV file:\")\n",
" csv_tail = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId DESC LIMIT 3\")\n",
" print(csv_tail)\n",
" \n",
"except Exception as e:\n",
" print(f\"CSV engine reading error: {e}\")\n",
"\n",
"# Cleanup\n",
"print(\"\\n=== Cleanup ===\")\n",
"try:\n",
" os.unlink(csv_path)\n",
" print(\"✓ Temporary CSV file cleaned up\")\n",
"except Exception as e:\n",
" print(f\"Warning: Could not clean up temporary file: {e}\")\n",
"\n",
"print(f\"\\n=== Performance Summary ===\")\n",
"if infile_time > 0:\n",
" print(f\"INFILE insertion (10k rows): {infile_time:.3f}s\")\n",
"if values_time > 0:\n",
" print(f\"VALUES insertion (10 rows): {values_time:.3f}s\")\n",
" if infile_time > 0:\n",
" print(f\"INFILE is {values_time/infile_time*1000:.1f}x faster per 1000 rows\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading