Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
{ lib
, beautifulsoup4
, buildPythonPackage
, fetchFromGitHub
, fetchPypi
, llama-index-core
, poetry-core
, pymupdf
, pypdf
, pytestCheckHook
, pythonOlder
, pythonRelaxDepsHook
, striprtf
}:

buildPythonPackage rec {
pname = "llama-index-readers-file";
version = "0.1.7";

inherit (llama-index-core) src meta;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So llama-index-readers-file doesn't have to be kept in sync with llama-index-core?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it doesn't.


version = "0.1.12";
pyproject = true;

sourceRoot = "${src.name}/llama-index-integrations/readers/${pname}";
disabled = pythonOlder "3.8";

src = fetchPypi {
pname = "llama_index_readers_file";
inherit version;
hash = "sha256-YGXL+AsPtdGJVYuLkK273JKsuGFH/KGS2I/MJwStKvM=";
};

pythonRelaxDeps = [
"beautifulsoup4"
"pymupdf"
"pypdf"
];
Expand All @@ -30,23 +33,30 @@ buildPythonPackage rec {
"bs4"
];

nativeBuildInputs = [
build-system = [
poetry-core
pythonRelaxDepsHook
];

propagatedBuildInputs = [
dependencies = [
beautifulsoup4
llama-index-core
pymupdf
pypdf
striprtf
];

nativeCheckInputs = [
pytestCheckHook
];
# Tests are only available in the mono repo
doCheck = false;

pythonImportsCheck = [
"llama_index.readers.file"
];

meta = with lib; {
description = "LlamaIndex Readers Integration for files";
homepage = "https://github.com/run-llama/llama_index/tree/main/llama-index-integrations/readers/llama-index-readers-file";
license = licenses.mit;
maintainers = with maintainers; [ fab ];
};
}
80 changes: 74 additions & 6 deletions pkgs/development/python-modules/pymupdf/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@

let
# PyMuPDF needs the C++ bindings generated
mupdf-cxx = mupdf.override { enableOcr = true; enableCxx = true; enablePython = true; python3 = python; };
mupdf-cxx = mupdf.override {
enableOcr = true;
enableCxx = true;
enablePython = true;
python3 = python;
};
in buildPythonPackage rec {
pname = "pymupdf";
version = "1.23.26";
Expand All @@ -45,12 +50,12 @@ in buildPythonPackage rec {
hash = "sha256-m2zq04+PDnlzFuqeSt27UhdHXTHxpHdMPIg5RQl/5bQ=";
};

# swig is not wrapped as python package
# swig is not wrapped as Python package
# libclang calls itself just clang in wheel metadata
postPatch = ''
substituteInPlace pyproject.toml \
--replace '"swig",' "" \
--replace "libclang" "clang"
--replace-fail '"swig",' "" \
--replace-fail "libclang" "clang"
'';

nativeBuildInputs = [
Expand Down Expand Up @@ -95,16 +100,79 @@ in buildPythonPackage rec {
fonttools
];

preCheck = ''
export PATH="$PATH:$out/bin";
'';

disabledTests = [
# fails for indeterminate reasons
"test_color_count"
"test_2753"
"test_2548"
"test_2753"
"test_3020"
"test_3050"
"test_3058"
"test_3177"
"test_3186"
"test_color_count"
"test_pilsave"
"test_fz_write_pixmap_as_jpeg"
# NotImplementedError
"test_1824"
"test_2093"
"test_2093"
"test_2108"
"test_2182"
"test_2182"
"test_2246"
"test_2270"
"test_2270"
"test_2391"
"test_2788"
"test_2861"
"test_2871"
"test_2886"
"test_2904"
"test_2922"
"test_2934"
"test_2957"
"test_2969"
"test_3070"
"test_3131"
"test_3140"
"test_3209"
"test_3209"
"test_caret"
"test_deletion"
"test_file_info"
"test_line"
"test_page_links_generator"
"test_polyline"
"test_redact"
"test_techwriter_append"
"test_text2"
# Issue with FzArchive
"test_htmlbox"
"test_2246"
"test_3140"
"test_fit_springer"
"test_write_stabilized_with_links"
"test_textbox"
"test_delete_image"
# Fonts not available
"test_fontarchive"
"test_subset_fonts"
# Exclude lint tests
"test_flake8"
] ++ lib.optionals stdenv.isDarwin [
# darwin does not support OCR right now
"test_tesseract"
];

disabledTestPaths = [
# Issue with FzArchive
"tests/test_docs_samples.py"
];

pythonImportsCheck = [
"fitz"
"fitz_old"
Expand Down