Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion mupdf-sys/mupdf
Submodule mupdf updated 308 files
13 changes: 13 additions & 0 deletions mupdf-sys/wrapper.c
Original file line number Diff line number Diff line change
Expand Up @@ -3281,3 +3281,16 @@ int32_t mupdf_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point
}
return count;
}

int32_t mupdf_search_stext_page_cb(fz_context *ctx, fz_stext_page *page, const char *needle, fz_search_callback_fn *cb, void *opaque, mupdf_error_t **errptr) {
int32_t count = 0;
fz_try(ctx)
{
count = fz_search_stext_page_cb(ctx, page, needle, cb, opaque);
}
fz_catch(ctx)
{
mupdf_save_error(ctx, errptr);
}
return count;
}
6 changes: 3 additions & 3 deletions src/colorspace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,9 @@ mod test {
)
.unwrap();
assert_eq!(n, 3);
assert!((0.6..0.7).contains(&gray[0]));
assert!((0.6..0.7).contains(&gray[1]));
assert!((0.6..0.7).contains(&gray[2]));
assert!((0.59..0.61).contains(&gray[0]), "gray = {:?}", gray);
assert!((0.59..0.61).contains(&gray[1]), "gray = {:?}", gray);
assert!((0.59..0.61).contains(&gray[2]), "gray = {:?}", gray);
assert_eq!(gray[3], 0.0);
}
}
12 changes: 6 additions & 6 deletions src/display_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,19 +140,19 @@ mod test {
[Quad {
ul: Point {
x: 56.8,
y: 69.32512
y: 69.32953
},
ur: Point {
x: 115.85405,
y: 69.32512
x: 115.85159,
y: 69.32953
},
ll: Point {
x: 56.8,
y: 87.311844
y: 80.50292
},
lr: Point {
x: 115.85405,
y: 87.311844
x: 115.85159,
y: 80.50292
}
}]
);
Expand Down
14 changes: 7 additions & 7 deletions src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,20 +534,20 @@ mod test {
[Quad {
ul: Point {
x: 56.8,
y: 69.32512,
y: 69.32953
},
ur: Point {
x: 115.85405,
y: 69.32512,
x: 115.85159,
y: 69.32953
},
ll: Point {
x: 56.8,
y: 87.311844,
y: 80.50292
},
lr: Point {
x: 115.85405,
y: 87.311844,
},
x: 115.85159,
y: 80.50292
}
}]
);

Expand Down
167 changes: 144 additions & 23 deletions src/text_page.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
use std::convert::TryInto;
use std::ffi::CString;
use std::io::Read;
use std::marker::PhantomData;
use std::ptr;
use std::{
convert::TryInto,
ffi::{c_int, c_void, CString},
io::Read,
marker::PhantomData,
ptr::{self, NonNull},
slice,
};

use bitflags::bitflags;
use mupdf_sys::*;
use num_enum::TryFromPrimitive;

use crate::FFIAnalogue;
use crate::{
array::FzArray, context, rust_slice_to_ffi_ptr, rust_vec_from_ffi_ptr, Buffer, Error, Image,
Matrix, Point, Quad, Rect, WriteMode,
context, rust_slice_to_ffi_ptr, Buffer, Error, Image, Matrix, Point, Quad, Rect, WriteMode,
};

bitflags! {
Expand Down Expand Up @@ -51,20 +54,111 @@ impl TextPage {
}
}

pub fn search(&self, needle: &str, hit_max: u32) -> Result<FzArray<Quad>, Error> {
pub fn search(&self, needle: &str) -> Result<Vec<Quad>, Error> {
let mut vec = Vec::new();
self.search_cb(needle, &mut vec, |v, quads| {
v.extend(quads.iter().cloned());
SearchHitResponse::ContinueSearch
})?;
Ok(vec)
}

/// Search through the page, finding all instances of `needle` and processing them through
/// `cb`.
/// Note that the `&[Quad]` given to `cb` in its invocation lives only during the time that
/// `cb` is being evaluated. That means the following won't work or compile:
///
/// ```compile_fail
/// # use mupdf::{TextPage, Quad, text_page::SearchHitResponse};
/// # let text_page: TextPage = todo!();
/// let mut quads: Vec<&Quad> = Vec::new();
/// text_page.search_cb("search term", &mut quads, |v, quads: &[Quad]| {
/// v.extend(quads);
/// SearchHitResponse::ContinueSearch
/// }).unwrap();
/// ```
///
/// But the following will:
/// ```no_run
/// # use mupdf::{TextPage, Quad, text_page::SearchHitResponse};
/// # let text_page: TextPage = todo!();
/// let mut quads: Vec<Quad> = Vec::new();
/// text_page.search_cb("search term", &mut quads, |v, quads: &[Quad]| {
/// v.extend(quads.iter().cloned());
/// SearchHitResponse::ContinueSearch
/// }).unwrap();
/// ```
pub fn search_cb<T, F>(&self, needle: &str, data: &mut T, cb: F) -> Result<u32, Error>
where
T: ?Sized,
F: Fn(&mut T, &[Quad]) -> SearchHitResponse,
{
// This struct allows us to wrap both the callback that the user gave us and the data so
// that we can pass it into the ffi callback nicely
struct FnWithData<'parent, T: ?Sized, F>
where
F: Fn(&mut T, &[Quad]) -> SearchHitResponse,
{
data: &'parent mut T,
f: F,
}

let mut opaque = FnWithData { data, f: cb };

// And then here's the `fn` that we'll pass in - it has to be an fn, not capturing context,
// because it needs to be unsafe extern "C". to be used with FFI.
unsafe extern "C" fn ffi_cb<T, F>(
_ctx: *mut fz_context,
data: *mut c_void,
num_quads: c_int,
hit_bbox: *mut fz_quad,
) -> c_int
where
T: ?Sized,
F: Fn(&mut T, &[Quad]) -> SearchHitResponse,
Quad: FFIAnalogue<FFIType = fz_quad>,
{
// This is upheld by our `FFIAnalogue` bound above
let quad_ptr = hit_bbox.cast::<Quad>();
let Some(nn) = NonNull::new(quad_ptr) else {
return SearchHitResponse::ContinueSearch as c_int;
};

// This guarantee is upheld by mupdf - they're giving us a pointer to the same type we
// gave them.
let data = data.cast::<FnWithData<'_, T, F>>();

// But if they like gave us a -1 for number of results or whatever, give up on
// decoding.
let Ok(len) = usize::try_from(num_quads) else {
return SearchHitResponse::ContinueSearch as c_int;
};

// SAFETY: We've ensure nn is not null, and we're trusting the FFI layer for the other
// invariants (about actually holding the data, etc)
let slice = unsafe { slice::from_raw_parts_mut(nn.as_ptr(), len) };

// Get the function and the data
// SAFETY: Trusting that the FFI layer actually gave us this ptr
let f = unsafe { &(*data).f };
// SAFETY: Trusting that the FFI layer actually gave us this ptr
let data = unsafe { &mut (*data).data };

// And call the function with the data
f(data, slice) as c_int
}

let c_needle = CString::new(needle)?;
let hit_max = if hit_max < 1 { 16 } else { hit_max };
let mut hit_count = 0;
unsafe {
ffi_try!(mupdf_search_stext_page(
ffi_try!(mupdf_search_stext_page_cb(
context(),
self.inner,
c_needle.as_ptr(),
hit_max as _,
&mut hit_count
Some(ffi_cb::<T, F>),
&raw mut opaque as *mut c_void
))
}
.and_then(|quads| unsafe { rust_vec_from_ffi_ptr(quads, hit_count) })
.map(|count| count as u32)
}

pub fn highlight_selection(
Expand Down Expand Up @@ -98,6 +192,12 @@ impl Drop for TextPage {
}
}

#[repr(i32)]
pub enum SearchHitResponse {
ContinueSearch = 0,
AbortSearch = 1,
}

#[derive(Debug, Clone, Copy, PartialEq, TryFromPrimitive)]
#[repr(u32)]
pub enum TextBlockType {
Expand Down Expand Up @@ -262,7 +362,7 @@ impl<'a> Iterator for TextCharIter<'a> {

#[cfg(test)]
mod test {
use crate::{Document, TextPageOptions};
use crate::{text_page::SearchHitResponse, Document, TextPageOptions};

#[test]
fn test_text_page_search() {
Expand All @@ -271,31 +371,52 @@ mod test {
let doc = Document::open("tests/files/dummy.pdf").unwrap();
let page0 = doc.load_page(0).unwrap();
let text_page = page0.to_text_page(TextPageOptions::BLOCK_IMAGE).unwrap();
let hits = text_page.search("Dummy", 1).unwrap();
let hits = text_page.search("Dummy").unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(
&*hits,
[Quad {
ul: Point {
x: 56.8,
y: 69.32512
y: 69.32953
},
ur: Point {
x: 115.85405,
y: 69.32512
x: 115.85159,
y: 69.32953
},
ll: Point {
x: 56.8,
y: 87.311844
y: 80.50292
},
lr: Point {
x: 115.85405,
y: 87.311844
x: 115.85159,
y: 80.50292
}
}]
);

let hits = text_page.search("Not Found", 1).unwrap();
let hits = text_page.search("Not Found").unwrap();
assert_eq!(hits.len(), 0);
}

#[test]
fn test_text_page_cb_search() {
let doc = Document::open("tests/files/dummy.pdf").unwrap();
let page0 = doc.load_page(0).unwrap();
let text_page = page0.to_text_page(TextPageOptions::BLOCK_IMAGE).unwrap();
let mut sum_x = 0.0;
let num_hits = text_page
.search_cb("Dummy", &mut sum_x, |acc, hits| {
for q in hits {
*acc += q.ul.x + q.ur.x + q.ll.x + q.lr.x;
}
SearchHitResponse::ContinueSearch
})
.unwrap();
assert_eq!(num_hits, 1);
assert_eq!(sum_x, 56.8 + 115.85159 + 56.8 + 115.85159);

let hits = text_page.search("Not Found").unwrap();
assert_eq!(hits.len(), 0);
}
}
Loading