@@ -5,6 +5,7 @@ use std::collections::HashMap;
5
5
use std:: convert:: TryInto ;
6
6
use std:: io:: { Read , Seek , SeekFrom } ;
7
7
use std:: mem;
8
+ use std:: vec:: Vec ;
8
9
9
10
use crate :: read:: ReadRef ;
10
11
@@ -24,6 +25,7 @@ pub struct ReadCache<R: Read + Seek> {
24
25
struct ReadCacheInternal < R : Read + Seek > {
25
26
read : R ,
26
27
bufs : HashMap < ( u64 , u64 ) , Box < [ u8 ] > > ,
28
+ strings : HashMap < ( u64 , u8 ) , Box < [ u8 ] > > ,
27
29
}
28
30
29
31
impl < R : Read + Seek > ReadCache < R > {
@@ -33,6 +35,7 @@ impl<R: Read + Seek> ReadCache<R> {
33
35
cache : RefCell :: new ( ReadCacheInternal {
34
36
read,
35
37
bufs : HashMap :: new ( ) ,
38
+ strings : HashMap :: new ( ) ,
36
39
} ) ,
37
40
}
38
41
}
@@ -86,6 +89,44 @@ impl<'a, R: Read + Seek> ReadRef<'a> for &'a ReadCache<R> {
86
89
// This is OK because we never mutate or remove entries.
87
90
Ok ( unsafe { mem:: transmute :: < & [ u8 ] , & [ u8 ] > ( buf) } )
88
91
}
92
+
93
+ fn read_bytes_at_until ( self , offset : u64 , delimiter : u8 ) -> Result < & ' a [ u8 ] , ( ) > {
94
+ let cache = & mut * self . cache . borrow_mut ( ) ;
95
+ let buf = match cache. strings . entry ( ( offset, delimiter) ) {
96
+ Entry :: Occupied ( entry) => entry. into_mut ( ) ,
97
+ Entry :: Vacant ( entry) => {
98
+ cache
99
+ . read
100
+ . seek ( SeekFrom :: Start ( offset as u64 ) )
101
+ . map_err ( |_| ( ) ) ?;
102
+ let mut bytes = Vec :: new ( ) ;
103
+ let mut checked = 0 ;
104
+ loop {
105
+ bytes. resize ( checked + 256 , 0 ) ;
106
+ let read = cache. read . read ( & mut bytes[ checked..] ) . map_err ( |_| ( ) ) ?;
107
+ if read == 0 {
108
+ return Err ( ( ) ) ;
109
+ }
110
+ match memchr:: memchr ( delimiter, & bytes[ checked..] [ ..read] ) {
111
+ Some ( len) => {
112
+ bytes. truncate ( checked + len) ;
113
+ break entry. insert ( bytes. into_boxed_slice ( ) ) ;
114
+ }
115
+ None => { }
116
+ }
117
+ checked += read;
118
+ // Strings should be relatively small.
119
+ // TODO: make this configurable?
120
+ if checked > 4096 {
121
+ return Err ( ( ) ) ;
122
+ }
123
+ }
124
+ }
125
+ } ;
126
+ // Extend the lifetime to that of self.
127
+ // This is OK because we never mutate or remove entries.
128
+ Ok ( unsafe { mem:: transmute :: < & [ u8 ] , & [ u8 ] > ( buf) } )
129
+ }
89
130
}
90
131
91
132
/// An implementation of `ReadRef` for a range of data in a stream that
@@ -127,4 +168,15 @@ impl<'a, R: Read + Seek> ReadRef<'a> for ReadCacheRange<'a, R> {
127
168
let r_offset = self . offset . checked_add ( offset) . ok_or ( ( ) ) ?;
128
169
self . r . read_bytes_at ( r_offset, size)
129
170
}
171
+
172
+ fn read_bytes_at_until ( self , offset : u64 , delimiter : u8 ) -> Result < & ' a [ u8 ] , ( ) > {
173
+ let r_offset = self . offset . checked_add ( offset) . ok_or ( ( ) ) ?;
174
+ let bytes = self . r . read_bytes_at_until ( r_offset, delimiter) ?;
175
+ let size = bytes. len ( ) . try_into ( ) . map_err ( |_| ( ) ) ?;
176
+ let end = offset. checked_add ( size) . ok_or ( ( ) ) ?;
177
+ if end > self . size {
178
+ return Err ( ( ) ) ;
179
+ }
180
+ Ok ( bytes)
181
+ }
130
182
}
0 commit comments