Skip to content

Commit 3fc1879

Browse files
authored
RUST-1892 Fix timestamp [de]serialization on big-endian machines (#465)
1 parent 546fbea commit 3fc1879

File tree

6 files changed

+38
-357
lines changed

6 files changed

+38
-357
lines changed

Diff for: src/bson.rs

+13-11
Original file line numberDiff line numberDiff line change
@@ -1029,19 +1029,21 @@ impl Display for Timestamp {
10291029
}
10301030

10311031
impl Timestamp {
1032-
pub(crate) fn to_le_i64(self) -> i64 {
1033-
let upper = (self.time.to_le() as u64) << 32;
1034-
let lower = self.increment.to_le() as u64;
1035-
1036-
(upper | lower) as i64
1032+
pub(crate) fn to_le_bytes(self) -> [u8; 8] {
1033+
let mut out = [0; 8];
1034+
out[0..4].copy_from_slice(&self.increment.to_le_bytes());
1035+
out[4..8].copy_from_slice(&self.time.to_le_bytes());
1036+
out
10371037
}
10381038

1039-
pub(crate) fn from_le_i64(val: i64) -> Self {
1040-
let ts = val.to_le();
1041-
1042-
Timestamp {
1043-
time: ((ts as u64) >> 32) as u32,
1044-
increment: (ts & 0xFFFF_FFFF) as u32,
1039+
pub(crate) fn from_le_bytes(bytes: [u8; 8]) -> Self {
1040+
let mut inc_bytes = [0; 4];
1041+
inc_bytes.copy_from_slice(&bytes[0..4]);
1042+
let mut time_bytes = [0; 4];
1043+
time_bytes.copy_from_slice(&bytes[4..8]);
1044+
Self {
1045+
increment: u32::from_le_bytes(inc_bytes),
1046+
time: u32::from_le_bytes(time_bytes),
10451047
}
10461048
}
10471049
}

Diff for: src/de/mod.rs

+6-225
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,11 @@ pub use self::{
3333
use std::io::Read;
3434

3535
use crate::{
36-
bson::{Array, Bson, DbPointer, Document, JavaScriptCodeWithScope, Regex, Timestamp},
37-
oid::{self, ObjectId},
36+
bson::{Bson, Document, Timestamp},
37+
oid::ObjectId,
3838
raw::RawBinaryRef,
3939
ser::write_i32,
40-
spec::{self, BinarySubtype},
41-
Binary,
40+
spec::BinarySubtype,
4241
Decimal128,
4342
};
4443

@@ -49,7 +48,6 @@ use ::serde::{
4948

5049
pub(crate) use self::serde::{convert_unsigned_to_signed_raw, BsonVisitor};
5150

52-
#[cfg(test)]
5351
pub(crate) use self::raw::Deserializer as RawDeserializer;
5452

5553
pub(crate) const MAX_BSON_SIZE: i32 = 16 * 1024 * 1024;
@@ -73,30 +71,6 @@ enum DeserializerHint {
7371
RawBson,
7472
}
7573

76-
/// Run the provided closure, ensuring that over the course of its execution, exactly `length` bytes
77-
/// were read from the reader.
78-
pub(crate) fn ensure_read_exactly<F, R>(
79-
reader: &mut R,
80-
length: usize,
81-
error_message: &str,
82-
func: F,
83-
) -> Result<()>
84-
where
85-
F: FnOnce(&mut std::io::Cursor<Vec<u8>>) -> Result<()>,
86-
R: Read + ?Sized,
87-
{
88-
let mut buf = vec![0u8; length];
89-
reader.read_exact(&mut buf)?;
90-
let mut cursor = std::io::Cursor::new(buf);
91-
92-
func(&mut cursor)?;
93-
94-
if cursor.position() != length as u64 {
95-
return Err(Error::invalid_length(length, &error_message));
96-
}
97-
Ok(())
98-
}
99-
10074
pub(crate) fn read_string<R: Read + ?Sized>(reader: &mut R, utf8_lossy: bool) -> Result<String> {
10175
let len = read_i32(reader)?;
10276

@@ -141,20 +115,6 @@ pub(crate) fn read_bool<R: Read>(mut reader: R) -> Result<bool> {
141115
Ok(val != 0)
142116
}
143117

144-
fn read_cstring<R: Read + ?Sized>(reader: &mut R) -> Result<String> {
145-
let mut v = Vec::new();
146-
147-
loop {
148-
let c = read_u8(reader)?;
149-
if c == 0 {
150-
break;
151-
}
152-
v.push(c);
153-
}
154-
155-
Ok(String::from_utf8(v)?)
156-
}
157-
158118
#[inline]
159119
pub(crate) fn read_u8<R: Read + ?Sized>(reader: &mut R) -> Result<u8> {
160120
let mut buf = [0; 1];
@@ -192,138 +152,6 @@ fn read_f128<R: Read + ?Sized>(reader: &mut R) -> Result<Decimal128> {
192152
Ok(Decimal128 { bytes: buf })
193153
}
194154

195-
fn deserialize_array<R: Read + ?Sized>(reader: &mut R, utf8_lossy: bool) -> Result<Array> {
196-
let mut arr = Array::new();
197-
let length = read_i32(reader)?;
198-
199-
if !(MIN_BSON_DOCUMENT_SIZE..=MAX_BSON_SIZE).contains(&length) {
200-
return Err(Error::invalid_length(
201-
length as usize,
202-
&format!(
203-
"array length must be between {} and {}",
204-
MIN_BSON_DOCUMENT_SIZE, MAX_BSON_SIZE
205-
)
206-
.as_str(),
207-
));
208-
}
209-
210-
ensure_read_exactly(
211-
reader,
212-
(length as usize) - 4,
213-
"array length longer than contents",
214-
|cursor| {
215-
loop {
216-
let tag = read_u8(cursor)?;
217-
if tag == 0 {
218-
break;
219-
}
220-
221-
let (_, val) = deserialize_bson_kvp(cursor, tag, utf8_lossy)?;
222-
arr.push(val)
223-
}
224-
Ok(())
225-
},
226-
)?;
227-
228-
Ok(arr)
229-
}
230-
231-
pub(crate) fn deserialize_bson_kvp<R: Read + ?Sized>(
232-
reader: &mut R,
233-
tag: u8,
234-
utf8_lossy: bool,
235-
) -> Result<(String, Bson)> {
236-
use spec::ElementType;
237-
let key = read_cstring(reader)?;
238-
239-
let val = match ElementType::from(tag) {
240-
Some(ElementType::Double) => Bson::Double(read_f64(reader)?),
241-
Some(ElementType::String) => read_string(reader, utf8_lossy).map(Bson::String)?,
242-
Some(ElementType::EmbeddedDocument) => Document::from_reader(reader).map(Bson::Document)?,
243-
Some(ElementType::Array) => deserialize_array(reader, utf8_lossy).map(Bson::Array)?,
244-
Some(ElementType::Binary) => Bson::Binary(Binary::from_reader(reader)?),
245-
Some(ElementType::ObjectId) => {
246-
let mut objid = [0; 12];
247-
for x in &mut objid {
248-
*x = read_u8(reader)?;
249-
}
250-
Bson::ObjectId(oid::ObjectId::from_bytes(objid))
251-
}
252-
Some(ElementType::Boolean) => Bson::Boolean(read_bool(reader)?),
253-
Some(ElementType::Null) => Bson::Null,
254-
Some(ElementType::RegularExpression) => {
255-
Bson::RegularExpression(Regex::from_reader(reader)?)
256-
}
257-
Some(ElementType::JavaScriptCode) => {
258-
read_string(reader, utf8_lossy).map(Bson::JavaScriptCode)?
259-
}
260-
Some(ElementType::JavaScriptCodeWithScope) => {
261-
Bson::JavaScriptCodeWithScope(JavaScriptCodeWithScope::from_reader(reader, utf8_lossy)?)
262-
}
263-
Some(ElementType::Int32) => read_i32(reader).map(Bson::Int32)?,
264-
Some(ElementType::Int64) => read_i64(reader).map(Bson::Int64)?,
265-
Some(ElementType::Timestamp) => Bson::Timestamp(Timestamp::from_reader(reader)?),
266-
Some(ElementType::DateTime) => {
267-
// The int64 is UTC milliseconds since the Unix epoch.
268-
let time = read_i64(reader)?;
269-
Bson::DateTime(crate::DateTime::from_millis(time))
270-
}
271-
Some(ElementType::Symbol) => read_string(reader, utf8_lossy).map(Bson::Symbol)?,
272-
Some(ElementType::Decimal128) => read_f128(reader).map(Bson::Decimal128)?,
273-
Some(ElementType::Undefined) => Bson::Undefined,
274-
Some(ElementType::DbPointer) => Bson::DbPointer(DbPointer::from_reader(reader)?),
275-
Some(ElementType::MaxKey) => Bson::MaxKey,
276-
Some(ElementType::MinKey) => Bson::MinKey,
277-
None => {
278-
return Err(Error::UnrecognizedDocumentElementType {
279-
key,
280-
element_type: tag,
281-
})
282-
}
283-
};
284-
285-
Ok((key, val))
286-
}
287-
288-
impl Binary {
289-
pub(crate) fn from_reader<R: Read>(mut reader: R) -> Result<Self> {
290-
let mut len = read_i32(&mut reader)?;
291-
if !(0..=MAX_BSON_SIZE).contains(&len) {
292-
return Err(Error::invalid_length(
293-
len as usize,
294-
&format!("binary length must be between 0 and {}", MAX_BSON_SIZE).as_str(),
295-
));
296-
}
297-
let subtype = BinarySubtype::from(read_u8(&mut reader)?);
298-
299-
// Skip length data in old binary.
300-
if let BinarySubtype::BinaryOld = subtype {
301-
let data_len = read_i32(&mut reader)?;
302-
303-
if !(0..=(MAX_BSON_SIZE - 4)).contains(&data_len) {
304-
return Err(Error::invalid_length(
305-
data_len as usize,
306-
&format!("0x02 length must be between 0 and {}", MAX_BSON_SIZE - 4).as_str(),
307-
));
308-
}
309-
310-
if data_len + 4 != len {
311-
return Err(Error::invalid_length(
312-
data_len as usize,
313-
&"0x02 length did not match top level binary length",
314-
));
315-
}
316-
317-
len -= 4;
318-
}
319-
320-
let mut bytes = Vec::with_capacity(len as usize);
321-
322-
reader.take(len as u64).read_to_end(&mut bytes)?;
323-
Ok(Binary { subtype, bytes })
324-
}
325-
}
326-
327155
impl<'a> RawBinaryRef<'a> {
328156
pub(crate) fn from_slice_with_len_and_payload(
329157
mut bytes: &'a [u8],
@@ -368,29 +196,11 @@ impl<'a> RawBinaryRef<'a> {
368196
}
369197
}
370198

371-
impl DbPointer {
372-
pub(crate) fn from_reader<R: Read>(mut reader: R) -> Result<Self> {
373-
let ns = read_string(&mut reader, false)?;
374-
let oid = ObjectId::from_reader(&mut reader)?;
375-
Ok(DbPointer {
376-
namespace: ns,
377-
id: oid,
378-
})
379-
}
380-
}
381-
382-
impl Regex {
383-
pub(crate) fn from_reader<R: Read>(mut reader: R) -> Result<Self> {
384-
let pattern = read_cstring(&mut reader)?;
385-
let options = read_cstring(&mut reader)?;
386-
387-
Ok(Regex { pattern, options })
388-
}
389-
}
390-
391199
impl Timestamp {
392200
pub(crate) fn from_reader<R: Read>(mut reader: R) -> Result<Self> {
393-
read_i64(&mut reader).map(Timestamp::from_le_i64)
201+
let mut bytes = [0; 8];
202+
reader.read_exact(&mut bytes)?;
203+
Ok(Timestamp::from_le_bytes(bytes))
394204
}
395205
}
396206

@@ -402,35 +212,6 @@ impl ObjectId {
402212
}
403213
}
404214

405-
impl JavaScriptCodeWithScope {
406-
pub(crate) fn from_reader<R: Read>(mut reader: R, utf8_lossy: bool) -> Result<Self> {
407-
let length = read_i32(&mut reader)?;
408-
if length < MIN_CODE_WITH_SCOPE_SIZE {
409-
return Err(Error::invalid_length(
410-
length as usize,
411-
&format!(
412-
"code with scope length must be at least {}",
413-
MIN_CODE_WITH_SCOPE_SIZE
414-
)
415-
.as_str(),
416-
));
417-
} else if length > MAX_BSON_SIZE {
418-
return Err(Error::invalid_length(
419-
length as usize,
420-
&"code with scope length too large",
421-
));
422-
}
423-
424-
let mut buf = vec![0u8; (length - 4) as usize];
425-
reader.read_exact(&mut buf)?;
426-
427-
let mut slice = buf.as_slice();
428-
let code = read_string(&mut slice, utf8_lossy)?;
429-
let scope = Document::from_reader(&mut slice)?;
430-
Ok(JavaScriptCodeWithScope { code, scope })
431-
}
432-
}
433-
434215
/// Deserialize a `T` from the provided [`Bson`] value.
435216
///
436217
/// The [`Deserializer`] used by this function presents itself as human readable, whereas the

0 commit comments

Comments
 (0)