-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlib.rs
271 lines (238 loc) · 10.1 KB
/
lib.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
// borrow-complex-key-example
//
// Written in 2020 by Rain <[email protected]>
//
// To the extent possible under law, the author(s) have dedicated all copyright and related and
// neighboring rights to this software to the public domain worldwide. This software is distributed
// without any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication along with this software. If
// not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
//! An example for how to implement Borrow for complex keys.
//!
//! Thanks to Ivan Dubrov (http://idubrov.name/rust/2018/06/01/tricking-the-hashmap.html) for the
//! inspiration.
#![allow(unused_imports)]
use proptest::prelude::*;
use proptest_derive::Arbitrary;
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::collections::HashSet;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
#[test]
fn basic() {
// Consider a hash set of strings...
let mut hash_set: HashSet<String> = HashSet::new();
hash_set.insert("example-string".to_string());
// Ordinarily, you need a &String to look up keys.
let string_key: String = "example-string".to_string();
assert!(hash_set.contains(&string_key));
// But, it turns out you can also pass in a &str, not just a &String!
let str_key: &str = "example-string";
assert!(hash_set.contains(str_key));
// How does this work? It's all based on the Borrow trait.
// https://doc.rust-lang.org/std/borrow/trait.Borrow.html
//
// For an owned type O and a borrowed type B, O may implement Borrow<B> if:
// - it's possible to implement a function borrow(&self) -> &B
// - if implemented, Eq, Ord and Hash are *consistent* between O and B.
//
// Intuitively, "consistent" means that O and B have implementations of Eq/Ord/Hash that produce
// the same results. In most cases, this is going to mean that O and B are a 1:1 map.
//
// More formally, "consistent" means that, for *all* values of type O `owned1` and `owned2`, if
// `owned1.borrow()` produces `borrowed1`, and `owned2.borrow()` produces `borrowed2`:
//
// Eq: (owned1 == owned2) is always the same as (borrowed1 == borrowed2).
// Ord: owned1.cmp(owned2) is always the same as borrowed1.cmp(borrowed2).
// Hash: for all hashers, owned1 hashes to the same value as borrowed1 (and owned2 the same as
// borrowed2).
//
// String and str satisfy these conditions (in fact they use the same underlying code), so
// String implements Borrow<str>.
}
// But what about a user-defined type that's more complex than just a String? For example,
// consider this owned type:
#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Arbitrary)]
struct OwnedKey {
s: String,
bytes: Vec<u8>,
}
// (You might have noticed the "Arbitrary" above. Put a pin in that.)
// ... and this borrowed type:
#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
struct BorrowedKey<'a> {
s: &'a str,
bytes: &'a [u8],
}
#[test]
fn complex1() {
// They're basically the same type, modulo ownership. Can we take a hash set of owned keys...
let mut hash_set: HashSet<OwnedKey> = HashSet::new();
hash_set.insert(OwnedKey {
s: "foo".to_string(),
bytes: b"abc".to_vec(),
});
// and use a borrowed key to look things up, thereby eliminating the need to allocate a new
// owned key just for this?
let _borrowed_key = BorrowedKey {
s: "foo",
bytes: b"abc",
};
// assert!(hash_set.contains(&_borrowed_key));
}
// One's first instinct might be to try and write an impl of this sort.
// (any() is false, so the code below will never be compiled.)
#[cfg(any())]
impl<'a> Borrow<BorrowedKey<'a>> for OwnedKey {
fn borrow(&self) -> &BorrowedKey<'a> {
// ... uhh, what do we put here? We need to return a *reference* to a BorrowedKey.
// But unlike a String/str, there's no BorrowedKey hiding "inside" an OwnedKey.
// Seems like a dead end...
}
}
// It turns out that we can approach this in a different manner, using the power of trait objects!
//
// Here's how:
// (1) define a trait object that looks like this.
trait Key {
// (The lifetimes can be elided here, but are shown for clarity.)
fn key<'k>(&'k self) -> BorrowedKey<'k>;
}
// (2) Implement it for both the owned and borrowed versions.
impl Key for OwnedKey {
fn key<'k>(&'k self) -> BorrowedKey<'k> {
BorrowedKey {
s: self.s.as_str(),
bytes: self.bytes.as_slice(),
}
}
}
impl<'a> Key for BorrowedKey<'a> {
fn key<'k>(&'k self) -> BorrowedKey<'k> {
// This creates a copy of the BorrowedKey with the shorter lifetime 'k.
// 'a can be shortened to 'k because it is a *covariant* lifetime parameter.
// For more about lifetime variance, check out my other tutorial:
// https://github.com/sunshowers/lifetime-variance-example/
*self
}
}
// For the rest of this example, we're going to make trait objects of type &(dyn Key + 'a)
// central to our strategy.
//
// OK, so...
//
// (3) Implement Borrow<dyn Key> for OwnedKey.
impl<'a> Borrow<dyn Key + 'a> for OwnedKey {
fn borrow(&self) -> &(dyn Key + 'a) {
// This is a simple coercion from the concrete type to a trait object.
self
}
}
// Note that while we *could* impl<'a> Borrow<dyn Key + 'a> for BorrowedKey<'a>, we don't have to.
// https://doc.rust-lang.org/std/collections/struct.HashSet.html#method.contains requires
// T: Borrow<Q>. This means that Borrow only needs to be implemented for the type stored in the
// HashSet (or, correspondingly, the key type in a HashMap or BTreeMap).
// Now, remember that for Borrow to be valid, Eq, Hash and Ord need to be consistent. How do
// we ensure that? Let's see:
// (4) PartialEq and Eq turn out to be easy to do.
impl<'a> PartialEq for (dyn Key + 'a) {
fn eq(&self, other: &Self) -> bool {
// It's easy to see from the definition that the owned and borrowed types have a consistent
// implementation. (Don't worry, we're actually going to verify this.)
self.key().eq(&other.key())
}
}
impl<'a> Eq for (dyn Key + 'a) {}
// (5) PartialOrd and Ord are similar.
//
// A couple of notes:
// - Importantly, this relies on the fact that the derive implementations for PartialOrd and Ord use
// lexicographic ordering on struct member order.
// - You need to implement this if you're using a btree based data structure, not if you're only
// using hash-based data structures.
impl<'a> PartialOrd for (dyn Key + 'a) {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.key().partial_cmp(&other.key())
}
}
impl<'a> Ord for (dyn Key + 'a) {
fn cmp(&self, other: &Self) -> Ordering {
self.key().cmp(&other.key())
}
}
// (6) Hash also turns out to be easy to do in this case, though in some uncommon cases, getting a
// consistent impl may be trickier and may require implementing Hash by hand for the owned type.
//
// Implementing Hash is only necessary if you're using a hash-based data structure.
impl<'a> Hash for (dyn Key + 'a) {
fn hash<H: Hasher>(&self, state: &mut H) {
self.key().hash(state)
}
}
// That's it! Now, we have everything we need to do this.
#[test]
fn complex2() {
// This is the same situation as complex1() above.
let mut hash_set: HashSet<OwnedKey> = HashSet::new();
hash_set.insert(OwnedKey {
s: "foo".to_string(),
bytes: b"abc".to_vec(),
});
let borrowed_key = BorrowedKey {
s: "foo",
bytes: b"abc",
};
// And here it is!
//
// &borrowed_key should automatically be coerced into a &dyn Key, but in case it doesn't work,
// you can write:
//
// assert!(hash_set.contains(&borrowed_key as &dyn Key));
assert!(hash_set.contains(&borrowed_key));
}
// ... not so fast, though! We've attempted to satisfy the constraints required for the Borrow impl.
// We've got to test them.
//
// The constraints describe *properties* that must be satisfied. The best way to ensure they are is
// to use property-based testing.
//
// There's much more to property-based testing than we can cover here, but
// https://blog.jessitron.com/2013/04/25/property-based-testing-what-is-it/ is a good intro.
//
// We're going to use the proptest framework to write our property-based tests.
proptest! {
// Here's where that Arbitrary above is useful. It's a simple way to generate random values of
// your structure.
#[test]
fn consistent_borrow(owned1 in any::<OwnedKey>(), owned2 in any::<OwnedKey>()) {
// owned1 and owned2 will be populated with random values of OwnedKey. That's enough for us
// to start testing various properties.
//
// Reminder that what we want is for the *owned* and *borrowed* impls to be consistent.
// owned1 and owned2 are the owned keys. The borrowed impls are:
let borrowed1: &dyn Key = &owned1;
let borrowed2: &dyn Key = &owned2;
// Awesome! That's all the setup we need. Time to test all of this. First, equality:
assert_eq!(owned1 == owned2, borrowed1 == borrowed2, "consistent Eq");
// PartialOrd and Ord:
assert_eq!(owned1.partial_cmp(&owned2), borrowed1.partial_cmp(borrowed2), "consistent PartialOrd");
assert_eq!(owned1.cmp(&owned2), borrowed1.cmp(borrowed2), "consistent Ord");
// And finally, Hash. This requires a tiny bit of setup.
fn hash_output(x: impl Hash) -> u64 {
let mut hasher = DefaultHasher::new();
x.hash(&mut hasher);
hasher.finish()
}
assert_eq!(hash_output(&owned1), hash_output(&borrowed1), "consistent Hash");
assert_eq!(hash_output(&owned2), hash_output(&borrowed2), "consistent Hash");
// and that's it! Any implementation that satisfies these properties is a valid
// Borrow implementation. A property-based test guarantees that with high confidence.
//
// Here's some stuff to play around with:
// (1) does this work for enums as well?
// (2) try swapping the order of fields in either OwnedKey or BorrowedKey, and see what
// happens to this property test.
}
}