-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathsylvan_cache.c
295 lines (268 loc) · 9.25 KB
/
sylvan_cache.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
/*
* Copyright 2011-2016 Formal Methods and Tools, University of Twente
* Copyright 2016-2017 Tom van Dijk, Johannes Kepler University Linz
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sylvan_int.h>
#include <sylvan_align.h>
#include <errno.h> // for errno
#include <string.h> // for strerror
#ifndef compiler_barrier
#define compiler_barrier() atomic_signal_fence(memory_order_seq_cst)
#endif
/**
* This cache is designed to store a,b,c->res, with a,b,c,res 64-bit integers.
*
* Each cache bucket takes 32 bytes, 2 per cache line.
* Each cache status bucket takes 4 bytes, 16 per cache line.
* Therefore, size 2^N = 36*(2^N) bytes.
*/
struct __attribute__((packed)) cache6_entry {
uint64_t a;
uint64_t b;
uint64_t c;
uint64_t res;
uint64_t d;
uint64_t e;
uint64_t f;
uint64_t res2;
};
typedef struct cache6_entry *cache6_entry_t;
struct __attribute__((packed)) cache_entry {
uint64_t a;
uint64_t b;
uint64_t c;
uint64_t res;
};
static size_t cache_size; // power of 2
static size_t cache_max; // power of 2
#if CACHE_MASK
static size_t cache_mask; // cache_size-1
#endif
static cache_entry_t cache_table;
static uint32_t* cache_status;
static _Atomic(uint64_t) next_opid;
uint64_t
cache_next_opid()
{
return atomic_fetch_add(&next_opid, 1LL<<40);
}
// status: 0x80000000 - bitlock
// 0x7fff0000 - hash (part of the 64-bit hash not used to position)
// 0x0000ffff - tag (every put increases tag field)
/* Rotating 64-bit FNV-1a hash */
static uint64_t
cache_hash(uint64_t a, uint64_t b, uint64_t c)
{
const uint64_t prime = 1099511628211;
uint64_t hash = 14695981039346656037LLU;
hash = (hash ^ (a>>32));
hash = (hash ^ a) * prime;
hash = (hash ^ b) * prime;
hash = (hash ^ c) * prime;
return hash;
}
static uint64_t
cache_hash6(uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e, uint64_t f)
{
const uint64_t prime = 1099511628211;
uint64_t hash = 14695981039346656037LLU;
hash = (hash ^ (a>>32));
hash = (hash ^ a) * prime;
hash = (hash ^ b) * prime;
hash = (hash ^ c) * prime;
hash = (hash ^ d) * prime;
hash = (hash ^ e) * prime;
hash = (hash ^ f) * prime;
return hash;
}
int
cache_get6(uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e, uint64_t f, uint64_t *res1, uint64_t *res2)
{
const uint64_t hash = cache_hash6(a, b, c, d, e, f);
#if CACHE_MASK
_Atomic(uint64_t) *s_bucket = (_Atomic(uint64_t)*)cache_status + (hash & cache_mask)/2;
cache6_entry_t bucket = (cache6_entry_t)cache_table + (hash & cache_mask)/2;
#else
_Atomic(uint64_t) *s_bucket = (_Atomic(uint64_t)*)cache_status + (hash % cache_size)/2;
cache6_entry_t bucket = (cache6_entry_t)cache_table + (hash % cache_size)/2;
#endif
// can be relaxed, we check again afterwards
const uint64_t s = atomic_load_explicit(s_bucket, memory_order_relaxed);
// abort if locked or second part of 2-part entry or if different hash
uint64_t x = ((hash>>32) & 0x7fff0000) | 0x04000000;
x = x | (x<<32);
if ((s & 0xffff0000ffff0000) != x) return 0;
// abort if key different
if (bucket->a != a || bucket->b != b || bucket->c != c) return 0;
if (bucket->d != d || bucket->e != e || bucket->f != f) return 0;
*res1 = bucket->res;
if (res2) *res2 = bucket->res2;
// abort if status field changed after compiler_barrier()
return atomic_load_explicit(s_bucket, memory_order_acquire) == s ? 1 : 0;
}
int
cache_put6(uint64_t a, uint64_t b, uint64_t c, uint64_t d, uint64_t e, uint64_t f, uint64_t res1, uint64_t res2)
{
const uint64_t hash = cache_hash6(a, b, c, d, e, f);
#if CACHE_MASK
_Atomic(uint64_t) *s_bucket = (_Atomic(uint64_t)*)cache_status + (hash & cache_mask)/2;
cache6_entry_t bucket = (cache6_entry_t)cache_table + (hash & cache_mask)/2;
#else
_Atomic(uint64_t) *s_bucket = (_Atomic(uint64_t)*)cache_status + (hash % cache_size)/2;
cache6_entry_t bucket = (cache6_entry_t)cache_table + (hash % cache_size)/2;
#endif
// can be relaxed, we use cas afterwards to claim it
uint64_t s = atomic_load_explicit(s_bucket, memory_order_relaxed);
// abort if locked
if (s & 0x8000000080000000LL) return 0;
// create new
uint64_t new_s = ((hash>>32) & 0x7fff0000) | 0x04000000;
new_s |= (new_s<<32);
new_s |= (((s>>32)+1)&0xffff)<<32;
new_s |= (s+1)&0xffff;
// use cas to claim bucket
if (!atomic_compare_exchange_weak(s_bucket, &s, new_s | 0x8000000080000000LL)) return 0;
// cas succesful: write data
bucket->a = a;
bucket->b = b;
bucket->c = c;
bucket->d = d;
bucket->e = e;
bucket->f = f;
bucket->res = res1;
bucket->res2 = res2;
// unlock status field
atomic_store_explicit(s_bucket, new_s, memory_order_release);
return 1;
}
int
cache_get(uint64_t a, uint64_t b, uint64_t c, uint64_t *res)
{
const uint64_t hash = cache_hash(a, b, c);
#if CACHE_MASK
_Atomic(uint32_t) *s_bucket = (_Atomic(uint32_t)*)cache_status + (hash & cache_mask);
cache_entry_t bucket = cache_table + (hash & cache_mask);
#else
_Atomic(uint32_t) *s_bucket = (_Atomic(uint32_t)*)cache_status + (hash % cache_size);
cache_entry_t bucket = cache_table + (hash % cache_size);
#endif
const uint32_t s = atomic_load_explicit(s_bucket, memory_order_relaxed);
// abort if locked or if part of a 2-part cache entry
if (s & 0xc0000000) return 0;
// abort if different hash
if ((s ^ (hash>>32)) & 0x3fff0000) return 0;
// abort if key different
if (bucket->a != a || bucket->b != b || bucket->c != c) return 0;
*res = bucket->res;
// abort if status field changed after compiler_barrier()
return atomic_load_explicit(s_bucket, memory_order_acquire) == s ? 1 : 0;
}
int
cache_put(uint64_t a, uint64_t b, uint64_t c, uint64_t res)
{
const uint64_t hash = cache_hash(a, b, c);
#if CACHE_MASK
_Atomic(uint32_t) *s_bucket = (_Atomic(uint32_t)*)cache_status + (hash & cache_mask);
cache_entry_t bucket = cache_table + (hash & cache_mask);
#else
_Atomic(uint32_t) *s_bucket = (_Atomic(uint32_t)*)cache_status + (hash % cache_size);
cache_entry_t bucket = cache_table + (hash % cache_size);
#endif
uint32_t s = atomic_load_explicit(s_bucket, memory_order_relaxed);
// abort if locked
if (s & 0x80000000) return 0;
// abort if hash identical -> no: in iscasmc this occasionally causes timeouts?!
const uint32_t hash_mask = (hash>>32) & 0x3fff0000;
// if ((s & 0x7fff0000) == hash_mask) return 0;
// use cas to claim bucket
const uint32_t new_s = ((s+1) & 0x0000ffff) | hash_mask;
if (!atomic_compare_exchange_weak(s_bucket, &s, new_s | 0x80000000)) return 0;
// cas succesful: write data
bucket->a = a;
bucket->b = b;
bucket->c = c;
bucket->res = res;
// after compiler_barrier(), unlock status field
atomic_store_explicit(s_bucket, new_s, memory_order_release);
return 1;
}
void
cache_create(size_t _cache_size, size_t _max_size)
{
#if CACHE_MASK
// Cache size must be a power of 2
if (__builtin_popcountll(_cache_size) != 1 || __builtin_popcountll(_max_size) != 1) {
fprintf(stderr, "cache_create: Table size must be a power of 2!\n");
exit(1);
}
#endif
cache_size = _cache_size;
cache_max = _max_size;
#if CACHE_MASK
cache_mask = cache_size - 1;
#endif
if (cache_size > cache_max) {
fprintf(stderr, "cache_create: Table size must be <= max size!\n");
exit(1);
}
cache_table = (cache_entry_t)alloc_aligned(cache_max * sizeof(struct cache_entry));
cache_status = (uint32_t*)alloc_aligned(cache_max * sizeof(uint32_t));
if (cache_table == 0 || cache_status == 0) {
fprintf(stderr, "cache_create: Unable to allocate memory: %s!\n", strerror(errno));
exit(1);
}
next_opid = 512LL << 40;
}
void
cache_free()
{
free_aligned(cache_table, cache_max * sizeof(struct cache_entry));
free_aligned(cache_status, cache_max * sizeof(uint32_t));
}
void
cache_clear()
{
// a bit silly, but this works just fine, and does not require writing 0 everywhere...
cache_free();
cache_create(cache_size, cache_max);
}
void
cache_setsize(size_t size)
{
// easy solution
cache_free();
cache_create(size, cache_max);
}
size_t
cache_getsize()
{
return cache_size;
}
size_t
cache_getused()
{
size_t result = 0;
for (size_t i=0;i<cache_size;i++) {
uint32_t s = cache_status[i];
if (s & 0x80000000) fprintf(stderr, "cache_getuser: cache in use during cache_getused()\n");
if (s) result++;
}
return result;
}
size_t
cache_getmaxsize()
{
return cache_max;
}