Skip to content

Commit 0da66de

Browse files
author
Mohan Srinivasan
committed
1) Add "write around" caching mode to flashcache_wt. In this mode only
reads are cached. All writes result in cache invalidations and writes to disk. Suitable for read mostly workloads. Use the new -r argument to flashcache_wt_create to choose "write around". 2) Make flashcache block checksums a compile time selection.
1 parent 13ebd36 commit 0da66de

File tree

4 files changed

+137
-34
lines changed

4 files changed

+137
-34
lines changed

flashcache-wt/README

+8-4
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
flashcache-wt is a simple, non-persistent write through flashcache.
1+
flashcache-wt is a simple, non-persistent write-through and write-around
2+
flashcache.
23

34
It is a separate code base from flashcache (which is write back only).
45

56
Notes :
67
-----
78
1) flashcache-wt is non persistent, which means that on a cache remove
8-
(or a reboot), you will lose the cache entirely. Since the cache is write
9-
through, this will not result in any data loss.
9+
(or a reboot), you will lose the cache entirely. Since the cache is
10+
write-through/write-around, this will not result in any data loss.
1011
2) Built on 2.6.18, .20, .27-.32 successfully. Tested on .18, .20, .27
1112
and .32 successfully.
1213

@@ -18,10 +19,13 @@ Creating a flashcache-wt volume :
1819
-------------------------------
1920
flashcache_wt_create : Create a new flashcache-wt volume.
2021

21-
flashcache_wt_create [-s cache size] [-b block size] cachedevname ssd_devname disk_devname
22+
flashcache_wt_create [-r] [-s cache size] [-b block size] cachedevname ssd_devname disk_devname
2223

2324
Very similar to flashcache_create.
2425

26+
Note : The default is to create the cache write-through. Use the -r option to create the
27+
cache write-around.
28+
2529
Removing a flashcache-wt volume :
2630
----------------------------
2731
Use dmsetup remove to remove a flashcache-wt volume.

flashcache-wt/src/flashcache_wt.c

+111-25
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ int dm_io_async_bvec(unsigned int num_regions,
117117
}
118118
#endif
119119

120+
#ifdef FLASHCACHE_WT_CHECKSUMS
120121
static u_int64_t
121122
flashcache_wt_compute_checksum(struct bio *bio)
122123
{
@@ -175,6 +176,18 @@ flashcache_wt_validate_checksum(struct kcached_job *job)
175176
spin_unlock_irqrestore(&job->dmc->cache_spin_lock, flags);
176177
return retval;
177178
}
179+
#else /* FLASHCACHE_WT_CHECKSUMS */
180+
static void
181+
flashcache_wt_store_checksum(struct kcached_job *job)
182+
{
183+
}
184+
185+
static int
186+
flashcache_wt_validate_checksum(struct kcached_job *job)
187+
{
188+
return 0;
189+
}
190+
#endif /* FLASHCACHE_WT_CHECKSUMS */
178191

179192
static int
180193
jobs_init(void)
@@ -345,7 +358,9 @@ do_io(struct kcached_job *job)
345358
VERIFY(job->rw == WRITECACHE);
346359
/* Write to cache device */
347360
flashcache_wt_store_checksum(job);
361+
#ifdef FLASHCACHE_WT_CHECKSUMS
348362
dmc->checksum_store++;
363+
#endif /* FLASHCACHE_WT_CHECKSUMS */
349364
dmc->cache_writes++;
350365
r = dm_io_async_bvec(1, &job->cache, WRITE, bio->bi_io_vec + bio->bi_idx,
351366
flashcache_wt_io_callback, job);
@@ -636,7 +651,6 @@ cache_read(struct cache_c *dmc, struct bio *bio)
636651
(bio_rw(bio) == READ ? "READ":"READA"),
637652
bio->bi_sector, bio->bi_size);
638653

639-
dmc->reads++;
640654
spin_lock_irqsave(&dmc->cache_spin_lock, flags);
641655
res = cache_lookup(dmc, bio, &index);
642656
/* Cache Hit */
@@ -784,7 +798,6 @@ cache_write(struct cache_c *dmc, struct bio* bio)
784798
unsigned long flags;
785799
struct kcached_job *job;
786800

787-
dmc->writes++;
788801
spin_lock_irqsave(&dmc->cache_spin_lock, flags);
789802
if (cache_invalidate_blocks(dmc, bio) > 0) {
790803
/* A non zero return indicates an inprog invalidation */
@@ -863,7 +876,13 @@ flashcache_wt_map(struct dm_target *ti, struct bio *bio,
863876

864877
VERIFY(to_sector(bio->bi_size) <= dmc->block_size);
865878

866-
if (to_sector(bio->bi_size) != dmc->block_size) {
879+
if (bio_data_dir(bio) == READ)
880+
dmc->reads++;
881+
else
882+
dmc->writes++;
883+
884+
if (to_sector(bio->bi_size) != dmc->block_size ||
885+
(dmc->write_around_mode && (bio_data_dir(bio) == WRITE))) {
867886
spin_lock_irqsave(&dmc->cache_spin_lock, flags);
868887
(void)cache_invalidate_blocks(dmc, bio);
869888
spin_unlock_irqrestore(&dmc->cache_spin_lock, flags);
@@ -1002,7 +1021,18 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
10021021
}
10031022

10041023
if (argc >= 3) {
1005-
if (sscanf(argv[2], "%u", &dmc->block_size) != 1) {
1024+
if (sscanf(argv[2], "%d", &dmc->write_around_mode) != 1) {
1025+
ti->error = "flashcache-wt: Invalid mode";
1026+
r = -EINVAL;
1027+
goto bad4;
1028+
}
1029+
} else
1030+
dmc->assoc = DEFAULT_CACHE_ASSOC;
1031+
1032+
1033+
1034+
if (argc >= 4) {
1035+
if (sscanf(argv[3], "%u", &dmc->block_size) != 1) {
10061036
ti->error = "flashcache-wt: Invalid block size";
10071037
r = -EINVAL;
10081038
goto bad4;
@@ -1018,8 +1048,8 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
10181048
dmc->block_mask = dmc->block_size - 1;
10191049

10201050
/* dmc->size is specified in sectors here, and converted to blocks below */
1021-
if (argc >= 4) {
1022-
if (sscanf(argv[3], "%lu", &dmc->size) != 1) {
1051+
if (argc >= 5) {
1052+
if (sscanf(argv[4], "%lu", &dmc->size) != 1) {
10231053
ti->error = "flashcache-wt: Invalid cache size";
10241054
r = -EINVAL;
10251055
goto bad4;
@@ -1028,8 +1058,8 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
10281058
dmc->size = to_sector(dmc->cache_dev->bdev->bd_inode->i_size);
10291059
}
10301060

1031-
if (argc >= 5) {
1032-
if (sscanf(argv[4], "%u", &dmc->assoc) != 1) {
1061+
if (argc >= 6) {
1062+
if (sscanf(argv[5], "%u", &dmc->assoc) != 1) {
10331063
ti->error = "flashcache-wt: Invalid cache associativity";
10341064
r = -EINVAL;
10351065
goto bad4;
@@ -1042,7 +1072,7 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
10421072
}
10431073
} else
10441074
dmc->assoc = DEFAULT_CACHE_ASSOC;
1045-
1075+
10461076
/*
10471077
* Convert size (in sectors) to blocks.
10481078
* Then round size (in blocks now) down to a multiple of associativity
@@ -1098,7 +1128,9 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
10981128
/* Initialize the cache structs */
10991129
for (i = 0; i < dmc->size ; i++) {
11001130
dmc->cache[i].dbn = 0;
1131+
#ifdef FLASHCACHE_WT_CHECKSUMS
11011132
dmc->cache[i].checksum = 0;
1133+
#endif /* FLASHCACHE_WT_CHECKSUMS */
11021134
dmc->cache_state[i] = INVALID;
11031135
}
11041136

@@ -1117,9 +1149,11 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv)
11171149
dmc->cached_blocks = 0;
11181150
dmc->cache_wr_replace = 0;
11191151

1152+
#ifdef FLASHCACHE_WT_CHECKSUMS
11201153
dmc->checksum_store = 0;
11211154
dmc->checksum_valid = 0;
11221155
dmc->checksum_invalid = 0;
1156+
#endif /* FLASHCACHE_WT_CHECKSUMS */
11231157

11241158
ti->split_io = dmc->block_size;
11251159
ti->private = dmc;
@@ -1160,13 +1194,21 @@ cache_dtr(struct dm_target *ti)
11601194
else
11611195
read_hit_pct = 0;
11621196
DMINFO("stats: \n\treads(%lu), writes(%lu)\n", dmc->reads, dmc->writes);
1197+
#ifdef FLASHCACHE_WT_CHECKSUMS
11631198
DMINFO("\tcache hits(%lu), cache hit percent (%d)\n" \
11641199
"\treplacement(%lu), write replacement(%lu)\n" \
11651200
"\tread invalidates(%lu), write invalidates(%lu)\n" \
11661201
"\tchecksum store (%lu), checksum valid (%lu), checksum invalid(%lu)\n",
11671202
dmc->cache_hits, read_hit_pct, dmc->replace, dmc->cache_wr_replace,
1168-
dmc->rd_invalidates, dmc->wr_invalidates, dmc->checksum_store,
1169-
dmc->checksum_valid, dmc->checksum_invalid);
1203+
dmc->rd_invalidates, dmc->wr_invalidates,
1204+
dmc->checksum_store, dmc->checksum_valid, dmc->checksum_invalid);
1205+
#else
1206+
DMINFO("\tcache hits(%lu), cache hit percent (%d)\n" \
1207+
"\treplacement(%lu), write replacement(%lu)\n" \
1208+
"\tread invalidates(%lu), write invalidates(%lu)\n",
1209+
dmc->cache_hits, read_hit_pct, dmc->replace, dmc->cache_wr_replace,
1210+
dmc->rd_invalidates, dmc->wr_invalidates);
1211+
#endif
11701212
if (dmc->size > 0)
11711213
cache_pct = (dmc->cached_blocks * 100) / dmc->size;
11721214
else
@@ -1203,19 +1245,62 @@ flashcache_wt_status_info(struct cache_c *dmc, status_type_t type,
12031245
else
12041246
read_hit_pct = 0;
12051247
DMEMIT("stats: \n\treads(%lu), writes(%lu)\n", dmc->reads, dmc->writes);
1206-
DMEMIT("\tcache hits(%lu), cache hit percent (%d)\n" \
1207-
"\treplacement(%lu), write replacement(%lu)\n" \
1208-
"\tread invalidates(%lu), write invalidates(%lu)\n" \
1209-
"\tuncached reads(%lu), uncached writes(%lu)\n" \
1210-
"\tdisk reads(%lu), disk writes(%lu)\n" \
1211-
"\tcache reads(%lu), cache writes(%lu)\n" \
1212-
"\tchecksum store (%lu), checksum valid (%lu), checksum invalid(%lu)\n",
1213-
dmc->cache_hits, read_hit_pct, dmc->replace, dmc->cache_wr_replace,
1214-
dmc->rd_invalidates, dmc->wr_invalidates,
1215-
dmc->uncached_reads, dmc->uncached_writes,
1216-
dmc->disk_reads, dmc->disk_writes,
1217-
dmc->cache_reads, dmc->cache_writes,
1218-
dmc->checksum_store, dmc->checksum_valid, dmc->checksum_invalid);
1248+
1249+
#ifdef FLASHCACHE_WT_CHECKSUMS
1250+
if (dmc->write_around_mode == 0) {
1251+
DMEMIT("\tcache hits(%lu), cache hit percent (%d)\n" \
1252+
"\treplacement(%lu), write replacement(%lu)\n" \
1253+
"\tread invalidates(%lu), write invalidates(%lu)\n" \
1254+
"\tuncached reads(%lu), uncached writes(%lu)\n" \
1255+
"\tdisk reads(%lu), disk writes(%lu)\n" \
1256+
"\tcache reads(%lu), cache writes(%lu)\n" \
1257+
"\tchecksum store (%lu), checksum valid (%lu), checksum invalid(%lu)\n",
1258+
dmc->cache_hits, read_hit_pct, dmc->replace, dmc->cache_wr_replace,
1259+
dmc->rd_invalidates, dmc->wr_invalidates,
1260+
dmc->uncached_reads, dmc->uncached_writes,
1261+
dmc->disk_reads, dmc->disk_writes,
1262+
dmc->cache_reads, dmc->cache_writes,
1263+
dmc->checksum_store, dmc->checksum_valid, dmc->checksum_invalid);
1264+
} else {
1265+
DMEMIT("\tcache hits(%lu), cache hit percent (%d)\n" \
1266+
"\treplacement(%lu), read invalidates(%lu) write invalidates(%lu)\n" \
1267+
"\tuncached reads(%lu), uncached writes(%lu)\n" \
1268+
"\tdisk reads(%lu), disk writes(%lu)\n" \
1269+
"\tcache reads(%lu), cache writes(%lu)\n" \
1270+
"\tchecksum store (%lu), checksum valid (%lu), checksum invalid(%lu)\n",
1271+
dmc->cache_hits, read_hit_pct, dmc->replace,
1272+
dmc->rd_invalidates, dmc->wr_invalidates,
1273+
dmc->uncached_reads, dmc->uncached_writes,
1274+
dmc->disk_reads, dmc->disk_writes,
1275+
dmc->cache_reads, dmc->cache_writes,
1276+
dmc->checksum_store, dmc->checksum_valid, dmc->checksum_invalid);
1277+
}
1278+
#else /* FLASHCACHE_WT_CHECKSUMS */
1279+
if (dmc->write_around_mode == 0) {
1280+
DMEMIT("\tcache hits(%lu), cache hit percent (%d)\n" \
1281+
"\treplacement(%lu), write replacement(%lu)\n" \
1282+
"\tread invalidates(%lu), write invalidates(%lu)\n" \
1283+
"\tuncached reads(%lu), uncached writes(%lu)\n" \
1284+
"\tdisk reads(%lu), disk writes(%lu)\n" \
1285+
"\tcache reads(%lu), cache writes(%lu)\n",
1286+
dmc->cache_hits, read_hit_pct, dmc->replace, dmc->cache_wr_replace,
1287+
dmc->rd_invalidates, dmc->wr_invalidates,
1288+
dmc->uncached_reads, dmc->uncached_writes,
1289+
dmc->disk_reads, dmc->disk_writes,
1290+
dmc->cache_reads, dmc->cache_writes);
1291+
} else {
1292+
DMEMIT("\tcache hits(%lu), cache hit percent (%d)\n" \
1293+
"\treplacement(%lu), read invalidates(%lu) write invalidates(%lu)\n" \
1294+
"\tuncached reads(%lu), uncached writes(%lu)\n" \
1295+
"\tdisk reads(%lu), disk writes(%lu)\n" \
1296+
"\tcache reads(%lu), cache writes(%lu)\n",
1297+
dmc->cache_hits, read_hit_pct, dmc->replace,
1298+
dmc->rd_invalidates, dmc->wr_invalidates,
1299+
dmc->uncached_reads, dmc->uncached_writes,
1300+
dmc->disk_reads, dmc->disk_writes,
1301+
dmc->cache_reads, dmc->cache_writes);
1302+
}
1303+
#endif /* FLASHCACHE_WT_CHECKSUMS */
12191304
}
12201305

12211306
static void
@@ -1231,10 +1316,11 @@ flashcache_wt_status_table(struct cache_c *dmc, status_type_t type,
12311316
else
12321317
cache_pct = 0;
12331318
DMEMIT("conf:\n"\
1234-
"\tssd dev (%s), disk dev (%s)\n" \
1319+
"\tssd dev (%s), disk dev (%s) mode (%s)\n" \
12351320
"\tcapacity(%luM), associativity(%u), block size(%uK)\n" \
12361321
"\ttotal blocks(%lu), cached blocks(%lu), cache percent(%d)\n",
12371322
dmc->cache_devname, dmc->disk_devname,
1323+
((dmc->write_around_mode) ? "WRITE_AROUND" : "WRITETHROUGH"),
12381324
dmc->size*dmc->block_size>>11, dmc->assoc,
12391325
dmc->block_size>>(10-SECTOR_SHIFT),
12401326
dmc->size, dmc->cached_blocks, cache_pct);

flashcache-wt/src/flashcache_wt.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ struct cache_c {
8686
u_int8_t *cache_state;
8787
u_int32_t *set_lru_next;
8888

89+
int write_around_mode;
90+
8991
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
9092
struct dm_io_client *io_client; /* Client memory pool*/
9193
#endif
@@ -108,9 +110,13 @@ struct cache_c {
108110
unsigned long wr_invalidates; /* Number of write invalidations */
109111
unsigned long rd_invalidates; /* Number of read invalidations */
110112
unsigned long cached_blocks; /* Number of cached blocks */
113+
114+
#ifdef FLASHCACHE_WT_CHECKSUMS
111115
unsigned long checksum_store;
112116
unsigned long checksum_valid;
113-
unsigned long checksum_invalid;
117+
unsigned long checksum_invalid
118+
#endif /* FLASHCACHE_WT_CHECKSUMS */
119+
114120
unsigned long cache_wr_replace;
115121
unsigned long uncached_reads;
116122
unsigned long uncached_writes;
@@ -124,7 +130,9 @@ struct cache_c {
124130
/* Cache block metadata structure */
125131
struct cacheblock {
126132
sector_t dbn; /* Sector number of the cached block */
133+
#ifdef FLASHCACHE_WT_CHECKSUMS
127134
u_int64_t checksum;
135+
#endif /* FLASHCACHE_WT_CHECKSUMS */
128136
};
129137

130138
/* Structure for a kcached job */

flashcache-wt/src/utils/flashcache_wt_create.c

+9-4
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,10 @@ main(int argc, char **argv)
155155
char *disk_devname, *ssd_devname, *cachedev;
156156
sector_t block_size = 0, cache_size = 0;
157157
sector_t disk_devsize;
158+
int write_around = 0;
158159

159160
pname = argv[0];
160-
while ((c = getopt(argc, argv, "fs:b:v")) != -1) {
161+
while ((c = getopt(argc, argv, "fs:b:vr")) != -1) {
161162
switch (c) {
162163
case 's':
163164
cache_size = get_cache_size(optarg);
@@ -172,6 +173,9 @@ main(int argc, char **argv)
172173
case 'f':
173174
force = 1;
174175
break;
176+
case 'r':
177+
write_around = 1;
178+
break;
175179
case '?':
176180
usage(pname);
177181
}
@@ -200,9 +204,10 @@ main(int argc, char **argv)
200204
}
201205
printf("cachedev %s, ssd_devname %s, disk_devname %s\n",
202206
cachedev, ssd_devname, disk_devname);
203-
printf("block_size %lu, cache_size %lu\n", block_size, cache_size);
204-
sprintf(dmsetup_cmd, "echo 0 %lu flashcache-wt %s %s %lu ",
205-
disk_devsize, disk_devname, ssd_devname, block_size);
207+
printf("cache mode %s, block_size %lu, cache_size %lu\n",
208+
((write_around) ? "WRITE_AROUND" : "WRITE_THRU"), block_size, cache_size);
209+
sprintf(dmsetup_cmd, "echo 0 %lu flashcache-wt %s %s %d %lu ",
210+
disk_devsize, disk_devname, ssd_devname, write_around, block_size);
206211
if (cache_size > 0) {
207212
char cache_size_str[4096];
208213

0 commit comments

Comments
 (0)