Skip to content

Commit

Permalink
crush: add a straw2 bucket type
Browse files Browse the repository at this point in the history
This is an improved straw bucket that correctly avoids any data movement
between items A and B when neither A nor B's weights are changed.  Said
differently, if we adjust the weight of item C (including adding it anew
or removing it completely), we will only see inputs move to or from C,
never between other items in the bucket.

Notably, there is not intermediate scaling factor that needs to be
calculated.  The mapping function is a simple function of the item weights.

Unfortunately, we need a natural log, which is expensive.  We use a lookup
table here.

Signed-off-by: Sage Weil <[email protected]>
  • Loading branch information
liewegas committed Jan 22, 2015
1 parent 6e084f6 commit 242293c
Show file tree
Hide file tree
Showing 11 changed files with 7,083 additions and 3 deletions.
2 changes: 2 additions & 0 deletions src/crush/CrushCompiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,8 @@ int CrushCompiler::parse_bucket(iter_t const& i)
alg = CRUSH_BUCKET_TREE;
else if (a == "straw")
alg = CRUSH_BUCKET_STRAW;
else if (a == "straw2")
alg = CRUSH_BUCKET_STRAW2;
else {
err << "unknown bucket alg '" << a << "'" << std::endl << std::endl;
return -EINVAL;
Expand Down
18 changes: 18 additions & 0 deletions src/crush/CrushWrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,12 @@ void CrushWrapper::encode(bufferlist& bl, bool lean) const
}
break;

case CRUSH_BUCKET_STRAW2:
for (unsigned j=0; j<crush->buckets[i]->size; j++) {
::encode((reinterpret_cast<crush_bucket_straw2*>(crush->buckets[i]))->item_weights[j], bl);
}
break;

default:
assert(0);
break;
Expand Down Expand Up @@ -1229,6 +1235,9 @@ void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator
case CRUSH_BUCKET_STRAW:
size = sizeof(crush_bucket_straw);
break;
case CRUSH_BUCKET_STRAW2:
size = sizeof(crush_bucket_straw2);
break;
default:
{
char str[128];
Expand Down Expand Up @@ -1292,6 +1301,15 @@ void CrushWrapper::decode_crush_bucket(crush_bucket** bptr, bufferlist::iterator
break;
}

case CRUSH_BUCKET_STRAW2: {
crush_bucket_straw2* cbs = reinterpret_cast<crush_bucket_straw2*>(bucket);
cbs->item_weights = (__u32*)calloc(1, bucket->size * sizeof(__u32));
for (unsigned j = 0; j < bucket->size; ++j) {
::decode(cbs->item_weights[j], blp);
}
break;
}

default:
// We should have handled this case in the first switch statement
assert(0);
Expand Down
180 changes: 180 additions & 0 deletions src/crush/builder.c
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,52 @@ crush_make_straw_bucket(struct crush_map *map,
return NULL;
}

struct crush_bucket_straw2 *
crush_make_straw2_bucket(struct crush_map *map,
int hash,
int type,
int size,
int *items,
int *weights)
{
struct crush_bucket_straw2 *bucket;
int i;

bucket = malloc(sizeof(*bucket));
if (!bucket)
return NULL;
memset(bucket, 0, sizeof(*bucket));
bucket->h.alg = CRUSH_BUCKET_STRAW2;
bucket->h.hash = hash;
bucket->h.type = type;
bucket->h.size = size;

bucket->h.items = malloc(sizeof(__s32)*size);
if (!bucket->h.items)
goto err;
bucket->h.perm = malloc(sizeof(__u32)*size);
if (!bucket->h.perm)
goto err;
bucket->item_weights = malloc(sizeof(__u32)*size);
if (!bucket->item_weights)
goto err;

bucket->h.weight = 0;
for (i=0; i<size; i++) {
bucket->h.items[i] = items[i];
bucket->h.weight += weights[i];
bucket->item_weights[i] = weights[i];
}

return bucket;
err:
free(bucket->item_weights);
free(bucket->h.perm);
free(bucket->h.items);
free(bucket);
return NULL;
}



struct crush_bucket*
Expand All @@ -629,6 +675,8 @@ crush_make_bucket(struct crush_map *map,

case CRUSH_BUCKET_STRAW:
return (struct crush_bucket *)crush_make_straw_bucket(map, hash, type, size, items, weights);
case CRUSH_BUCKET_STRAW2:
return (struct crush_bucket *)crush_make_straw2_bucket(map, hash, type, size, items, weights);
}
return 0;
}
Expand Down Expand Up @@ -808,6 +856,42 @@ int crush_add_straw_bucket_item(struct crush_map *map,
return crush_calc_straw(map, bucket);
}

int crush_add_straw2_bucket_item(struct crush_map *map,
struct crush_bucket_straw2 *bucket,
int item, int weight)
{
int newsize = bucket->h.size + 1;

void *_realloc = NULL;

if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.items = _realloc;
}
if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.perm = _realloc;
}
if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->item_weights = _realloc;
}

bucket->h.items[newsize-1] = item;
bucket->item_weights[newsize-1] = weight;

if (crush_addition_is_unsafe(bucket->h.weight, weight))
return -ERANGE;

bucket->h.weight += weight;
bucket->h.size++;

return 0;
}

int crush_bucket_add_item(struct crush_map *map,
struct crush_bucket *b, int item, int weight)
{
Expand All @@ -823,6 +907,8 @@ int crush_bucket_add_item(struct crush_map *map,
return crush_add_tree_bucket_item((struct crush_bucket_tree *)b, item, weight);
case CRUSH_BUCKET_STRAW:
return crush_add_straw_bucket_item(map, (struct crush_bucket_straw *)b, item, weight);
case CRUSH_BUCKET_STRAW2:
return crush_add_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item, weight);
default:
return -1;
}
Expand Down Expand Up @@ -1034,6 +1120,50 @@ int crush_remove_straw_bucket_item(struct crush_map *map,
return crush_calc_straw(map, bucket);
}

int crush_remove_straw2_bucket_item(struct crush_map *map,
struct crush_bucket_straw2 *bucket, int item)
{
int newsize = bucket->h.size - 1;
unsigned i, j;

for (i = 0; i < bucket->h.size; i++) {
if (bucket->h.items[i] == item) {
bucket->h.size--;
if (bucket->item_weights[i] < bucket->h.weight)
bucket->h.weight -= bucket->item_weights[i];
else
bucket->h.weight = 0;
for (j = i; j < bucket->h.size; j++) {
bucket->h.items[j] = bucket->h.items[j+1];
bucket->item_weights[j] = bucket->item_weights[j+1];
}
break;
}
}
if (i == bucket->h.size)
return -ENOENT;

void *_realloc = NULL;

if ((_realloc = realloc(bucket->h.items, sizeof(__s32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.items = _realloc;
}
if ((_realloc = realloc(bucket->h.perm, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->h.perm = _realloc;
}
if ((_realloc = realloc(bucket->item_weights, sizeof(__u32)*newsize)) == NULL) {
return -ENOMEM;
} else {
bucket->item_weights = _realloc;
}

return 0;
}

int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int item)
{
/* invalidate perm cache */
Expand All @@ -1048,6 +1178,8 @@ int crush_bucket_remove_item(struct crush_map *map, struct crush_bucket *b, int
return crush_remove_tree_bucket_item((struct crush_bucket_tree *)b, item);
case CRUSH_BUCKET_STRAW:
return crush_remove_straw_bucket_item(map, (struct crush_bucket_straw *)b, item);
case CRUSH_BUCKET_STRAW2:
return crush_remove_straw2_bucket_item(map, (struct crush_bucket_straw2 *)b, item);
default:
return -1;
}
Expand Down Expand Up @@ -1140,6 +1272,26 @@ int crush_adjust_straw_bucket_item_weight(struct crush_map *map,
return diff;
}

int crush_adjust_straw2_bucket_item_weight(struct crush_map *map,
struct crush_bucket_straw2 *bucket,
int item, int weight)
{
unsigned idx;
int diff;

for (idx = 0; idx < bucket->h.size; idx++)
if (bucket->h.items[idx] == item)
break;
if (idx == bucket->h.size)
return 0;

diff = weight - bucket->item_weights[idx];
bucket->item_weights[idx] = weight;
bucket->h.weight += diff;

return diff;
}

int crush_bucket_adjust_item_weight(struct crush_map *map,
struct crush_bucket *b,
int item, int weight)
Expand All @@ -1158,6 +1310,10 @@ int crush_bucket_adjust_item_weight(struct crush_map *map,
return crush_adjust_straw_bucket_item_weight(map,
(struct crush_bucket_straw *)b,
item, weight);
case CRUSH_BUCKET_STRAW2:
return crush_adjust_straw2_bucket_item_weight(map,
(struct crush_bucket_straw2 *)b,
item, weight);
default:
return -1;
}
Expand Down Expand Up @@ -1263,6 +1419,28 @@ static int crush_reweight_straw_bucket(struct crush_map *crush, struct crush_buc
return 0;
}

static int crush_reweight_straw2_bucket(struct crush_map *crush, struct crush_bucket_straw2 *bucket)
{
unsigned i;

bucket->h.weight = 0;
for (i = 0; i < bucket->h.size; i++) {
int id = bucket->h.items[i];
if (id < 0) {
struct crush_bucket *c = crush->buckets[-1-id];
crush_reweight_bucket(crush, c);
bucket->item_weights[i] = c->weight;
}

if (crush_addition_is_unsafe(bucket->h.weight, bucket->item_weights[i]))
return -ERANGE;

bucket->h.weight += bucket->item_weights[i];
}

return 0;
}

int crush_reweight_bucket(struct crush_map *crush, struct crush_bucket *b)
{
switch (b->alg) {
Expand All @@ -1274,6 +1452,8 @@ int crush_reweight_bucket(struct crush_map *crush, struct crush_bucket *b)
return crush_reweight_tree_bucket(crush, (struct crush_bucket_tree *)b);
case CRUSH_BUCKET_STRAW:
return crush_reweight_straw_bucket(crush, (struct crush_bucket_straw *)b);
case CRUSH_BUCKET_STRAW2:
return crush_reweight_straw2_bucket(crush, (struct crush_bucket_straw2 *)b);
default:
return -1;
}
Expand Down
1 change: 1 addition & 0 deletions src/crush/crush.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const char *crush_bucket_alg_name(int alg)
case CRUSH_BUCKET_LIST: return "list";
case CRUSH_BUCKET_TREE: return "tree";
case CRUSH_BUCKET_STRAW: return "straw";
case CRUSH_BUCKET_STRAW2: return "straw2";
default: return "unknown";
}
}
Expand Down
12 changes: 10 additions & 2 deletions src/crush/crush.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,15 @@ struct crush_rule {
* uniform O(1) poor poor
* list O(n) optimal poor
* tree O(log n) good good
* straw O(n) optimal optimal
* straw O(n) better better
* straw2 O(n) optimal optimal
*/
enum {
CRUSH_BUCKET_UNIFORM = 1,
CRUSH_BUCKET_LIST = 2,
CRUSH_BUCKET_TREE = 3,
CRUSH_BUCKET_STRAW = 4
CRUSH_BUCKET_STRAW = 4,
CRUSH_BUCKET_STRAW2 = 5,
};
extern const char *crush_bucket_alg_name(int alg);

Expand Down Expand Up @@ -153,6 +155,11 @@ struct crush_bucket_straw {
__u32 *straws; /* 16-bit fixed point */
};

struct crush_bucket_straw2 {
struct crush_bucket h;
__u32 *item_weights; /* 16-bit fixed point */
};



/*
Expand Down Expand Up @@ -203,6 +210,7 @@ extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
extern void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
extern void crush_destroy_rule(struct crush_rule *r);
extern void crush_destroy(struct crush_map *map);
Expand Down
Loading

0 comments on commit 242293c

Please sign in to comment.