Skip to content
10 changes: 8 additions & 2 deletions go/cmd/vtctldclient/command/shards.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ that shard.`,
// GenerateShardRanges outputs a set of shard ranges assuming a (mostly)
// equal distribution of N shards.
GenerateShardRanges = &cobra.Command{
Use: "GenerateShardRanges <num_shards>",
Use: "GenerateShardRanges <num_shards> [--hex-width=w]",
Short: "Print a set of shard ranges assuming a keyspace with N shards.",
DisableFlagsInUseLine: true,
Args: cobra.ExactArgs(1),
Expand All @@ -68,7 +68,7 @@ that shard.`,

cli.FinishedParsing(cmd)

shards, err := key.GenerateShardRanges(n)
shards, err := key.GenerateShardRanges(n, generateShardRangesOptions.HexWidth)
if err != nil {
return err
}
Expand Down Expand Up @@ -210,6 +210,10 @@ var createShardOptions = struct {
IncludeParent bool
}{}

var generateShardRangesOptions = struct {
HexWidth int
}{}

func commandCreateShard(cmd *cobra.Command, args []string) error {
keyspace, shard, err := topoproto.ParseKeyspaceShard(cmd.Flags().Arg(0))
if err != nil {
Expand Down Expand Up @@ -663,6 +667,8 @@ func init() {

Root.AddCommand(GetShard)
Root.AddCommand(GetShardReplication)

GenerateShardRanges.Flags().IntVar(&generateShardRangesOptions.HexWidth, "hex-width", 0, "The number of hex characters to use for the shard range start and end. If not set or set to 0, it will be automatically computed based on the number of requested shards.")
Root.AddCommand(GenerateShardRanges)

RemoveShardCell.Flags().BoolVarP(&removeShardCellOptions.Force, "force", "f", false, "Proceed even if the cell's topology server cannot be reached. The assumption is that you turned down the entire cell, and just need to update the global topo data.")
Expand Down
69 changes: 27 additions & 42 deletions go/vt/key/key.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,23 +379,31 @@ func EvenShardsKeyRange(i, n int) (*topodatapb.KeyRange, error) {
}

// GenerateShardRanges returns shard ranges assuming a keyspace with N shards.
func GenerateShardRanges(shards int) ([]string, error) {
var format string
var maxShards int

func GenerateShardRanges(shards int, hexWidth int) ([]string, error) {
switch {
case shards <= 0:
return nil, errors.New("shards must be greater than zero")
case shards == 1:
return []string{"-"}, nil
case shards <= 256:
format = "%02x"
maxShards = 256
if hexWidth == 0 {
hexWidth = 2
}
case shards <= 65536:
format = "%04x"
maxShards = 65536
if hexWidth == 0 {
hexWidth = 4
}
default:
return nil, errors.New("this function does not support more than 65336 shards in a single keyspace")
return nil, errors.New("this function does not support more than 65536 shards in a single keyspace")
}

maxShards := math.Pow(16, float64(hexWidth))
if shards > int(maxShards) {
return nil, fmt.Errorf("the given number of shards (%d) is too high for the given number of characters to use (%d)", shards, hexWidth)
}

format := fmt.Sprintf("%%0%dx", hexWidth)

rangeFormatter := func(start, end int) string {
var (
startKid string
Expand All @@ -406,47 +414,24 @@ func GenerateShardRanges(shards int) ([]string, error) {
startKid = fmt.Sprintf(format, start)
}

if end != maxShards {
if end != int(maxShards) {
endKid = fmt.Sprintf(format, end)
}

return fmt.Sprintf("%s-%s", startKid, endKid)
}

start := 0
end := 0

// If shards does not divide evenly into maxShards, then there is some lossiness,
// where each shard is smaller than it should technically be (if, for example, size == 25.6).
// If we choose to keep everything in ints, then we have two choices:
// - Have every shard in #numshards be a uniform size, tack on an additional shard
// at the end of the range to account for the loss. This is bad because if you ask for
// 7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards.
// It's also bad because one shard will have much different data distribution than the rest.
// - Expand the final shard to include whatever is left in the keyrange. This will give the
// correct number of shards, which is good, but depending on how lossy each individual shard is,
// you could end with that final shard being significantly larger than the rest of the shards,
// so this doesn't solve the data distribution problem.
//
// By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense),
// we can re-truncate the integer end on each iteration, which spreads the lossiness more
// evenly across the shards.
//
// This implementation has no impact on shard numbers that are powers of 2, even at large numbers,
// which you can see in the tests.
size := float64(maxShards) / float64(shards)
realEnd := float64(0)
shardRanges := make([]string, 0, shards)

for i := 1; i < shards; i++ {
realEnd = float64(i) * size

end = int(realEnd)
shardRanges = append(shardRanges, rangeFormatter(start, end))
start = end
boundaries := make([]int, 0, shards+1)
for i := 0; i < shards; i++ {
boundaries = append(boundaries, int(float64(i)*maxShards/float64(shards)))
}

shardRanges = append(shardRanges, rangeFormatter(start, maxShards))
shardRanges := make([]string, 0, shards)
shardRanges = append(shardRanges, rangeFormatter(0, boundaries[1])) // first shard
for i := 1; i < shards-1; i++ {
shardRanges = append(shardRanges, rangeFormatter(boundaries[i], boundaries[i+1]))
}
shardRanges = append(shardRanges, rangeFormatter(boundaries[shards-1], int(maxShards))) // last shard

return shardRanges, nil
}
65 changes: 58 additions & 7 deletions go/vt/key/key_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1548,11 +1548,17 @@ func TestGenerateShardRanges(t *testing.T) {
[]string{"-01", "01-02", "02-03", "03-04", "04-05", "05-06", "06-07", "07-08", "08-09", "09-0a", "0a-0b", "0b-0c", "0c-0d", "0d-0e", "0e-0f", "0f-10", "10-11", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-18", "18-19", "19-1a", "1a-1b", "1b-1c", "1c-1d", "1d-1e", "1e-1f", "1f-20", "20-21", "21-22", "22-23", "23-24", "24-25", "25-26", "26-27", "27-28", "28-29", "29-2a", "2a-2b", "2b-2c", "2c-2d", "2d-2e", "2e-2f", "2f-30", "30-31", "31-32", "32-33", "33-34", "34-35", "35-36", "36-37", "37-38", "38-39", "39-3a", "3a-3b", "3b-3c", "3c-3d", "3d-3e", "3e-3f", "3f-40", "40-41", "41-42", "42-43", "43-44", "44-45", "45-46", "46-47", "47-48", "48-49", "49-4a", "4a-4b", "4b-4c", "4c-4d", "4d-4e", "4e-4f", "4f-50", "50-51", "51-52", "52-53", "53-54", "54-55", "55-56", "56-57", "57-58", "58-59", "59-5a", "5a-5b", "5b-5c", "5c-5d", "5d-5e", "5e-5f", "5f-60", "60-61", "61-62", "62-63", "63-64", "64-65", "65-66", "66-67", "67-68", "68-69", "69-6a", "6a-6b", "6b-6c", "6c-6d", "6d-6e", "6e-6f", "6f-70", "70-71", "71-72", "72-73", "73-74", "74-75", "75-76", "76-77", "77-78", "78-79", "79-7a", "7a-7b", "7b-7c", "7c-7d", "7d-7e", "7e-7f", "7f-80", "80-81", "81-82", "82-83", "83-84", "84-85", "85-86", "86-87", "87-88", "88-89", "89-8a", "8a-8b", "8b-8c", "8c-8d", "8d-8e", "8e-8f", "8f-90", "90-91", "91-92", "92-93", "93-94", "94-95", "95-96", "96-97", "97-98", "98-99", "99-9a", "9a-9b", "9b-9c", "9c-9d", "9d-9e", "9e-9f", "9f-a0", "a0-a1", "a1-a2", "a2-a3", "a3-a4", "a4-a5", "a5-a6", "a6-a7", "a7-a8", "a8-a9", "a9-aa", "aa-ab", "ab-ac", "ac-ad", "ad-ae", "ae-af", "af-b0", "b0-b1", "b1-b2", "b2-b3", "b3-b4", "b4-b5", "b5-b6", "b6-b7", "b7-b8", "b8-b9", "b9-ba", "ba-bb", "bb-bc", "bc-bd", "bd-be", "be-bf", "bf-c0", "c0-c1", "c1-c2", "c2-c3", "c3-c4", "c4-c5", "c5-c6", "c6-c7", "c7-c8", "c8-c9", "c9-ca", "ca-cb", "cb-cc", "cc-cd", "cd-ce", "ce-cf", "cf-d0", "d0-d1", "d1-d2", "d2-d3", "d3-d4", "d4-d5", "d5-d6", "d6-d7", "d7-d8", "d8-d9", "d9-da", "da-db", "db-dc", "dc-dd", "dd-de", "de-df", "df-e0", "e0-e1", "e1-e2", "e2-e3", "e3-e4", "e4-e5", "e5-e6", "e6-e7", "e7-e8", "e8-e9", "e9-ea", "ea-eb", "eb-ec", "ec-ed", "ed-ee", "ee-ef", "ef-f0", "f0-f1", "f1-f2", "f2-f3", "f3-f4", "f4-f5", "f5-f6", "f6-f7", "f7-f8", "f8-f9", "f9-fa", "fa-fb", "fb-fc", "fc-fd", "fd-fe", "fe-ff", "ff-"},
false,
},
{
"works for very large number of shards",
args{512},
[]string{"-0080", "0080-0100", "0100-0180", "0180-0200", "0200-0280", "0280-0300", "0300-0380", "0380-0400", "0400-0480", "0480-0500", "0500-0580", "0580-0600", "0600-0680", "0680-0700", "0700-0780", "0780-0800", "0800-0880", "0880-0900", "0900-0980", "0980-0a00", "0a00-0a80", "0a80-0b00", "0b00-0b80", "0b80-0c00", "0c00-0c80", "0c80-0d00", "0d00-0d80", "0d80-0e00", "0e00-0e80", "0e80-0f00", "0f00-0f80", "0f80-1000", "1000-1080", "1080-1100", "1100-1180", "1180-1200", "1200-1280", "1280-1300", "1300-1380", "1380-1400", "1400-1480", "1480-1500", "1500-1580", "1580-1600", "1600-1680", "1680-1700", "1700-1780", "1780-1800", "1800-1880", "1880-1900", "1900-1980", "1980-1a00", "1a00-1a80", "1a80-1b00", "1b00-1b80", "1b80-1c00", "1c00-1c80", "1c80-1d00", "1d00-1d80", "1d80-1e00", "1e00-1e80", "1e80-1f00", "1f00-1f80", "1f80-2000", "2000-2080", "2080-2100", "2100-2180", "2180-2200", "2200-2280", "2280-2300", "2300-2380", "2380-2400", "2400-2480", "2480-2500", "2500-2580", "2580-2600", "2600-2680", "2680-2700", "2700-2780", "2780-2800", "2800-2880", "2880-2900", "2900-2980", "2980-2a00", "2a00-2a80", "2a80-2b00", "2b00-2b80", "2b80-2c00", "2c00-2c80", "2c80-2d00", "2d00-2d80", "2d80-2e00", "2e00-2e80", "2e80-2f00", "2f00-2f80", "2f80-3000", "3000-3080", "3080-3100", "3100-3180", "3180-3200", "3200-3280", "3280-3300", "3300-3380", "3380-3400", "3400-3480", "3480-3500", "3500-3580", "3580-3600", "3600-3680", "3680-3700", "3700-3780", "3780-3800", "3800-3880", "3880-3900", "3900-3980", "3980-3a00", "3a00-3a80", "3a80-3b00", "3b00-3b80", "3b80-3c00", "3c00-3c80", "3c80-3d00", "3d00-3d80", "3d80-3e00", "3e00-3e80", "3e80-3f00", "3f00-3f80", "3f80-4000", "4000-4080", "4080-4100", "4100-4180", "4180-4200", "4200-4280", "4280-4300", "4300-4380", "4380-4400", "4400-4480", "4480-4500", "4500-4580", "4580-4600", "4600-4680", "4680-4700", "4700-4780", "4780-4800", "4800-4880", "4880-4900", "4900-4980", "4980-4a00", "4a00-4a80", "4a80-4b00", "4b00-4b80", "4b80-4c00", "4c00-4c80", "4c80-4d00", "4d00-4d80", "4d80-4e00", "4e00-4e80", "4e80-4f00", "4f00-4f80", "4f80-5000", "5000-5080", "5080-5100", "5100-5180", "5180-5200", "5200-5280", "5280-5300", "5300-5380", "5380-5400", "5400-5480", "5480-5500", "5500-5580", "5580-5600", "5600-5680", "5680-5700", "5700-5780", "5780-5800", "5800-5880", "5880-5900", "5900-5980", "5980-5a00", "5a00-5a80", "5a80-5b00", "5b00-5b80", "5b80-5c00", "5c00-5c80", "5c80-5d00", "5d00-5d80", "5d80-5e00", "5e00-5e80", "5e80-5f00", "5f00-5f80", "5f80-6000", "6000-6080", "6080-6100", "6100-6180", "6180-6200", "6200-6280", "6280-6300", "6300-6380", "6380-6400", "6400-6480", "6480-6500", "6500-6580", "6580-6600", "6600-6680", "6680-6700", "6700-6780", "6780-6800", "6800-6880", "6880-6900", "6900-6980", "6980-6a00", "6a00-6a80", "6a80-6b00", "6b00-6b80", "6b80-6c00", "6c00-6c80", "6c80-6d00", "6d00-6d80", "6d80-6e00", "6e00-6e80", "6e80-6f00", "6f00-6f80", "6f80-7000", "7000-7080", "7080-7100", "7100-7180", "7180-7200", "7200-7280", "7280-7300", "7300-7380", "7380-7400", "7400-7480", "7480-7500", "7500-7580", "7580-7600", "7600-7680", "7680-7700", "7700-7780", "7780-7800", "7800-7880", "7880-7900", "7900-7980", "7980-7a00", "7a00-7a80", "7a80-7b00", "7b00-7b80", "7b80-7c00", "7c00-7c80", "7c80-7d00", "7d00-7d80", "7d80-7e00", "7e00-7e80", "7e80-7f00", "7f00-7f80", "7f80-8000", "8000-8080", "8080-8100", "8100-8180", "8180-8200", "8200-8280", "8280-8300", "8300-8380", "8380-8400", "8400-8480", "8480-8500", "8500-8580", "8580-8600", "8600-8680", "8680-8700", "8700-8780", "8780-8800", "8800-8880", "8880-8900", "8900-8980", "8980-8a00", "8a00-8a80", "8a80-8b00", "8b00-8b80", "8b80-8c00", "8c00-8c80", "8c80-8d00", "8d00-8d80", "8d80-8e00", "8e00-8e80", "8e80-8f00", "8f00-8f80", "8f80-9000", "9000-9080", "9080-9100", "9100-9180", "9180-9200", "9200-9280", "9280-9300", "9300-9380", "9380-9400", "9400-9480", "9480-9500", "9500-9580", "9580-9600", "9600-9680", "9680-9700", "9700-9780", "9780-9800", "9800-9880", "9880-9900", "9900-9980", "9980-9a00", "9a00-9a80", "9a80-9b00", "9b00-9b80", "9b80-9c00", "9c00-9c80", "9c80-9d00", "9d00-9d80", "9d80-9e00", "9e00-9e80", "9e80-9f00", "9f00-9f80", "9f80-a000", "a000-a080", "a080-a100", "a100-a180", "a180-a200", "a200-a280", "a280-a300", "a300-a380", "a380-a400", "a400-a480", "a480-a500", "a500-a580", "a580-a600", "a600-a680", "a680-a700", "a700-a780", "a780-a800", "a800-a880", "a880-a900", "a900-a980", "a980-aa00", "aa00-aa80", "aa80-ab00", "ab00-ab80", "ab80-ac00", "ac00-ac80", "ac80-ad00", "ad00-ad80", "ad80-ae00", "ae00-ae80", "ae80-af00", "af00-af80", "af80-b000", "b000-b080", "b080-b100", "b100-b180", "b180-b200", "b200-b280", "b280-b300", "b300-b380", "b380-b400", "b400-b480", "b480-b500", "b500-b580", "b580-b600", "b600-b680", "b680-b700", "b700-b780", "b780-b800", "b800-b880", "b880-b900", "b900-b980", "b980-ba00", "ba00-ba80", "ba80-bb00", "bb00-bb80", "bb80-bc00", "bc00-bc80", "bc80-bd00", "bd00-bd80", "bd80-be00", "be00-be80", "be80-bf00", "bf00-bf80", "bf80-c000", "c000-c080", "c080-c100", "c100-c180", "c180-c200", "c200-c280", "c280-c300", "c300-c380", "c380-c400", "c400-c480", "c480-c500", "c500-c580", "c580-c600", "c600-c680", "c680-c700", "c700-c780", "c780-c800", "c800-c880", "c880-c900", "c900-c980", "c980-ca00", "ca00-ca80", "ca80-cb00", "cb00-cb80", "cb80-cc00", "cc00-cc80", "cc80-cd00", "cd00-cd80", "cd80-ce00", "ce00-ce80", "ce80-cf00", "cf00-cf80", "cf80-d000", "d000-d080", "d080-d100", "d100-d180", "d180-d200", "d200-d280", "d280-d300", "d300-d380", "d380-d400", "d400-d480", "d480-d500", "d500-d580", "d580-d600", "d600-d680", "d680-d700", "d700-d780", "d780-d800", "d800-d880", "d880-d900", "d900-d980", "d980-da00", "da00-da80", "da80-db00", "db00-db80", "db80-dc00", "dc00-dc80", "dc80-dd00", "dd00-dd80", "dd80-de00", "de00-de80", "de80-df00", "df00-df80", "df80-e000", "e000-e080", "e080-e100", "e100-e180", "e180-e200", "e200-e280", "e280-e300", "e300-e380", "e380-e400", "e400-e480", "e480-e500", "e500-e580", "e580-e600", "e600-e680", "e680-e700", "e700-e780", "e780-e800", "e800-e880", "e880-e900", "e900-e980", "e980-ea00", "ea00-ea80", "ea80-eb00", "eb00-eb80", "eb80-ec00", "ec00-ec80", "ec80-ed00", "ed00-ed80", "ed80-ee00", "ee00-ee80", "ee80-ef00", "ef00-ef80", "ef80-f000", "f000-f080", "f080-f100", "f100-f180", "f180-f200", "f200-f280", "f280-f300", "f300-f380", "f380-f400", "f400-f480", "f480-f500", "f500-f580", "f580-f600", "f600-f680", "f680-f700", "f700-f780", "f780-f800", "f800-f880", "f880-f900", "f900-f980", "f980-fa00", "fa00-fa80", "fa80-fb00", "fb00-fb80", "fb80-fc00", "fc00-fc80", "fc80-fd00", "fd00-fd80", "fd80-fe00", "fe00-fe80", "fe80-ff00", "ff00-ff80", "ff80-"},
false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := GenerateShardRanges(tt.args.shards)
got, err := GenerateShardRanges(tt.args.shards, 0)
if tt.wantErr {
assert.Error(t, err)
return
Expand All @@ -1567,7 +1573,7 @@ func TestGenerateShardRanges(t *testing.T) {
func TestGenerateShardRangesForManyShards(t *testing.T) {
for i := 1; i <= 1024; i++ {
t.Run(fmt.Sprintf("shards=%d", i), func(t *testing.T) {
ranges, err := GenerateShardRanges(i)
ranges, err := GenerateShardRanges(i, 0)

require.NoError(t, err)
require.Len(t, ranges, i)
Expand All @@ -1587,13 +1593,58 @@ func TestGenerateShardRangesForManyShards(t *testing.T) {
}
}

func TestShardCalculatorForShardsGreaterThan512(t *testing.T) {
got, err := GenerateShardRanges(512)
assert.NoError(t, err)
func TestGenerateShardRangesWithHexCharacterCount(t *testing.T) {
{
ranges, err := GenerateShardRanges(7, 1)

require.NoError(t, err)

require.EqualValues(t, 7, len(ranges))
require.EqualValues(t, []string{"-2", "2-4", "4-6", "6-9", "9-b", "b-d", "d-"}, ranges)
}

{
ranges, err := GenerateShardRanges(7, 2)

require.NoError(t, err)

require.EqualValues(t, 7, len(ranges))
require.EqualValues(t, []string{"-24", "24-49", "49-6d", "6d-92", "92-b6", "b6-db", "db-"}, ranges)
}

{
ranges, err := GenerateShardRanges(7, 3)

require.NoError(t, err)

require.EqualValues(t, 7, len(ranges))
require.EqualValues(t, []string{"-249", "249-492", "492-6db", "6db-924", "924-b6d", "b6d-db6", "db6-"}, ranges)
}

{
ranges, err := GenerateShardRanges(7, 4)

require.NoError(t, err)

require.EqualValues(t, 7, len(ranges))
require.EqualValues(t, []string{"-2492", "2492-4924", "4924-6db6", "6db6-9249", "9249-b6db", "b6db-db6d", "db6d-"}, ranges)
}

{
ranges, err := GenerateShardRanges(8, 4)

want := "ff80-"
require.NoError(t, err)

assert.Equal(t, want, got[511], "Invalid mapping for a 512-shard keyspace. Expected %v, got %v", want, got[511])
require.EqualValues(t, 8, len(ranges))
require.EqualValues(t, []string{"-2000", "2000-4000", "4000-6000", "6000-8000", "8000-a000", "a000-c000", "c000-e000", "e000-"}, ranges)
}

{
_, err := GenerateShardRanges(32, 1)

require.Error(t, err)
require.ErrorContains(t, err, "the given number of shards (32) is too high for the given number of characters to use (1)")
}
}

func stringToKeyRange(spec string) *topodatapb.KeyRange {
Expand Down
4 changes: 2 additions & 2 deletions go/vt/topo/keyspace_external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func TestServerFindAllShardsInKeyspace(t *testing.T) {
// the keyspace to fetch later.
require.NoError(t, ts.CreateKeyspace(ctx, keyspace, &topodatapb.Keyspace{}))

shards, err := key.GenerateShardRanges(tt.shards)
shards, err := key.GenerateShardRanges(tt.shards, 0)
require.NoError(t, err)

for _, s := range shards {
Expand Down Expand Up @@ -167,7 +167,7 @@ func TestServerGetServingShards(t *testing.T) {
require.NoError(t, err)
var shardNames []string
if tt.shards > 0 {
shardNames, err = key.GenerateShardRanges(tt.shards)
shardNames, err = key.GenerateShardRanges(tt.shards, 0)
require.NoError(t, err)
require.Equal(t, tt.shards, len(shardNames))
for _, shardName := range shardNames {
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtctl/vtctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -3908,7 +3908,7 @@ func commandGenerateShardRanges(ctx context.Context, wr *wrangler.Wrangler, subF
return err
}

shardRanges, err := key.GenerateShardRanges(*numShards)
shardRanges, err := key.GenerateShardRanges(*numShards, 0)
if err != nil {
return err
}
Expand Down
2 changes: 1 addition & 1 deletion tools/map-shard-for-value/map-shard-for-value.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func main() {
if *shardsCSV != "" {
log.Fatalf("cannot specify both total_shards and shards")
}
shardArr, err := key.GenerateShardRanges(*totalShards)
shardArr, err := key.GenerateShardRanges(*totalShards, 0)
if err != nil {
log.Fatalf("failed to generate shard ranges: %v", err)
}
Expand Down
Loading