diff --git a/go/cmd/vtctldclient/command/shards.go b/go/cmd/vtctldclient/command/shards.go index 70032299f16..60fef6ede68 100644 --- a/go/cmd/vtctldclient/command/shards.go +++ b/go/cmd/vtctldclient/command/shards.go @@ -56,7 +56,7 @@ that shard.`, // GenerateShardRanges outputs a set of shard ranges assuming a (mostly) // equal distribution of N shards. GenerateShardRanges = &cobra.Command{ - Use: "GenerateShardRanges ", + Use: "GenerateShardRanges [--hex-width=w]", Short: "Print a set of shard ranges assuming a keyspace with N shards.", DisableFlagsInUseLine: true, Args: cobra.ExactArgs(1), @@ -68,7 +68,7 @@ that shard.`, cli.FinishedParsing(cmd) - shards, err := key.GenerateShardRanges(n) + shards, err := key.GenerateShardRanges(n, generateShardRangesOptions.HexWidth) if err != nil { return err } @@ -210,6 +210,10 @@ var createShardOptions = struct { IncludeParent bool }{} +var generateShardRangesOptions = struct { + HexWidth int +}{} + func commandCreateShard(cmd *cobra.Command, args []string) error { keyspace, shard, err := topoproto.ParseKeyspaceShard(cmd.Flags().Arg(0)) if err != nil { @@ -663,6 +667,8 @@ func init() { Root.AddCommand(GetShard) Root.AddCommand(GetShardReplication) + + GenerateShardRanges.Flags().IntVar(&generateShardRangesOptions.HexWidth, "hex-width", 0, "The number of hex characters to use for the shard range start and end. If not set or set to 0, it will be automatically computed based on the number of requested shards.") Root.AddCommand(GenerateShardRanges) RemoveShardCell.Flags().BoolVarP(&removeShardCellOptions.Force, "force", "f", false, "Proceed even if the cell's topology server cannot be reached. The assumption is that you turned down the entire cell, and just need to update the global topo data.") diff --git a/go/vt/key/key.go b/go/vt/key/key.go index 09d4bad2975..0217642cdc5 100644 --- a/go/vt/key/key.go +++ b/go/vt/key/key.go @@ -379,23 +379,31 @@ func EvenShardsKeyRange(i, n int) (*topodatapb.KeyRange, error) { } // GenerateShardRanges returns shard ranges assuming a keyspace with N shards. -func GenerateShardRanges(shards int) ([]string, error) { - var format string - var maxShards int - +func GenerateShardRanges(shards int, hexWidth int) ([]string, error) { switch { case shards <= 0: return nil, errors.New("shards must be greater than zero") + case shards == 1: + return []string{"-"}, nil case shards <= 256: - format = "%02x" - maxShards = 256 + if hexWidth == 0 { + hexWidth = 2 + } case shards <= 65536: - format = "%04x" - maxShards = 65536 + if hexWidth == 0 { + hexWidth = 4 + } default: - return nil, errors.New("this function does not support more than 65336 shards in a single keyspace") + return nil, errors.New("this function does not support more than 65536 shards in a single keyspace") } + maxShards := math.Pow(16, float64(hexWidth)) + if shards > int(maxShards) { + return nil, fmt.Errorf("the given number of shards (%d) is too high for the given number of characters to use (%d)", shards, hexWidth) + } + + format := fmt.Sprintf("%%0%dx", hexWidth) + rangeFormatter := func(start, end int) string { var ( startKid string @@ -406,47 +414,24 @@ func GenerateShardRanges(shards int) ([]string, error) { startKid = fmt.Sprintf(format, start) } - if end != maxShards { + if end != int(maxShards) { endKid = fmt.Sprintf(format, end) } return fmt.Sprintf("%s-%s", startKid, endKid) } - start := 0 - end := 0 - - // If shards does not divide evenly into maxShards, then there is some lossiness, - // where each shard is smaller than it should technically be (if, for example, size == 25.6). - // If we choose to keep everything in ints, then we have two choices: - // - Have every shard in #numshards be a uniform size, tack on an additional shard - // at the end of the range to account for the loss. This is bad because if you ask for - // 7 shards, you'll actually get 7 uniform shards with 1 small shard, for 8 total shards. - // It's also bad because one shard will have much different data distribution than the rest. - // - Expand the final shard to include whatever is left in the keyrange. This will give the - // correct number of shards, which is good, but depending on how lossy each individual shard is, - // you could end with that final shard being significantly larger than the rest of the shards, - // so this doesn't solve the data distribution problem. - // - // By tracking the "real" end (both in the real number sense, and in the truthfulness of the value sense), - // we can re-truncate the integer end on each iteration, which spreads the lossiness more - // evenly across the shards. - // - // This implementation has no impact on shard numbers that are powers of 2, even at large numbers, - // which you can see in the tests. - size := float64(maxShards) / float64(shards) - realEnd := float64(0) - shardRanges := make([]string, 0, shards) - - for i := 1; i < shards; i++ { - realEnd = float64(i) * size - - end = int(realEnd) - shardRanges = append(shardRanges, rangeFormatter(start, end)) - start = end + boundaries := make([]int, 0, shards+1) + for i := 0; i < shards; i++ { + boundaries = append(boundaries, int(float64(i)*maxShards/float64(shards))) } - shardRanges = append(shardRanges, rangeFormatter(start, maxShards)) + shardRanges := make([]string, 0, shards) + shardRanges = append(shardRanges, rangeFormatter(0, boundaries[1])) // first shard + for i := 1; i < shards-1; i++ { + shardRanges = append(shardRanges, rangeFormatter(boundaries[i], boundaries[i+1])) + } + shardRanges = append(shardRanges, rangeFormatter(boundaries[shards-1], int(maxShards))) // last shard return shardRanges, nil } diff --git a/go/vt/key/key_test.go b/go/vt/key/key_test.go index 00ba4477e2b..b8623de120c 100644 --- a/go/vt/key/key_test.go +++ b/go/vt/key/key_test.go @@ -1548,11 +1548,17 @@ func TestGenerateShardRanges(t *testing.T) { []string{"-01", "01-02", "02-03", "03-04", "04-05", "05-06", "06-07", "07-08", "08-09", "09-0a", "0a-0b", "0b-0c", "0c-0d", "0d-0e", "0e-0f", "0f-10", "10-11", "11-12", "12-13", "13-14", "14-15", "15-16", "16-17", "17-18", "18-19", "19-1a", "1a-1b", "1b-1c", "1c-1d", "1d-1e", "1e-1f", "1f-20", "20-21", "21-22", "22-23", "23-24", "24-25", "25-26", "26-27", "27-28", "28-29", "29-2a", "2a-2b", "2b-2c", "2c-2d", "2d-2e", "2e-2f", "2f-30", "30-31", "31-32", "32-33", "33-34", "34-35", "35-36", "36-37", "37-38", "38-39", "39-3a", "3a-3b", "3b-3c", "3c-3d", "3d-3e", "3e-3f", "3f-40", "40-41", "41-42", "42-43", "43-44", "44-45", "45-46", "46-47", "47-48", "48-49", "49-4a", "4a-4b", "4b-4c", "4c-4d", "4d-4e", "4e-4f", "4f-50", "50-51", "51-52", "52-53", "53-54", "54-55", "55-56", "56-57", "57-58", "58-59", "59-5a", "5a-5b", "5b-5c", "5c-5d", "5d-5e", "5e-5f", "5f-60", "60-61", "61-62", "62-63", "63-64", "64-65", "65-66", "66-67", "67-68", "68-69", "69-6a", "6a-6b", "6b-6c", "6c-6d", "6d-6e", "6e-6f", "6f-70", "70-71", "71-72", "72-73", "73-74", "74-75", "75-76", "76-77", "77-78", "78-79", "79-7a", "7a-7b", "7b-7c", "7c-7d", "7d-7e", "7e-7f", "7f-80", "80-81", "81-82", "82-83", "83-84", "84-85", "85-86", "86-87", "87-88", "88-89", "89-8a", "8a-8b", "8b-8c", "8c-8d", "8d-8e", "8e-8f", "8f-90", "90-91", "91-92", "92-93", "93-94", "94-95", "95-96", "96-97", "97-98", "98-99", "99-9a", "9a-9b", "9b-9c", "9c-9d", "9d-9e", "9e-9f", "9f-a0", "a0-a1", "a1-a2", "a2-a3", "a3-a4", "a4-a5", "a5-a6", "a6-a7", "a7-a8", "a8-a9", "a9-aa", "aa-ab", "ab-ac", "ac-ad", "ad-ae", "ae-af", "af-b0", "b0-b1", "b1-b2", "b2-b3", "b3-b4", "b4-b5", "b5-b6", "b6-b7", "b7-b8", "b8-b9", "b9-ba", "ba-bb", "bb-bc", "bc-bd", "bd-be", "be-bf", "bf-c0", "c0-c1", "c1-c2", "c2-c3", "c3-c4", "c4-c5", "c5-c6", "c6-c7", "c7-c8", "c8-c9", "c9-ca", "ca-cb", "cb-cc", "cc-cd", "cd-ce", "ce-cf", "cf-d0", "d0-d1", "d1-d2", "d2-d3", "d3-d4", "d4-d5", "d5-d6", "d6-d7", "d7-d8", "d8-d9", "d9-da", "da-db", "db-dc", "dc-dd", "dd-de", "de-df", "df-e0", "e0-e1", "e1-e2", "e2-e3", "e3-e4", "e4-e5", "e5-e6", "e6-e7", "e7-e8", "e8-e9", "e9-ea", "ea-eb", "eb-ec", "ec-ed", "ed-ee", "ee-ef", "ef-f0", "f0-f1", "f1-f2", "f2-f3", "f3-f4", "f4-f5", "f5-f6", "f6-f7", "f7-f8", "f8-f9", "f9-fa", "fa-fb", "fb-fc", "fc-fd", "fd-fe", "fe-ff", "ff-"}, false, }, + { + "works for very large number of shards", + args{512}, + []string{"-0080", "0080-0100", "0100-0180", "0180-0200", "0200-0280", "0280-0300", "0300-0380", "0380-0400", "0400-0480", "0480-0500", "0500-0580", "0580-0600", "0600-0680", "0680-0700", "0700-0780", "0780-0800", "0800-0880", "0880-0900", "0900-0980", "0980-0a00", "0a00-0a80", "0a80-0b00", "0b00-0b80", "0b80-0c00", "0c00-0c80", "0c80-0d00", "0d00-0d80", "0d80-0e00", "0e00-0e80", "0e80-0f00", "0f00-0f80", "0f80-1000", "1000-1080", "1080-1100", "1100-1180", "1180-1200", "1200-1280", "1280-1300", "1300-1380", "1380-1400", "1400-1480", "1480-1500", "1500-1580", "1580-1600", "1600-1680", "1680-1700", "1700-1780", "1780-1800", "1800-1880", "1880-1900", "1900-1980", "1980-1a00", "1a00-1a80", "1a80-1b00", "1b00-1b80", "1b80-1c00", "1c00-1c80", "1c80-1d00", "1d00-1d80", "1d80-1e00", "1e00-1e80", "1e80-1f00", "1f00-1f80", "1f80-2000", "2000-2080", "2080-2100", "2100-2180", "2180-2200", "2200-2280", "2280-2300", "2300-2380", "2380-2400", "2400-2480", "2480-2500", "2500-2580", "2580-2600", "2600-2680", "2680-2700", "2700-2780", "2780-2800", "2800-2880", "2880-2900", "2900-2980", "2980-2a00", "2a00-2a80", "2a80-2b00", "2b00-2b80", "2b80-2c00", "2c00-2c80", "2c80-2d00", "2d00-2d80", "2d80-2e00", "2e00-2e80", "2e80-2f00", "2f00-2f80", "2f80-3000", "3000-3080", "3080-3100", "3100-3180", "3180-3200", "3200-3280", "3280-3300", "3300-3380", "3380-3400", "3400-3480", "3480-3500", "3500-3580", "3580-3600", "3600-3680", "3680-3700", "3700-3780", "3780-3800", "3800-3880", "3880-3900", "3900-3980", "3980-3a00", "3a00-3a80", "3a80-3b00", "3b00-3b80", "3b80-3c00", "3c00-3c80", "3c80-3d00", "3d00-3d80", "3d80-3e00", "3e00-3e80", "3e80-3f00", "3f00-3f80", "3f80-4000", "4000-4080", "4080-4100", "4100-4180", "4180-4200", "4200-4280", "4280-4300", "4300-4380", "4380-4400", "4400-4480", "4480-4500", "4500-4580", "4580-4600", "4600-4680", "4680-4700", "4700-4780", "4780-4800", "4800-4880", "4880-4900", "4900-4980", "4980-4a00", "4a00-4a80", "4a80-4b00", "4b00-4b80", "4b80-4c00", "4c00-4c80", "4c80-4d00", "4d00-4d80", "4d80-4e00", "4e00-4e80", "4e80-4f00", "4f00-4f80", "4f80-5000", "5000-5080", "5080-5100", "5100-5180", "5180-5200", "5200-5280", "5280-5300", "5300-5380", "5380-5400", "5400-5480", "5480-5500", "5500-5580", "5580-5600", "5600-5680", "5680-5700", "5700-5780", "5780-5800", "5800-5880", "5880-5900", "5900-5980", "5980-5a00", "5a00-5a80", "5a80-5b00", "5b00-5b80", "5b80-5c00", "5c00-5c80", "5c80-5d00", "5d00-5d80", "5d80-5e00", "5e00-5e80", "5e80-5f00", "5f00-5f80", "5f80-6000", "6000-6080", "6080-6100", "6100-6180", "6180-6200", "6200-6280", "6280-6300", "6300-6380", "6380-6400", "6400-6480", "6480-6500", "6500-6580", "6580-6600", "6600-6680", "6680-6700", "6700-6780", "6780-6800", "6800-6880", "6880-6900", "6900-6980", "6980-6a00", "6a00-6a80", "6a80-6b00", "6b00-6b80", "6b80-6c00", "6c00-6c80", "6c80-6d00", "6d00-6d80", "6d80-6e00", "6e00-6e80", "6e80-6f00", "6f00-6f80", "6f80-7000", "7000-7080", "7080-7100", "7100-7180", "7180-7200", "7200-7280", "7280-7300", "7300-7380", "7380-7400", "7400-7480", "7480-7500", "7500-7580", "7580-7600", "7600-7680", "7680-7700", "7700-7780", "7780-7800", "7800-7880", "7880-7900", "7900-7980", "7980-7a00", "7a00-7a80", "7a80-7b00", "7b00-7b80", "7b80-7c00", "7c00-7c80", "7c80-7d00", "7d00-7d80", "7d80-7e00", "7e00-7e80", "7e80-7f00", "7f00-7f80", "7f80-8000", "8000-8080", "8080-8100", "8100-8180", "8180-8200", "8200-8280", "8280-8300", "8300-8380", "8380-8400", "8400-8480", "8480-8500", "8500-8580", "8580-8600", "8600-8680", "8680-8700", "8700-8780", "8780-8800", "8800-8880", "8880-8900", "8900-8980", "8980-8a00", "8a00-8a80", "8a80-8b00", "8b00-8b80", "8b80-8c00", "8c00-8c80", "8c80-8d00", "8d00-8d80", "8d80-8e00", "8e00-8e80", "8e80-8f00", "8f00-8f80", "8f80-9000", "9000-9080", "9080-9100", "9100-9180", "9180-9200", "9200-9280", "9280-9300", "9300-9380", "9380-9400", "9400-9480", "9480-9500", "9500-9580", "9580-9600", "9600-9680", "9680-9700", "9700-9780", "9780-9800", "9800-9880", "9880-9900", "9900-9980", "9980-9a00", "9a00-9a80", "9a80-9b00", "9b00-9b80", "9b80-9c00", "9c00-9c80", "9c80-9d00", "9d00-9d80", "9d80-9e00", "9e00-9e80", "9e80-9f00", "9f00-9f80", "9f80-a000", "a000-a080", "a080-a100", "a100-a180", "a180-a200", "a200-a280", "a280-a300", "a300-a380", "a380-a400", "a400-a480", "a480-a500", "a500-a580", "a580-a600", "a600-a680", "a680-a700", "a700-a780", "a780-a800", "a800-a880", "a880-a900", "a900-a980", "a980-aa00", "aa00-aa80", "aa80-ab00", "ab00-ab80", "ab80-ac00", "ac00-ac80", "ac80-ad00", "ad00-ad80", "ad80-ae00", "ae00-ae80", "ae80-af00", "af00-af80", "af80-b000", "b000-b080", "b080-b100", "b100-b180", "b180-b200", "b200-b280", "b280-b300", "b300-b380", "b380-b400", "b400-b480", "b480-b500", "b500-b580", "b580-b600", "b600-b680", "b680-b700", "b700-b780", "b780-b800", "b800-b880", "b880-b900", "b900-b980", "b980-ba00", "ba00-ba80", "ba80-bb00", "bb00-bb80", "bb80-bc00", "bc00-bc80", "bc80-bd00", "bd00-bd80", "bd80-be00", "be00-be80", "be80-bf00", "bf00-bf80", "bf80-c000", "c000-c080", "c080-c100", "c100-c180", "c180-c200", "c200-c280", "c280-c300", "c300-c380", "c380-c400", "c400-c480", "c480-c500", "c500-c580", "c580-c600", "c600-c680", "c680-c700", "c700-c780", "c780-c800", "c800-c880", "c880-c900", "c900-c980", "c980-ca00", "ca00-ca80", "ca80-cb00", "cb00-cb80", "cb80-cc00", "cc00-cc80", "cc80-cd00", "cd00-cd80", "cd80-ce00", "ce00-ce80", "ce80-cf00", "cf00-cf80", "cf80-d000", "d000-d080", "d080-d100", "d100-d180", "d180-d200", "d200-d280", "d280-d300", "d300-d380", "d380-d400", "d400-d480", "d480-d500", "d500-d580", "d580-d600", "d600-d680", "d680-d700", "d700-d780", "d780-d800", "d800-d880", "d880-d900", "d900-d980", "d980-da00", "da00-da80", "da80-db00", "db00-db80", "db80-dc00", "dc00-dc80", "dc80-dd00", "dd00-dd80", "dd80-de00", "de00-de80", "de80-df00", "df00-df80", "df80-e000", "e000-e080", "e080-e100", "e100-e180", "e180-e200", "e200-e280", "e280-e300", "e300-e380", "e380-e400", "e400-e480", "e480-e500", "e500-e580", "e580-e600", "e600-e680", "e680-e700", "e700-e780", "e780-e800", "e800-e880", "e880-e900", "e900-e980", "e980-ea00", "ea00-ea80", "ea80-eb00", "eb00-eb80", "eb80-ec00", "ec00-ec80", "ec80-ed00", "ed00-ed80", "ed80-ee00", "ee00-ee80", "ee80-ef00", "ef00-ef80", "ef80-f000", "f000-f080", "f080-f100", "f100-f180", "f180-f200", "f200-f280", "f280-f300", "f300-f380", "f380-f400", "f400-f480", "f480-f500", "f500-f580", "f580-f600", "f600-f680", "f680-f700", "f700-f780", "f780-f800", "f800-f880", "f880-f900", "f900-f980", "f980-fa00", "fa00-fa80", "fa80-fb00", "fb00-fb80", "fb80-fc00", "fc00-fc80", "fc80-fd00", "fd00-fd80", "fd80-fe00", "fe00-fe80", "fe80-ff00", "ff00-ff80", "ff80-"}, + false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := GenerateShardRanges(tt.args.shards) + got, err := GenerateShardRanges(tt.args.shards, 0) if tt.wantErr { assert.Error(t, err) return @@ -1567,7 +1573,7 @@ func TestGenerateShardRanges(t *testing.T) { func TestGenerateShardRangesForManyShards(t *testing.T) { for i := 1; i <= 1024; i++ { t.Run(fmt.Sprintf("shards=%d", i), func(t *testing.T) { - ranges, err := GenerateShardRanges(i) + ranges, err := GenerateShardRanges(i, 0) require.NoError(t, err) require.Len(t, ranges, i) @@ -1587,13 +1593,58 @@ func TestGenerateShardRangesForManyShards(t *testing.T) { } } -func TestShardCalculatorForShardsGreaterThan512(t *testing.T) { - got, err := GenerateShardRanges(512) - assert.NoError(t, err) +func TestGenerateShardRangesWithHexCharacterCount(t *testing.T) { + { + ranges, err := GenerateShardRanges(7, 1) + + require.NoError(t, err) + + require.EqualValues(t, 7, len(ranges)) + require.EqualValues(t, []string{"-2", "2-4", "4-6", "6-9", "9-b", "b-d", "d-"}, ranges) + } + + { + ranges, err := GenerateShardRanges(7, 2) + + require.NoError(t, err) + + require.EqualValues(t, 7, len(ranges)) + require.EqualValues(t, []string{"-24", "24-49", "49-6d", "6d-92", "92-b6", "b6-db", "db-"}, ranges) + } + + { + ranges, err := GenerateShardRanges(7, 3) + + require.NoError(t, err) + + require.EqualValues(t, 7, len(ranges)) + require.EqualValues(t, []string{"-249", "249-492", "492-6db", "6db-924", "924-b6d", "b6d-db6", "db6-"}, ranges) + } + + { + ranges, err := GenerateShardRanges(7, 4) + + require.NoError(t, err) + + require.EqualValues(t, 7, len(ranges)) + require.EqualValues(t, []string{"-2492", "2492-4924", "4924-6db6", "6db6-9249", "9249-b6db", "b6db-db6d", "db6d-"}, ranges) + } + + { + ranges, err := GenerateShardRanges(8, 4) - want := "ff80-" + require.NoError(t, err) - assert.Equal(t, want, got[511], "Invalid mapping for a 512-shard keyspace. Expected %v, got %v", want, got[511]) + require.EqualValues(t, 8, len(ranges)) + require.EqualValues(t, []string{"-2000", "2000-4000", "4000-6000", "6000-8000", "8000-a000", "a000-c000", "c000-e000", "e000-"}, ranges) + } + + { + _, err := GenerateShardRanges(32, 1) + + require.Error(t, err) + require.ErrorContains(t, err, "the given number of shards (32) is too high for the given number of characters to use (1)") + } } func stringToKeyRange(spec string) *topodatapb.KeyRange { diff --git a/go/vt/topo/keyspace_external_test.go b/go/vt/topo/keyspace_external_test.go index bfcb2f591a9..e8e7f795c66 100644 --- a/go/vt/topo/keyspace_external_test.go +++ b/go/vt/topo/keyspace_external_test.go @@ -87,7 +87,7 @@ func TestServerFindAllShardsInKeyspace(t *testing.T) { // the keyspace to fetch later. require.NoError(t, ts.CreateKeyspace(ctx, keyspace, &topodatapb.Keyspace{})) - shards, err := key.GenerateShardRanges(tt.shards) + shards, err := key.GenerateShardRanges(tt.shards, 0) require.NoError(t, err) for _, s := range shards { @@ -167,7 +167,7 @@ func TestServerGetServingShards(t *testing.T) { require.NoError(t, err) var shardNames []string if tt.shards > 0 { - shardNames, err = key.GenerateShardRanges(tt.shards) + shardNames, err = key.GenerateShardRanges(tt.shards, 0) require.NoError(t, err) require.Equal(t, tt.shards, len(shardNames)) for _, shardName := range shardNames { diff --git a/go/vt/vtctl/vtctl.go b/go/vt/vtctl/vtctl.go index 8507ff86410..191502eb33b 100644 --- a/go/vt/vtctl/vtctl.go +++ b/go/vt/vtctl/vtctl.go @@ -3908,7 +3908,7 @@ func commandGenerateShardRanges(ctx context.Context, wr *wrangler.Wrangler, subF return err } - shardRanges, err := key.GenerateShardRanges(*numShards) + shardRanges, err := key.GenerateShardRanges(*numShards, 0) if err != nil { return err } diff --git a/tools/map-shard-for-value/map-shard-for-value.go b/tools/map-shard-for-value/map-shard-for-value.go index 18a092d1371..ab80978f8fd 100755 --- a/tools/map-shard-for-value/map-shard-for-value.go +++ b/tools/map-shard-for-value/map-shard-for-value.go @@ -189,7 +189,7 @@ func main() { if *shardsCSV != "" { log.Fatalf("cannot specify both total_shards and shards") } - shardArr, err := key.GenerateShardRanges(*totalShards) + shardArr, err := key.GenerateShardRanges(*totalShards, 0) if err != nil { log.Fatalf("failed to generate shard ranges: %v", err) }