Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go/cmd/dolt/commands/tblcmds/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@ func generateAllTextSchema(rd table.ReadCloser, impOpts *importOptions) (schema.
var colType typeinfo.TypeInfo
if slices.Contains(impOpts.primaryKeys, col.Name) || (len(impOpts.primaryKeys) == 0 && len(cols) == 0) {
// text type is not supported for primary keys, pk is either explicitly set or is the first column
colType = typeinfo.StringDefaultType
colType = typeinfo.StringImportDefaultType
} else {
colType = typeinfo.TextType
}
Expand Down
18 changes: 9 additions & 9 deletions go/libraries/doltcore/env/actions/infer_schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ func leastPermissiveType(strVal string, floatThreshold float64) typeinfo.TypeInf
if int64(len(strVal)) > typeinfo.MaxVarcharLength {
return typeinfo.TextType
} else {
return typeinfo.StringDefaultType
return typeinfo.StringImportDefaultType
}
}

Expand Down Expand Up @@ -234,7 +234,7 @@ func leastPermissiveNumericType(strVal string, floatThreshold float64) (ti typei

// use string for out of range
if errors.Is(err, strconv.ErrRange) {
return typeinfo.StringDefaultType
return typeinfo.StringImportDefaultType
}

if err != nil {
Expand All @@ -243,7 +243,7 @@ func leastPermissiveNumericType(strVal string, floatThreshold float64) (ti typei

// handle leading zero case
if len(strVal) > 1 && strVal[0] == '0' {
return typeinfo.StringDefaultType
return typeinfo.StringImportDefaultType
}

if i >= math.MinInt32 && i <= math.MaxInt32 {
Expand Down Expand Up @@ -325,7 +325,7 @@ func findCommonType(ts typeInfoSet) typeinfo.TypeInfo {

if len(ts) == 0 {
// use strings if all values were empty
return typeinfo.StringDefaultType
return typeinfo.StringImportDefaultType
}

if len(ts) == 1 {
Expand All @@ -339,9 +339,9 @@ func findCommonType(ts typeInfoSet) typeinfo.TypeInfo {
if setHasType(ts, typeinfo.TextType) {
return typeinfo.TextType
} else if setHasType(ts, typeinfo.StringDefaultType) {
return typeinfo.StringDefaultType
} else if setHasType(ts, typeinfo.StringDefaultType) {
return typeinfo.StringDefaultType
return typeinfo.StringImportDefaultType
} else if setHasType(ts, typeinfo.StringImportDefaultType) {
return typeinfo.StringImportDefaultType
}

hasNumeric := false
Expand All @@ -364,7 +364,7 @@ func findCommonType(ts typeInfoSet) typeinfo.TypeInfo {
}

if hasNumeric && hasNonNumeric {
return typeinfo.StringDefaultType
return typeinfo.StringImportDefaultType
}

if hasNumeric {
Expand All @@ -383,7 +383,7 @@ func findCommonType(ts typeInfoSet) typeinfo.TypeInfo {
if setHasType(ts, nct) {
// types in nonChronoTypes have only string
// as a common type with any other type
return typeinfo.StringDefaultType
return typeinfo.StringImportDefaultType
}
}

Expand Down
28 changes: 14 additions & 14 deletions go/libraries/doltcore/env/actions/infer_schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ func TestLeastPermissiveType(t *testing.T) {
}{
{"empty string", "", 0.0, typeinfo.UnknownType},
{"valid uuid", "00000000-0000-0000-0000-000000000000", 0.0, typeinfo.UuidType},
{"invalid uuid", "00000000-0000-0000-0000-00000000000z", 0.0, typeinfo.StringDefaultType},
{"invalid uuid", "00000000-0000-0000-0000-00000000000z", 0.0, typeinfo.StringImportDefaultType},
{"lower bool", "true", 0.0, typeinfo.BoolType},
{"upper bool", "FALSE", 0.0, typeinfo.BoolType},
{"yes", "yes", 0.0, typeinfo.StringDefaultType},
{"yes", "yes", 0.0, typeinfo.StringImportDefaultType},
{"one", "1", 0.0, typeinfo.Int32Type},
{"negative one", "-1", 0.0, typeinfo.Int32Type},
{"negative one point 0", "-1.0", 0.0, typeinfo.Float32Type},
Expand All @@ -57,7 +57,7 @@ func TestLeastPermissiveType(t *testing.T) {
{"negative one point 999 with FT of 1.0", "-1.999", 1.0, typeinfo.Int32Type},
{"zero point zero zero zero zero", "0.0000", 0.0, typeinfo.Float32Type},
{"max int", strconv.FormatUint(math.MaxInt64, 10), 0.0, typeinfo.Int64Type},
{"bigger than max int", strconv.FormatUint(math.MaxUint64, 10) + "0", 0.0, typeinfo.StringDefaultType},
{"bigger than max int", strconv.FormatUint(math.MaxUint64, 10) + "0", 0.0, typeinfo.StringImportDefaultType},
}

for _, test := range tests {
Expand All @@ -82,11 +82,11 @@ func TestLeastPermissiveNumericType(t *testing.T) {
{"double decimal point", "0.00.0", 0.0, typeinfo.UnknownType},
{"leading zero floats", "05.78", 0.0, typeinfo.Float32Type},
{"zero float with high precision", "0.0000", 0.0, typeinfo.Float32Type},
{"all zeroes", "0000", 0.0, typeinfo.StringDefaultType},
{"leading zeroes", "01", 0.0, typeinfo.StringDefaultType},
{"all zeroes", "0000", 0.0, typeinfo.StringImportDefaultType},
{"leading zeroes", "01", 0.0, typeinfo.StringImportDefaultType},
{"negative int", "-1234", 0.0, typeinfo.Int32Type},
{"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType},
{"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType},
{"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringImportDefaultType},
{"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringImportDefaultType},
{"math.MinInt64", strconv.FormatInt(math.MinInt64, 10), 0.0, typeinfo.Int64Type},
}

Expand Down Expand Up @@ -186,23 +186,23 @@ func testFindCommonType(t *testing.T) {
typeinfo.Int32Type: {},
typeinfo.BoolType: {},
},
expType: typeinfo.StringDefaultType,
expType: typeinfo.StringImportDefaultType,
},
{
name: "floats and bools",
inferSet: typeInfoSet{
typeinfo.Float32Type: {},
typeinfo.BoolType: {},
},
expType: typeinfo.StringDefaultType,
expType: typeinfo.StringImportDefaultType,
},
{
name: "floats and uuids",
inferSet: typeInfoSet{
typeinfo.Float32Type: {},
typeinfo.UuidType: {},
},
expType: typeinfo.StringDefaultType,
expType: typeinfo.StringImportDefaultType,
},
}

Expand Down Expand Up @@ -230,7 +230,7 @@ func testFindCommonTypeFromSingleType(t *testing.T) {
typeinfo.TimeType,
typeinfo.TimestampType,
typeinfo.DatetimeType,
typeinfo.StringDefaultType,
typeinfo.StringImportDefaultType,
}

for _, ti := range allTypes {
Expand Down Expand Up @@ -371,11 +371,11 @@ func TestInferSchema(t *testing.T) {
},
map[string]typeinfo.TypeInfo{
"int": typeinfo.Int32Type,
"uint": typeinfo.StringDefaultType,
"uint": typeinfo.StringImportDefaultType,
"uuid": typeinfo.UuidType,
"float": typeinfo.Float32Type,
"bool": typeinfo.BoolType,
"string": typeinfo.StringDefaultType,
"string": typeinfo.StringImportDefaultType,
},
nil,
},
Expand All @@ -387,7 +387,7 @@ func TestInferSchema(t *testing.T) {
floatThreshold: 0,
},
map[string]typeinfo.TypeInfo{
"mix": typeinfo.StringDefaultType,
"mix": typeinfo.StringImportDefaultType,
"uuid": typeinfo.UuidType,
},
nil,
Expand Down
2 changes: 2 additions & 0 deletions go/libraries/doltcore/schema/typeinfo/varstring.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ var (
MaxVarcharLength = int64(16383)
// StringDefaultType is sized to 1k, which allows up to 16 fields per row
StringDefaultType = &varStringType{gmstypes.MustCreateStringWithDefaults(sqltypes.VarChar, MaxVarcharLength/16)}
// StringImportDefaultType is sized to 200, which allows up to 80+ fields per row during import operations
StringImportDefaultType = &varStringType{gmstypes.MustCreateStringWithDefaults(sqltypes.VarChar, 200)}
)

func CreateVarStringTypeFromSqlType(stringType sql.StringType) TypeInfo {
Expand Down
14 changes: 7 additions & 7 deletions integration-tests/bats/import-create-tables.bats
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ DELIM
[ "$status" -eq 0 ]
[[ "$output" =~ "CREATE TABLE \`test\`" ]] || false
[[ "$output" =~ "\`pk\` int" ]] || false
[[ "$output" =~ "\`str\` varchar(1023)" ]] || false
[[ "$output" =~ "\`str\` varchar(200)" ]] || false
[[ "$output" =~ "\`int\` int" ]] || false
[[ "$output" =~ "\`bool\` tinyint" ]] || false
[[ "$output" =~ "\`float\` float" ]] || false
Expand Down Expand Up @@ -895,9 +895,9 @@ DELIM

run dolt sql -q "describe test"
[ "$status" -eq 0 ]
[[ "$output" =~ "| id | varchar(1023) |" ]] || false
[[ "$output" =~ "| state | text |" ]] || false
[[ "$output" =~ "| data | text |" ]] || false
[[ "$output" =~ "| id | varchar(200) |" ]] || false
[[ "$output" =~ "| state | text |" ]] || false
[[ "$output" =~ "| data | text |" ]] || false

# pk defaults to first column if not explicitly defined
run dolt table import -c --all-text test2 test.csv
Expand All @@ -906,9 +906,9 @@ DELIM

run dolt sql -q "describe test2"
[ "$status" -eq 0 ]
[[ "$output" =~ "| id | varchar(1023) |" ]] || false
[[ "$output" =~ "| state | text |" ]] || false
[[ "$output" =~ "| data | text |" ]] || false
[[ "$output" =~ "| id | varchar(200) |" ]] || false
[[ "$output" =~ "| state | text |" ]] || false
[[ "$output" =~ "| data | text |" ]] || false
}

@test "import-create-tables: --all-text and --schema are mutually exclusive" {
Expand Down
45 changes: 31 additions & 14 deletions integration-tests/bats/schema-import.bats
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ teardown() {
[[ "${lines[0]}" =~ "test" ]] || false
[[ "$output" =~ "\`pk\` int" ]] || false
[[ "$output" =~ "\`int\` int" ]] || false
[[ "$output" =~ "\`string\` varchar(1023)" ]] || false
[[ "$output" =~ "\`string\` varchar(200)" ]] || false
[[ "$output" =~ "\`boolean\` tinyint" ]] || false
[[ "$output" =~ "\`float\` float" ]] || false
[[ "$output" =~ "\`uint\` int" ]] || false
Expand All @@ -141,7 +141,7 @@ DELIM
[[ "${lines[0]}" =~ "test" ]] || false
[[ "$output" =~ "\`pk\` int" ]] || false
[[ "$output" =~ "\`int\` int" ]] || false
[[ "$output" =~ "\`string\` varchar(1023)" ]] || false
[[ "$output" =~ "\`string\` varchar(200)" ]] || false
[[ "$output" =~ "\`boolean\` tinyint" ]] || false
[[ "$output" =~ "\`float\` float" ]] || false
[[ "$output" =~ "\`uint\` int" ]] || false
Expand Down Expand Up @@ -201,8 +201,8 @@ DELIM
[ "$status" -eq 0 ]
[ "${#lines[@]}" -eq 7 ]
[[ "${lines[0]}" =~ "test" ]] || false
[[ "$output" =~ "\`pk\` varchar(1023)" ]] || false
[[ "$output" =~ "\`headerOne\` varchar(1023)" ]] || false
[[ "$output" =~ "\`pk\` varchar(200)" ]] || false
[[ "$output" =~ "\`headerOne\` varchar(200)" ]] || false
[[ "$output" =~ "\`headerTwo\` int" ]] || false
}

Expand All @@ -227,7 +227,7 @@ DELIM
[[ "$output" =~ "\`c3\` int" ]] || false
[[ "$output" =~ "\`c4\` int" ]] || false
[[ "$output" =~ "\`c5\` int" ]] || false
[[ "$output" =~ "\`c6\` varchar(1023)" ]] || false
[[ "$output" =~ "\`c6\` varchar(200)" ]] || false
[[ "$output" =~ "PRIMARY KEY (\`pk\`)" ]] || false
}

Expand All @@ -243,12 +243,12 @@ DELIM
[ "${#lines[@]}" -eq 11 ]
[[ "${lines[0]}" =~ "test" ]] || false
[[ "$output" =~ "\`pk\` int" ]] || false
[[ "$output" =~ "\`c1\` varchar(1023)" ]] || false
[[ "$output" =~ "\`c2\` varchar(1023)" ]] || false
[[ "$output" =~ "\`c3\` varchar(1023)" ]] || false
[[ "$output" =~ "\`c4\` varchar(1023)" ]] || false
[[ "$output" =~ "\`c5\` varchar(1023)" ]] || false
[[ "$output" =~ "\`c6\` varchar(1023)" ]] || false
[[ "$output" =~ "\`c1\` varchar(200)" ]] || false
[[ "$output" =~ "\`c2\` varchar(200)" ]] || false
[[ "$output" =~ "\`c3\` varchar(200)" ]] || false
[[ "$output" =~ "\`c4\` varchar(200)" ]] || false
[[ "$output" =~ "\`c5\` varchar(200)" ]] || false
[[ "$output" =~ "\`c6\` varchar(200)" ]] || false
[[ "$output" =~ "PRIMARY KEY (\`pk\`)" ]] || false
}

Expand Down Expand Up @@ -295,7 +295,7 @@ DELIM

run dolt diff --schema
[ "$status" -eq 0 ]
[[ "$output" =~ '+ `x` varchar(1023),' ]] || false
[[ "$output" =~ '+ `x` varchar(200),' ]] || false
[[ "$output" =~ '+ `y` float,' ]] || false
[[ "$output" =~ '+ `z` int,' ]] || false
# assert no columns were deleted/replaced
Expand Down Expand Up @@ -331,7 +331,7 @@ DELIM

run dolt diff --schema
[ "$status" -eq 0 ]
[[ "$output" =~ '+ `x` varchar(1023),' ]] || false
[[ "$output" =~ '+ `x` varchar(200),' ]] || false
[[ "$output" =~ '+ `y` float,' ]] || false
[[ "$output" =~ '+ `z` int,' ]] || false
# assert no columns were deleted/replaced
Expand Down Expand Up @@ -373,7 +373,7 @@ DELIM

run dolt diff --schema
[ "$status" -eq 0 ]
[[ "$output" =~ '- `a` varchar(1023),' ]] || false
[[ "$output" =~ '- `a` varchar(200),' ]] || false
[[ "$output" =~ '- `b` float,' ]] || false
[[ "$output" =~ '- `c` tinyint(1),' ]] || false
# assert no columns were added
Expand Down Expand Up @@ -410,3 +410,20 @@ CSV
[[ "$output" =~ "name" ]] || false
[[ "$output" =~ "invalid schema" ]] || false
}

@test "schema-import: varchar(200) allows many columns" {
# Test that import operations use varchar(200) as default length, allowing many varchar columns
# With varchar(200), we should be able to have 80+ columns vs only 16 with varchar(1023)
cat <<DELIM > many_varchar_cols.csv
pk,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30
1,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a1,b1,c1,d1
DELIM
run dolt schema import -c --pks=pk test many_varchar_cols.csv
[ "$status" -eq 0 ]
[[ "$output" =~ "Created table successfully." ]] || false
run dolt schema show test
[ "$status" -eq 0 ]
# Verify that columns were created with varchar(200)
[[ "$output" =~ "\`c1\` varchar(200)" ]] || false
[[ "$output" =~ "\`c30\` varchar(200)" ]] || false
}
Loading