dolthub · timsehn · May 29, 2025 · May 29, 2025
diff --git a/go/cmd/dolt/commands/tblcmds/import.go b/go/cmd/dolt/commands/tblcmds/import.go
@@ -798,7 +798,7 @@ func generateAllTextSchema(rd table.ReadCloser, impOpts *importOptions) (schema.
 		var colType typeinfo.TypeInfo
 		if slices.Contains(impOpts.primaryKeys, col.Name) || (len(impOpts.primaryKeys) == 0 && len(cols) == 0) {
 			// text type is not supported for primary keys, pk is either explicitly set or is the first column
-			colType = typeinfo.StringDefaultType
+			colType = typeinfo.StringImportDefaultType
 		} else {
 			colType = typeinfo.TextType
 		}

diff --git a/go/libraries/doltcore/env/actions/infer_schema.go b/go/libraries/doltcore/env/actions/infer_schema.go
@@ -193,7 +193,7 @@ func leastPermissiveType(strVal string, floatThreshold float64) typeinfo.TypeInf
 	if int64(len(strVal)) > typeinfo.MaxVarcharLength {
 		return typeinfo.TextType
 	} else {
-		return typeinfo.StringDefaultType
+		return typeinfo.StringImportDefaultType
 	}
 }
 
@@ -234,7 +234,7 @@ func leastPermissiveNumericType(strVal string, floatThreshold float64) (ti typei
 
 	// use string for out of range
 	if errors.Is(err, strconv.ErrRange) {
-		return typeinfo.StringDefaultType
+		return typeinfo.StringImportDefaultType
 	}
 
 	if err != nil {
@@ -243,7 +243,7 @@ func leastPermissiveNumericType(strVal string, floatThreshold float64) (ti typei
 
 	// handle leading zero case
 	if len(strVal) > 1 && strVal[0] == '0' {
-		return typeinfo.StringDefaultType
+		return typeinfo.StringImportDefaultType
 	}
 
 	if i >= math.MinInt32 && i <= math.MaxInt32 {
@@ -325,7 +325,7 @@ func findCommonType(ts typeInfoSet) typeinfo.TypeInfo {
 
 	if len(ts) == 0 {
 		// use strings if all values were empty
-		return typeinfo.StringDefaultType
+		return typeinfo.StringImportDefaultType
 	}
 
 	if len(ts) == 1 {
@@ -339,9 +339,9 @@ func findCommonType(ts typeInfoSet) typeinfo.TypeInfo {
 	if setHasType(ts, typeinfo.TextType) {
 		return typeinfo.TextType
 	} else if setHasType(ts, typeinfo.StringDefaultType) {
-		return typeinfo.StringDefaultType
-	} else if setHasType(ts, typeinfo.StringDefaultType) {
-		return typeinfo.StringDefaultType
+		return typeinfo.StringImportDefaultType
+	} else if setHasType(ts, typeinfo.StringImportDefaultType) {
+		return typeinfo.StringImportDefaultType
 	}
 
 	hasNumeric := false
@@ -364,7 +364,7 @@ func findCommonType(ts typeInfoSet) typeinfo.TypeInfo {
 	}
 
 	if hasNumeric && hasNonNumeric {
-		return typeinfo.StringDefaultType
+		return typeinfo.StringImportDefaultType
 	}
 
 	if hasNumeric {
@@ -383,7 +383,7 @@ func findCommonType(ts typeInfoSet) typeinfo.TypeInfo {
 		if setHasType(ts, nct) {
 			// types in nonChronoTypes have only string
 			// as a common type with any other type
-			return typeinfo.StringDefaultType
+			return typeinfo.StringImportDefaultType
 		}
 	}
 

diff --git a/go/libraries/doltcore/env/actions/infer_schema_test.go b/go/libraries/doltcore/env/actions/infer_schema_test.go
@@ -45,10 +45,10 @@ func TestLeastPermissiveType(t *testing.T) {
 	}{
 		{"empty string", "", 0.0, typeinfo.UnknownType},
 		{"valid uuid", "00000000-0000-0000-0000-000000000000", 0.0, typeinfo.UuidType},
-		{"invalid uuid", "00000000-0000-0000-0000-00000000000z", 0.0, typeinfo.StringDefaultType},
+		{"invalid uuid", "00000000-0000-0000-0000-00000000000z", 0.0, typeinfo.StringImportDefaultType},
 		{"lower bool", "true", 0.0, typeinfo.BoolType},
 		{"upper bool", "FALSE", 0.0, typeinfo.BoolType},
-		{"yes", "yes", 0.0, typeinfo.StringDefaultType},
+		{"yes", "yes", 0.0, typeinfo.StringImportDefaultType},
 		{"one", "1", 0.0, typeinfo.Int32Type},
 		{"negative one", "-1", 0.0, typeinfo.Int32Type},
 		{"negative one point 0", "-1.0", 0.0, typeinfo.Float32Type},
@@ -57,7 +57,7 @@ func TestLeastPermissiveType(t *testing.T) {
 		{"negative one point 999 with FT of 1.0", "-1.999", 1.0, typeinfo.Int32Type},
 		{"zero point zero zero zero zero", "0.0000", 0.0, typeinfo.Float32Type},
 		{"max int", strconv.FormatUint(math.MaxInt64, 10), 0.0, typeinfo.Int64Type},
-		{"bigger than max int", strconv.FormatUint(math.MaxUint64, 10) + "0", 0.0, typeinfo.StringDefaultType},
+		{"bigger than max int", strconv.FormatUint(math.MaxUint64, 10) + "0", 0.0, typeinfo.StringImportDefaultType},
 	}
 
 	for _, test := range tests {
@@ -82,11 +82,11 @@ func TestLeastPermissiveNumericType(t *testing.T) {
 		{"double decimal point", "0.00.0", 0.0, typeinfo.UnknownType},
 		{"leading zero floats", "05.78", 0.0, typeinfo.Float32Type},
 		{"zero float with high precision", "0.0000", 0.0, typeinfo.Float32Type},
-		{"all zeroes", "0000", 0.0, typeinfo.StringDefaultType},
-		{"leading zeroes", "01", 0.0, typeinfo.StringDefaultType},
+		{"all zeroes", "0000", 0.0, typeinfo.StringImportDefaultType},
+		{"leading zeroes", "01", 0.0, typeinfo.StringImportDefaultType},
 		{"negative int", "-1234", 0.0, typeinfo.Int32Type},
-		{"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType},
-		{"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringDefaultType},
+		{"fits in uint64 but not int64", strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringImportDefaultType},
+		{"negative less than math.MinInt64", "-" + strconv.FormatUint(math.MaxUint64, 10), 0.0, typeinfo.StringImportDefaultType},
 		{"math.MinInt64", strconv.FormatInt(math.MinInt64, 10), 0.0, typeinfo.Int64Type},
 	}
 
@@ -186,23 +186,23 @@ func testFindCommonType(t *testing.T) {
 				typeinfo.Int32Type: {},
 				typeinfo.BoolType:  {},
 			},
-			expType: typeinfo.StringDefaultType,
+			expType: typeinfo.StringImportDefaultType,
 		},
 		{
 			name: "floats and bools",
 			inferSet: typeInfoSet{
 				typeinfo.Float32Type: {},
 				typeinfo.BoolType:    {},
 			},
-			expType: typeinfo.StringDefaultType,
+			expType: typeinfo.StringImportDefaultType,
 		},
 		{
 			name: "floats and uuids",
 			inferSet: typeInfoSet{
 				typeinfo.Float32Type: {},
 				typeinfo.UuidType:    {},
 			},
-			expType: typeinfo.StringDefaultType,
+			expType: typeinfo.StringImportDefaultType,
 		},
 	}
 
@@ -230,7 +230,7 @@ func testFindCommonTypeFromSingleType(t *testing.T) {
 		typeinfo.TimeType,
 		typeinfo.TimestampType,
 		typeinfo.DatetimeType,
-		typeinfo.StringDefaultType,
+		typeinfo.StringImportDefaultType,
 	}
 
 	for _, ti := range allTypes {
@@ -371,11 +371,11 @@ func TestInferSchema(t *testing.T) {
 			},
 			map[string]typeinfo.TypeInfo{
 				"int":    typeinfo.Int32Type,
-				"uint":   typeinfo.StringDefaultType,
+				"uint":   typeinfo.StringImportDefaultType,
 				"uuid":   typeinfo.UuidType,
 				"float":  typeinfo.Float32Type,
 				"bool":   typeinfo.BoolType,
-				"string": typeinfo.StringDefaultType,
+				"string": typeinfo.StringImportDefaultType,
 			},
 			nil,
 		},
@@ -387,7 +387,7 @@ func TestInferSchema(t *testing.T) {
 				floatThreshold: 0,
 			},
 			map[string]typeinfo.TypeInfo{
-				"mix":  typeinfo.StringDefaultType,
+				"mix":  typeinfo.StringImportDefaultType,
 				"uuid": typeinfo.UuidType,
 			},
 			nil,

diff --git a/go/libraries/doltcore/schema/typeinfo/varstring.go b/go/libraries/doltcore/schema/typeinfo/varstring.go
@@ -52,6 +52,8 @@ var (
 	MaxVarcharLength = int64(16383)
 	// StringDefaultType is sized to 1k, which allows up to 16 fields per row
 	StringDefaultType = &varStringType{gmstypes.MustCreateStringWithDefaults(sqltypes.VarChar, MaxVarcharLength/16)}
+	// StringImportDefaultType is sized to 200, which allows up to 80+ fields per row during import operations
+	StringImportDefaultType = &varStringType{gmstypes.MustCreateStringWithDefaults(sqltypes.VarChar, 200)}
 )
 
 func CreateVarStringTypeFromSqlType(stringType sql.StringType) TypeInfo {

diff --git a/integration-tests/bats/import-create-tables.bats b/integration-tests/bats/import-create-tables.bats
@@ -596,7 +596,7 @@ DELIM
     [ "$status" -eq 0 ]
     [[ "$output" =~ "CREATE TABLE \`test\`" ]] || false
     [[ "$output" =~ "\`pk\` int" ]] || false
-    [[ "$output" =~ "\`str\` varchar(1023)" ]] || false
+    [[ "$output" =~ "\`str\` varchar(200)" ]] || false
     [[ "$output" =~ "\`int\` int" ]] || false
     [[ "$output" =~ "\`bool\` tinyint" ]] || false
     [[ "$output" =~ "\`float\` float" ]] || false
@@ -895,9 +895,9 @@ DELIM
 
     run dolt sql -q "describe test"
     [ "$status" -eq 0 ]
-    [[ "$output" =~ "| id    | varchar(1023) |" ]] || false
-    [[ "$output" =~ "| state | text          |" ]] || false
-    [[ "$output" =~ "| data  | text          |" ]] || false
+    [[ "$output" =~ "| id    | varchar(200) |" ]] || false
+    [[ "$output" =~ "| state | text         |" ]] || false
+    [[ "$output" =~ "| data  | text         |" ]] || false
 
     # pk defaults to first column if not explicitly defined
     run dolt table import -c --all-text test2 test.csv
@@ -906,9 +906,9 @@ DELIM
 
     run dolt sql -q "describe test2"
     [ "$status" -eq 0 ]
-    [[ "$output" =~ "| id    | varchar(1023) |" ]] || false
-    [[ "$output" =~ "| state | text          |" ]] || false
-    [[ "$output" =~ "| data  | text          |" ]] || false
+    [[ "$output" =~ "| id    | varchar(200) |" ]] || false
+    [[ "$output" =~ "| state | text         |" ]] || false
+    [[ "$output" =~ "| data  | text         |" ]] || false
 }
 
 @test "import-create-tables: --all-text and --schema are mutually exclusive" {

diff --git a/integration-tests/bats/schema-import.bats b/integration-tests/bats/schema-import.bats
@@ -116,7 +116,7 @@ teardown() {
     [[ "${lines[0]}" =~ "test" ]] || false
     [[ "$output" =~ "\`pk\` int" ]] || false
     [[ "$output" =~ "\`int\` int" ]] || false
-    [[ "$output" =~ "\`string\` varchar(1023)" ]] || false
+    [[ "$output" =~ "\`string\` varchar(200)" ]] || false
     [[ "$output" =~ "\`boolean\` tinyint" ]] || false
     [[ "$output" =~ "\`float\` float" ]] || false
     [[ "$output" =~ "\`uint\` int" ]] || false
@@ -141,7 +141,7 @@ DELIM
     [[ "${lines[0]}" =~ "test" ]] || false
     [[ "$output" =~ "\`pk\` int" ]] || false
     [[ "$output" =~ "\`int\` int" ]] || false
-    [[ "$output" =~ "\`string\` varchar(1023)" ]] || false
+    [[ "$output" =~ "\`string\` varchar(200)" ]] || false
     [[ "$output" =~ "\`boolean\` tinyint" ]] || false
     [[ "$output" =~ "\`float\` float" ]] || false
     [[ "$output" =~ "\`uint\` int" ]] || false
@@ -201,8 +201,8 @@ DELIM
     [ "$status" -eq 0 ]
     [ "${#lines[@]}" -eq 7 ]
     [[ "${lines[0]}" =~ "test" ]] || false
-    [[ "$output" =~ "\`pk\` varchar(1023)" ]] || false
-    [[ "$output" =~ "\`headerOne\` varchar(1023)" ]] || false
+    [[ "$output" =~ "\`pk\` varchar(200)" ]] || false
+    [[ "$output" =~ "\`headerOne\` varchar(200)" ]] || false
     [[ "$output" =~ "\`headerTwo\` int" ]] || false
 }
 
@@ -227,7 +227,7 @@ DELIM
     [[ "$output" =~ "\`c3\` int" ]] || false
     [[ "$output" =~ "\`c4\` int" ]] || false
     [[ "$output" =~ "\`c5\` int" ]] || false
-    [[ "$output" =~ "\`c6\` varchar(1023)" ]] || false
+    [[ "$output" =~ "\`c6\` varchar(200)" ]] || false
     [[ "$output" =~ "PRIMARY KEY (\`pk\`)" ]] || false
 }
 
@@ -243,12 +243,12 @@ DELIM
     [ "${#lines[@]}" -eq 11 ]
     [[ "${lines[0]}" =~ "test" ]] || false
     [[ "$output" =~ "\`pk\` int" ]] || false
-    [[ "$output" =~ "\`c1\` varchar(1023)" ]] || false
-    [[ "$output" =~ "\`c2\` varchar(1023)" ]] || false
-    [[ "$output" =~ "\`c3\` varchar(1023)" ]] || false
-    [[ "$output" =~ "\`c4\` varchar(1023)" ]] || false
-    [[ "$output" =~ "\`c5\` varchar(1023)" ]] || false
-    [[ "$output" =~ "\`c6\` varchar(1023)" ]] || false
+    [[ "$output" =~ "\`c1\` varchar(200)" ]] || false
+    [[ "$output" =~ "\`c2\` varchar(200)" ]] || false
+    [[ "$output" =~ "\`c3\` varchar(200)" ]] || false
+    [[ "$output" =~ "\`c4\` varchar(200)" ]] || false
+    [[ "$output" =~ "\`c5\` varchar(200)" ]] || false
+    [[ "$output" =~ "\`c6\` varchar(200)" ]] || false
     [[ "$output" =~ "PRIMARY KEY (\`pk\`)" ]] || false
 }
 
@@ -295,7 +295,7 @@ DELIM
 
     run dolt diff --schema
     [ "$status" -eq 0 ]
-    [[ "$output" =~ '+  `x` varchar(1023),' ]] || false
+    [[ "$output" =~ '+  `x` varchar(200),' ]] || false
     [[ "$output" =~ '+  `y` float,' ]] || false
     [[ "$output" =~ '+  `z` int,' ]] || false
     # assert no columns were deleted/replaced
@@ -331,7 +331,7 @@ DELIM
 
     run dolt diff --schema
     [ "$status" -eq 0 ]
-    [[ "$output" =~ '+  `x` varchar(1023),' ]] || false
+    [[ "$output" =~ '+  `x` varchar(200),' ]] || false
     [[ "$output" =~ '+  `y` float,' ]] || false
     [[ "$output" =~ '+  `z` int,' ]] || false
     # assert no columns were deleted/replaced
@@ -373,7 +373,7 @@ DELIM
 
     run dolt diff --schema
     [ "$status" -eq 0 ]
-    [[ "$output" =~ '-  `a` varchar(1023),' ]] || false
+    [[ "$output" =~ '-  `a` varchar(200),' ]] || false
     [[ "$output" =~ '-  `b` float,' ]] || false
     [[ "$output" =~ '-  `c` tinyint(1),' ]] || false
     # assert no columns were added
@@ -410,3 +410,20 @@ CSV
     [[ "$output" =~ "name" ]] || false
     [[ "$output" =~ "invalid schema" ]] || false
 }
+
+@test "schema-import: varchar(200) allows many columns" {
+    # Test that import operations use varchar(200) as default length, allowing many varchar columns
+    # With varchar(200), we should be able to have 80+ columns vs only 16 with varchar(1023)
+    cat <<DELIM > many_varchar_cols.csv
+pk,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,c16,c17,c18,c19,c20,c21,c22,c23,c24,c25,c26,c27,c28,c29,c30
+1,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a1,b1,c1,d1
+DELIM
+    run dolt schema import -c --pks=pk test many_varchar_cols.csv
+    [ "$status" -eq 0 ]
+    [[ "$output" =~ "Created table successfully." ]] || false
+    run dolt schema show test
+    [ "$status" -eq 0 ]
+    # Verify that columns were created with varchar(200)
+    [[ "$output" =~ "\`c1\` varchar(200)" ]] || false
+    [[ "$output" =~ "\`c30\` varchar(200)" ]] || false
+}