tinyspeck · demmer · May 7, 2018 · Mar 1, 2018 · Apr 17, 2018 · Apr 19, 2018
diff --git a/ADOPTERS.md b/ADOPTERS.md
@@ -5,6 +5,7 @@ This is an alphabetical list of known adopters of Vitess. Some have already gone
 * [BetterCloud](http://bettercloud.com)
 * [FlipKart](http://flipkart.com)
 * [HubSpot](http://product.hubspot.com/)
+* [JD](http://jd.com/)
 * [Nozzle](http://nozzle.io)
 * [Pixel Federation](http://pixelfederation.com)
 * [Quiz of Kings](http://quizofkings.com)

diff --git a/data/test/vtgate/dml_cases.txt b/data/test/vtgate/dml_cases.txt
@@ -921,6 +921,33 @@
   }
 }
 
+# insert with multiple rows - multi-shard autocommit
+"insert /*vt+ MULTI_SHARD_AUTOCOMMIT=1 */ into user(id) values (1), (2)"
+{
+  "Original": "insert /*vt+ MULTI_SHARD_AUTOCOMMIT=1 */ into user(id) values (1), (2)",
+  "Instructions": {
+    "Opcode": "InsertSharded",
+    "Keyspace": {
+      "Name": "user",
+      "Sharded": true
+    },
+    "Query": "insert /*vt+ MULTI_SHARD_AUTOCOMMIT=1 */ into user(id, Name, Costly) values (:_Id0, :_Name0, :_Costly0), (:_Id1, :_Name1, :_Costly1)",
+    "Values": [[[":__seq0",":__seq1"]],[[null,null]],[[null,null]]],
+    "Table": "user",
+    "Generate": {
+      "Keyspace": {
+        "Name": "main",
+        "Sharded": false
+      },
+      "Query": "select next :n values from seq",
+      "Values": [1,2]
+    },
+    "Prefix": "insert /*vt+ MULTI_SHARD_AUTOCOMMIT=1 */ into user(id, Name, Costly) values ",
+    "Mid": ["(:_Id0, :_Name0, :_Costly0)","(:_Id1, :_Name1, :_Costly1)"],
+    "MultiShardAutocommit": true
+  }
+}
+
 # simple replace unsharded
 "replace into unsharded values(1, 2)"
 {
@@ -1253,6 +1280,22 @@
   }
 }
 
+# delete from with no index match - multi shard autocommit
+"delete /*vt+ MULTI_SHARD_AUTOCOMMIT=1 */ from user_extra where name = 'jose'"
+{
+  "Original": "delete /*vt+ MULTI_SHARD_AUTOCOMMIT=1 */ from user_extra where name = 'jose'",
+  "Instructions": {
+    "Opcode": "DeleteScatter",
+    "Keyspace": {
+      "Name": "user",
+      "Sharded": true
+    },
+    "Query": "delete /*vt+ MULTI_SHARD_AUTOCOMMIT=1 */ from user_extra where name = 'jose'",
+    "Table": "user_extra",
+    "MultiShardAutocommit": true
+  }
+}
+
 # delete from with primary id in through IN clause
 "delete from user_extra where user_id in (1, 2)"
 {

diff --git a/doc/ServerConfiguration.md b/doc/ServerConfiguration.md
@@ -61,7 +61,7 @@ RBR will eventually be supported by Vitess.
 
 ### Data types
 
-Vitess supports data types at the MySQL 5.5 level. The newer data types like spatial or JSON are not supported yet. Additionally, the TIMESTAMP data type should not be used in a primary key or sharding column. Otherwise, Vitess cannot predict those values correctly and this may result in data corruption.
+Vitess supports all data types including newer data types like spatial and JSON. Additionally, the TIMESTAMP data type should not be used in a primary key or sharding column. Otherwise, Vitess cannot predict those values correctly and this may result in data corruption.
 
 ### No side effects
 
@@ -676,4 +676,3 @@ Orchestrator, it also means new instances will be discovered immediately,
 and the topology will automatically repopulate even if Orchestrator's
 backing store is wiped out. Note that Orchestrator will forget stale
 instances after a configurable timeout.
-
diff --git a/doc/VitessReplication.md b/doc/VitessReplication.md
@@ -3,70 +3,16 @@
 ## Statement vs Row Based Replication
 
 MySQL supports two primary modes of replication in its binary logs: statement or
-row based.
-
-**Statement Based Replication**:
-
-* The statements executed on the master are copied almost as-is in the master
-  logs.
-* The slaves replay these statements as is.
-* If the statements are expensive (especially an update with a complicated WHERE
-  clause), they will be expensive on the slaves too.
-* For current timestamp and auto-increment values, the master also puts
-  additional SET statements in the logs to make the statement have the same
-  effect, so the slaves end up with the same values.
-
-**Row Based Replication**:
-
-* The statements executed on the master result in updated rows. The new full
-  values for these rows are copied to the master logs.
-* The slaves change their records for the rows they receive. The update is by
-  primary key, and contains the new values for each column, so usually it’s very
-  fast.
-* Each updated row contains the entire row, not just the columns that were
-  updated (unless the flag --binlog\_row\_image=minimal is used).
-* The replication stream is harder to read, as it contains almost binary data,
-  that don’t easily map to the original statements.
-* There is a configurable limit on how many rows can be affected by one
-  binlog event, so the master logs are not flooded.
-* The format of the logs depends on the master schema: each row has a list of
-  values, one value for each column. So if the master schema is different from
-  the slave schema, updates will misbehave (exception being if slave has extra
-  columns at the end).
-* It is possible to revert to statement based replication for some commands to
-  avoid these drawbacks (for instance for DELETE statements that affect a large
-  number of rows).
-* Schema changes always use statement based replication.
-* If comments are added to a statement, they are stripped from the
-  replication stream (as only rows are transmitted). There is a flag
-  --binlog\_rows\_query\_log\_events to add the original statement to each row
-  update, but it is costly in terms of binlog size.
-
-For the longest time, MySQL replication has been single-threaded: only one
-statement is applied by the slaves at a time. Since the master applies more
-statements in parallel, replication can fall behind on the slaves fairly easily,
-under higher load. Even though the situation has improved (parallel slave
-apply), the slave replication speed is still a limiting factor for a lot of
-applications. Since row based replication achieves higher update rates on the
-slaves in most cases, it has been the only viable option for most performance
-sensitive applications.
-
-Schema changes however are not easy to achieve with row based
-replication. Adding columns can be done offline, but removing or changing
-columns cannot easily be done (there are multiple ways to achieve this, but they
-all have limitations or performance implications, and are not that easy to
-setup).
-
-Vitess helps by using statement based replication (therefore allowing complex
-schema changes), while at the same time simplifying the replication stream (so
-slaves can be fast), by rewriting Update statements.
-
-Then, with statement based replication, it becomes easier to perform offline
-advanced schema changes, or large data updates. Vitess’s solution is called
-schema swap.
+row based. Vitess supports both these modes.
+
+For schema changes, if the number of affected rows is greater > 100k (configurable), we don't allow direct application
+of DDLs. The recommended tools in such cases are [gh-ost](https://github.com/github/gh-ost) or [pt-osc](https://www.percona.com/doc/percona-toolkit/LATEST/pt-online-schema-change.html).
 
-We plan to also support row based replication in the future, and adapt our tools
-to provide the same features when possible. See Appendix for our plan.
+Not all statements are safe for Statement Based Replication (SBR): https://dev.mysql.com/doc/refman/8.0/en/replication-rbr-safe-unsafe.html. Vitess rewrites some of these statements to be safe for SBR, and others are explicitly failed. This is described in detail below.
+
+With statement based replication, it becomes easier to perform offline
+advanced schema changes, or large data updates. Vitess’s solution is called
+schema swap (described below).
 
 ## Rewriting Update Statements
 

diff --git a/go/cmd/vtcombo/main.go b/go/cmd/vtcombo/main.go
@@ -38,6 +38,7 @@ import (
 	"vitess.io/vitess/go/vt/srvtopo"
 	"vitess.io/vitess/go/vt/topo"
 	"vitess.io/vitess/go/vt/topo/memorytopo"
+	"vitess.io/vitess/go/vt/vtcombo"
 	"vitess.io/vitess/go/vt/vtctld"
 	"vitess.io/vitess/go/vt/vtgate"
 	"vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv"
@@ -109,7 +110,7 @@ func main() {
 	servenv.OnClose(mysqld.Close)
 
 	// tablets configuration and init
-	if err := initTabletMap(ts, tpb, mysqld, *dbcfgs, *schemaDir, mycnf); err != nil {
+	if err := vtcombo.InitTabletMap(ts, tpb, mysqld, *dbcfgs, *schemaDir, mycnf); err != nil {
 		log.Errorf("initTabletMapProto failed: %v", err)
 		exit.Return(1)
 	}

diff --git a/go/mysql/conn.go b/go/mysql/conn.go
@@ -579,31 +579,31 @@ func (c *Conn) writeEphemeralPacket(direct bool) error {
 		// Just write c.buffer as a single buffer.
 		// It has both header and data.
 		if n, err := w.Write(c.buffer); err != nil {
-			return fmt.Errorf("Write(c.buffer) failed: %v", err)
+			return fmt.Errorf("Conn %v: Write(c.buffer) failed: %v", c.ID(), err)
 		} else if n != len(c.buffer) {
-			return fmt.Errorf("Write(c.buffer) returned a short write: %v < %v", n, len(c.buffer))
+			return fmt.Errorf("Conn %v: Write(c.buffer) returned a short write: %v < %v", c.ID(), n, len(c.buffer))
 		}
 	case ephemeralWriteSingleBuffer:
 		// Write the allocated buffer as a single buffer.
 		// It has both header and data.
 		if n, err := w.Write(c.currentEphemeralPacket); err != nil {
-			return fmt.Errorf("Write(c.currentEphemeralPacket) failed: %v", err)
+			return fmt.Errorf("Conn %v: Write(c.currentEphemeralPacket) failed: %v", c.ID(), err)
 		} else if n != len(c.currentEphemeralPacket) {
-			return fmt.Errorf("Write(c.currentEphemeralPacket) returned a short write: %v < %v", n, len(c.currentEphemeralPacket))
+			return fmt.Errorf("Conn %v: Write(c.currentEphemeralPacket) returned a short write: %v < %v", c.ID(), n, len(c.currentEphemeralPacket))
 		}
 	case ephemeralWriteBigBuffer:
 		// This is the slower path for big data.
 		// With direct=true, the caller expects a flush, so we call it
 		// manually.
 		if err := c.writePacket(c.currentEphemeralPacket); err != nil {
-			return err
+			return fmt.Errorf("Conn %v: %v", c.ID(), err)
 		}
 		if direct {
 			return c.flush()
 		}
 	case ephemeralUnused, ephemeralReadGlobalBuffer, ephemeralReadSingleBuffer, ephemeralReadBigBuffer:
 		// Programming error.
-		panic(fmt.Errorf("trying to call writeEphemeralPacket while currentEphemeralPolicy is %v", c.currentEphemeralPolicy))
+		panic(fmt.Errorf("Conn %v: trying to call writeEphemeralPacket while currentEphemeralPolicy is %v", c.ID(), c.currentEphemeralPolicy))
 	}
 
 	return nil
@@ -613,7 +613,7 @@ func (c *Conn) writeEphemeralPacket(direct bool) error {
 // This method returns a generic error, not a SQLError.
 func (c *Conn) flush() error {
 	if err := c.writer.Flush(); err != nil {
-		return fmt.Errorf("Flush() failed: %v", err)
+		return fmt.Errorf("Conn %v: Flush() failed: %v", c.ID(), err)
 	}
 	return nil
 }

diff --git a/go/mysql/ldapauthserver/auth_server_ldap.go b/go/mysql/ldapauthserver/auth_server_ldap.go
@@ -25,7 +25,7 @@ import (
 	"sync"
 	"time"
 
-	"gopkg.in/ldap.v2"
+	ldap "gopkg.in/ldap.v2"
 	"vitess.io/vitess/go/mysql"
 	"vitess.io/vitess/go/netutil"
 	"vitess.io/vitess/go/vt/log"

diff --git a/go/mysql/ldapauthserver/auth_server_ldap_test.go b/go/mysql/ldapauthserver/auth_server_ldap_test.go
@@ -20,7 +20,7 @@ import (
 	"fmt"
 	"testing"
 
-	"gopkg.in/ldap.v2"
+	ldap "gopkg.in/ldap.v2"
 )
 
 type MockLdapClient struct{}

diff --git a/go/mysql/server.go b/go/mysql/server.go
@@ -42,7 +42,7 @@ const (
 
 var (
 	// Metrics
-	timings    = stats.NewTimings("MysqlServerTimings", "MySQL server timings")
+	timings    = stats.NewTimings("MysqlServerTimings", "MySQL server timings", "operation")
 	connCount  = stats.NewGauge("MysqlServerConnCount", "Active MySQL server connections")
 	connAccept = stats.NewCounter("MysqlServerConnAccepted", "Connections accepted by MySQL server")
 	connSlow   = stats.NewCounter("MysqlServerConnSlow", "Connections that took more than the configured mysql_slow_connect_warn_threshold to establish")