diff --git a/Makefile b/Makefile index a4562845ecc..7427a8d1e45 100644 --- a/Makefile +++ b/Makefile @@ -67,7 +67,7 @@ endif install: build # binaries mkdir -p "$${PREFIX}/bin" - cp "$${VTROOT}/bin/"{mysqlctld,vtctld,vtctlclient,vtgate,vttablet,vtworker,vtbackup} "$${PREFIX}/bin/" + cp "$${VTROOT}/bin/"{mysqlctld,orchestrator,vtctld,vtctlclient,vtgate,vttablet,vtworker,vtbackup} "$${PREFIX}/bin/" # install copies the files needed to run test Vitess using vtcombo into the given directory tree. # Usage: make install PREFIX=/path/to/install/root diff --git a/config/orchestrator/default.json b/config/orchestrator/default.json index c68029c540e..8fc7ab3747d 100644 --- a/config/orchestrator/default.json +++ b/config/orchestrator/default.json @@ -4,9 +4,5 @@ "MySQLTopologyPassword": "orc_client_user_password", "MySQLReplicaUser": "vt_repl", "MySQLReplicaPassword": "", - "BackendDB": "sqlite", - "SQLite3DataFile": "/home/sougou/dev/src/vitess.io/vitess/vtdataroot/orchestrator.sqlite3", - "RecoverMasterClusterFilters": ["*"], - "RecoveryPeriodBlockSeconds": 1, - "DelayMasterPromotionIfSQLThreadNotUpToDate": true + "RecoveryPeriodBlockSeconds": 5 } diff --git a/docker/lite/Dockerfile.alpine b/docker/lite/Dockerfile.alpine index 84b19c3a9bf..aac7caba428 100644 --- a/docker/lite/Dockerfile.alpine +++ b/docker/lite/Dockerfile.alpine @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:mariadb103 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -51,6 +48,7 @@ ENV MYSQL_FLAVOR MariaDB103 # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.mariadb b/docker/lite/Dockerfile.mariadb index c99858aade5..2299fa0866f 100644 --- a/docker/lite/Dockerfile.mariadb +++ b/docker/lite/Dockerfile.mariadb @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:mariadb AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -51,6 +48,7 @@ ENV MYSQL_FLAVOR MariaDB # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.mariadb103 b/docker/lite/Dockerfile.mariadb103 index 2127b3423c9..825035a66a6 100644 --- a/docker/lite/Dockerfile.mariadb103 +++ b/docker/lite/Dockerfile.mariadb103 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:mariadb103 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -51,6 +48,7 @@ ENV MYSQL_FLAVOR MariaDB103 # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.mysql56 b/docker/lite/Dockerfile.mysql56 index 306c2e2ac1c..a8d5a76de68 100644 --- a/docker/lite/Dockerfile.mysql56 +++ b/docker/lite/Dockerfile.mysql56 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:mysql56 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -50,6 +47,7 @@ ENV PATH $VTROOT/bin:$PATH # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.mysql57 b/docker/lite/Dockerfile.mysql57 index a024cbcb6d5..11013292b49 100644 --- a/docker/lite/Dockerfile.mysql57 +++ b/docker/lite/Dockerfile.mysql57 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:mysql57 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -50,6 +47,7 @@ ENV PATH $VTROOT/bin:$PATH # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.mysql80 b/docker/lite/Dockerfile.mysql80 index 1d3a48d3220..99752bf11b7 100644 --- a/docker/lite/Dockerfile.mysql80 +++ b/docker/lite/Dockerfile.mysql80 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:mysql80 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -51,6 +48,7 @@ ENV MYSQL_FLAVOR MySQL80 # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.percona b/docker/lite/Dockerfile.percona index ba0de684220..b93f0dffe4a 100644 --- a/docker/lite/Dockerfile.percona +++ b/docker/lite/Dockerfile.percona @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:percona AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -50,6 +47,7 @@ ENV PATH $VTROOT/bin:$PATH # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.percona57 b/docker/lite/Dockerfile.percona57 index f4abff9a92a..a1091434a88 100644 --- a/docker/lite/Dockerfile.percona57 +++ b/docker/lite/Dockerfile.percona57 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:percona57 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -50,6 +47,7 @@ ENV PATH $VTROOT/bin:$PATH # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.percona80 b/docker/lite/Dockerfile.percona80 index add8229d3a7..dfa875a51db 100644 --- a/docker/lite/Dockerfile.percona80 +++ b/docker/lite/Dockerfile.percona80 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:percona80 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -51,6 +48,7 @@ ENV MYSQL_FLAVOR MySQL80 # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.testing b/docker/lite/Dockerfile.testing index 7634d0e5d96..19214800013 100644 --- a/docker/lite/Dockerfile.testing +++ b/docker/lite/Dockerfile.testing @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:mysql57 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -50,6 +47,7 @@ ENV PATH $VTROOT/bin:$PATH # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator # Create mount point for actual data (e.g. MySQL data dir) VOLUME /vt/vtdataroot diff --git a/docker/lite/Dockerfile.ubi7.mysql57 b/docker/lite/Dockerfile.ubi7.mysql57 index 7e8050dbfb0..c8675f48382 100644 --- a/docker/lite/Dockerfile.ubi7.mysql57 +++ b/docker/lite/Dockerfile.ubi7.mysql57 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:mysql57 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -84,6 +81,7 @@ ENV PATH $VTROOT/bin:$PATH # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator RUN mkdir -p /licenses COPY LICENSE /licenses diff --git a/docker/lite/Dockerfile.ubi7.mysql80 b/docker/lite/Dockerfile.ubi7.mysql80 index ef01553d574..9c9be8c68f5 100644 --- a/docker/lite/Dockerfile.ubi7.mysql80 +++ b/docker/lite/Dockerfile.ubi7.mysql80 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:mysql80 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -85,6 +82,7 @@ ENV MYSQL_FLAVOR MySQL80 # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator RUN mkdir -p /licenses COPY LICENSE /licenses diff --git a/docker/lite/Dockerfile.ubi7.percona57 b/docker/lite/Dockerfile.ubi7.percona57 index 87c812018df..a630262f006 100644 --- a/docker/lite/Dockerfile.ubi7.percona57 +++ b/docker/lite/Dockerfile.ubi7.percona57 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:percona57 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -75,6 +72,7 @@ ENV PATH $VTROOT/bin:$PATH # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator RUN mkdir -p /licenses COPY LICENSE /licenses diff --git a/docker/lite/Dockerfile.ubi7.percona80 b/docker/lite/Dockerfile.ubi7.percona80 index e6fe2f07927..6e993a85ebd 100644 --- a/docker/lite/Dockerfile.ubi7.percona80 +++ b/docker/lite/Dockerfile.ubi7.percona80 @@ -19,9 +19,6 @@ # Use a temporary layer for the build stage. FROM vitess/bootstrap:percona80 AS builder -# Allows some docker builds to disable CGO -ARG CGO_ENABLED=0 - # Allows docker builds to set the BUILD_NUMBER ARG BUILD_NUMBER @@ -80,6 +77,7 @@ ENV MYSQL_FLAVOR MySQL80 # Copy artifacts from builder layer. COPY --from=builder --chown=vitess:vitess /vt/install /vt +COPY --from=builder --chown=vitess:vitess /vt/src/vitess.io/vitess/web/orchestrator /vt/web/orchestrator RUN mkdir -p /licenses COPY LICENSE /licenses diff --git a/examples/local/orc_test.sh b/examples/local/orc_test.sh new file mode 100755 index 00000000000..0817a470770 --- /dev/null +++ b/examples/local/orc_test.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# Copyright 2020 The Vitess Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# this script brings up zookeeper and all the vitess components +# required for a single shard deployment. + +source ./env.sh + +# start topo server +if [ "${TOPO}" = "zk2" ]; then + CELL=zone1 ./scripts/zk-up.sh +elif [ "${TOPO}" = "k8s" ]; then + CELL=zone1 ./scripts/k3s-up.sh +else + CELL=zone1 ./scripts/etcd-up.sh +fi + +# start vtctld +CELL=zone1 ./scripts/vtctld-up.sh + +vtctlclient AddCellInfo -root /vitess/zone2 -server_address "${ETCD_SERVER}" zone2 + +# start vttablets for keyspace commerce +for i in 100 101 102; do + CELL=zone1 TABLET_UID=$i ./scripts/mysqlctl-up.sh + CELL=zone1 KEYSPACE=commerce TABLET_UID=$i ./scripts/ovttablet-up.sh +done + +for i in 110 111; do + CELL=zone2 TABLET_UID=$i ./scripts/mysqlctl-up.sh + CELL=zone2 KEYSPACE=commerce TABLET_UID=$i ./scripts/ovttablet-up.sh +done + +# set one of the replicas to master +#vtctlclient InitShardMaster -force commerce/0 zone1-100 + +# create the schema +#vtctlclient ApplySchema -sql-file create_commerce_schema.sql commerce + +# create the vschema +#vtctlclient ApplyVSchema -vschema_file vschema_commerce_initial.json commerce + +# start vtgate +CELL=zone1 ./scripts/vtgate-up.sh diff --git a/examples/local/scripts/ovttablet-up.sh b/examples/local/scripts/ovttablet-up.sh new file mode 100755 index 00000000000..ad53b840034 --- /dev/null +++ b/examples/local/scripts/ovttablet-up.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +# Copyright 2020 The Vitess Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source ./env.sh + +cell=${CELL:-'test'} +keyspace=${KEYSPACE:-'test_keyspace'} +shard=${SHARD:-'0'} +uid=$TABLET_UID +mysql_port=$[17000 + $uid] +port=$[15000 + $uid] +grpc_port=$[16000 + $uid] +printf -v alias '%s-%010d' $cell $uid +printf -v tablet_dir 'vt_%010d' $uid +tablet_hostname='' +printf -v tablet_logfile 'vttablet_%010d_querylog.txt' $uid + +tablet_type=replica +if [[ "${uid: -1}" -gt 1 ]]; then + tablet_type=rdonly +fi + +echo "Starting vttablet for $alias..." +# shellcheck disable=SC2086 +vttablet \ + $TOPOLOGY_FLAGS \ + -log_dir $VTDATAROOT/tmp \ + -log_queries_to_file $VTDATAROOT/tmp/$tablet_logfile \ + -tablet-path $alias \ + -tablet_hostname "$tablet_hostname" \ + -init_keyspace $keyspace \ + -init_shard $shard \ + -init_tablet_type $tablet_type \ + -health_check_interval 5s \ + -enable_replication_reporter \ + -backup_storage_implementation file \ + -file_backup_storage_root $VTDATAROOT/backups \ + -restore_from_backup \ + -port $port \ + -grpc_port $grpc_port \ + -service_map 'grpc-queryservice,grpc-tabletmanager,grpc-updatestream' \ + -pid_file $VTDATAROOT/$tablet_dir/vttablet.pid \ + -vtctld_addr http://$hostname:$vtctld_web_port/ \ + -disable_active_reparents \ + > $VTDATAROOT/$tablet_dir/vttablet.out 2>&1 & + +# Block waiting for the tablet to be listening +# Not the same as healthy + +for i in $(seq 0 300); do + curl -I "http://$hostname:$port/debug/status" >/dev/null 2>&1 && break + sleep 0.1 +done + +# check one last time +curl -I "http://$hostname:$port/debug/status" || fail "tablet could not be started!" diff --git a/examples/operator/orc_cluster.yaml b/examples/operator/orc_cluster.yaml new file mode 100644 index 00000000000..1cff45c99d2 --- /dev/null +++ b/examples/operator/orc_cluster.yaml @@ -0,0 +1,212 @@ +# The following example is minimalist. The security policies +# and resource specifications are not meant to be used in production. +# Please refer to the operator documentation for recommendations on +# production settings. +apiVersion: planetscale.com/v2 +kind: VitessCluster +metadata: + name: example +spec: + images: + vtctld: vitess/orctest:v1 + orchestrator: vitess/orctest:v1 + vtgate: vitess/orctest:v1 + vttablet: vitess/orctest:v1 + vtbackup: vitess/orctest:v1 + mysqld: + mysql56Compatible: vitess/lite:v6.0.20-20200429 + mysqldExporter: prom/mysqld-exporter:v0.11.0 + cells: + - name: zone1 + gateway: + authentication: + static: + secret: + name: example-cluster-config + key: users.json + replicas: 1 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + memory: 256Mi + vitessDashboard: + cells: + - zone1 + extraFlags: + security_policy: read-only + replicas: 1 + resources: + limits: + memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + vitessOrchestrator: + cells: + - zone1 + configSecret: + name: example-cluster-config + key: orc_config.json + replicas: 1 + resources: + limits: + memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + + keyspaces: + - name: commerce + turndownPolicy: Immediate + partitionings: + - equal: + parts: 1 + shardTemplate: + databaseInitScriptSecret: + name: example-cluster-config + key: init_db.sql + replication: + enforceSemiSync: false + tabletPools: + - cell: zone1 + type: replica + replicas: 2 + vttablet: + extraFlags: + db_charset: utf8mb4 + disable_active_reparents: "true" + resources: + requests: + cpu: 100m + memory: 256Mi + mysqld: + resources: + requests: + cpu: 100m + memory: 256Mi + dataVolumeClaimTemplate: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi + updateStrategy: + type: Immediate +--- +apiVersion: v1 +kind: Secret +metadata: + name: example-cluster-config +type: Opaque +stringData: + users.json: | + { + "user": [{ + "UserData": "user", + "Password": "" + }] + } + init_db.sql: | + # This file is executed immediately after mysql_install_db, + # to initialize a fresh data directory. + + ############################################################################### + # Equivalent of mysql_secure_installation + ############################################################################### + + # Changes during the init db should not make it to the binlog. + # They could potentially create errant transactions on replicas. + SET sql_log_bin = 0; + # Remove anonymous users. + DELETE FROM mysql.user WHERE User = ''; + + # Disable remote root access (only allow UNIX socket). + DELETE FROM mysql.user WHERE User = 'root' AND Host != 'localhost'; + + # Remove test database. + DROP DATABASE IF EXISTS test; + + ############################################################################### + # Vitess defaults + ############################################################################### + + # Vitess-internal database. + CREATE DATABASE IF NOT EXISTS _vt; + # Note that definitions of local_metadata and shard_metadata should be the same + # as in production which is defined in go/vt/mysqlctl/metadata_tables.go. + CREATE TABLE IF NOT EXISTS _vt.local_metadata ( + name VARCHAR(255) NOT NULL, + value VARCHAR(255) NOT NULL, + db_name VARBINARY(255) NOT NULL, + PRIMARY KEY (db_name, name) + ) ENGINE=InnoDB; + CREATE TABLE IF NOT EXISTS _vt.shard_metadata ( + name VARCHAR(255) NOT NULL, + value MEDIUMBLOB NOT NULL, + db_name VARBINARY(255) NOT NULL, + PRIMARY KEY (db_name, name) + ) ENGINE=InnoDB; + + # Admin user with all privileges. + CREATE USER 'vt_dba'@'localhost'; + GRANT ALL ON *.* TO 'vt_dba'@'localhost'; + GRANT GRANT OPTION ON *.* TO 'vt_dba'@'localhost'; + + # User for app traffic, with global read-write access. + CREATE USER 'vt_app'@'localhost'; + GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, + REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, + LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW, + SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER + ON *.* TO 'vt_app'@'localhost'; + + # User for app debug traffic, with global read access. + CREATE USER 'vt_appdebug'@'localhost'; + GRANT SELECT, SHOW DATABASES, PROCESS ON *.* TO 'vt_appdebug'@'localhost'; + + # User for administrative operations that need to be executed as non-SUPER. + # Same permissions as vt_app here. + CREATE USER 'vt_allprivs'@'localhost'; + GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, + REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, + LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW, + SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER + ON *.* TO 'vt_allprivs'@'localhost'; + + # User for slave replication connections. + # TODO: Should we set a password on this since it allows remote connections? + CREATE USER 'vt_repl'@'%'; + GRANT REPLICATION SLAVE ON *.* TO 'vt_repl'@'%'; + + # User for Vitess filtered replication (binlog player). + # Same permissions as vt_app. + CREATE USER 'vt_filtered'@'localhost'; + GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, RELOAD, PROCESS, FILE, + REFERENCES, INDEX, ALTER, SHOW DATABASES, CREATE TEMPORARY TABLES, + LOCK TABLES, EXECUTE, REPLICATION SLAVE, REPLICATION CLIENT, CREATE VIEW, + SHOW VIEW, CREATE ROUTINE, ALTER ROUTINE, CREATE USER, EVENT, TRIGGER + ON *.* TO 'vt_filtered'@'localhost'; + + # User for Orchestrator (https://github.com/openark/orchestrator). + # TODO: Reenable when the password is randomly generated. + CREATE USER 'orc_client_user'@'%' IDENTIFIED BY 'orc_client_user_password'; + GRANT SUPER, PROCESS, REPLICATION SLAVE, RELOAD + ON *.* TO 'orc_client_user'@'%'; + GRANT SELECT + ON _vt.* TO 'orc_client_user'@'%'; + + FLUSH PRIVILEGES; + + RESET SLAVE ALL; + RESET MASTER; + orc_config.json: | + { + "Debug": true, + "Durability": "none", + "MySQLTopologyUser": "orc_client_user", + "MySQLTopologyPassword": "orc_client_user_password", + "MySQLReplicaUser": "vt_repl", + "MySQLReplicaPassword": "", + "RecoveryPeriodBlockSeconds": 5 + } diff --git a/examples/operator/orcpf.sh b/examples/operator/orcpf.sh new file mode 100755 index 00000000000..692cab3e74e --- /dev/null +++ b/examples/operator/orcpf.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +kubectl port-forward --address localhost "$(kubectl get service --selector="planetscale.com/component=vtctld" -o name | head -n1)" 15000 15999 & +process_id1=$! +kubectl port-forward --address localhost "$(kubectl get service --selector="planetscale.com/component=vtgate,!planetscale.com/cell" -o name | head -n1)" 15306:3306 & +process_id2=$! +kubectl port-forward --address localhost "$(kubectl get service --selector="planetscale.com/component=orchestrator" -o name | head -n1)" 3000 & +process_id3=$! +sleep 2 +echo "You may point your browser to http://localhost:15000 for vtctld, http://localhost:3000 for orchestrator, and use the following aliases as shortcuts:" +echo 'alias vtctlclient="vtctlclient -server=localhost:15999 -logtostderr"' +echo 'alias mysql="mysql -h 127.0.0.1 -P 15306 -u user"' +echo "Hit Ctrl-C to stop the port forwards" +wait $process_id1 +wait $process_id2 +wait $process_id3 diff --git a/go/cmd/orchestrator/main.go b/go/cmd/orchestrator/main.go index cd7e70c5831..19c7063f233 100644 --- a/go/cmd/orchestrator/main.go +++ b/go/cmd/orchestrator/main.go @@ -18,8 +18,6 @@ package main import ( "flag" - "fmt" - "os" "vitess.io/vitess/go/vt/orchestrator/app" "vitess.io/vitess/go/vt/orchestrator/config" @@ -32,19 +30,10 @@ var AppVersion, GitCommit string // main is the application's entry point. It will either spawn a CLI or HTTP itnerfaces. func main() { // TODO(sougou): change this to use vitess servenv framework + // TODO(sougou): remove cli code configFile := flag.String("config", "", "config file name") - command := flag.String("c", "", "command, required. See full list of commands via 'orchestrator -c help'") - strict := flag.Bool("strict", false, "strict mode (more checks, slower)") - instance := flag.String("i", "", "instance, host_fqdn[:port] (e.g. db.company.com:3306, db.company.com)") sibling := flag.String("s", "", "sibling instance, host_fqdn[:port]") destination := flag.String("d", "", "destination instance, host_fqdn[:port] (synonym to -s)") - owner := flag.String("owner", "", "operation owner") - reason := flag.String("reason", "", "operation reason") - duration := flag.String("duration", "", "maintenance duration (format: 59s, 59m, 23h, 6d, 4w)") - pattern := flag.String("pattern", "", "regular expression pattern") - clusterAlias := flag.String("alias", "", "cluster alias") - pool := flag.String("pool", "", "Pool logical name (applies for pool-related commands)") - hostnameFlag := flag.String("hostname", "", "Hostname/fqdn/CNAME/VIP (applies for hostname/resolve related commands)") discovery := flag.Bool("discovery", true, "auto discovery mode") quiet := flag.Bool("quiet", false, "quiet") verbose := flag.Bool("verbose", false, "verbose") @@ -126,41 +115,17 @@ func main() { config.RuntimeCLIFlags.ConfiguredVersion = AppVersion config.MarkConfigurationLoaded() - if len(flag.Args()) == 0 && *command == "" { - // No command, no argument: just prompt - fmt.Println(app.AppPrompt) - return - } - helpTopic := "" if flag.Arg(0) == "help" { if flag.Arg(1) != "" { helpTopic = flag.Arg(1) } - if helpTopic == "" { - helpTopic = *command - } - if helpTopic == "" { - // hacky way to make the CLI kick in as if the user typed `orchestrator -c help cli` - *command = "help" - flag.Args()[0] = "cli" - } } switch { case helpTopic != "": app.HelpCommand(helpTopic) - case len(flag.Args()) == 0 || flag.Arg(0) == "cli": - app.CliWrapper(*command, *strict, *instance, *destination, *owner, *reason, *duration, *pattern, *clusterAlias, *pool, *hostnameFlag) - case flag.Arg(0) == "http": - app.Http(*discovery) default: - fmt.Fprintln(os.Stderr, `Usage: - orchestrator --options... [cli|http] -See complete list of commands: - orchestrator -c help -Full blown documentation: - orchestrator`) - os.Exit(1) + app.Http(*discovery) } } diff --git a/go/vt/orchestrator/app/cli.go b/go/vt/orchestrator/app/cli.go index b6f42460d9d..4ea377c64f3 100644 --- a/go/vt/orchestrator/app/cli.go +++ b/go/vt/orchestrator/app/cli.go @@ -683,42 +683,6 @@ func Cli(command string, strict bool, instance string, destination string, owner } fmt.Println(instanceKey.DisplayString()) } - case registerCliCommand("enable-semi-sync-master", "Replication, general", `Enable semi-sync replication (master-side)`): - { - instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) - _, err := inst.SetSemiSyncMaster(instanceKey, true) - if err != nil { - log.Fatale(err) - } - fmt.Println(instanceKey.DisplayString()) - } - case registerCliCommand("disable-semi-sync-master", "Replication, general", `Disable semi-sync replication (master-side)`): - { - instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) - _, err := inst.SetSemiSyncMaster(instanceKey, false) - if err != nil { - log.Fatale(err) - } - fmt.Println(instanceKey.DisplayString()) - } - case registerCliCommand("enable-semi-sync-replica", "Replication, general", `Enable semi-sync replication (replica-side)`): - { - instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) - _, err := inst.SetSemiSyncReplica(instanceKey, true) - if err != nil { - log.Fatale(err) - } - fmt.Println(instanceKey.DisplayString()) - } - case registerCliCommand("disable-semi-sync-replica", "Replication, general", `Disable semi-sync replication (replica-side)`): - { - instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) - _, err := inst.SetSemiSyncReplica(instanceKey, false) - if err != nil { - log.Fatale(err) - } - fmt.Println(instanceKey.DisplayString()) - } case registerCliCommand("restart-slave-statements", "Replication, general", `Get a list of statements to execute to stop then restore replica to same execution state. Provide --statement for injected statement`): { instanceKey, _ = inst.FigureInstanceKey(instanceKey, thisInstanceKey) diff --git a/go/vt/orchestrator/app/http.go b/go/vt/orchestrator/app/http.go index 96cdf606057..27c9237da89 100644 --- a/go/vt/orchestrator/app/http.go +++ b/go/vt/orchestrator/app/http.go @@ -17,8 +17,10 @@ package app import ( + "flag" "net" nethttp "net/http" + "path" "strings" "time" @@ -40,6 +42,9 @@ import ( const discoveryMetricsName = "DISCOVERY_METRICS" +// TODO(sougou): see if this can be rice-boxed. +var webDir = flag.String("orc_web_dir", "web/orchestrator", "Orchestrator http file location") + var sslPEMPassword []byte var agentSSLPEMPassword []byte var discoveryMetrics *collection.Collection @@ -109,11 +114,11 @@ func standardHttp(continuousDiscovery bool) { m.Use(gzip.All()) // Render html templates from templates directory m.Use(render.Renderer(render.Options{ - Directory: "web/orchestrator", + Directory: *webDir, Layout: "templates/layout", HTMLContentType: "text/html", })) - m.Use(martini.Static("web/orchestrator/public", martini.StaticOptions{Prefix: config.Config.URLPrefix})) + m.Use(martini.Static(path.Join(*webDir, "public"), martini.StaticOptions{Prefix: config.Config.URLPrefix})) if config.Config.UseMutualTLS { m.Use(ssl.VerifyOUs(config.Config.SSLValidOUs)) } diff --git a/go/vt/orchestrator/config/config.go b/go/vt/orchestrator/config/config.go index ecf2838bf03..7d8c861ce87 100644 --- a/go/vt/orchestrator/config/config.go +++ b/go/vt/orchestrator/config/config.go @@ -73,6 +73,7 @@ type Configuration struct { ListenSocket string // Where orchestrator HTTP should listen for unix socket (default: empty; when given, TCP is disabled) HTTPAdvertise string // optional, for raft setups, what is the HTTP address this node will advertise to its peers (potentially use where behind NAT or when rerouting ports; example: "http://11.22.33.44:3030") AgentsServerPort string // port orchestrator agents talk back to + Durability string // The type of durability to enforce. Default is "semi_sync". Other values are dictated by registered plugins MySQLTopologyUser string MySQLTopologyPassword string MySQLReplicaUser string // If set, use this credential instead of discovering from mysql. TODO(sougou): deprecate this in favor of fetching from vttablet @@ -272,10 +273,11 @@ func newConfiguration() *Configuration { ListenSocket: "", HTTPAdvertise: "", AgentsServerPort: ":3001", + Durability: "none", StatusEndpoint: DefaultStatusAPIEndpoint, StatusOUVerify: false, - BackendDB: "mysql", - SQLite3DataFile: "", + BackendDB: "sqlite", + SQLite3DataFile: "file::memory:?mode=memory&cache=shared", SkipOrchestratorDatabaseUpdate: false, PanicIfDifferentDatabaseDeploy: false, RaftBind: "127.0.0.1:10008", @@ -313,7 +315,7 @@ func newConfiguration() *Configuration { DiscoverySeeds: []string{}, InstanceBulkOperationsWaitTimeoutSeconds: 10, HostnameResolveMethod: "default", - MySQLHostnameResolveMethod: "@@hostname", + MySQLHostnameResolveMethod: "none", SkipBinlogServerUnresolveCheck: true, ExpiryHostnameResolvesMinutes: 60, RejectHostnameResolvePattern: "", @@ -381,7 +383,7 @@ func newConfiguration() *Configuration { RecoveryPeriodBlockMinutes: 60, RecoveryPeriodBlockSeconds: 3600, RecoveryIgnoreHostnameFilters: []string{}, - RecoverMasterClusterFilters: []string{}, + RecoverMasterClusterFilters: []string{"*"}, RecoverIntermediateMasterClusterFilters: []string{}, ProcessesShellCommand: "bash", OnFailureDetectionProcesses: []string{}, @@ -402,7 +404,7 @@ func newConfiguration() *Configuration { MasterFailoverDetachSlaveMasterHost: false, FailMasterPromotionOnLagMinutes: 0, FailMasterPromotionIfSQLThreadNotUpToDate: false, - DelayMasterPromotionIfSQLThreadNotUpToDate: false, + DelayMasterPromotionIfSQLThreadNotUpToDate: true, PostponeSlaveRecoveryOnLagMinutes: 0, OSCIgnoreHostnameFilters: []string{}, GraphiteAddr: "", diff --git a/go/vt/orchestrator/http/api.go b/go/vt/orchestrator/http/api.go index 171ca1ecc26..889f482374e 100644 --- a/go/vt/orchestrator/http/api.go +++ b/go/vt/orchestrator/http/api.go @@ -1520,63 +1520,6 @@ func (this *HttpAPI) CanReplicateFromGTID(params martini.Params, r render.Render Respond(r, &APIResponse{Code: OK, Message: fmt.Sprintf("%t", canReplicate), Details: belowKey}) } -// setSemiSyncMaster -func (this *HttpAPI) setSemiSyncMaster(params martini.Params, r render.Render, req *http.Request, user auth.User, enable bool) { - if !isAuthorizedForAction(req, user) { - Respond(r, &APIResponse{Code: ERROR, Message: "Unauthorized"}) - return - } - instanceKey, err := this.getInstanceKey(params["host"], params["port"]) - - if err != nil { - Respond(r, &APIResponse{Code: ERROR, Message: err.Error()}) - return - } - instance, err := inst.SetSemiSyncMaster(&instanceKey, enable) - if err != nil { - Respond(r, &APIResponse{Code: ERROR, Message: err.Error()}) - return - } - - Respond(r, &APIResponse{Code: OK, Message: fmt.Sprintf("master semi-sync set to %t", enable), Details: instance}) -} - -func (this *HttpAPI) EnableSemiSyncMaster(params martini.Params, r render.Render, req *http.Request, user auth.User) { - this.setSemiSyncMaster(params, r, req, user, true) -} -func (this *HttpAPI) DisableSemiSyncMaster(params martini.Params, r render.Render, req *http.Request, user auth.User) { - this.setSemiSyncMaster(params, r, req, user, false) -} - -// setSemiSyncMaster -func (this *HttpAPI) setSemiSyncReplica(params martini.Params, r render.Render, req *http.Request, user auth.User, enable bool) { - if !isAuthorizedForAction(req, user) { - Respond(r, &APIResponse{Code: ERROR, Message: "Unauthorized"}) - return - } - instanceKey, err := this.getInstanceKey(params["host"], params["port"]) - - if err != nil { - Respond(r, &APIResponse{Code: ERROR, Message: err.Error()}) - return - } - instance, err := inst.SetSemiSyncReplica(&instanceKey, enable) - if err != nil { - Respond(r, &APIResponse{Code: ERROR, Message: err.Error()}) - return - } - - Respond(r, &APIResponse{Code: OK, Message: fmt.Sprintf("replica semi-sync set to %t", enable), Details: instance}) -} - -func (this *HttpAPI) EnableSemiSyncReplica(params martini.Params, r render.Render, req *http.Request, user auth.User) { - this.setSemiSyncReplica(params, r, req, user, true) -} - -func (this *HttpAPI) DisableSemiSyncReplica(params martini.Params, r render.Render, req *http.Request, user auth.User) { - this.setSemiSyncReplica(params, r, req, user, false) -} - // SetReadOnly sets the global read_only variable func (this *HttpAPI) SetReadOnly(params martini.Params, r render.Render, req *http.Request, user auth.User) { if !isAuthorizedForAction(req, user) { @@ -3719,10 +3662,6 @@ func (this *HttpAPI) RegisterRequests(m *martini.ClassicMartini) { this.registerAPIRequest(m, "flush-binary-logs/:host/:port", this.FlushBinaryLogs) this.registerAPIRequest(m, "purge-binary-logs/:host/:port/:logFile", this.PurgeBinaryLogs) this.registerAPIRequest(m, "restart-slave-statements/:host/:port", this.RestartReplicationStatements) - this.registerAPIRequest(m, "enable-semi-sync-master/:host/:port", this.EnableSemiSyncMaster) - this.registerAPIRequest(m, "disable-semi-sync-master/:host/:port", this.DisableSemiSyncMaster) - this.registerAPIRequest(m, "enable-semi-sync-replica/:host/:port", this.EnableSemiSyncReplica) - this.registerAPIRequest(m, "disable-semi-sync-replica/:host/:port", this.DisableSemiSyncReplica) // Replication information: this.registerAPIRequest(m, "can-replicate-from/:host/:port/:belowHost/:belowPort", this.CanReplicateFrom) diff --git a/go/vt/orchestrator/inst/analysis.go b/go/vt/orchestrator/inst/analysis.go index e0524660078..cfceecdfac6 100644 --- a/go/vt/orchestrator/inst/analysis.go +++ b/go/vt/orchestrator/inst/analysis.go @@ -38,10 +38,14 @@ const ( DeadMasterAndSomeReplicas AnalysisCode = "DeadMasterAndSomeReplicas" MasterHasMaster AnalysisCode = "MasterHasMaster" MasterIsReadOnly AnalysisCode = "MasterIsReadOnly" + MasterSemiSyncMustBeSet AnalysisCode = "MasterSemiSyncMustBeSet" + MasterSemiSyncMustNotBeSet AnalysisCode = "MasterSemiSyncMustNotBeSet" ReplicaIsWritable AnalysisCode = "ReplicaIsWritable" NotConnectedToMaster AnalysisCode = "NotConnectedToMaster" ConnectedToWrongMaster AnalysisCode = "ConnectedToWrongMaster" ReplicationStopped AnalysisCode = "ReplicationStopped" + ReplicaSemiSyncMustBeSet AnalysisCode = "ReplicaSemiSyncMustBeSet" + ReplicaSemiSyncMustNotBeSet AnalysisCode = "ReplicaSemiSyncMustNotBeSet" UnreachableMasterWithLaggingReplicas AnalysisCode = "UnreachableMasterWithLaggingReplicas" UnreachableMaster AnalysisCode = "UnreachableMaster" MasterSingleReplicaNotReplicating AnalysisCode = "MasterSingleReplicaNotReplicating" @@ -164,6 +168,7 @@ type ReplicationAnalysis struct { SemiSyncMasterStatus bool SemiSyncMasterWaitForReplicaCount uint SemiSyncMasterClients uint + SemiSyncReplicaEnabled bool CountSemiSyncReplicasEnabled uint CountLoggingReplicas uint CountStatementBasedLoggingReplicas uint diff --git a/go/vt/orchestrator/inst/analysis_dao.go b/go/vt/orchestrator/inst/analysis_dao.go index 42e6cbf1302..d4c7cdd0f52 100644 --- a/go/vt/orchestrator/inst/analysis_dao.go +++ b/go/vt/orchestrator/inst/analysis_dao.go @@ -81,6 +81,7 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) MIN(master_instance.binary_log_file) AS binary_log_file, MIN(master_instance.binary_log_pos) AS binary_log_pos, MIN(master_instance.suggested_cluster_alias) AS suggested_cluster_alias, + MIN(master_tablet.info) AS master_tablet_info, MIN( IFNULL( master_instance.binary_log_file = database_instance_stale_binlog_coordinates.binary_log_file @@ -201,6 +202,9 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) MIN( master_instance.semi_sync_master_status ) AS semi_sync_master_status, + MIN( + master_instance.semi_sync_replica_enabled + ) AS semi_sync_replica_enabled, SUM(replica_instance.is_co_master) AS count_co_master_replicas, SUM(replica_instance.oracle_gtid) AS count_oracle_gtid_replicas, IFNULL( @@ -306,6 +310,10 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) vitess_tablet.hostname = master_instance.hostname AND vitess_tablet.port = master_instance.port ) + LEFT JOIN vitess_tablet master_tablet ON ( + master_tablet.hostname = master_instance.master_host + AND master_tablet.port = master_instance.master_port + ) LEFT JOIN hostname_resolve ON ( master_instance.hostname = hostname_resolve.hostname ) @@ -348,8 +356,8 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) vitess_tablet.hostname, vitess_tablet.port ORDER BY - tablet_type ASC, - master_timestamp DESC + vitess_tablet.tablet_type ASC, + vitess_tablet.master_timestamp DESC ` clusters := make(map[string]*clusterAnalysis) @@ -366,6 +374,14 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) return nil } + masterTablet := &topodatapb.Tablet{} + if str := m.GetString("master_tablet_info"); str != "" { + if err := proto.UnmarshalText(str, masterTablet); err != nil { + log.Errorf("could not read tablet %v: %v", str, err) + return nil + } + } + a.TabletType = tablet.Type a.MasterTimeStamp = m.GetTime("master_timestamp") @@ -416,6 +432,7 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) a.PseudoGTIDImmediateTopology = m.GetBool("is_pseudo_gtid") a.SemiSyncMasterEnabled = m.GetBool("semi_sync_master_enabled") a.SemiSyncMasterStatus = m.GetBool("semi_sync_master_status") + a.SemiSyncReplicaEnabled = m.GetBool("semi_sync_replica_enabled") a.CountSemiSyncReplicasEnabled = m.GetUint("count_semi_sync_replicas") // countValidSemiSyncReplicasEnabled := m.GetUint("count_valid_semi_sync_replicas") a.SemiSyncMasterWaitForReplicaCount = m.GetUint("semi_sync_master_wait_for_slave_count") @@ -486,6 +503,14 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) a.Analysis = MasterIsReadOnly a.Description = "Master is read-only" // + } else if a.IsClusterMaster && MasterSemiSync(a.AnalyzedInstanceKey) != 0 && !a.SemiSyncMasterEnabled { + a.Analysis = MasterSemiSyncMustBeSet + a.Description = "Master semi-sync must be set" + // + } else if a.IsClusterMaster && MasterSemiSync(a.AnalyzedInstanceKey) == 0 && a.SemiSyncMasterEnabled { + a.Analysis = MasterSemiSyncMustNotBeSet + a.Description = "Master semi-sync must not be set" + // } else if topo.IsReplicaType(a.TabletType) && ca.masterKey == nil { a.Analysis = ClusterHasNoMaster a.Description = "Cluster has no master" @@ -506,6 +531,14 @@ func GetReplicationAnalysis(clusterName string, hints *ReplicationAnalysisHints) a.Analysis = ReplicationStopped a.Description = "Replication is stopped" // + } else if topo.IsReplicaType(a.TabletType) && !a.IsMaster && ReplicaSemiSyncFromTablet(masterTablet, tablet) && !a.SemiSyncReplicaEnabled { + a.Analysis = ReplicaSemiSyncMustBeSet + a.Description = "Replica semi-sync must be set" + // + } else if topo.IsReplicaType(a.TabletType) && !a.IsMaster && !ReplicaSemiSyncFromTablet(masterTablet, tablet) && a.SemiSyncReplicaEnabled { + a.Analysis = ReplicaSemiSyncMustNotBeSet + a.Description = "Replica semi-sync must not be set" + // // TODO(sougou): Events below here are either ignored or not possible. } else if a.IsMaster && !a.LastCheckValid && a.CountLaggingReplicas == a.CountReplicas && a.CountDelayedReplicas < a.CountReplicas && a.CountValidReplicatingReplicas > 0 { a.Analysis = UnreachableMasterWithLaggingReplicas diff --git a/go/vt/orchestrator/inst/durability.go b/go/vt/orchestrator/inst/durability.go new file mode 100644 index 00000000000..ce2ab963169 --- /dev/null +++ b/go/vt/orchestrator/inst/durability.go @@ -0,0 +1,163 @@ +/* +Copyright 2020 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inst + +import ( + "fmt" + + "vitess.io/vitess/go/vt/orchestrator/external/golib/log" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" +) + +//======================================================================= + +var ( + durabilityPolicies = make(map[string]durabler) + curDurabilityPolicy durabler +) + +func init() { + registerDurability("none", &durabilityNone{}) + registerDurability("semi_sync", &durabilitySemiSync{}) + registerDurability("cross_cell", &durabilityCrossCell{}) +} + +type durabler interface { + promotionRule(*topodatapb.Tablet) CandidatePromotionRule + masterSemiSync(InstanceKey) int + replicaSemiSync(master, replica *topodatapb.Tablet) bool +} + +func registerDurability(name string, d durabler) { + if durabilityPolicies[name] != nil { + log.Fatalf("durability policy %v already registered", name) + } + durabilityPolicies[name] = d +} + +//======================================================================= + +func SetDurabilityPolicy(name string) error { + curDurabilityPolicy = durabilityPolicies[name] + if curDurabilityPolicy == nil { + return fmt.Errorf("durability policy %v not found", name) + } + log.Infof("Durability setting: %v", name) + return nil +} + +// PromotionRule returns the promotion rule for the instance. +func PromotionRule(tablet *topodatapb.Tablet) CandidatePromotionRule { + return curDurabilityPolicy.promotionRule(tablet) +} + +// MasterSemiSync returns the master semi-sync setting for the instance. +// 0 means none. Non-zero specifies the number of required ackers. +func MasterSemiSync(instanceKey InstanceKey) int { + return curDurabilityPolicy.masterSemiSync(instanceKey) +} + +// ReplicaSemiSync returns the replica semi-sync setting for the instance. +func ReplicaSemiSync(masterKey, replicaKey InstanceKey) bool { + master, err := ReadTablet(masterKey) + if err != nil { + return false + } + replica, err := ReadTablet(replicaKey) + if err != nil { + return false + } + return curDurabilityPolicy.replicaSemiSync(master, replica) +} + +// ReplicaSemiSyncFromTablet returns the replica semi-sync setting from the tablet record. +// Prefer using this function if tablet record is available. +func ReplicaSemiSyncFromTablet(master, replica *topodatapb.Tablet) bool { + return curDurabilityPolicy.replicaSemiSync(master, replica) +} + +//======================================================================= + +type durabilityNone struct{} + +func (d *durabilityNone) promotionRule(tablet *topodatapb.Tablet) CandidatePromotionRule { + switch tablet.Type { + case topodatapb.TabletType_MASTER, topodatapb.TabletType_REPLICA: + return NeutralPromoteRule + } + return MustNotPromoteRule +} + +func (d *durabilityNone) masterSemiSync(instanceKey InstanceKey) int { + return 0 +} + +func (d *durabilityNone) replicaSemiSync(master, replica *topodatapb.Tablet) bool { + return false +} + +//======================================================================= + +type durabilitySemiSync struct{} + +func (d *durabilitySemiSync) promotionRule(tablet *topodatapb.Tablet) CandidatePromotionRule { + switch tablet.Type { + case topodatapb.TabletType_MASTER, topodatapb.TabletType_REPLICA: + return NeutralPromoteRule + } + return MustNotPromoteRule +} + +func (d *durabilitySemiSync) masterSemiSync(instanceKey InstanceKey) int { + return 1 +} + +func (d *durabilitySemiSync) replicaSemiSync(master, replica *topodatapb.Tablet) bool { + switch replica.Type { + case topodatapb.TabletType_MASTER, topodatapb.TabletType_REPLICA: + return true + } + return false +} + +//======================================================================= + +type durabilityCrossCell struct{} + +func (d *durabilityCrossCell) promotionRule(tablet *topodatapb.Tablet) CandidatePromotionRule { + switch tablet.Type { + case topodatapb.TabletType_MASTER, topodatapb.TabletType_REPLICA: + return NeutralPromoteRule + } + return MustNotPromoteRule +} + +func (d *durabilityCrossCell) masterSemiSync(instanceKey InstanceKey) int { + return 1 +} + +func (d *durabilityCrossCell) replicaSemiSync(master, replica *topodatapb.Tablet) bool { + // Prevent panics. + if master.Alias == nil || replica.Alias == nil { + return false + } + switch replica.Type { + case topodatapb.TabletType_MASTER, topodatapb.TabletType_REPLICA: + return master.Alias.Cell != replica.Alias.Cell + } + return false +} diff --git a/go/vt/orchestrator/inst/instance_dao.go b/go/vt/orchestrator/inst/instance_dao.go index eaae26cb869..850875381f4 100644 --- a/go/vt/orchestrator/inst/instance_dao.go +++ b/go/vt/orchestrator/inst/instance_dao.go @@ -355,7 +355,9 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, goto Cleanup } if tablet == nil { - log.Errorf("tablet alias is nil") + // This can happen because Orc rediscovers instances by alt hostnames, + // lit localhost, ip, etc. + // TODO(sougou): disable this ability. goto Cleanup } @@ -838,11 +840,7 @@ func ReadTopologyInstanceBufferable(instanceKey *InstanceKey, bufferWrites bool, // We need to update candidate_database_instance. // We register the rule even if it hasn't changed, // to bump the last_suggested time. - if tablet.Type == topodatapb.TabletType_MASTER || tablet.Type == topodatapb.TabletType_REPLICA { - instance.PromotionRule = NeutralPromoteRule - } else { - instance.PromotionRule = MustNotPromoteRule - } + instance.PromotionRule = PromotionRule(tablet) err = RegisterCandidateInstance(NewCandidateDatabaseInstance(instanceKey, instance.PromotionRule).WithCurrentTime()) logReadTopologyInstanceError(instanceKey, "RegisterCandidateInstance", err) diff --git a/go/vt/orchestrator/inst/instance_topology.go b/go/vt/orchestrator/inst/instance_topology.go index d608493cc67..9170fcc5363 100644 --- a/go/vt/orchestrator/inst/instance_topology.go +++ b/go/vt/orchestrator/inst/instance_topology.go @@ -2524,7 +2524,7 @@ func RegroupReplicasGTID( } } - if err := TabletSetMaster(candidateReplica.Key); err != nil { + if err := SwitchMaster(candidateReplica.Key, *masterKey); err != nil { return emptyReplicas, emptyReplicas, emptyReplicas, candidateReplica, err } diff --git a/go/vt/orchestrator/inst/instance_topology_dao.go b/go/vt/orchestrator/inst/instance_topology_dao.go index 0282512ef3a..b5d93d900eb 100644 --- a/go/vt/orchestrator/inst/instance_topology_dao.go +++ b/go/vt/orchestrator/inst/instance_topology_dao.go @@ -209,36 +209,25 @@ func purgeBinaryLogsTo(instanceKey *InstanceKey, logFile string) (*Instance, err return ReadTopologyInstance(instanceKey) } -func SetSemiSyncMaster(instanceKey *InstanceKey, enableMaster bool) (*Instance, error) { - instance, err := ReadTopologyInstance(instanceKey) - if err != nil { - return instance, err +// TODO(sougou): implement count +func SetSemiSyncMaster(instanceKey *InstanceKey, enableMaster bool) error { + if _, err := ExecInstance(instanceKey, `set global rpl_semi_sync_master_enabled = ?, global rpl_semi_sync_slave_enabled = ?`, enableMaster, false); err != nil { + return log.Errore(err) } - if _, err := ExecInstance(instanceKey, "set @@global.rpl_semi_sync_master_enabled=?", enableMaster); err != nil { - return instance, log.Errore(err) - } - return ReadTopologyInstance(instanceKey) + return nil } -func SetSemiSyncReplica(instanceKey *InstanceKey, enableReplica bool) (*Instance, error) { - instance, err := ReadTopologyInstance(instanceKey) - if err != nil { - return instance, err - } - if instance.SemiSyncReplicaEnabled == enableReplica { - return instance, nil - } - if _, err := ExecInstance(instanceKey, "set @@global.rpl_semi_sync_slave_enabled=?", enableReplica); err != nil { - return instance, log.Errore(err) +// TODO(sougou): This function may be used later for fixing semi-sync +func SetSemiSyncReplica(instanceKey *InstanceKey, enableReplica bool) error { + if _, err := ExecInstance(instanceKey, `set global rpl_semi_sync_master_enabled = ?, global rpl_semi_sync_slave_enabled = ?`, false, enableReplica); err != nil { + return log.Errore(err) } - if instance.ReplicationIOThreadRuning { - // Need to apply change by stopping starting IO thread - ExecInstance(instanceKey, "stop slave io_thread") - if _, err := ExecInstance(instanceKey, "start slave io_thread"); err != nil { - return instance, log.Errore(err) - } + // Need to apply change by stopping starting IO thread + ExecInstance(instanceKey, "stop slave io_thread") + if _, err := ExecInstance(instanceKey, "start slave io_thread"); err != nil { + return log.Errore(err) } - return ReadTopologyInstance(instanceKey) + return nil } func RestartReplicationQuick(instanceKey *InstanceKey) error { @@ -439,20 +428,6 @@ func StartReplication(instanceKey *InstanceKey) (*Instance, error) { return instance, fmt.Errorf("instance is not a replica: %+v", instanceKey) } - // If async fallback is disallowed, we'd better make sure to enable replicas to - // send ACKs before START SLAVE. Replica ACKing is off at mysqld startup because - // some replicas (those that must never be promoted) should never ACK. - // Note: We assume that replicas use 'skip-slave-start' so they won't - // START SLAVE on their own upon restart. - if instance.SemiSyncEnforced { - // Send ACK only from promotable instances. - sendACK := instance.PromotionRule != MustNotPromoteRule - // Always disable master setting, in case we're converting a former master. - if err := EnableSemiSync(instanceKey, false, sendACK); err != nil { - return instance, log.Errore(err) - } - } - _, err = ExecInstance(instanceKey, `start slave`) if err != nil { return instance, log.Errore(err) @@ -538,15 +513,6 @@ func StartReplicationUntilMasterCoordinates(instanceKey *InstanceKey, masterCoor log.Infof("Will start replication on %+v until coordinates: %+v", instanceKey, masterCoordinates) - if instance.SemiSyncEnforced { - // Send ACK only from promotable instances. - sendACK := instance.PromotionRule != MustNotPromoteRule - // Always disable master setting, in case we're converting a former master. - if err := EnableSemiSync(instanceKey, false, sendACK); err != nil { - return instance, log.Errore(err) - } - } - // MariaDB has a bug: a CHANGE MASTER TO statement does not work properly with prepared statement... :P // See https://mariadb.atlassian.net/browse/MDEV-7640 // This is the reason for ExecInstance @@ -572,16 +538,6 @@ func StartReplicationUntilMasterCoordinates(instanceKey *InstanceKey, masterCoor return instance, err } -// EnableSemiSync sets the rpl_semi_sync_(master|replica)_enabled variables -// on a given instance. -func EnableSemiSync(instanceKey *InstanceKey, master, replica bool) error { - log.Infof("instance %+v rpl_semi_sync_master_enabled: %t, rpl_semi_sync_slave_enabled: %t", instanceKey, master, replica) - _, err := ExecInstance(instanceKey, - `set global rpl_semi_sync_master_enabled = ?, global rpl_semi_sync_slave_enabled = ?`, - master, replica) - return err -} - // EnableMasterSSL issues CHANGE MASTER TO MASTER_SSL=1 func EnableMasterSSL(instanceKey *InstanceKey) (*Instance, error) { instance, err := ReadTopologyInstance(instanceKey) @@ -732,6 +688,12 @@ func ChangeMasterTo(instanceKey *InstanceKey, masterKey *InstanceKey, masterBinl if err != nil { return instance, log.Errore(err) } + + semiSync := ReplicaSemiSync(*masterKey, *instanceKey) + if _, err := ExecInstance(instanceKey, `set global rpl_semi_sync_master_enabled = ?, global rpl_semi_sync_slave_enabled = ?`, false, semiSync); err != nil { + return instance, log.Errore(err) + } + WriteMasterPositionEquivalence(&originalMasterKey, &originalExecBinlogCoordinates, changeToMasterKey, masterBinlogCoordinates) ResetInstanceRelaylogCoordinatesHistory(instanceKey) @@ -958,16 +920,6 @@ func SetReadOnly(instanceKey *InstanceKey, readOnly bool) (*Instance, error) { return instance, fmt.Errorf("noop: aborting set-read-only operation on %+v; signalling error but nothing went wrong.", *instanceKey) } - // If async fallback is disallowed, we're responsible for flipping the master - // semi-sync switch ON before accepting writes. The setting is off by default. - if instance.SemiSyncEnforced && !readOnly { - // Send ACK only from promotable instances. - sendACK := instance.PromotionRule != MustNotPromoteRule - if err := EnableSemiSync(instanceKey, true, sendACK); err != nil { - return instance, log.Errore(err) - } - } - if _, err := ExecInstance(instanceKey, "set global read_only = ?", readOnly); err != nil { return instance, log.Errore(err) } @@ -982,16 +934,6 @@ func SetReadOnly(instanceKey *InstanceKey, readOnly bool) (*Instance, error) { } instance, err = ReadTopologyInstance(instanceKey) - // If we just went read-only, it's safe to flip the master semi-sync switch - // OFF, which is the default value so that replicas can make progress. - if instance.SemiSyncEnforced && readOnly { - // Send ACK only from promotable instances. - sendACK := instance.PromotionRule != MustNotPromoteRule - if err := EnableSemiSync(instanceKey, false, sendACK); err != nil { - return instance, log.Errore(err) - } - } - log.Infof("instance %+v read_only: %t", instanceKey, readOnly) AuditOperation("read-only", instanceKey, fmt.Sprintf("set as %t", readOnly)) diff --git a/go/vt/orchestrator/inst/tablet_dao.go b/go/vt/orchestrator/inst/tablet_dao.go index 7aaeb153001..fc2959a003f 100644 --- a/go/vt/orchestrator/inst/tablet_dao.go +++ b/go/vt/orchestrator/inst/tablet_dao.go @@ -27,6 +27,7 @@ import ( "vitess.io/vitess/go/vt/orchestrator/external/golib/sqlutils" topodatapb "vitess.io/vitess/go/vt/proto/topodata" "vitess.io/vitess/go/vt/topo" + "vitess.io/vitess/go/vt/topo/topoproto" "vitess.io/vitess/go/vt/vttablet/tmclient" ) @@ -36,27 +37,72 @@ var TopoServ *topo.Server // ErrTabletAliasNil is a fixed error message. var ErrTabletAliasNil = errors.New("tablet alias is nil") -// TabletSetMaster designates the tablet that owns an instance as the master. -func TabletSetMaster(instanceKey InstanceKey) error { +// SwitchMaster makes the new tablet the master and proactively performs +// the necessary propagation to the old master. The propagation is best +// effort. If it fails, the tablet's shard sync will eventually converge. +// The proactive propagation allows a competing Orchestrator from discovering +// the successful action of a previous one, which reduces churn. +func SwitchMaster(newMasterKey, oldMasterKey InstanceKey) error { + newMasterTablet, err := ChangeTabletType(newMasterKey, topodatapb.TabletType_MASTER) + if err != nil { + return err + } + // The following operations are best effort. + if newMasterTablet.Type != topodatapb.TabletType_MASTER { + log.Errorf("Unexpected: tablet type did not change to master: %v", newMasterTablet.Type) + return nil + } + _, err = TopoServ.UpdateShardFields(context.TODO(), newMasterTablet.Keyspace, newMasterTablet.Shard, func(si *topo.ShardInfo) error { + if proto.Equal(si.MasterAlias, newMasterTablet.Alias) && proto.Equal(si.MasterTermStartTime, newMasterTablet.MasterTermStartTime) { + return topo.NewError(topo.NoUpdateNeeded, "") + } + + // We just successfully reparented. We should check timestamps, but always overwrite. + lastTerm := si.GetMasterTermStartTime() + newTerm := logutil.ProtoToTime(newMasterTablet.MasterTermStartTime) + if !newTerm.After(lastTerm) { + log.Errorf("Possible clock skew. New master start time is before previous one: %v vs %v", newTerm, lastTerm) + } + + aliasStr := topoproto.TabletAliasString(newMasterTablet.Alias) + log.Infof("Updating shard record: master_alias=%v, master_term_start_time=%v", aliasStr, newTerm) + si.MasterAlias = newMasterTablet.Alias + si.MasterTermStartTime = newMasterTablet.MasterTermStartTime + return nil + }) + // Don't proceed if shard record could not be updated. + if err != nil { + log.Errore(err) + return nil + } + if _, err := ChangeTabletType(oldMasterKey, topodatapb.TabletType_REPLICA); err != nil { + // This is best effort. + log.Errore(err) + } + return nil +} + +// ChangeTabletType designates the tablet that owns an instance as the master. +func ChangeTabletType(instanceKey InstanceKey, tabletType topodatapb.TabletType) (*topodatapb.Tablet, error) { if instanceKey.Hostname == "" { - return errors.New("can't set tablet to master: instance is unspecified") + return nil, errors.New("can't set tablet to master: instance is unspecified") } tablet, err := ReadTablet(instanceKey) if err != nil { - return err + return nil, err } tmc := tmclient.NewTabletManagerClient() - if err := tmc.ChangeType(context.TODO(), tablet, topodatapb.TabletType_MASTER); err != nil { - return err + if err := tmc.ChangeType(context.TODO(), tablet, tabletType); err != nil { + return nil, err } ti, err := TopoServ.GetTablet(context.TODO(), tablet.Alias) if err != nil { - return log.Errore(err) + return nil, log.Errore(err) } if err := SaveTablet(ti.Tablet); err != nil { log.Errore(err) } - return nil + return ti.Tablet, nil } // ReadTablet reads the vitess tablet record. diff --git a/go/vt/orchestrator/logic/orchestrator.go b/go/vt/orchestrator/logic/orchestrator.go index be70b17b04e..1339ce67546 100644 --- a/go/vt/orchestrator/logic/orchestrator.go +++ b/go/vt/orchestrator/logic/orchestrator.go @@ -272,32 +272,6 @@ func DiscoverInstance(instanceKey inst.InstanceKey) { InstanceLatency: instanceLatency, Err: nil, }) - - if !IsLeaderOrActive() { - // Maybe this node was elected before, but isn't elected anymore. - // If not elected, stop drilling up/down the topology - return - } - - // Investigate replicas and members of the same replication group: - for _, replicaKey := range append(instance.ReplicationGroupMembers.GetInstanceKeys(), instance.Replicas.GetInstanceKeys()...) { - replicaKey := replicaKey // not needed? no concurrency here? - - // Avoid noticing some hosts we would otherwise discover - if inst.RegexpMatchPatterns(replicaKey.StringCode(), config.Config.DiscoveryIgnoreReplicaHostnameFilters) { - continue - } - - if replicaKey.IsValid() { - discoveryQueue.Push(replicaKey) - } - } - // Investigate master: - if instance.MasterKey.IsValid() { - if !inst.RegexpMatchPatterns(instance.MasterKey.StringCode(), config.Config.DiscoveryIgnoreMasterHostnameFilters) { - discoveryQueue.Push(instance.MasterKey) - } - } } // onHealthTick handles the actions to take to discover/poll instances @@ -509,6 +483,7 @@ func ContinuousDiscovery() { go ometrics.InitGraphiteMetrics() go acceptSignals() go kv.InitKVStores() + inst.SetDurabilityPolicy(config.Config.Durability) if config.Config.RaftEnabled { if err := orcraft.Setup(NewCommandApplier(), NewSnapshotDataCreatorApplier(), process.ThisHostname); err != nil { log.Fatale(err) diff --git a/go/vt/orchestrator/logic/tablet_discovery.go b/go/vt/orchestrator/logic/tablet_discovery.go index 9bcdebd16f0..0b1f6ddd3c5 100644 --- a/go/vt/orchestrator/logic/tablet_discovery.go +++ b/go/vt/orchestrator/logic/tablet_discovery.go @@ -174,21 +174,26 @@ func LockShard(instanceKey inst.InstanceKey) (func(*error), error) { if err != nil { return nil, err } - _, unlock, err := ts.LockShard(context.TODO(), tablet.Keyspace, tablet.Shard, "Orc Recovery") + ctx, cancel := context.WithTimeout(context.TODO(), 1*time.Second) + defer cancel() + _, unlock, err := ts.LockShard(ctx, tablet.Keyspace, tablet.Shard, "Orc Recovery") return unlock, err } // TabletRefresh refreshes the tablet info. -func TabletRefresh(instanceKey inst.InstanceKey) error { +func TabletRefresh(instanceKey inst.InstanceKey) (*topodatapb.Tablet, error) { tablet, err := inst.ReadTablet(instanceKey) if err != nil { - return err + return nil, err } ti, err := ts.GetTablet(context.TODO(), tablet.Alias) if err != nil { - return err + return nil, err + } + if err := inst.SaveTablet(ti.Tablet); err != nil { + return nil, err } - return inst.SaveTablet(ti.Tablet) + return ti.Tablet, nil } // TabletDemoteMaster requests the master tablet to stop accepting transactions. @@ -210,10 +215,14 @@ func tabletDemoteMaster(instanceKey inst.InstanceKey, forward bool) error { return err } tmc := tmclient.NewTabletManagerClient() + // TODO(sougou): this should be controllable because we may want + // to give a longer timeout for a graceful takeover. + ctx, cancel := context.WithTimeout(context.TODO(), 1*time.Second) + defer cancel() if forward { - _, err = tmc.DemoteMaster(context.TODO(), tablet) + _, err = tmc.DemoteMaster(ctx, tablet) } else { - err = tmc.UndoDemoteMaster(context.TODO(), tablet) + err = tmc.UndoDemoteMaster(ctx, tablet) } return err } diff --git a/go/vt/orchestrator/logic/topology_recovery.go b/go/vt/orchestrator/logic/topology_recovery.go index 23800675dd8..785e531cfdf 100644 --- a/go/vt/orchestrator/logic/topology_recovery.go +++ b/go/vt/orchestrator/logic/topology_recovery.go @@ -38,6 +38,7 @@ import ( "vitess.io/vitess/go/vt/orchestrator/process" orcraft "vitess.io/vitess/go/vt/orchestrator/raft" "vitess.io/vitess/go/vt/orchestrator/util" + topodatapb "vitess.io/vitess/go/vt/proto/topodata" ) var countPendingRecoveries int64 @@ -506,6 +507,9 @@ func recoverDeadMaster(topologyRecovery *TopologyRecovery, candidateInstanceKey AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadMaster: will recover %+v", *failedInstanceKey)) + err = TabletDemoteMaster(*failedInstanceKey) + AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadMaster: TabletDemoteMaster: %v", err)) + topologyRecovery.RecoveryType = GetMasterRecoveryType(analysisEntry) AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadMaster: masterRecoveryType=%+v", topologyRecovery.RecoveryType)) @@ -582,14 +586,17 @@ func recoverDeadMaster(topologyRecovery *TopologyRecovery, candidateInstanceKey } if promotedReplica == nil { + err := TabletUndoDemoteMaster(*failedInstanceKey) + AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("RecoverDeadMaster: TabletUndoDemoteMaster: %v", err)) message := "Failure: no replica promoted." AuditTopologyRecovery(topologyRecovery, message) inst.AuditOperation("recover-dead-master", failedInstanceKey, message) - } else { - message := fmt.Sprintf("promoted replica: %+v", promotedReplica.Key) - AuditTopologyRecovery(topologyRecovery, message) - inst.AuditOperation("recover-dead-master", failedInstanceKey, message) + return true, promotedReplica, lostReplicas, err } + + message := fmt.Sprintf("promoted replica: %+v", promotedReplica.Key) + AuditTopologyRecovery(topologyRecovery, message) + inst.AuditOperation("recover-dead-master", failedInstanceKey, message) return true, promotedReplica, lostReplicas, err } @@ -841,6 +848,16 @@ func checkAndRecoverDeadMaster(analysisEntry inst.ReplicationAnalysis, candidate } defer unlock(&err) + // Check if someone else fixed the problem. + tablet, err := TabletRefresh(analysisEntry.AnalyzedInstanceKey) + if err == nil && tablet.Type != topodatapb.TabletType_MASTER { + // TODO(sougou); use a version that only refreshes the current shard. + RefreshTablets() + AuditTopologyRecovery(topologyRecovery, "another agent seems to have fixed the problem") + // TODO(sougou): see if we have to reset the cluster as healthy. + return false, topologyRecovery, nil + } + AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("will handle DeadMaster event on %+v", analysisEntry.ClusterDetails.ClusterName)) recoverDeadMasterCounter.Inc(1) recoveryAttempted, promotedReplica, lostReplicas, err := recoverDeadMaster(topologyRecovery, candidateInstanceKey, skipProcesses) @@ -904,8 +921,15 @@ func checkAndRecoverDeadMaster(analysisEntry inst.ReplicationAnalysis, candidate } } { - _, err := inst.SetReadOnly(&promotedReplica.Key, false) - AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadMaster: applying read-only=0 on promoted master: success=%t", (err == nil))) + count := inst.MasterSemiSync(promotedReplica.Key) + err := inst.SetSemiSyncMaster(&promotedReplica.Key, count > 0) + AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadMaster: applying semi-sync %v: success=%t", count > 0, (err == nil))) + + // Dont' allow writes if semi-sync settings fail. + if err == nil { + _, err := inst.SetReadOnly(&promotedReplica.Key, false) + AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- RecoverDeadMaster: applying read-only=0 on promoted master: success=%t", (err == nil))) + } } // Let's attempt, though we won't necessarily succeed, to set old master as read-only go func() { @@ -1535,9 +1559,10 @@ func getCheckAndRecoverFunction(analysisCode inst.AnalysisCode, analyzedInstance return electNewMaster, true case inst.MasterHasMaster: return fixClusterAndMaster, true - case inst.MasterIsReadOnly: + case inst.MasterIsReadOnly, inst.MasterSemiSyncMustBeSet, inst.MasterSemiSyncMustNotBeSet: return fixMaster, true - case inst.NotConnectedToMaster, inst.ConnectedToWrongMaster, inst.ReplicationStopped, inst.ReplicaIsWritable: + case inst.NotConnectedToMaster, inst.ConnectedToWrongMaster, inst.ReplicationStopped, inst.ReplicaIsWritable, + inst.ReplicaSemiSyncMustBeSet, inst.ReplicaSemiSyncMustNotBeSet: return fixReplica, false // intermediate master case inst.DeadIntermediateMaster: @@ -1975,11 +2000,6 @@ func GracefulMasterTakeover(clusterName string, designatedKey *inst.InstanceKey, if err := executeProcesses(config.Config.PreGracefulTakeoverProcesses, "PreGracefulTakeoverProcesses", preGracefulTakeoverTopologyRecovery, true); err != nil { return nil, nil, fmt.Errorf("Failed running PreGracefulTakeoverProcesses: %+v", err) } - - log.Infof("GracefulMasterTakeover: invoking TabletDemoteMaster on %+v", clusterMaster.Key) - if err := TabletDemoteMaster(clusterMaster.Key); err != nil { - return nil, nil, err - } demotedMasterSelfBinlogCoordinates := &clusterMaster.SelfBinlogCoordinates log.Infof("GracefulMasterTakeover: Will wait for %+v to reach master coordinates %+v", designatedInstance.Key, *demotedMasterSelfBinlogCoordinates) if designatedInstance, _, err = inst.WaitForExecBinlogCoordinatesToReach(&designatedInstance.Key, demotedMasterSelfBinlogCoordinates, time.Duration(config.Config.ReasonableMaintenanceReplicationLagSeconds)*time.Second); err != nil { @@ -1998,13 +2018,6 @@ func GracefulMasterTakeover(clusterName string, designatedKey *inst.InstanceKey, if topologyRecovery == nil { return nil, nil, fmt.Errorf("GracefulMasterTakeover: recovery attempted but with no results. This should not happen") } - if topologyRecovery.SuccessorKey == nil { - // Promotion failed. Undo. - log.Infof("GracefulMasterTakeover: Invoking tabletUndoDemoteMaster on %+v", clusterMaster.Key) - if err := TabletUndoDemoteMaster(clusterMaster.Key); err != nil { - log.Errore(err) - } - } var gtidHint inst.OperationGTIDHint = inst.GTIDHintNeutral if topologyRecovery.RecoveryType == MasterRecoveryGTID { gtidHint = inst.GTIDHintForce @@ -2030,6 +2043,7 @@ func GracefulMasterTakeover(clusterName string, designatedKey *inst.InstanceKey, } // electNewMaster elects a new master while none were present before. +// TODO(sougou): this should be mreged with recoverDeadMaster func electNewMaster(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *inst.InstanceKey, forceInstanceRecovery bool, skipProcesses bool) (recoveryAttempted bool, topologyRecovery *TopologyRecovery, err error) { topologyRecovery, err = AttemptRecoveryRegistration(&analysisEntry, false, true) if topologyRecovery == nil { @@ -2047,6 +2061,8 @@ func electNewMaster(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey } defer unlock(&err) + // TODO(sougou): check if another Orc succeeded before fixing anything. + replicas, err := inst.ReadClusterAliasInstances(analysisEntry.SuggestedClusterAlias) if err != nil { return false, topologyRecovery, err @@ -2090,7 +2106,7 @@ func electNewMaster(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey } } - if err := inst.TabletSetMaster(candidate.Key); err != nil { + if _, err := inst.ChangeTabletType(candidate.Key, topodatapb.TabletType_MASTER); err != nil { return true, topologyRecovery, err } // TODO(sougou): parallelize @@ -2102,7 +2118,15 @@ func electNewMaster(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey return false, topologyRecovery, err } } - if _, err := inst.SetReadOnly(&candidate.Key, false); err != nil { + count := inst.MasterSemiSync(candidate.Key) + err = inst.SetSemiSyncMaster(&candidate.Key, count > 0) + AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- electNewMaster: applying semi-sync %v: success=%t", count > 0, (err == nil))) + if err != nil { + return false, topologyRecovery, err + } + _, err = inst.SetReadOnly(&candidate.Key, false) + AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- electNewMaster: set read-only false: success=%t", (err == nil))) + if err != nil { return false, topologyRecovery, err } return true, topologyRecovery, nil @@ -2132,7 +2156,7 @@ func fixClusterAndMaster(analysisEntry inst.ReplicationAnalysis, candidateInstan if err != nil { return recoveryAttempted, topologyRecovery, err } - if err := TabletRefresh(analysisEntry.AnalyzedInstanceKey); err != nil { + if _, err := TabletRefresh(analysisEntry.AnalyzedInstanceKey); err != nil { log.Errore(err) } return recoveryAttempted, topologyRecovery, err @@ -2156,6 +2180,14 @@ func fixMaster(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *ins } defer unlock(&err) + // TODO(sougou): this code pattern has reached DRY limits. Reuse. + count := inst.MasterSemiSync(analysisEntry.AnalyzedInstanceKey) + err = inst.SetSemiSyncMaster(&analysisEntry.AnalyzedInstanceKey, count > 0) + //AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("- fixMaster: applying semi-sync %v: success=%t", count > 0, (err == nil))) + if err != nil { + return false, topologyRecovery, err + } + if err := TabletUndoDemoteMaster(analysisEntry.AnalyzedInstanceKey); err != nil { return false, topologyRecovery, err } @@ -2170,6 +2202,16 @@ func fixReplica(analysisEntry inst.ReplicationAnalysis, candidateInstanceKey *in return false, nil, err } log.Infof("Analysis: %v, will fix replica %+v", analysisEntry.Analysis, analysisEntry.AnalyzedInstanceKey) + + unlock, err := LockShard(analysisEntry.AnalyzedInstanceKey) + if err != nil { + log.Infof("CheckAndRecover: Analysis: %+v, InstanceKey: %+v, candidateInstanceKey: %+v, "+ + "skipProcesses: %v: NOT detecting/recovering host, could not obtain shard lock (%v)", + analysisEntry.Analysis, analysisEntry.AnalyzedInstanceKey, candidateInstanceKey, skipProcesses, err) + return false, topologyRecovery, err + } + defer unlock(&err) + if _, err := inst.SetReadOnly(&analysisEntry.AnalyzedInstanceKey, true); err != nil { return false, topologyRecovery, err }