Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/actions/build-windows-artifacts/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ runs:
shell: pwsh
run: make test sqlness-test
env:
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
RUST_BACKTRACE: 1
SQLNESS_OPTS: "--preserve-state"

Expand Down
1 change: 0 additions & 1 deletion .github/workflows/nightly-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ jobs:
CARGO_BUILD_RUSTFLAGS: "-C linker=lld-link"
RUST_BACKTRACE: 1
CARGO_INCREMENTAL: 0
RUSTUP_WINDOWS_PATH_ADD_BIN: 1 # Workaround for https://github.com/nextest-rs/nextest/issues/1493
GT_S3_BUCKET: ${{ vars.AWS_CI_TEST_BUCKET }}
GT_S3_ACCESS_KEY_ID: ${{ secrets.AWS_CI_TEST_ACCESS_KEY_ID }}
GT_S3_ACCESS_KEY: ${{ secrets.AWS_CI_TEST_SECRET_ACCESS_KEY }}
Expand Down
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ etcd-client = "0.14"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "c5419bbd20cb42e568ec325a4d71a3c94cc327e1" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "2be0f36b3264e28ab0e1c22a980d0bb634eb3a77" }
hex = "0.4"
http = "1"
humantime = "2.1"
Expand Down
104 changes: 80 additions & 24 deletions src/common/meta/src/ddl/alter_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,31 @@ use std::vec;
use api::v1::alter_table_expr::Kind;
use api::v1::RenameTable;
use async_trait::async_trait;
use common_error::ext::ErrorExt;
use common_error::status_code::StatusCode;
use common_error::ext::BoxedError;
use common_procedure::error::{FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu};
use common_procedure::{
Context as ProcedureContext, Error as ProcedureError, LockKey, Procedure, Status, StringKey,
Context as ProcedureContext, ContextProvider, Error as ProcedureError, LockKey, PoisonKey,
PoisonKeys, Procedure, ProcedureId, Status, StringKey,
};
use common_telemetry::{debug, error, info};
use futures::future;
use serde::{Deserialize, Serialize};
use snafu::ResultExt;
use snafu::{ensure, ResultExt};
use store_api::storage::RegionId;
use strum::AsRefStr;
use table::metadata::{RawTableInfo, TableId, TableInfo};
use table::table_reference::TableReference;

use crate::cache_invalidator::Context;
use crate::ddl::utils::add_peer_context_if_needed;
use crate::ddl::utils::{add_peer_context_if_needed, handle_multiple_results, MultipleResults};
use crate::ddl::DdlContext;
use crate::error::{Error, Result};
use crate::error::{AbortProcedureSnafu, Error, NoLeaderSnafu, PutPoisonSnafu, Result};
use crate::instruction::CacheIdent;
use crate::key::table_info::TableInfoValue;
use crate::key::{DeserializedValueWithBytes, RegionDistribution};
use crate::lock_key::{CatalogLock, SchemaLock, TableLock, TableNameLock};
use crate::metrics;
use crate::poison_key::table_poison_key;
use crate::rpc::ddl::AlterTableTask;
use crate::rpc::router::{find_leader_regions, find_leaders, region_distribution};

Expand Down Expand Up @@ -104,7 +105,27 @@ impl AlterTableProcedure {
Ok(Status::executing(true))
}

pub async fn submit_alter_region_requests(&mut self) -> Result<Status> {
fn table_poison_key(&self) -> PoisonKey {
table_poison_key(self.data.table_id())
}

async fn put_poison(
&self,
ctx_provider: &dyn ContextProvider,
procedure_id: ProcedureId,
) -> Result<()> {
let poison_key = self.table_poison_key();
ctx_provider
.try_put_poison(&poison_key, procedure_id)
.await
.context(PutPoisonSnafu)
}

pub async fn submit_alter_region_requests(
&mut self,
procedure_id: ProcedureId,
ctx_provider: &dyn ContextProvider,
) -> Result<Status> {
let table_id = self.data.table_id();
let (_, physical_table_route) = self
.context
Expand All @@ -127,6 +148,9 @@ impl AlterTableProcedure {
alter_kind,
);

ensure!(!leaders.is_empty(), NoLeaderSnafu { table_id });
// Puts the poison before submitting alter region requests to datanodes.
self.put_poison(ctx_provider, procedure_id).await?;
for datanode in leaders {
let requester = self.context.node_manager.datanode(&datanode).await;
let regions = find_leader_regions(&physical_table_route.region_routes, &datanode);
Expand All @@ -140,28 +164,51 @@ impl AlterTableProcedure {
let requester = requester.clone();

alter_region_tasks.push(async move {
if let Err(err) = requester.handle(request).await {
if err.status_code() != StatusCode::RequestOutdated {
// Treat request outdated as success.
// The engine will throw this code when the schema version not match.
// As this procedure has locked the table, the only reason for this error
// is procedure is succeeded before and is retrying.
return Err(add_peer_context_if_needed(datanode)(err));
}
}
Ok(())
requester
.handle(request)
.await
.map_err(add_peer_context_if_needed(datanode))
});
}
}

future::join_all(alter_region_tasks)
let results = future::join_all(alter_region_tasks)
.await
.into_iter()
.collect::<Result<Vec<_>>>()?;

self.data.state = AlterTableState::UpdateMetadata;
.collect::<Vec<_>>();

Ok(Status::executing(true))
match handle_multiple_results(results) {
MultipleResults::PartialRetryable(error) => {
// Just returns the error, and wait for the next try.
Err(error)
}
MultipleResults::PartialNonRetryable(error) => {
error!(error; "Partial non-retryable errors occurred during alter table, table {}, table_id: {}", self.data.table_ref(), self.data.table_id());
// No retry will be done.
Ok(Status::poisoned(
Some(self.table_poison_key()),
ProcedureError::external(error),
))
}
MultipleResults::AllRetryable(error) => {
// Just returns the error, and wait for the next try.
Err(error)
}
MultipleResults::Ok => {
self.data.state = AlterTableState::UpdateMetadata;
Ok(Status::executing_with_clean_poisons(true))
}
MultipleResults::AllNonRetryable(error) => {
error!(error; "All alter requests returned non-retryable errors for table {}, table_id: {}", self.data.table_ref(), self.data.table_id());
// It assumes the metadata on datanode is not changed.
// Case: The alter region request is sent but not applied. (e.g., InvalidArgument)

let err = BoxedError::new(error);
Err(err).context(AbortProcedureSnafu {
clean_poisons: true,
})
}
}
}

/// Update table metadata.
Expand Down Expand Up @@ -250,10 +297,12 @@ impl Procedure for AlterTableProcedure {
Self::TYPE_NAME
}

async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
async fn execute(&mut self, ctx: &ProcedureContext) -> ProcedureResult<Status> {
let error_handler = |e: Error| {
if e.is_retry_later() {
ProcedureError::retry_later(e)
} else if e.need_clean_poisons() {
ProcedureError::external_and_clean_poisons(e)
} else {
ProcedureError::external(e)
}
Expand All @@ -269,7 +318,10 @@ impl Procedure for AlterTableProcedure {

match state {
AlterTableState::Prepare => self.on_prepare().await,
AlterTableState::SubmitAlterRegionRequests => self.submit_alter_region_requests().await,
AlterTableState::SubmitAlterRegionRequests => {
self.submit_alter_region_requests(ctx.procedure_id, ctx.provider.as_ref())
.await
}
AlterTableState::UpdateMetadata => self.on_update_metadata().await,
AlterTableState::InvalidateTableCache => self.on_broadcast().await,
}
Expand All @@ -285,6 +337,10 @@ impl Procedure for AlterTableProcedure {

LockKey::new(key)
}

fn poison_keys(&self) -> PoisonKeys {
PoisonKeys::new(vec![self.table_poison_key()])
}
}

#[derive(Debug, Serialize, Deserialize, AsRefStr)]
Expand Down
4 changes: 3 additions & 1 deletion src/common/meta/src/ddl/create_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,9 @@ impl Procedure for CreateTableProcedure {
.creator
.register_opening_regions(&self.context, &x.region_routes)
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
.context(ExternalSnafu {
clean_poisons: false,
})?;
}

Ok(())
Expand Down
4 changes: 3 additions & 1 deletion src/common/meta/src/ddl/drop_database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ impl Procedure for DropDatabaseProcedure {
self.state
.recover(&self.runtime_context)
.map_err(BoxedError::new)
.context(ExternalSnafu)
.context(ExternalSnafu {
clean_poisons: false,
})
}

async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<Status> {
Expand Down
4 changes: 3 additions & 1 deletion src/common/meta/src/ddl/drop_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ impl Procedure for DropTableProcedure {
if register_operating_regions {
self.register_dropping_regions()
.map_err(BoxedError::new)
.context(ExternalSnafu)?;
.context(ExternalSnafu {
clean_poisons: false,
})?;
}

Ok(())
Expand Down
8 changes: 7 additions & 1 deletion src/common/meta/src/ddl/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,13 @@ pub async fn create_logical_table(
let tasks = vec![test_create_logical_table_task(table_name)];
let mut procedure = CreateLogicalTablesProcedure::new(tasks, physical_table_id, ddl_context);
let status = procedure.on_prepare().await.unwrap();
assert_matches!(status, Status::Executing { persist: true });
assert_matches!(
status,
Status::Executing {
persist: true,
clean_poisons: false
}
);
let status = procedure.on_create_metadata().await.unwrap();
assert_matches!(status, Status::Done { .. });

Expand Down
71 changes: 71 additions & 0 deletions src/common/meta/src/ddl/test_util/datanode_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,74 @@ impl MockDatanodeHandler for NaiveDatanodeHandler {
unreachable!()
}
}

#[derive(Clone)]
pub struct PartialSuccessDatanodeHandler {
pub retryable: bool,
}

#[async_trait::async_trait]
impl MockDatanodeHandler for PartialSuccessDatanodeHandler {
async fn handle(&self, peer: &Peer, _request: RegionRequest) -> Result<RegionResponse> {
let success = peer.id % 2 == 0;
if success {
Ok(RegionResponse::new(0))
} else if self.retryable {
Err(Error::RetryLater {
source: BoxedError::new(
error::UnexpectedSnafu {
err_msg: "retry later",
}
.build(),
),
})
} else {
error::UnexpectedSnafu {
err_msg: "mock error",
}
.fail()
}
}

async fn handle_query(
&self,
_peer: &Peer,
_request: QueryRequest,
) -> Result<SendableRecordBatchStream> {
unreachable!()
}
}

#[derive(Clone)]
pub struct AllFailureDatanodeHandler {
pub retryable: bool,
}

#[async_trait::async_trait]
impl MockDatanodeHandler for AllFailureDatanodeHandler {
async fn handle(&self, _peer: &Peer, _request: RegionRequest) -> Result<RegionResponse> {
if self.retryable {
Err(Error::RetryLater {
source: BoxedError::new(
error::UnexpectedSnafu {
err_msg: "retry later",
}
.build(),
),
})
} else {
error::UnexpectedSnafu {
err_msg: "mock error",
}
.fail()
}
}

async fn handle_query(
&self,
_peer: &Peer,
_request: QueryRequest,
) -> Result<SendableRecordBatchStream> {
unreachable!()
}
}
Loading
Loading