Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(async-inserts): allow async inserts as an option #5955

Merged
merged 4 commits into from
May 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions rust_snuba/benches/processors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ fn create_factory(
clickhouse_concurrency,
commitlog_concurrency,
replacements_concurrency,
async_inserts: false,
python_max_queue_depth: None,
use_rust_processor: true,
health_check_file: None,
Expand Down
4 changes: 4 additions & 0 deletions rust_snuba/src/consumer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub fn consumer(
use_rust_processor: bool,
enforce_schema: bool,
max_poll_interval_ms: usize,
async_inserts: bool,
python_max_queue_depth: Option<usize>,
health_check_file: Option<&str>,
stop_at_timestamp: Option<i64>,
Expand All @@ -50,6 +51,7 @@ pub fn consumer(
use_rust_processor,
enforce_schema,
max_poll_interval_ms,
async_inserts,
python_max_queue_depth,
health_check_file,
stop_at_timestamp,
Expand All @@ -67,6 +69,7 @@ pub fn consumer_impl(
use_rust_processor: bool,
enforce_schema: bool,
max_poll_interval_ms: usize,
async_inserts: bool,
python_max_queue_depth: Option<usize>,
health_check_file: Option<&str>,
stop_at_timestamp: Option<i64>,
Expand Down Expand Up @@ -215,6 +218,7 @@ pub fn consumer_impl(
clickhouse_concurrency: ConcurrencyConfig::new(2),
commitlog_concurrency: ConcurrencyConfig::new(2),
replacements_concurrency: ConcurrencyConfig::new(4),
async_inserts,
python_max_queue_depth,
use_rust_processor,
health_check_file: health_check_file.map(ToOwned::to_owned),
Expand Down
2 changes: 2 additions & 0 deletions rust_snuba/src/factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ pub struct ConsumerStrategyFactory {
pub clickhouse_concurrency: ConcurrencyConfig,
pub commitlog_concurrency: ConcurrencyConfig,
pub replacements_concurrency: ConcurrencyConfig,
pub async_inserts: bool,
pub python_max_queue_depth: Option<usize>,
pub use_rust_processor: bool,
pub health_check_file: Option<String>,
Expand Down Expand Up @@ -116,6 +117,7 @@ impl ProcessingStrategyFactory<KafkaPayload> for ConsumerStrategyFactory {
&self.clickhouse_concurrency,
&self.storage_config.clickhouse_cluster.user,
&self.storage_config.clickhouse_cluster.password,
self.async_inserts,
);

let accumulator = Arc::new(
Expand Down
50 changes: 49 additions & 1 deletion rust_snuba/src/strategies/clickhouse/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use tokio::task::JoinHandle;
use tokio::time::{sleep, Duration};
use tokio_stream::wrappers::ReceiverStream;

use crate::runtime_config::get_str_config;
use crate::types::RowData;

const CLICKHOUSE_HTTP_CHUNK_SIZE: usize = 1_000_000;
Expand All @@ -31,6 +32,7 @@ impl BatchFactory {
concurrency: &ConcurrencyConfig,
clickhouse_user: &str,
clickhouse_password: &str,
async_inserts: bool,
) -> Self {
let mut headers = HeaderMap::with_capacity(5);
headers.insert(CONNECTION, HeaderValue::from_static("keep-alive"));
Expand All @@ -48,7 +50,16 @@ impl BatchFactory {
HeaderValue::from_str(database).unwrap(),
);

let query_params = "load_balancing=in_order&insert_distributed_sync=1".to_string();
let mut query_params = String::new();
query_params.push_str("load_balancing=in_order&insert_distributed_sync=1");

if async_inserts {
let async_inserts_allowed = get_str_config("async_inserts_allowed").ok().flatten();
if async_inserts_allowed == Some("1".to_string()) {
query_params.push_str("&async_insert=1&wait_for_async_insert=1");
}
}

let url = format!("http://{hostname}:{http_port}?{query_params}");
let query = format!("INSERT INTO {table} FORMAT JSONEachRow");

Expand Down Expand Up @@ -204,6 +215,41 @@ mod tests {
&concurrency,
"default",
"",
false,
);

let mut batch = factory.new_batch();

batch
.write_rows(&RowData::from_encoded_rows(vec![
br#"{"hello": "world"}"#.to_vec()
]))
.unwrap();

concurrency.handle().block_on(batch.finish()).unwrap();

mock.assert();
}

#[test]
fn test_write_async() {
crate::testutils::initialize_python();
let server = MockServer::start();
let mock = server.mock(|when, then| {
when.method(POST).path("/").body("{\"hello\": \"world\"}\n");
then.status(200).body("hi");
});

let concurrency = ConcurrencyConfig::new(1);
let factory = BatchFactory::new(
&server.host(),
server.port(),
"testtable",
"testdb",
&concurrency,
"default",
"",
true,
);

let mut batch = factory.new_batch();
Expand Down Expand Up @@ -236,6 +282,7 @@ mod tests {
&concurrency,
"default",
"",
false,
);

let mut batch = factory.new_batch();
Expand Down Expand Up @@ -266,6 +313,7 @@ mod tests {
&concurrency,
"default",
"",
false,
);

let mut batch = factory.new_batch();
Expand Down
8 changes: 8 additions & 0 deletions snuba/cli/rust_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@
type=int,
default=30000,
)
@click.option(
"--async-inserts",
is_flag=True,
default=False,
help="Enable async inserts for ClickHouse",
)
@click.option(
"--health-check-file",
default=None,
Expand Down Expand Up @@ -169,6 +175,7 @@ def rust_consumer(
use_rust_processor: bool,
group_instance_id: Optional[str],
max_poll_interval_ms: int,
async_inserts: bool,
python_max_queue_depth: Optional[int],
health_check_file: Optional[str],
enforce_schema: bool,
Expand Down Expand Up @@ -211,6 +218,7 @@ def rust_consumer(
use_rust_processor,
enforce_schema,
max_poll_interval_ms,
async_inserts,
python_max_queue_depth,
health_check_file,
stop_at_timestamp,
Expand Down
Loading