From a0d363a340ef6953cdf0860f6558dc8b459b586d Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Mon, 3 Feb 2025 13:55:22 +0100 Subject: [PATCH 01/18] blob_db: initial commit of the blob database component Signed-off-by: Leonardo Alminana --- include/fluent-bit/flb_blob_db.h | 454 +++++++++ src/flb_blob_db.c | 1550 ++++++++++++++++++++++++++++++ 2 files changed, 2004 insertions(+) create mode 100644 include/fluent-bit/flb_blob_db.h create mode 100644 src/flb_blob_db.c diff --git a/include/fluent-bit/flb_blob_db.h b/include/fluent-bit/flb_blob_db.h new file mode 100644 index 00000000000..f12a0cc39b4 --- /dev/null +++ b/include/fluent-bit/flb_blob_db.h @@ -0,0 +1,454 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2015-2024 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLB_BLOB_DB_H +#define FLB_BLOB_DB_H + +#include + +#define SQL_PRAGMA_FOREIGN_KEYS "PRAGMA foreign_keys = ON;" + +#define SQL_CREATE_BLOB_FILES \ + "CREATE TABLE IF NOT EXISTS blob_files (" \ + " id INTEGER PRIMARY KEY," \ + " tag TEXT NOT NULL DEFAULT ''," \ + " source TEXT NOT NULL," \ + " destination TEXT NOT NULL," \ + " path TEXT NOT NULL," \ + " remote_id TEXT NOT NULL DEFAULT ''," \ + " size INTEGER," \ + " created INTEGER," \ + " delivery_attempts INTEGER DEFAULT 0," \ + " aborted INTEGER DEFAULT 0," \ + " last_delivery_attempt INTEGER DEFAULT 0" \ + ");" + +#define SQL_CREATE_BLOB_PARTS \ + "CREATE TABLE IF NOT EXISTS blob_parts (" \ + " id INTEGER PRIMARY KEY," \ + " file_id INTEGER NOT NULL," \ + " part_id INTEGER NOT NULL," \ + " remote_id TEXT NOT NULL DEFAULT ''," \ + " uploaded INTEGER DEFAULT 0," \ + " in_progress INTEGER DEFAULT 0," \ + " offset_start INTEGER," \ + " offset_end INTEGER," \ + " delivery_attempts INTEGER DEFAULT 0," \ + " FOREIGN KEY (file_id) REFERENCES blob_files(id) " \ + " ON DELETE CASCADE" \ + ");" + +#define SQL_INSERT_FILE \ + "INSERT INTO blob_files (tag, source, destination, path, size, created)" \ + " VALUES (@tag, @source, @destination, @path, @size, @created);" + +#define SQL_DELETE_FILE \ + "DELETE FROM blob_files WHERE id=@id;" + +#define SQL_SET_FILE_ABORTED_STATE \ + "UPDATE blob_files SET aborted=@state WHERE id=@id;" + +#define SQL_UPDATE_FILE_REMOTE_ID \ + "UPDATE blob_files SET remote_id=@remote_id WHERE id=@id;" + +#define SQL_UPDATE_FILE_DESTINATION \ + "UPDATE blob_files SET destination=@destination WHERE id=@id;" + +#define SQL_UPDATE_FILE_DELIVERY_ATTEMPT_COUNT \ + "UPDATE blob_files " \ + " SET delivery_attempts=@delivery_attempts, " \ + " last_delivery_attempt=UNIXEPOCH() " \ + " WHERE id=@id;" + +#define SQL_GET_FILE \ + "SELECT * FROM blob_files WHERE path=@path ORDER BY id DESC;" + +#define SQL_GET_FILE_PART_COUNT \ + "SELECT count(id) " \ + " FROM blob_parts " \ + " WHERE file_id=@id;" + +#define SQL_GET_NEXT_ABORTED_FILE \ + "SELECT id, bf.delivery_attempts, source, path, remote_id, " \ + " tag " \ + " FROM blob_files bf " \ + " WHERE aborted = 1 " \ + " AND (SELECT COUNT(*) " \ + " FROM blob_parts bp " \ + " WHERE bp.file_id = bf.id " \ + " AND in_progress = 1) = 0 " \ + "ORDER BY id DESC " \ + "LIMIT 1;" + +#define SQL_GET_NEXT_STALE_FILE \ + "SELECT id, path, remote_id, tag " \ + " FROM blob_files " \ + " WHERE aborted = 0 " \ + " AND last_delivery_attempt > 0 " \ + " AND last_delivery_attempt < @freshness_threshold " \ + "ORDER BY id DESC " \ + "LIMIT 1;" + +#define SQL_INSERT_FILE_PART \ + "INSERT INTO blob_parts (file_id, part_id, offset_start, offset_end)" \ + " VALUES (@file_id, @part_id, @offset_start, @offset_end);" + +#define SQL_UPDATE_FILE_PART_REMOTE_ID \ + "UPDATE blob_parts SET remote_id=@remote_id WHERE id=@id;" + +#define SQL_GET_FILE_PART_REMOTE_ID \ + "SELECT remote_id " \ + " FROM blob_parts " \ + " WHERE file_id=@id;" + +#define SQL_UPDATE_FILE_PART_UPLOADED \ + "UPDATE blob_parts SET uploaded=1, in_progress=0 WHERE id=@id;" + +#define SQL_UPDATE_FILE_PART_IN_PROGRESS \ + "UPDATE blob_parts SET in_progress=@status WHERE id=@id;" + +#define SQL_UPDATE_FILE_PART_DELIVERY_ATTEMPT_COUNT \ + "UPDATE blob_parts " \ + " SET delivery_attempts=@delivery_attempts " \ + " WHERE file_id=@file_id " \ + " AND part_id=@part_id;" + +#define SQL_RESET_FILE_UPLOAD_STATES \ + "UPDATE blob_files " \ + " SET last_delivery_attempt=0 " \ + " WHERE id=@id;" + +#define SQL_RESET_FILE_PART_UPLOAD_STATES \ + "UPDATE blob_parts " \ + " SET delivery_attempts=0, " \ + " uploaded=0, " \ + " in_progress=0 " \ + " WHERE file_id=@id;" + +#define SQL_GET_NEXT_FILE_PART \ + " SELECT p.id, " \ + " p.file_id, " \ + " p.part_id, " \ + " p.offset_start, " \ + " p.offset_end, " \ + " p.delivery_attempts, " \ + " f.path, " \ + " f.delivery_attempts, " \ + " f.last_delivery_attempt, " \ + " f.destination, " \ + " f.remote_id, " \ + " f.tag " \ + " FROM blob_parts p " \ + " JOIN blob_files f " \ + " ON p.file_id = f.id " \ + " WHERE p.uploaded = 0 " \ + " AND p.in_progress = 0 " \ + " AND f.aborted = 0 " \ + " AND (p.part_id = 0 OR " \ + " (SELECT sp.uploaded " \ + " FROM blob_parts sp " \ + " WHERE sp.part_id = 0 " \ + " AND sp.file_id = p.file_id) = 1) " \ + "ORDER BY f.created ASC, " \ + " p.part_id ASC " \ + " LIMIT 1;" + + +/* + * Query to retrieve the oldest file which all it parts are mark as uploaded, this + * query will group the results in a single record, e.g: + * +* path part_ids + * ---------------- ---------- ------------------------------------------------------------ + * /.../alice29.txt 1726423769 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18, + * 19,20,21,22,23,24,25,26,27,28,29,30 + * + * this query is used to compose + */ +#define SQL_GET_OLDEST_FILE_WITH_PARTS_CONCAT \ + "SELECT f.id, f.path, GROUP_CONCAT(p.part_id ORDER BY p.part_id ASC) AS part_ids, f.source, " \ + " f.remote_id, f.tag " \ + "FROM blob_files f " \ + "JOIN blob_parts p ON f.id = p.file_id " \ + "WHERE p.uploaded = 1 " \ + "GROUP BY f.id " \ + "HAVING COUNT(p.id) = (SELECT COUNT(p2.id) FROM blob_parts p2 WHERE p2.file_id = f.id) " \ + "ORDER BY f.created ASC " \ + "LIMIT 1;" + + +#define FLB_BLOB_DB_SUCCESS 0 +#define FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE -1 +#define FLB_BLOB_DB_ERROR_ALLOCATOR_FAILURE -2 +#define FLB_BLOB_DB_ERROR_INVALID_BLOB_DB_CONTEXT -3 +#define FLB_BLOB_DB_ERROR_INVALID_FLB_CONTEXT -4 +#define FLB_BLOB_DB_ERROR_INVALID_DATABASE_PATH -5 +#define FLB_BLOB_DB_ERROR_SQLDB_OPEN_FAILURE -6 +#define FLB_BLOB_DB_ERROR_FILE_TABLE_CREATION -7 +#define FLB_BLOB_DB_ERROR_PART_TABLE_CREATION -8 +#define FLB_BLOB_DB_ERROR_SQLDB_FK_INIT_FAILURE -9 +#define FLB_BLOB_DB_ERROR_LOCK_INIT -10 + +#define FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -200 + +#define FLB_BLOB_DB_ERROR_FILE_INSERT \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -1 +#define FLB_BLOB_DB_ERROR_FILE_DELETE \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -2 +#define FLB_BLOB_DB_ERROR_FILE_ABORT \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -3 +#define FLB_BLOB_DB_ERROR_FILE_DESTINATION_CHANGE \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -4 +#define FLB_BLOB_DB_ERROR_FILE_REMOTE_ID_UPDATE \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -5 +#define FLB_BLOB_DB_ERROR_FILE_DELIVERY_ATTEMPT_UPDATE \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -6 +#define FLB_BLOB_DB_ERROR_PART_UPLOAD_STATE_RESET \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -7 +#define FLB_BLOB_DB_ERROR_FILE_UPLOAD_STATE_RESET \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -8 +#define FLB_BLOB_DB_ERROR_FILE_PART_INSERT \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -9 +#define FLB_BLOB_DB_ERROR_FILE_PART_IN_PROGRESS_UPDATE \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -10 +#define FLB_BLOB_DB_ERROR_PART_UPLOAD_STATE_UPDATE \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -11 +#define FLB_BLOB_DB_ERROR_PART_DELIVERY_ATTEMPT_COUNTER_UPDATE \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -12 +#define FLB_BLOB_DB_ERROR_PART_REMOTE_ID_UPDATE \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -13 +#define FLB_BLOB_DB_ERROR_PART_REMOTE_ID_FETCH \ + FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_BASE -14 + +#define FLB_BLOB_DB_ERROR_EXECUTING_STATEMENT_TOP \ + FLB_BLOB_DB_ERROR_PART_REMOTE_ID_UPDATE + +/* These errors are highly speciifc and thus client code should be able to + * range check them. + */ + +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE -100 + +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_INSERT_FILE \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 0 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_DELETE_FILE \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 1 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_ABORT_FILE \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 2 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_FILE \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 3 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_DESTINATION \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 4 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_REMOTE_ID \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 5 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_DELIVERY_ATTEMPT_COUNT \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 6 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_SET_FILE_ABORTED_STATE \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 7 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_NEXT_ABORTED_FILE \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 8 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_NEXT_STALE_FILE \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 9 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_RESET_FILE_UPLOAD_STATES \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 10 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_RESET_FILE_PART_UPLOAD_STATES \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 11 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_INSERT_FILE_PART \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 12 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_PART_UPLOADED \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 13 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_PART_REMOTE_ID \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 14 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_FETCH_FILE_PART_REMOTE_ID \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 15 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_PART_DELIVERY_ATTEMPT_COUNT \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 16 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_NEXT_FILE_PART \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 17 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_PART_IN_PROGRESS \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 18 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_OLDEST_FILE_WITH_PARTS \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 19 +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_FILE_PART_COUNT \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_BASE - 20 + +#define FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_TOP \ + FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_OLDEST_FILE_WITH_PARTS + +#ifdef FLB_HAVE_SQLDB +#include + +typedef struct flb_sqldb __internal_flb_sqldb; +typedef sqlite3_stmt __internal_sqlite3_stmt; +#else +typedef void __internal_flb_sqldb; +typedef void __internal_sqlite3_stmt; +#endif + +struct flb_blob_db { + /* database context */ + __internal_flb_sqldb *db; + int last_error; + flb_lock_t global_lock; + + /* prepared statements: files */ + __internal_sqlite3_stmt *stmt_insert_file; + __internal_sqlite3_stmt *stmt_delete_file; + __internal_sqlite3_stmt *stmt_abort_file; + __internal_sqlite3_stmt *stmt_get_file; + __internal_sqlite3_stmt *stmt_get_file_part_count; + __internal_sqlite3_stmt *stmt_update_file_remote_id; + __internal_sqlite3_stmt *stmt_update_file_destination; + __internal_sqlite3_stmt *stmt_update_file_delivery_attempt_count; + __internal_sqlite3_stmt *stmt_set_file_aborted_state; + __internal_sqlite3_stmt *stmt_get_next_aborted_file; + __internal_sqlite3_stmt *stmt_get_next_stale_file; + __internal_sqlite3_stmt *stmt_reset_file_upload_states; + + /* prepared statement: file parts */ + __internal_sqlite3_stmt *stmt_insert_file_part; + __internal_sqlite3_stmt *stmt_fetch_file_part_remote_id; + __internal_sqlite3_stmt *stmt_update_file_part_remote_id; + __internal_sqlite3_stmt *stmt_update_file_part_uploaded; + __internal_sqlite3_stmt *stmt_reset_file_part_upload_states; + __internal_sqlite3_stmt *stmt_update_file_part_delivery_attempt_count; + __internal_sqlite3_stmt *stmt_get_next_file_part; + __internal_sqlite3_stmt *stmt_update_file_part_in_progress; + + __internal_sqlite3_stmt *stmt_get_oldest_file_with_parts; +}; + +int flb_blob_db_open(struct flb_blob_db *context, + struct flb_config *config, + char *path); + +int flb_blob_db_close(struct flb_blob_db *context); + +int flb_blob_db_lock(struct flb_blob_db *context); + +int flb_blob_db_unlock(struct flb_blob_db *context); + +int flb_blob_db_file_exists(struct flb_blob_db *context, + char *path, + uint64_t *id); + +int64_t flb_blob_db_file_insert(struct flb_blob_db *context, + char *tag, + char *source, + char *destination, + char *path, + size_t size); + +int flb_blob_db_file_delete(struct flb_blob_db *context, + uint64_t id, + char *path); + +int flb_blob_db_file_set_aborted_state(struct flb_blob_db *context, + uint64_t id, + char *path, + uint64_t state); + +int flb_blob_file_change_destination(struct flb_blob_db *context, + uint64_t id, + cfl_sds_t destination); + +int flb_blob_db_file_delivery_attempts(struct flb_blob_db *context, + uint64_t id, + uint64_t attempts); + +int flb_blob_file_update_remote_id(struct flb_blob_db *context, + uint64_t id, + cfl_sds_t remote_id); + +int flb_blob_db_file_get_next_aborted(struct flb_blob_db *context, + uint64_t *id, + uint64_t *delivery_attempts, + cfl_sds_t *path, + cfl_sds_t *source, + cfl_sds_t *remote_id, + cfl_sds_t *file_tag, + int *part_count); + +int flb_blob_db_file_get_next_stale(struct flb_blob_db *context, + uint64_t *id, + cfl_sds_t *path, + uint64_t upload_parts_freshness_threshold, + cfl_sds_t *remote_id, + cfl_sds_t *tag, + int *part_count); + +int flb_blob_db_file_reset_upload_states(struct flb_blob_db *context, + uint64_t id, + char *path); + +int flb_blob_db_file_part_insert(struct flb_blob_db *context, + uint64_t file_id, + uint64_t part_id, + size_t offset_start, + size_t offset_end, + int64_t *out_id); + +int flb_blob_db_file_part_in_progress(struct flb_blob_db *context, + int in_progress, + uint64_t id); + +int flb_blob_db_file_part_get_next(struct flb_blob_db *context, + uint64_t *id, + uint64_t *file_id, + uint64_t *part_id, + off_t *offset_start, + off_t *offset_end, + uint64_t *part_delivery_attempts, + uint64_t *file_delivery_attempts, + cfl_sds_t *file_path, + cfl_sds_t *destination, + cfl_sds_t *remote_file_id, + cfl_sds_t *tag, + int *part_count); + +int flb_blob_db_file_part_update_remote_id(struct flb_blob_db *context, + uint64_t id, + cfl_sds_t remote_id); + +int flb_blob_db_file_part_uploaded(struct flb_blob_db *context, uint64_t id); + +int flb_blob_db_file_part_update_delivery_attempt_counter( + struct flb_blob_db *context, + uint64_t file_id, + uint64_t part_id, + uint64_t attempts); + +int flb_blob_db_file_fetch_oldest_ready(struct flb_blob_db *context, + uint64_t *file_id, + cfl_sds_t *path, + cfl_sds_t *part_ids, + cfl_sds_t *source, + cfl_sds_t *file_remote_id, + cfl_sds_t *file_tag, + int *part_count); + +int flb_blob_db_file_fetch_part_ids(struct flb_blob_db *context, + uint64_t file_id, + cfl_sds_t *remote_id_list, + size_t remote_id_list_size, + int *remote_id_count); + +int flb_blob_db_file_fetch_part_count(struct flb_blob_db *context, + uint64_t file_id); +#endif \ No newline at end of file diff --git a/src/flb_blob_db.c b/src/flb_blob_db.c new file mode 100644 index 00000000000..0dd715f38f4 --- /dev/null +++ b/src/flb_blob_db.c @@ -0,0 +1,1550 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2015-2024 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef FLB_HAVE_SQLDB + +#include +#include + +static int prepare_stmts(struct flb_blob_db *context) +{ + int result; + + /* insert */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_INSERT_FILE, -1, + &context->stmt_insert_file, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_INSERT_FILE; + } + + /* delete */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_DELETE_FILE, -1, + &context->stmt_delete_file, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_DELETE_FILE; + } + + /* abort */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_SET_FILE_ABORTED_STATE, -1, + &context->stmt_set_file_aborted_state, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_ABORT_FILE; + } + + + /* file destination update */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_UPDATE_FILE_REMOTE_ID, -1, + &context->stmt_update_file_remote_id, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_REMOTE_ID; + } + + /* file destination update */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_UPDATE_FILE_DESTINATION, -1, + &context->stmt_update_file_destination, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_DESTINATION; + } + + /* delivery attempt counter update */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_UPDATE_FILE_DELIVERY_ATTEMPT_COUNT, -1, + &context->stmt_update_file_delivery_attempt_count, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_DELIVERY_ATTEMPT_COUNT; + } + + /* get */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_GET_FILE, -1, + &context->stmt_get_file, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_FILE; + } + + /* get part count */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_GET_FILE_PART_COUNT, -1, + &context->stmt_get_file_part_count, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_FILE_PART_COUNT; + } + + /* get next aborted file */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_GET_NEXT_ABORTED_FILE, -1, + &context->stmt_get_next_aborted_file, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_NEXT_ABORTED_FILE; + } + + /* get next stale file */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_GET_NEXT_STALE_FILE, -1, + &context->stmt_get_next_stale_file, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_NEXT_STALE_FILE; + } + + /* reset file upload progress */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_RESET_FILE_UPLOAD_STATES, -1, + &context->stmt_reset_file_upload_states, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_RESET_FILE_UPLOAD_STATES; + } + + /* reset file part upload progress */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_RESET_FILE_PART_UPLOAD_STATES, -1, + &context->stmt_reset_file_part_upload_states, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_RESET_FILE_PART_UPLOAD_STATES; + } + + /* insert blob file part */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_INSERT_FILE_PART, -1, + &context->stmt_insert_file_part, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_INSERT_FILE_PART; + } + + /* update blob part remote id */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_UPDATE_FILE_PART_REMOTE_ID, -1, + &context->stmt_update_file_part_remote_id, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_PART_REMOTE_ID; + } + + /* fetch blob part remote id */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_GET_FILE_PART_REMOTE_ID, -1, + &context->stmt_fetch_file_part_remote_id, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_FETCH_FILE_PART_REMOTE_ID; + } + + /* update blob part uploaded */ + result = sqlite3_prepare_v2(context->db->handler, + SQL_UPDATE_FILE_PART_UPLOADED, -1, + &context->stmt_update_file_part_uploaded, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_PART_UPLOADED; + } + + result = sqlite3_prepare_v2(context->db->handler, + SQL_GET_NEXT_FILE_PART, -1, + &context->stmt_get_next_file_part, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_NEXT_FILE_PART; + } + + result = sqlite3_prepare_v2(context->db->handler, + SQL_UPDATE_FILE_PART_IN_PROGRESS, -1, + &context->stmt_update_file_part_in_progress, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_PART_IN_PROGRESS; + } + + result = sqlite3_prepare_v2(context->db->handler, + SQL_UPDATE_FILE_PART_DELIVERY_ATTEMPT_COUNT, -1, + &context->stmt_update_file_part_delivery_attempt_count, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_DELIVERY_ATTEMPT_COUNT; + } + + result = sqlite3_prepare_v2(context->db->handler, + SQL_GET_OLDEST_FILE_WITH_PARTS_CONCAT, -1, + &context->stmt_get_oldest_file_with_parts, + NULL); + if (result != SQLITE_OK) { + return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_OLDEST_FILE_WITH_PARTS; + } + + return FLB_BLOB_DB_SUCCESS; +} + +int flb_blob_db_open(struct flb_blob_db *context, + struct flb_config *config, + char *path) +{ + int result; + struct flb_sqldb *db; + + if (context == NULL) { + return FLB_BLOB_DB_ERROR_INVALID_BLOB_DB_CONTEXT; + } + + if (config == NULL) { + return FLB_BLOB_DB_ERROR_INVALID_FLB_CONTEXT; + } + + if (path == NULL) { + return FLB_BLOB_DB_ERROR_INVALID_DATABASE_PATH; + } + + db = flb_sqldb_open(path, "", config); + + if (db == NULL) { + return FLB_BLOB_DB_ERROR_SQLDB_OPEN_FAILURE; + } + + result = flb_sqldb_query(db, SQL_CREATE_BLOB_FILES, NULL, NULL); + + if (result != FLB_OK) { + flb_sqldb_close(db); + + return FLB_BLOB_DB_ERROR_FILE_TABLE_CREATION; + } + + result = flb_sqldb_query(db, SQL_CREATE_BLOB_PARTS, NULL, NULL); + + if (result != FLB_OK) { + flb_sqldb_close(db); + + return FLB_BLOB_DB_ERROR_PART_TABLE_CREATION; + } + + result = flb_sqldb_query(db, SQL_PRAGMA_FOREIGN_KEYS, NULL, NULL); + + if (result != FLB_OK) { + flb_sqldb_close(db); + + return FLB_BLOB_DB_ERROR_SQLDB_FK_INIT_FAILURE; + } + + result = flb_lock_init(&context->global_lock); + + if (result != 0) { + flb_sqldb_close(db); + + return FLB_BLOB_DB_ERROR_LOCK_INIT; + } + + context->db = db; + + result = prepare_stmts(context); + + if (result != FLB_BLOB_DB_SUCCESS) { + flb_lock_destroy(&context->global_lock); + flb_sqldb_close(db); + + context->db = NULL; + } + + return result; +} + +int flb_blob_db_close(struct flb_blob_db *context) +{ + if (context == NULL) { + return FLB_BLOB_DB_ERROR_INVALID_BLOB_DB_CONTEXT; + } + + if (context->db == NULL) { + return FLB_BLOB_DB_SUCCESS; + } + + /* finalize prepared statements */ + sqlite3_finalize(context->stmt_insert_file); + sqlite3_finalize(context->stmt_delete_file); + sqlite3_finalize(context->stmt_set_file_aborted_state); + sqlite3_finalize(context->stmt_get_file); + sqlite3_finalize(context->stmt_get_file_part_count); + sqlite3_finalize(context->stmt_update_file_remote_id); + sqlite3_finalize(context->stmt_update_file_destination); + sqlite3_finalize(context->stmt_update_file_delivery_attempt_count); + sqlite3_finalize(context->stmt_get_next_aborted_file); + sqlite3_finalize(context->stmt_get_next_stale_file); + sqlite3_finalize(context->stmt_reset_file_upload_states); + sqlite3_finalize(context->stmt_reset_file_part_upload_states); + + sqlite3_finalize(context->stmt_insert_file_part); + sqlite3_finalize(context->stmt_update_file_part_remote_id); + sqlite3_finalize(context->stmt_fetch_file_part_remote_id); + sqlite3_finalize(context->stmt_update_file_part_uploaded); + sqlite3_finalize(context->stmt_update_file_part_in_progress); + sqlite3_finalize(context->stmt_update_file_part_delivery_attempt_count); + + sqlite3_finalize(context->stmt_get_next_file_part); + sqlite3_finalize(context->stmt_get_oldest_file_with_parts); + + flb_lock_destroy(&context->global_lock); + + return flb_sqldb_close(context->db); +} + +int flb_blob_db_lock(struct flb_blob_db *context) +{ + return flb_lock_acquire(&context->global_lock, + FLB_LOCK_INFINITE_RETRY_LIMIT, + FLB_LOCK_DEFAULT_RETRY_DELAY); +} + +int flb_blob_db_unlock(struct flb_blob_db *context) +{ + return flb_lock_release(&context->global_lock, + FLB_LOCK_INFINITE_RETRY_LIMIT, + FLB_LOCK_DEFAULT_RETRY_DELAY); +} + + +int flb_blob_db_file_exists(struct flb_blob_db *context, + char *path, + uint64_t *id) +{ + sqlite3_stmt *statement; + int result; + int exists; + + statement = context->stmt_get_file; + + flb_sqldb_lock(context->db); + + /* Bind parameters */ + sqlite3_bind_text(statement, 1, path, -1, 0); + + result = sqlite3_step(statement); + + if (result == SQLITE_ROW) { + exists = FLB_TRUE; + + /* id: column 0 */ + *id = sqlite3_column_int64(statement, 0); + } + else if (result == SQLITE_DONE) { + exists = FLB_FALSE; + } + else { + exists = -1; + } + + sqlite3_clear_bindings(statement); + + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return exists; +} + +int64_t flb_blob_db_file_insert(struct flb_blob_db *context, + char *tag, + char *source, + char *destination, + char *path, + size_t size) +{ + sqlite3_stmt *statement; + time_t created; + int result; + int64_t id; + + statement = context->stmt_insert_file; + + flb_sqldb_lock(context->db); + + created = time(NULL); + + sqlite3_bind_text(statement, 1, tag, -1, 0); + sqlite3_bind_text(statement, 2, source, -1, 0); + sqlite3_bind_text(statement, 3, destination, -1, 0); + sqlite3_bind_text(statement, 4, path, -1, 0); + sqlite3_bind_int64(statement, 5, size); + sqlite3_bind_int64(statement, 6, created); + + result = sqlite3_step(statement); + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + if (result == SQLITE_DONE) { + /* Get the database ID for this file */ + id = flb_sqldb_last_id(context->db); + } + else { + context->last_error = result; + + id = FLB_BLOB_DB_ERROR_FILE_INSERT; + } + + flb_sqldb_unlock(context->db); + + return id; +} + +int flb_blob_db_file_delete(struct flb_blob_db *context, + uint64_t id, + char *path) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_delete_file; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int64(statement, 1, id); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_FILE_DELETE; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_set_aborted_state(struct flb_blob_db *context, + uint64_t id, + char *path, + uint64_t state) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_set_file_aborted_state; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int64(statement, 1, state); + sqlite3_bind_int64(statement, 2, id); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_FILE_ABORT; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_file_update_remote_id(struct flb_blob_db *context, + uint64_t id, + cfl_sds_t remote_id) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_update_file_remote_id; + + flb_sqldb_lock(context->db); + + sqlite3_bind_text(statement, 1, remote_id, -1, 0); + sqlite3_bind_int64(statement, 2, id); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_FILE_REMOTE_ID_UPDATE; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_file_change_destination(struct flb_blob_db *context, + uint64_t id, + cfl_sds_t destination) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_update_file_destination; + + flb_sqldb_lock(context->db); + + sqlite3_bind_text(statement, 1, destination, -1, 0); + sqlite3_bind_int64(statement, 2, id); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_FILE_DESTINATION_CHANGE; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_delivery_attempts(struct flb_blob_db *context, + uint64_t id, + uint64_t attempts) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_update_file_delivery_attempt_count; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int64(statement, 1, attempts); + sqlite3_bind_int64(statement, 2, id); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_FILE_DELIVERY_ATTEMPT_UPDATE; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_get_next_stale(struct flb_blob_db *context, + uint64_t *id, + cfl_sds_t *path, + uint64_t upload_parts_freshness_threshold, + cfl_sds_t *remote_id, + cfl_sds_t *tag, + int *part_count) +{ + time_t freshness_threshold; + sqlite3_stmt *statement; + char *tmp_remote_id; + char *tmp_path; + char *tmp_tag; + int exists; + int result; + + statement = context->stmt_get_next_stale_file; + + flb_sqldb_lock(context->db); + + freshness_threshold = time(NULL) - upload_parts_freshness_threshold; + + sqlite3_bind_int64(statement, 1, freshness_threshold); + + result = sqlite3_step(statement); + + if (result == SQLITE_ROW) { + exists = FLB_TRUE; + + *id = sqlite3_column_int64(statement, 0); + tmp_path = (char *) sqlite3_column_text(statement, 1); + tmp_remote_id = (char *) sqlite3_column_text(statement, 2); + tmp_tag = (char *) sqlite3_column_text(statement, 3); + + *path = cfl_sds_create(tmp_path); + + if (*path == NULL) { + exists = -1; + } + else { + *remote_id = cfl_sds_create(tmp_remote_id); + + if (*remote_id == NULL) { + exists = -1; + } + else { + *tag = cfl_sds_create(tmp_tag); + + if (*tag == NULL) { + exists = -1; + } + else { + *part_count = flb_blob_db_file_fetch_part_count(context, *id); + + if (*part_count <= 0) { + exists = -1; + } + else { + exists = 1; + } + + } + } + } + } + else if (result == SQLITE_DONE) { + exists = FLB_FALSE; + } + else { + context->last_error = result; + + exists = -1; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + if (exists == -1) { + if (*path != NULL) { + cfl_sds_destroy(*path); + + *path = NULL; + } + + if (*remote_id != NULL) { + cfl_sds_destroy(*remote_id); + + *remote_id = NULL; + } + + if (*tag != NULL) { + cfl_sds_destroy(*tag); + + *tag = NULL; + } + + *id = 0; + } + + flb_sqldb_unlock(context->db); + + return exists; +} + +int flb_blob_db_file_get_next_aborted(struct flb_blob_db *context, + uint64_t *id, + uint64_t *delivery_attempts, + cfl_sds_t *path, + cfl_sds_t *source, + cfl_sds_t *remote_id, + cfl_sds_t *file_tag, + int *part_count) +{ + char *tmp_remote_id; + char *tmp_source; + sqlite3_stmt *statement; + char *tmp_path; + char *tmp_tag; + int result; + int exists; + + path = NULL; + source = NULL; + remote_id = NULL; + file_tag = NULL; + + statement = context->stmt_get_next_aborted_file; + + flb_sqldb_lock(context->db); + + result = sqlite3_step(statement); + + if (result == SQLITE_ROW) { + exists = FLB_TRUE; + + *id = sqlite3_column_int64(statement, 0); + *delivery_attempts = sqlite3_column_int64(statement, 1); + tmp_source = (char *) sqlite3_column_text(statement, 2); + tmp_path = (char *) sqlite3_column_text(statement, 3); + tmp_remote_id = (char *) sqlite3_column_text(statement, 4); + tmp_tag = (char *) sqlite3_column_text(statement, 5); + + *path = cfl_sds_create(tmp_path); + + if (*path == NULL) { + exists = -1; + } + else { + *source = cfl_sds_create(tmp_source); + + if (*source == NULL) { + exists = -1; + } + else { + *remote_id = cfl_sds_create(tmp_remote_id); + + if (*remote_id == NULL) { + exists = -1; + } + else { + *file_tag = cfl_sds_create(tmp_tag); + + if (*file_tag == NULL) { + exists = -1; + } + else { + *part_count = flb_blob_db_file_fetch_part_count(context, *id); + + if (*part_count <= 0) { + exists = -1; + } + else { + exists = 1; + } + } + } + } + } + } + else if (result == SQLITE_DONE) { + exists = FLB_FALSE; + } + else { + context->last_error = result; + + exists = -1; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + if (exists == -1) { + *id = 0; + *delivery_attempts = 0; + + if (*path != NULL) { + cfl_sds_destroy(*path); + *path = NULL; + } + + if (*source != NULL) { + cfl_sds_destroy(*source); + *source = NULL; + } + + if (*remote_id != NULL) { + cfl_sds_destroy(*remote_id); + *remote_id = NULL; + } + + if (*file_tag != NULL) { + cfl_sds_destroy(*file_tag); + *file_tag = NULL; + } + } + + return exists; +} + + +static int flb_blob_db_file_reset_part_upload_states(struct flb_blob_db *context, + uint64_t id, + char *path) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_reset_file_part_upload_states; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int64(statement, 1, id); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_PART_UPLOAD_STATE_RESET; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_reset_upload_states(struct flb_blob_db *context, + uint64_t id, + char *path) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_reset_file_upload_states; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int64(statement, 1, id); + + result = sqlite3_step(statement); + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_FILE_UPLOAD_STATE_RESET; + } + else { + result = flb_blob_db_file_reset_part_upload_states(context, id, path); + } + + return result; +} + +int flb_blob_db_file_part_insert(struct flb_blob_db *context, + uint64_t file_id, + uint64_t part_id, + size_t offset_start, + size_t offset_end, + int64_t *out_id) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_insert_file_part; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int64(statement, 1, file_id); + sqlite3_bind_int64(statement, 2, part_id); + sqlite3_bind_int64(statement, 3, offset_start); + sqlite3_bind_int64(statement, 4, offset_end); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_FILE_PART_INSERT; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_part_in_progress(struct flb_blob_db *context, + int in_progress, + uint64_t id) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_update_file_part_in_progress; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int(statement, 1, in_progress); + sqlite3_bind_int64(statement, 2, id); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_FILE_PART_IN_PROGRESS_UPDATE; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_part_get_next(struct flb_blob_db *context, + uint64_t *id, + uint64_t *file_id, + uint64_t *part_id, + off_t *offset_start, + off_t *offset_end, + uint64_t *part_delivery_attempts, + uint64_t *file_delivery_attempts, + cfl_sds_t *file_path, + cfl_sds_t *destination, + cfl_sds_t *remote_file_id, + cfl_sds_t *tag, + int *part_count) +{ + cfl_sds_t local_remote_file_id; + char *tmp_remote_file_id; + cfl_sds_t local_destination; + char *tmp_destination; + int inner_result; + sqlite3_stmt *statement; + cfl_sds_t local_tag; + cfl_sds_t tmp_tag; + int result; + cfl_sds_t path; + char *tmp; + + local_remote_file_id = NULL; + tmp_remote_file_id = NULL; + local_destination = NULL; + tmp_destination = NULL; + local_tag = NULL; + tmp_tag = NULL; + path = NULL; + + tmp_destination = NULL; + tmp = NULL; + + statement = context->stmt_get_next_file_part; + + flb_sqldb_lock(context->db); + + *file_path = NULL; + + result = sqlite3_step(statement); + + if (result == SQLITE_ROW) { + *id = sqlite3_column_int64(statement, 0); + *file_id = sqlite3_column_int64(statement, 1); + *part_id = sqlite3_column_int64(statement, 2); + *offset_start = sqlite3_column_int64(statement, 3); + *offset_end = sqlite3_column_int64(statement, 4); + *part_delivery_attempts = sqlite3_column_int64(statement, 5); + tmp = (char *) sqlite3_column_text(statement, 6); + *file_delivery_attempts = sqlite3_column_int64(statement, 7); + tmp_destination = (char *) sqlite3_column_text(statement, 9); + tmp_remote_file_id = (char *) sqlite3_column_text(statement, 10); + tmp_tag = (char *) sqlite3_column_text(statement, 11); + + path = cfl_sds_create(tmp); + local_tag = cfl_sds_create(tmp_tag); + local_destination = cfl_sds_create(tmp_destination); + local_remote_file_id = cfl_sds_create(tmp_remote_file_id); + + *part_count = flb_blob_db_file_fetch_part_count(context, *file_id); + } + else if (result == SQLITE_DONE) { + /* no records */ + result = 0; + } + else { + context->last_error = result; + + result = -1; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + inner_result = -1; + + if (result == SQLITE_ROW) { + if (path == NULL || + local_tag == NULL || + local_destination == NULL || + local_remote_file_id == NULL) { + result = FLB_BLOB_DB_ERROR_ALLOCATOR_FAILURE; + } + else{ + inner_result = flb_blob_db_file_part_in_progress(context, 1, *id); + + if (inner_result == FLB_BLOB_DB_SUCCESS) { + *tag = local_tag; + *file_path = path; + *destination = local_destination; + *remote_file_id = local_remote_file_id; + } + } + } + + if (inner_result != FLB_BLOB_DB_SUCCESS || + result != SQLITE_ROW) { + if (path != NULL) { + cfl_sds_destroy(path); + } + + if (local_tag != NULL) { + cfl_sds_destroy(local_tag); + } + + if (local_destination != NULL) { + cfl_sds_destroy(local_destination); + } + + if (local_remote_file_id != NULL) { + cfl_sds_destroy(local_remote_file_id); + } + } + + return result; +} + +int flb_blob_db_file_part_update_remote_id(struct flb_blob_db *context, + uint64_t id, + cfl_sds_t remote_id) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_update_file_part_remote_id; + + flb_sqldb_lock(context->db); + + sqlite3_bind_text(statement, 1, remote_id, -1, 0); + sqlite3_bind_int64(statement, 2, id); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_PART_REMOTE_ID_UPDATE; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_part_uploaded(struct flb_blob_db *context, + uint64_t id) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_update_file_part_uploaded; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int64(statement, 1, id); + + result = sqlite3_step(statement); + + if (result != SQLITE_DONE) { + context->last_error = result; + + result = FLB_BLOB_DB_ERROR_PART_UPLOAD_STATE_UPDATE; + } + else { + result = FLB_BLOB_DB_SUCCESS; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_part_update_delivery_attempt_counter( + struct flb_blob_db *context, + uint64_t file_id, + uint64_t part_id, + uint64_t attempts) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_update_file_part_delivery_attempt_count; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int64(statement, 1, attempts); + sqlite3_bind_int64(statement, 2, file_id); + sqlite3_bind_int64(statement, 3, part_id); + + result = sqlite3_step(statement); + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + if (result != SQLITE_DONE) { + result = FLB_BLOB_DB_ERROR_PART_DELIVERY_ATTEMPT_COUNTER_UPDATE; + } + else { + result = FLB_BLOB_DB_SUCCESS; + + } + + return result; +} + +int flb_blob_db_file_fetch_oldest_ready(struct flb_blob_db *context, + uint64_t *file_id, + cfl_sds_t *path, + cfl_sds_t *part_ids, + cfl_sds_t *source, + cfl_sds_t *file_remote_id, + cfl_sds_t *file_tag, + int *part_count) +{ + sqlite3_stmt *statement; + int result; + int ret; + char *tmp; + + tmp = NULL; + *path = NULL; + *part_ids = NULL; + *source = NULL; + *file_remote_id = NULL; + *file_tag = NULL; + + statement = context->stmt_get_oldest_file_with_parts; + + flb_sqldb_lock(context->db); + + ret = sqlite3_step(statement); + + if (ret == SQLITE_ROW) { + /* file_id */ + *file_id = sqlite3_column_int64(statement, 0); + + /* path */ + tmp = (char *) sqlite3_column_text(statement, 1); + *path = cfl_sds_create(tmp); + + /* part_ids */ + tmp = (char *) sqlite3_column_text(statement, 2); + *part_ids = cfl_sds_create(tmp); + + /* source */ + tmp = (char *) sqlite3_column_text(statement, 3); + *source = cfl_sds_create(tmp); + + tmp = (char *) sqlite3_column_text(statement, 4); + *file_remote_id = cfl_sds_create(tmp); + + tmp = (char *) sqlite3_column_text(statement, 5); + *file_tag = cfl_sds_create(tmp); + + if (*path == NULL || + *part_ids == NULL || + *source == NULL || + *file_remote_id == NULL || + *file_tag == NULL) { + result = -1; + } + else{ + *part_count = flb_blob_db_file_fetch_part_count(context, *file_id); + + if (*part_count <= 0) { + result = -1; + } + else { + result = 1; + } + } + } + else if (ret == SQLITE_DONE) { + /* no records */ + result = 0; + } + else { + result = -1; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + if (result == -1) { + if (*path != NULL) { + cfl_sds_destroy(*path); + + *path = NULL; + } + + if (*part_ids != NULL) { + cfl_sds_destroy(*part_ids); + + *part_ids = NULL; + } + + if (*source != NULL) { + cfl_sds_destroy(*source); + + *source = NULL; + } + + if (*file_remote_id != NULL) { + cfl_sds_destroy(*file_remote_id); + + *file_remote_id = NULL; + } + + if (*file_tag != NULL) { + cfl_sds_destroy(*file_tag); + + *file_tag = NULL; + } + } + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_fetch_part_ids(struct flb_blob_db *context, + uint64_t file_id, + cfl_sds_t *remote_id_list, + size_t remote_id_list_size, + int *remote_id_count) +{ + size_t remote_id_index; + sqlite3_stmt *statement; + int result; + char *tmp; + + statement = context->stmt_fetch_file_part_remote_id; + + flb_sqldb_lock(context->db); + + memset(remote_id_list, 0, sizeof(cfl_sds_t) * remote_id_list_size); + + sqlite3_bind_int64(statement, 1, file_id); + + result = -1; + + for (remote_id_index = 0 ; remote_id_index < remote_id_list_size ; remote_id_index++) { + result = sqlite3_step(statement); + + if (result == SQLITE_ROW) { + tmp = (char *) sqlite3_column_text(statement, 0); + + remote_id_list[remote_id_index] = flb_sds_create(tmp); + + if (remote_id_list[remote_id_index] == NULL) { + context->last_error = result; + + result = -1; + + break; + } + } + else if (result == SQLITE_DONE) { + break; + } + else { + context->last_error = result; + + result = -1; + + break; + } + } + + if (result == -1) { + while (remote_id_index > 0) { + if (remote_id_list[remote_id_index] != NULL) { + flb_sds_destroy(remote_id_list[remote_id_index]); + } + + remote_id_index--; + } + + if (remote_id_list[remote_id_index] != NULL) { + flb_sds_destroy(remote_id_list[remote_id_index]); + } + + memset(remote_id_list, 0, sizeof(cfl_sds_t) * remote_id_list_size); + } + else { + *remote_id_count = (int) remote_id_index; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +int flb_blob_db_file_fetch_part_count(struct flb_blob_db *context, + uint64_t file_id) +{ + sqlite3_stmt *statement; + int result; + + statement = context->stmt_get_file_part_count; + + flb_sqldb_lock(context->db); + + sqlite3_bind_int64(statement, 1, file_id); + + result = sqlite3_step(statement); + + if (result == SQLITE_ROW) { + result = sqlite3_column_int64(statement, 0); + } + else { + context->last_error = result; + + result = -1; + } + + sqlite3_clear_bindings(statement); + sqlite3_reset(statement); + + flb_sqldb_unlock(context->db); + + return result; +} + +#else + +int flb_blob_db_open(struct flb_blob_db *context, + struct flb_config *config, + char *path) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_close(struct flb_blob_db *context) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_exists(struct flb_blob_db *context, + char *path, + uint64_t *id) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int64_t flb_blob_db_file_insert(struct flb_blob_db *context, + char *tag, + char *source, + char *destination, + char *path, + size_t size) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_delete(struct flb_blob_db *context, + uint64_t id, + char *path) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_set_aborted_state(struct flb_blob_db *context, + uint64_t id, + char *path, + uint64_t state) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_file_change_destination(struct flb_blob_db *context, + uint64_t id, + cfl_sds_t destination) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_delivery_attempts(struct flb_blob_db *context, + uint64_t id, + uint64_t attempts) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_get_next_aborted(struct flb_blob_db *context, + uint64_t *id, + uint64_t *delivery_attempts, + cfl_sds_t *path, + cfl_sds_t *source, + cfl_sds_t *remote_id, + cfl_sds_t *file_tag, + int *part_count) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_get_next_stale(struct flb_blob_db *context, + uint64_t *id, + cfl_sds_t *path, + uint64_t upload_parts_freshness_threshold, + cfl_sds_t *remote_id, + cfl_sds_t *tag, + int *part_count) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_reset_upload_states(struct flb_blob_db *context, + uint64_t id, + char *path) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_part_insert(struct flb_blob_db *context, + uint64_t file_id, + uint64_t part_id, + size_t offset_start, + size_t offset_end, + int64_t *out_id) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_part_in_progress(struct flb_blob_db *context, + int in_progress, + uint64_t id) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_part_get_next(struct flb_blob_db *context, + uint64_t *id, + uint64_t *file_id, + uint64_t *part_id, + off_t *offset_start, + off_t *offset_end, + uint64_t *part_delivery_attempts, + uint64_t *file_delivery_attempts, + cfl_sds_t *file_path, + cfl_sds_t *destination, + cfl_sds_t *remote_file_id, + cfl_sds_t *tag) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + + +int flb_blob_db_file_part_uploaded(struct flb_blob_db *context, uint64_t id) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_part_update_delivery_attempt_counter( + struct flb_blob_db *context, + uint64_t file_id, + uint64_t part_id, + uint64_t attempts) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_fetch_oldest_ready(struct flb_blob_db *context, + uint64_t *file_id, + cfl_sds_t *path, + cfl_sds_t *part_ids, + cfl_sds_t *source, + cfl_sds_t *file_remote_id, + cfl_sds_t *file_tag, + int *part_count) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_fetch_part_ids(struct flb_blob_db *context, + uint64_t file_id, + cfl_sds_t *remote_id_list, + size_t remote_id_list_size, + int *remote_id_count) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +int flb_blob_db_file_fetch_part_count(struct flb_blob_db *context, + uint64_t file_id) +{ + return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; +} + +#endif \ No newline at end of file From 34808582599a5bea47048719295efa217d27570d Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Mon, 3 Feb 2025 13:55:54 +0100 Subject: [PATCH 02/18] signv4: added missing method support Signed-off-by: Leonardo Alminana --- src/flb_signv4.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/flb_signv4.c b/src/flb_signv4.c index e3337208104..14f9b04a076 100644 --- a/src/flb_signv4.c +++ b/src/flb_signv4.c @@ -434,7 +434,7 @@ static flb_sds_t url_params_format(char *params) tmp = flb_sds_printf(&buf, "%s=%s&", kv->key, kv->val); } - } + } else { if (kv->val == NULL) { tmp = flb_sds_printf(&buf, "%s=", @@ -622,6 +622,9 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, case FLB_HTTP_HEAD: tmp = flb_sds_cat(cr, "HEAD\n", 5); break; + case FLB_HTTP_DELETE: + tmp = flb_sds_cat(cr, "DELETE\n", 7); + break; }; if (!tmp) { From 51f05eae42f9ee58ea48225910eb34d94fb62a77 Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Mon, 3 Feb 2025 13:56:41 +0100 Subject: [PATCH 03/18] sqldb: added locking mechanics Signed-off-by: Leonardo Alminana --- include/fluent-bit/flb_sqldb.h | 7 +++++ src/flb_sqldb.c | 56 ++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/include/fluent-bit/flb_sqldb.h b/include/fluent-bit/flb_sqldb.h index 67b243c38c8..6f28b361916 100644 --- a/include/fluent-bit/flb_sqldb.h +++ b/include/fluent-bit/flb_sqldb.h @@ -22,6 +22,7 @@ #include #include +#include struct flb_sqldb { char *path; /* physical path of the database */ @@ -30,6 +31,7 @@ struct flb_sqldb { int users; /* number of active users */ void *parent; /* if shared, ref to parent */ sqlite3 *handler; /* SQLite3 handler */ + flb_lock_t lock; /* thread safety mechanism */ struct mk_list _head; /* Link to config->sqldb_list */ }; @@ -40,6 +42,11 @@ int flb_sqldb_close(struct flb_sqldb *db); int flb_sqldb_query(struct flb_sqldb *db, const char *sql, int (*callback) (void *, int, char **, char **), void *data); + int64_t flb_sqldb_last_id(struct flb_sqldb *db); +int flb_sqldb_lock(struct flb_sqldb *db); + +int flb_sqldb_unlock(struct flb_sqldb *db); + #endif diff --git a/src/flb_sqldb.c b/src/flb_sqldb.c index a4a5df2f490..fa75ac934c1 100644 --- a/src/flb_sqldb.c +++ b/src/flb_sqldb.c @@ -35,15 +35,26 @@ struct flb_sqldb *flb_sqldb_open(const char *path, const char *desc, struct flb_sqldb *db; sqlite3 *sdb = NULL; - db = flb_malloc(sizeof(struct flb_sqldb)); - if (!db) { + db = flb_calloc(1, sizeof(struct flb_sqldb)); + + if (db == NULL) { flb_errno(); + return NULL; } + db->parent = NULL; db->shared = FLB_FALSE; db->users = 0; + ret = flb_lock_init(&db->lock); + + if (ret != 0) { + flb_free(db); + + return NULL; + } + /* * The database handler can be shared across different instances of * Fluent Bit. Before to open a new one, try to find a database that @@ -75,16 +86,41 @@ struct flb_sqldb *flb_sqldb_open(const char *path, const char *desc, } else { ret = sqlite3_open(path, &sdb); + if (ret) { flb_error("[sqldb] cannot open database %s", path); + + flb_lock_destroy(&db->lock); flb_free(db); + return NULL; + } db->handler = sdb; } db->path = flb_strdup(path); + + if (db->path == NULL) { + flb_lock_destroy(&db->lock); + sqlite3_close(sdb); + flb_free(db); + + return NULL; + } + + db->desc = flb_strdup(desc); + + if (db->desc == NULL) { + flb_lock_destroy(&db->lock); + flb_free(db->path); + sqlite3_close(sdb); + flb_free(db); + + return NULL; + } + mk_list_add(&db->_head, &config->sqldb_list); return db; @@ -102,9 +138,11 @@ int flb_sqldb_close(struct flb_sqldb *db) sqlite3_exec(db->handler, "COMMIT;", NULL, NULL, NULL); sqlite3_close(db->handler); } + mk_list_del(&db->_head); flb_free(db->path); flb_free(db->desc); + flb_lock_destroy(&db->lock); flb_free(db); return 0; @@ -131,3 +169,17 @@ int64_t flb_sqldb_last_id(struct flb_sqldb *db) { return sqlite3_last_insert_rowid(db->handler); } + +int flb_sqldb_lock(struct flb_sqldb *db) +{ + return flb_lock_acquire(&db->lock, + FLB_LOCK_INFINITE_RETRY_LIMIT, + FLB_LOCK_DEFAULT_RETRY_DELAY); +} + +int flb_sqldb_unlock(struct flb_sqldb *db) +{ + return flb_lock_release(&db->lock, + FLB_LOCK_INFINITE_RETRY_LIMIT, + FLB_LOCK_DEFAULT_RETRY_DELAY); +} From 7e89f56d687980bf4c4588edf1bdebfb1ff0484f Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Mon, 3 Feb 2025 13:57:23 +0100 Subject: [PATCH 04/18] aws_util: added blob specific s3 key generation Signed-off-by: Leonardo Alminana --- include/fluent-bit/flb_aws_util.h | 6 +- src/aws/flb_aws_util.c | 195 ++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+), 1 deletion(-) diff --git a/include/fluent-bit/flb_aws_util.h b/include/fluent-bit/flb_aws_util.h index 0de6223d986..339735ca1fc 100644 --- a/include/fluent-bit/flb_aws_util.h +++ b/include/fluent-bit/flb_aws_util.h @@ -192,10 +192,14 @@ int flb_aws_is_auth_error(char *payload, size_t payload_size); int flb_read_file(const char *path, char **out_buf, size_t *out_size); -//* Constructs S3 object key as per the format. */ +/* Constructs S3 object key as per the format. */ flb_sds_t flb_get_s3_key(const char *format, time_t time, const char *tag, char *tag_delimiter, uint64_t seq_index); +/* Constructs S3 object key as per the blob format. */ +flb_sds_t flb_get_s3_blob_key(const char *format, const char *tag, + char *tag_delimiter, const char *blob_path); + /* * This function is an extension to strftime which can support milliseconds with %3N, * support nanoseconds with %9N or %L. The return value is the length of formatted diff --git a/src/aws/flb_aws_util.c b/src/aws/flb_aws_util.c index 33d3d088212..9c2acd89aa3 100644 --- a/src/aws/flb_aws_util.c +++ b/src/aws/flb_aws_util.c @@ -810,6 +810,201 @@ char* strtok_concurrent( #endif } +/* Constructs S3 object key as per the blob format. */ +flb_sds_t flb_get_s3_blob_key(const char *format, + const char *tag, + char *tag_delimiter, + const char *blob_path) +{ + int i = 0; + int ret = 0; + char *tag_token = NULL; + char *random_alphanumeric; + /* concurrent safe strtok_r requires a tracking ptr */ + char *strtok_saveptr; + flb_sds_t tmp = NULL; + flb_sds_t buf = NULL; + flb_sds_t s3_key = NULL; + flb_sds_t tmp_key = NULL; + flb_sds_t tmp_tag = NULL; + flb_sds_t sds_result = NULL; + char *valid_blob_path = NULL; + + if (strlen(format) > S3_KEY_SIZE){ + flb_warn("[s3_key] Object key length is longer than the 1024 character limit."); + } + + tmp_tag = flb_sds_create_len(tag, strlen(tag)); + if(!tmp_tag){ + goto error; + } + + s3_key = flb_sds_create_len(format, strlen(format)); + if (!s3_key) { + goto error; + } + + /* Check if delimiter(s) specifed exists in the tag. */ + for (i = 0; i < strlen(tag_delimiter); i++){ + if (strchr(tag, tag_delimiter[i])){ + ret = 1; + break; + } + } + + tmp = flb_sds_create_len(TAG_PART_DESCRIPTOR, 5); + if (!tmp) { + goto error; + } + if (strstr(s3_key, tmp)){ + if(ret == 0){ + flb_warn("[s3_key] Invalid Tag delimiter: does not exist in tag. " + "tag=%s, format=%s", tag, format); + } + } + + flb_sds_destroy(tmp); + tmp = NULL; + + /* Split the string on the delimiters */ + tag_token = strtok_concurrent(tmp_tag, tag_delimiter, &strtok_saveptr); + + /* Find all occurences of $TAG[*] and + * replaces it with the right token from tag. + */ + i = 0; + while(tag_token != NULL && i < MAX_TAG_PARTS) { + buf = flb_sds_create_size(10); + if (!buf) { + goto error; + } + tmp = flb_sds_printf(&buf, TAG_PART_DESCRIPTOR, i); + if (!tmp) { + goto error; + } + + tmp_key = replace_uri_tokens(s3_key, tmp, tag_token); + if (!tmp_key) { + goto error; + } + + if(strlen(tmp_key) > S3_KEY_SIZE){ + flb_warn("[s3_key] Object key length is longer than the 1024 character limit."); + } + + if (buf != tmp) { + flb_sds_destroy(buf); + } + flb_sds_destroy(tmp); + tmp = NULL; + buf = NULL; + flb_sds_destroy(s3_key); + s3_key = tmp_key; + tmp_key = NULL; + + tag_token = strtok_concurrent(NULL, tag_delimiter, &strtok_saveptr); + i++; + } + + tmp = flb_sds_create_len(TAG_PART_DESCRIPTOR, 5); + if (!tmp) { + goto error; + } + + /* A match against "$TAG[" indicates an invalid or out of bounds tag part. */ + if (strstr(s3_key, tmp)){ + flb_warn("[s3_key] Invalid / Out of bounds tag part: At most 10 tag parts " + "($TAG[0] - $TAG[9]) can be processed. tag=%s, format=%s, delimiters=%s", + tag, format, tag_delimiter); + } + + /* Find all occurences of $TAG and replace with the entire tag. */ + tmp_key = replace_uri_tokens(s3_key, TAG_DESCRIPTOR, tag); + if (!tmp_key) { + goto error; + } + + if(strlen(tmp_key) > S3_KEY_SIZE){ + flb_warn("[s3_key] Object key length is longer than the 1024 character limit."); + } + + flb_sds_destroy(s3_key); + s3_key = tmp_key; + tmp_key = NULL; + + flb_sds_len_set(s3_key, strlen(s3_key)); + + valid_blob_path = (char *) blob_path; + + while (*valid_blob_path == '.' || + *valid_blob_path == '/') { + valid_blob_path++; + } + + /* Append the blob path. */ + sds_result = flb_sds_cat(s3_key, valid_blob_path, strlen(valid_blob_path)); + + if (!sds_result) { + goto error; + } + + s3_key = sds_result; + + if(strlen(s3_key) > S3_KEY_SIZE){ + flb_warn("[s3_key] Object key length is longer than the 1024 character limit."); + } + + /* Find all occurences of $UUID and replace with a random string. */ + random_alphanumeric = flb_sts_session_name(); + if (!random_alphanumeric) { + goto error; + } + /* only use 8 chars of the random string */ + random_alphanumeric[8] = '\0'; + tmp_key = replace_uri_tokens(s3_key, RANDOM_STRING, random_alphanumeric); + if (!tmp_key) { + flb_free(random_alphanumeric); + goto error; + } + + if(strlen(tmp_key) > S3_KEY_SIZE){ + flb_warn("[s3_key] Object key length is longer than the 1024 character limit."); + } + + flb_sds_destroy(s3_key); + s3_key = tmp_key; + tmp_key = NULL; + + flb_free(random_alphanumeric); + + flb_sds_destroy(tmp); + tmp = NULL; + + flb_sds_destroy(tmp_tag); + tmp_tag = NULL; + + return s3_key; + + error: + flb_errno(); + if (tmp_tag){ + flb_sds_destroy(tmp_tag); + } + if (s3_key){ + flb_sds_destroy(s3_key); + } + if (buf && buf != tmp){ + flb_sds_destroy(buf); + } + if (tmp){ + flb_sds_destroy(tmp); + } + if (tmp_key){ + flb_sds_destroy(tmp_key); + } + return NULL; +} + /* Constructs S3 object key as per the format. */ flb_sds_t flb_get_s3_key(const char *format, time_t time, const char *tag, char *tag_delimiter, uint64_t seq_index) From 27b16231c733673d14d38621f9f4c776dc0b18d3 Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Mon, 3 Feb 2025 13:58:11 +0100 Subject: [PATCH 05/18] out_s3: added blob support Signed-off-by: Leonardo Alminana --- plugins/out_s3/s3.c | 1602 ++++++++++++++++++++++++++++++++- plugins/out_s3/s3.h | 18 +- plugins/out_s3/s3_multipart.c | 68 +- 3 files changed, 1656 insertions(+), 32 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index 3b1edafbdab..e7491394569 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -29,9 +29,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -43,6 +45,16 @@ #define DEFAULT_S3_PORT 443 #define DEFAULT_S3_INSECURE_PORT 80 +/* thread_local_storage for workers */ + +struct worker_info { + int active_upload; +}; + +FLB_TLS_DEFINE(struct worker_info, s3_worker_info); + +static int s3_timer_create(struct flb_s3 *ctx); + static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, struct s3_file *chunk, char **out_buf, size_t *out_size); @@ -510,6 +522,456 @@ static void s3_context_destroy(struct flb_s3 *ctx) flb_free(ctx); } +static int find_map_entry_by_key(msgpack_object_map *map, + char *key, + size_t match_index, + int case_insensitive) +{ + size_t match_count; + int result; + int index; + + match_count = 0; + + for (index = 0 ; index < (int) map->size ; index++) { + if (map->ptr[index].key.type == MSGPACK_OBJECT_STR) { + if (case_insensitive) { + result = strncasecmp(map->ptr[index].key.via.str.ptr, + key, + map->ptr[index].key.via.str.size); + } + else { + result = strncmp(map->ptr[index].key.via.str.ptr, + key, + map->ptr[index].key.via.str.size); + } + + if (result == 0) { + if (match_count == match_index) { + return index; + } + + match_count++; + } + } + } + + return -1; +} + +static int extract_map_string_entry_by_key(flb_sds_t *output, + msgpack_object_map *map, + char *key, + size_t match_index, + int case_insensitive) +{ + int index; + int result; + + index = find_map_entry_by_key(map, + key, + match_index, + case_insensitive); + + if (index == -1) { + return -1; + } + + if (map->ptr[index].val.type != MSGPACK_OBJECT_STR) { + return -2; + } + + if (*output == NULL) { + *output = flb_sds_create_len(map->ptr[index].val.via.str.ptr, + map->ptr[index].val.via.str.size); + + if (*output == NULL) { + return -3; + } + } + else { + (*output)[0] = '\0'; + + flb_sds_len_set(*output, 0); + + result = flb_sds_cat_safe(output, + map->ptr[index].val.via.str.ptr, + map->ptr[index].val.via.str.size); + + if (result != 0) { + return -4; + } + } + + return 0; +} + +static int process_remote_configuration_payload( + struct flb_s3 *context, + char *payload, + size_t payload_size) +{ + size_t msgpack_body_length; + msgpack_object_map *configuration_map; + flb_sds_t secret_access_key; + flb_sds_t access_key_id; + flb_sds_t session_token; + msgpack_unpacked unpacked_root; + char *msgpack_body; + int root_type; + size_t offset; + int result; + + result = flb_pack_json(payload, + payload_size, + &msgpack_body, + &msgpack_body_length, + &root_type, + NULL); + + if (result != 0) { + flb_plg_error(context->ins, + "JSON to msgpack conversion error"); + + result = -1; + } + else { + msgpack_unpacked_init(&unpacked_root); + + offset = 0; + result = msgpack_unpack_next(&unpacked_root, + msgpack_body, + msgpack_body_length, + &offset); + + if (result != MSGPACK_UNPACK_SUCCESS) { + flb_plg_error(context->ins, "corrupted msgpack data"); + + result = -1; + + goto cleanup; + } + + if (unpacked_root.data.type != MSGPACK_OBJECT_MAP) { + flb_plg_error(context->ins, "unexpected root object type"); + + result = -1; + + goto cleanup; + } + + configuration_map = &unpacked_root.data.via.map; + + secret_access_key = NULL; + access_key_id = NULL; + session_token = NULL; + + result = extract_map_string_entry_by_key(&access_key_id, + configuration_map, + "access_key_id", 0, FLB_TRUE); + + if (result != 0) { + flb_plg_error(context->ins, + "access_key_id could be extracted : %d", result); + + goto cleanup; + } + + result = extract_map_string_entry_by_key(&secret_access_key, + configuration_map, + "secret_access_key", 0, FLB_TRUE); + + if (result != 0) { + flb_plg_error(context->ins, + "secret_access_key extraction error : %d", result); + + goto cleanup; + } + + result = extract_map_string_entry_by_key(&session_token, + configuration_map, + "secret_access_key", 0, FLB_TRUE); + + if (result != 0) { + flb_plg_error(context->ins, + "secret_access_key extraction error : %d", result); + + goto cleanup; + } + + setenv("aws_secret_access_key", secret_access_key, 1); + setenv("aws_access_key_id", access_key_id, 1); + setenv("aws_session_token", session_token, 1); + +cleanup: + if (result != 0) { + if (secret_access_key != NULL) { + free(secret_access_key); + + secret_access_key = NULL; + } + + if (access_key_id != NULL) { + free(access_key_id); + + access_key_id = NULL; + } + + if (session_token != NULL) { + free(session_token); + + session_token = NULL; + } + + + result = -1; + } + + msgpack_unpacked_destroy(&unpacked_root); + + flb_free(msgpack_body); + } + + return result; +} + +static int apply_remote_configuration(struct flb_s3 *context) +{ + int ret; + size_t b_sent; + struct flb_http_client *http_client; + struct flb_connection *connection; + struct flb_upstream *upstream; + struct flb_tls *tls_context; + char *scheme = NULL; + char *host = NULL; + char *port = NULL; + char *uri = NULL; + uint16_t port_as_short; + + /* Parse and split URL */ + ret = flb_utils_url_split(context->configuration_endpoint_url, + &scheme, &host, &port, &uri); + if (ret == -1) { + flb_plg_error(context->ins, + "Invalid URL: %s", + context->configuration_endpoint_url); + + return -1; + } + + if (port != NULL) { + port_as_short = (uint16_t) strtoul(port, NULL, 10); + } + else { + if (scheme != NULL) { + if (strcasecmp(scheme, "https") == 0) { + port_as_short = 443; + } + else { + port_as_short = 80; + } + } + } + + if (scheme != NULL) { + flb_free(scheme); + scheme = NULL; + } + + if (port != NULL) { + flb_free(port); + port = NULL; + } + + if (host == NULL || uri == NULL) { + flb_plg_error(context->ins, + "Invalid URL: %s", + context->configuration_endpoint_url); + + if (host != NULL) { + flb_free(host); + } + + if (uri != NULL) { + flb_free(uri); + } + + return -2; + } + + tls_context = flb_tls_create(FLB_TLS_CLIENT_MODE, + FLB_FALSE, + FLB_FALSE, + host, + NULL, + NULL, + NULL, + NULL, + NULL); + + if (tls_context == NULL) { + flb_free(host); + flb_free(uri); + + flb_plg_error(context->ins, + "TLS context creation errror"); + + return -2; + } + + upstream = flb_upstream_create_url(context->ins->config, + context->configuration_endpoint_url, + FLB_IO_TCP, + tls_context); + + if (upstream == NULL) { + flb_tls_destroy(tls_context); + flb_free(host); + flb_free(uri); + + flb_plg_error(context->ins, + "Upstream creation errror"); + + return -3; + } + + flb_stream_disable_async_mode(&upstream->base); + + /* Get upstream connection */ + connection = flb_upstream_conn_get(upstream); + if (connection == NULL) { + flb_upstream_destroy(upstream); + flb_tls_destroy(tls_context); + flb_free(host); + flb_free(uri); + + flb_plg_error(context->ins, + "cannot create connection"); + + return -3; + } + + /* Create HTTP client context */ + http_client = flb_http_client(connection, + FLB_HTTP_GET, + uri, + NULL, 0, + host, + (int) port_as_short, + NULL, 0); + if (http_client == NULL) { + flb_upstream_conn_release(connection); + flb_upstream_destroy(upstream); + flb_tls_destroy(tls_context); + flb_free(host); + flb_free(uri); + + flb_plg_error(context->ins, + "cannot create HTTP client"); + + return -4; + } + + flb_http_add_header(http_client, + "Accept", + strlen("Accept"), + "application/json", + 16); + + /* User Agent */ + flb_http_add_header(http_client, + "User-Agent", 10, + "Fluent-Bit", 10); + + if (context->configuration_endpoint_username != NULL && + context->configuration_endpoint_password != NULL) { + flb_http_basic_auth(http_client, + context->configuration_endpoint_username, + context->configuration_endpoint_password); + } + else if (context->configuration_endpoint_bearer_token != NULL) { + flb_http_bearer_auth(http_client, + context->configuration_endpoint_bearer_token); + } + + /* Send HTTP request */ + ret = flb_http_do(http_client, &b_sent); + + if (ret == -1) { + flb_http_client_destroy(http_client); + flb_upstream_conn_release(connection); + flb_upstream_destroy(upstream); + flb_tls_destroy(tls_context); + flb_free(host); + flb_free(uri); + + flb_plg_error(context->ins, + "Error sending configuration request"); + + return -5; + } + + if (http_client->resp.status == 200) { + flb_plg_info(context->ins, + "Configuration retrieved successfully"); + + ret = process_remote_configuration_payload( + context, + http_client->resp.payload, + http_client->resp.payload_size); + + if (ret != 0) { + flb_plg_error(context->ins, + "Configuration payload processing error %d", + ret); + + flb_http_client_destroy(http_client); + flb_upstream_conn_release(connection); + flb_upstream_destroy(upstream); + flb_tls_destroy(tls_context); + flb_free(host); + flb_free(uri); + + return -7; + } + + flb_plg_info(context->ins, + "Configuration applied successfully"); + } + else { + if (http_client->resp.payload_size > 0) { + flb_plg_error(context->ins, + "Configuration retrieval failed with status %i\n%s", + http_client->resp.status, + http_client->resp.payload); + } + else { + flb_plg_error(context->ins, + "Configuration retrieval failed with status %i", + http_client->resp.status); + } + + flb_http_client_destroy(http_client); + flb_upstream_conn_release(connection); + flb_upstream_destroy(upstream); + flb_tls_destroy(tls_context); + flb_free(host); + flb_free(uri); + + return -6; + } + + flb_http_client_destroy(http_client); + flb_upstream_conn_release(connection); + flb_upstream_destroy(upstream); + flb_tls_destroy(tls_context); + flb_free(host); + flb_free(uri); + + return 0; +} + static int cb_s3_init(struct flb_output_instance *ins, struct flb_config *config, void *data) { @@ -868,6 +1330,18 @@ static int cb_s3_init(struct flb_output_instance *ins, } } + if (ctx->configuration_endpoint_url != NULL) { + ret = apply_remote_configuration(ctx); + + if (ret != 0) { + flb_plg_error(ctx->ins, "Failed to retrieve configuration " + "from endpoint"); + flb_errno(); + + return -1; + } + } + /* read any remaining buffers from previous (failed) executions */ ctx->has_old_buffers = s3_store_has_data(ctx); ctx->has_old_uploads = s3_store_has_uploads(ctx); @@ -969,6 +1443,64 @@ static int cb_s3_init(struct flb_output_instance *ins, /* this is done last since in the previous block we make calls to AWS */ ctx->provider->provider_vtable->upstream_set(ctx->provider, ctx->ins); + /* database file for blob signal handling */ + if (ctx->blob_database_file != NULL) { + ret = flb_blob_db_open(&ctx->blob_db, + config, + ctx->blob_database_file); + + if (ret != FLB_BLOB_DB_SUCCESS) { + return -1; + } + } + + return 0; +} + +/* worker initialization, used for our internal timers */ +static int cb_s3_worker_init(void *data, struct flb_config *config) +{ + int ret; + struct worker_info *info; + struct flb_s3 *ctx = data; + + flb_plg_info(ctx->ins, "initializing worker"); + + info = FLB_TLS_GET(s3_worker_info); + if (!info) { + /* initialize worker global info */ + info = flb_calloc(1, sizeof(struct worker_info)); + if (!info) { + flb_errno(); + return -1; + } + info->active_upload = FLB_FALSE; + FLB_TLS_SET(s3_worker_info, info); + } + + ret = s3_timer_create(ctx); + if (ret == -1) { + flb_plg_error(ctx->ins, "failed to create upload timer"); + return -1; + } + + return 0; +} + +/* worker teardown */ +static int cb_s3_worker_exit(void *data, struct flb_config *config) +{ + struct worker_info *info; + struct flb_s3 *ctx = data; + + flb_plg_info(ctx->ins, "initializing worker"); + + info = FLB_TLS_GET(s3_worker_info); + if (info != NULL) { + flb_free(info); + FLB_TLS_SET(s3_worker_info, NULL); + } + return 0; } @@ -1536,6 +2068,7 @@ static struct multipart_upload *create_upload(struct flb_s3 *ctx, const char *ta tmp_sds = flb_sds_create_len(tag, tag_len); if (!tmp_sds) { flb_errno(); + flb_sds_destroy(s3_key); flb_free(m_upload); return NULL; } @@ -1552,8 +2085,16 @@ static struct multipart_upload *create_upload(struct flb_s3 *ctx, const char *ta ret = write_seq_index(ctx->seq_index_file, ctx->seq_index); if (ret < 0) { ctx->seq_index--; + + mk_list_del(&m_upload->_head); + + flb_sds_destroy(tmp_sds); flb_sds_destroy(s3_key); + + flb_free(m_upload); + flb_plg_error(ctx->ins, "Failed to write to sequential index metadata file"); + return NULL; } } @@ -1755,34 +2296,814 @@ static void s3_upload_queue(struct flb_config *config, void *out_context) return; } -static void cb_s3_upload(struct flb_config *config, void *data) + +static struct multipart_upload *create_blob_upload(struct flb_s3 *ctx, const char *tag, + int tag_len, + const char *path) { - struct flb_s3 *ctx = data; - struct s3_file *chunk = NULL; - struct multipart_upload *m_upload = NULL; - struct flb_fstore_file *fsf; - char *buffer = NULL; - size_t buffer_size = 0; - struct mk_list *tmp; - struct mk_list *head; - int complete; int ret; - time_t now; - - flb_plg_debug(ctx->ins, "Running upload timer callback (cb_s3_upload).."); + struct multipart_upload *m_upload = NULL; + flb_sds_t s3_key = NULL; + flb_sds_t tmp_sds = NULL; - now = time(NULL); + /* create new upload for this key */ + m_upload = flb_calloc(1, sizeof(struct multipart_upload)); + if (!m_upload) { + flb_errno(); + return NULL; + } - /* Check all chunks and see if any have timed out */ - mk_list_foreach_safe(head, tmp, &ctx->stream_active->files) { - fsf = mk_list_entry(head, struct flb_fstore_file, _head); - chunk = fsf->data; + s3_key = flb_get_s3_blob_key("/$TAG/", + tag, + ctx->tag_delimiters, + path); - if (now < (chunk->create_time + ctx->upload_timeout + ctx->retry_time)) { - continue; /* Only send chunks which have timed out */ - } + if (!s3_key) { + flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); + flb_free(m_upload); + return NULL; + } + m_upload->s3_key = s3_key; + tmp_sds = flb_sds_create_len(tag, tag_len); + if (!tmp_sds) { + flb_errno(); + flb_sds_destroy(s3_key); + flb_free(m_upload); + return NULL; + } + m_upload->tag = tmp_sds; + m_upload->upload_state = MULTIPART_UPLOAD_STATE_NOT_CREATED; + m_upload->part_number = 1; + m_upload->init_time = time(NULL); + mk_list_add(&m_upload->_head, &ctx->uploads); - /* Locked chunks are being processed, skip */ + /* Update file and increment index value right before request */ + if (ctx->key_fmt_has_seq_index) { + ctx->seq_index++; + + ret = write_seq_index(ctx->seq_index_file, ctx->seq_index); + if (ret < 0) { + ctx->seq_index--; + + mk_list_del(&m_upload->_head); + + flb_sds_destroy(tmp_sds); + flb_sds_destroy(s3_key); + + flb_free(m_upload); + + flb_plg_error(ctx->ins, "Failed to write to sequential index metadata file"); + + return NULL; + } + } + + return m_upload; +} + +static int put_blob_object(struct flb_s3 *ctx, + const char *tag, + const char *path, + char *body, size_t body_size) +{ + flb_sds_t s3_key = NULL; + struct flb_http_client *c = NULL; + struct flb_aws_client *s3_client; + struct flb_aws_header *headers = NULL; + int len; + int ret; + int num_headers = 0; + char *final_key; + flb_sds_t uri; + flb_sds_t tmp; + char final_body_md5[25]; + + s3_key = flb_get_s3_blob_key("/$TAG/", + tag, + ctx->tag_delimiters, + path); + + if (!s3_key) { + flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); + return -1; + } + + len = strlen(s3_key); + len += strlen(ctx->bucket + 1); + + uri = flb_sds_create_size(len); + + tmp = flb_sds_printf(&uri, "/%s%s", ctx->bucket, s3_key); + + if (!tmp) { + flb_sds_destroy(s3_key); + flb_plg_error(ctx->ins, "Failed to create PutObject URI"); + return -1; + } + + flb_sds_destroy(s3_key); + uri = tmp; + + memset(final_body_md5, 0, sizeof(final_body_md5)); + if (ctx->send_content_md5 == FLB_TRUE) { + ret = get_md5_base64(body, body_size, + final_body_md5, sizeof(final_body_md5)); + if (ret != 0) { + flb_plg_error(ctx->ins, "Failed to create Content-MD5 header"); + flb_sds_destroy(uri); + return -1; + } + } + + s3_client = ctx->s3_client; + if (s3_plugin_under_test() == FLB_TRUE) { + c = mock_s3_call("TEST_PUT_OBJECT_ERROR", "PutObject"); + } + else { + ret = create_headers(ctx, final_body_md5, &headers, &num_headers, FLB_FALSE); + if (ret == -1) { + flb_plg_error(ctx->ins, "Failed to create headers"); + flb_sds_destroy(uri); + return -1; + } + c = s3_client->client_vtable->request(s3_client, FLB_HTTP_PUT, + uri, body, body_size, + headers, num_headers); + flb_free(headers); + } + if (c) { + flb_plg_debug(ctx->ins, "PutObject http status=%d", c->resp.status); + if (c->resp.status == 200) { + /* + * URI contains bucket name, so we must advance over it + * to print the object key + */ + final_key = uri + strlen(ctx->bucket) + 1; + flb_plg_info(ctx->ins, "Successfully uploaded object %s", final_key); + flb_sds_destroy(uri); + flb_http_client_destroy(c); + + return 0; + } + flb_aws_print_xml_error(c->resp.payload, c->resp.payload_size, + "PutObject", ctx->ins); + if (c->resp.data != NULL) { + flb_plg_error(ctx->ins, "Raw PutObject response: %s", c->resp.data); + } + flb_http_client_destroy(c); + } + + flb_plg_error(ctx->ins, "PutObject request failed"); + flb_sds_destroy(uri); + + return -1; +} + +static int cb_s3_upload_blob(struct flb_config *config, void *data) +{ + int ret; + char *out_buf = NULL; + size_t out_size; + uint64_t id; + uint64_t file_id; + uint64_t part_id; + uint64_t part_delivery_attempts; + uint64_t file_delivery_attempts; + off_t offset_start; + off_t offset_end; + cfl_sds_t file_remote_id = NULL; + cfl_sds_t file_destination = NULL; + cfl_sds_t file_path = NULL; + cfl_sds_t file_tag = NULL; + cfl_sds_t part_ids = NULL; + cfl_sds_t source = NULL; + struct flb_s3 *ctx = data; + struct worker_info *info; + struct flb_blob_delivery_notification *notification; + struct multipart_upload *m_upload; + int part_count; + int put_object_required; + + info = FLB_TLS_GET(s3_worker_info); + + if (info->active_upload) { + flb_plg_trace(ctx->ins, "[worker: file upload] upload already in progress..."); + + return 0; + } + + if (ctx->blob_db.db == NULL) { + return 0; + } + + info->active_upload = FLB_TRUE; + + /* + * Check if is there any file which has been fully uploaded and we need to commit it with + * the Put Block List operation + */ + + flb_blob_db_lock(&ctx->blob_db); + + while (1) { + ret = flb_blob_db_file_get_next_stale(&ctx->blob_db, + &file_id, + &file_path, + ctx->upload_parts_freshness_threshold, + &file_remote_id, + &file_tag, + &part_count); + + if (ret == 1) { + if (part_count > 1) { + m_upload = create_blob_upload(ctx, file_tag, cfl_sds_len(file_tag), file_path); + + if (m_upload == NULL) { + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + flb_blob_db_unlock(&ctx->blob_db); + + return -1; + } + + mk_list_del(&m_upload->_head); + + m_upload->upload_id = flb_sds_create(file_remote_id); + + if (m_upload->upload_id == NULL) { + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + + flb_plg_error(ctx->ins, "Could not allocate upload id copy"); + + flb_blob_db_unlock(&ctx->blob_db); + + return -4; + } + + ret = abort_multipart_upload(ctx, m_upload); + } + + if (ctx->file_delivery_attempt_limit != FLB_OUT_RETRY_UNLIMITED && + file_delivery_attempts < ctx->file_delivery_attempt_limit) { + flb_blob_db_file_reset_upload_states(&ctx->blob_db, file_id, file_path); + flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 0); + } + else { + ret = flb_blob_db_file_delete(&ctx->blob_db, file_id, file_path); + + notification = flb_calloc(1, + sizeof( + struct flb_blob_delivery_notification)); + + if (notification != NULL) { + notification->base.dynamically_allocated = FLB_TRUE; + notification->base.notification_type = FLB_NOTIFICATION_TYPE_BLOB_DELIVERY; + notification->base.destructor = flb_input_blob_delivery_notification_destroy; + notification->success = FLB_FALSE; + notification->path = cfl_sds_create(file_path); + + ret = flb_notification_enqueue(FLB_PLUGIN_INPUT, + source, + ¬ification->base, + config); + + if (ret != 0) { + flb_plg_error(ctx->ins, + "blob file '%s' (id=%" PRIu64 ") notification " \ + "delivery error %d", file_path, file_id, ret); + + flb_notification_cleanup(¬ification->base); + } + } + } + + flb_blob_file_update_remote_id(&ctx->blob_db, file_id, ""); + flb_blob_db_file_reset_upload_states(&ctx->blob_db, file_id, file_path); + flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 0); + + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_path); + cfl_sds_destroy(source); + + file_remote_id = NULL; + file_path = NULL; + source = NULL; + } + else { + break; + } + } + + while (1) { + ret = flb_blob_db_file_get_next_aborted(&ctx->blob_db, + &file_id, + &file_delivery_attempts, + &file_path, + &source, + &file_remote_id, + &file_tag, + &part_count); + + if (ret == 1) { + if (part_count > 1) { + m_upload = create_blob_upload(ctx, file_tag, cfl_sds_len(file_tag), file_path); + + if (m_upload == NULL) { + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + flb_blob_db_unlock(&ctx->blob_db); + + return -1; + } + + mk_list_del(&m_upload->_head); + + m_upload->upload_id = flb_sds_create(file_remote_id); + + if (m_upload->upload_id == NULL) { + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + + flb_plg_error(ctx->ins, "Could not allocate upload id copy"); + + flb_blob_db_unlock(&ctx->blob_db); + + return -4; + } + + ret = abort_multipart_upload(ctx, m_upload); + } + + if (ctx->file_delivery_attempt_limit != FLB_OUT_RETRY_UNLIMITED && + file_delivery_attempts < ctx->file_delivery_attempt_limit) { + + flb_blob_file_update_remote_id(&ctx->blob_db, file_id, ""); + flb_blob_db_file_reset_upload_states(&ctx->blob_db, file_id, file_path); + flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 0); + } + else { + ret = flb_blob_db_file_delete(&ctx->blob_db, file_id, file_path); + + notification = flb_calloc(1, + sizeof( + struct flb_blob_delivery_notification)); + + if (notification != NULL) { + notification->base.dynamically_allocated = FLB_TRUE; + notification->base.notification_type = FLB_NOTIFICATION_TYPE_BLOB_DELIVERY; + notification->base.destructor = flb_input_blob_delivery_notification_destroy; + notification->success = FLB_FALSE; + notification->path = cfl_sds_create(file_path); + + ret = flb_notification_enqueue(FLB_PLUGIN_INPUT, + source, + ¬ification->base, + config); + + if (ret != 0) { + flb_plg_error(ctx->ins, + "blob file '%s' (id=%" PRIu64 ") notification " \ + "delivery error %d", file_path, file_id, ret); + + flb_notification_cleanup(¬ification->base); + } + } + } + + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_path); + cfl_sds_destroy(source); + + file_remote_id = NULL; + file_path = NULL; + source = NULL; + } + else { + break; + } + } + + ret = flb_blob_db_file_fetch_oldest_ready(&ctx->blob_db, + &file_id, + &file_path, + &part_ids, + &source, + &file_remote_id, + &file_tag, + &part_count); + + if (ret == 0) { + flb_plg_trace(ctx->ins, "no blob files ready to commit"); + } + else if (ret == -1) { + flb_plg_error(ctx->ins, "cannot get oldest blob file ready to upload"); + } + else if (ret == 1) { + /* one file is ready to be committed */ + flb_plg_debug(ctx->ins, "blob file '%s' (id=%" PRIu64 ") ready to upload", file_path, file_id); + + if (part_count > 1) { + m_upload = create_blob_upload(ctx, file_tag, cfl_sds_len(file_tag), file_path); + + if (m_upload == NULL) { + flb_blob_db_unlock(&ctx->blob_db); + + return -1; + } + + mk_list_del(&m_upload->_head); + + m_upload->upload_id = flb_sds_create(file_remote_id); + + if (m_upload->upload_id == NULL) { + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + + flb_plg_error(ctx->ins, "Could not allocate upload id copy"); + + flb_blob_db_unlock(&ctx->blob_db); + + return -4; + } + + ret = flb_blob_db_file_fetch_part_ids(&ctx->blob_db, + file_id, + m_upload->etags, + 1000, + &part_count); + + if (ret == -1) { + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + + flb_plg_error(ctx->ins, "Could not retrieve part ids"); + + flb_blob_db_unlock(&ctx->blob_db); + + return -5; + } + + m_upload->part_number = part_count; + + ret = complete_multipart_upload(ctx, m_upload); + + if (ret < 0) { + multipart_upload_destroy(m_upload); + + flb_plg_error(ctx->ins, "Could not initiate multipart upload"); + + flb_blob_db_unlock(&ctx->blob_db); + + return -6; + } + + multipart_upload_destroy(m_upload); + } + else { + ret = 0; + } + + if (ret == -1) { + flb_plg_error(ctx->ins, "cannot commit blob file parts for file id=%" PRIu64 " path=%s", + file_id, file_path); + } + else { + flb_plg_info(ctx->ins, "blob file '%s' (id=%" PRIu64 ") committed successfully", file_path, file_id); + /* notify the engine the blob file has been processed */ + + notification = flb_calloc(1, + sizeof( + struct flb_blob_delivery_notification)); + + if (notification != NULL) { + notification->base.dynamically_allocated = FLB_TRUE; + notification->base.notification_type = FLB_NOTIFICATION_TYPE_BLOB_DELIVERY; + notification->base.destructor = flb_input_blob_delivery_notification_destroy; + notification->success = FLB_TRUE; + notification->path = cfl_sds_create(file_path); + + ret = flb_notification_enqueue(FLB_PLUGIN_INPUT, + source, + ¬ification->base, + config); + + if (ret != 0) { + flb_plg_error(ctx->ins, + "blob file '%s' (id=%" PRIu64 ") notification " \ + "delivery error %d", file_path, file_id, ret); + + flb_notification_cleanup(¬ification->base); + } + } + + /* remove the file entry from the database */ + ret = flb_blob_db_file_delete(&ctx->blob_db, file_id, file_path); + if (ret == -1) { + flb_plg_error(ctx->ins, "cannot delete blob file '%s' (id=%" PRIu64 ") from the database", + file_path, file_id); + } + } + } + + flb_blob_db_unlock(&ctx->blob_db); + + if (file_tag) { + cfl_sds_destroy(file_tag); + file_tag = NULL; + } + + if (file_path) { + cfl_sds_destroy(file_path); + file_path = NULL; + } + + if (part_ids) { + cfl_sds_destroy(part_ids); + part_ids = NULL; + } + + if (source) { + cfl_sds_destroy(source); + source = NULL; + } + + if (file_remote_id) { + cfl_sds_destroy(file_remote_id); + file_remote_id = NULL; + } + + /* check for a next part file and lock it */ + ret = flb_blob_db_file_part_get_next(&ctx->blob_db, &id, &file_id, &part_id, + &offset_start, &offset_end, + &part_delivery_attempts, + &file_delivery_attempts, + &file_path, + &file_destination, + &file_remote_id, + &file_tag, + &part_count); + + if (ret == -1) { + flb_plg_error(ctx->ins, "cannot get next blob file part"); + info->active_upload = FLB_FALSE; + + return -1; + } + else if (ret == 0) { + flb_plg_trace(ctx->ins, "no more blob file parts to process"); + info->active_upload = FLB_FALSE; + + return -1; + } + else if (ret == 1) { + /* just continue, the row info was retrieved */ + } + + if (strcmp(file_destination, ctx->endpoint) != 0) { + flb_plg_info(ctx->ins, + "endpoint change detected, restarting file : %s\n%s\n%s", + file_path, + file_destination, + ctx->endpoint); + + info->active_upload = FLB_FALSE; + + /* we need to set the aborted state flag to wait for existing uploads + * to finish and then wipe the slate and start again but we don't want + * to increment the failure count in this case. + */ + flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 1); + + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + return 0; + } + + /* since this is the first part we want to increment the files + * delivery attempt counter. + */ + if (part_id == 0) { + ret = flb_blob_db_file_delivery_attempts(&ctx->blob_db, file_id, ++file_delivery_attempts); + } + + /* read the file content */ + ret = flb_utils_read_file_offset(file_path, offset_start, offset_end, &out_buf, &out_size); + if (ret == -1) { + flb_plg_error(ctx->ins, "cannot read file part %s", file_path); + + info->active_upload = FLB_FALSE; + + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + return 0; + } + + flb_blob_db_file_part_update_delivery_attempt_counter(&ctx->blob_db, file_id, part_id, ++part_delivery_attempts); + + flb_plg_debug(ctx->ins, "sending part file %s (id=%" PRIu64 " part_id=%" PRIu64 ")", file_path, id, part_id); + + put_object_required = FLB_FALSE; + + + if (part_id == 0) { + if (part_count == 1) { + if (out_size <= MIN_CHUNKED_UPLOAD_SIZE) { + put_object_required = FLB_TRUE; + } + } + } + + if (put_object_required == FLB_TRUE) { + ret = put_blob_object(ctx, + file_tag, + file_path, + out_buf, + out_size); + + if (ret != 0) { + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + return -1; + } + } + else { + m_upload = create_blob_upload(ctx, file_tag, cfl_sds_len(file_tag), file_path); + + if (m_upload == NULL) { + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + return -1; + } + + mk_list_del(&m_upload->_head); + + if (part_id == 0) { + ret = create_multipart_upload(ctx, m_upload); + + if (ret < 0) { + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + + flb_plg_error(ctx->ins, "Could not initiate multipart upload"); + + return -2; + } + + ret = flb_blob_file_update_remote_id(&ctx->blob_db, file_id, m_upload->upload_id); + + if (ret != FLB_BLOB_DB_SUCCESS) { + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + + flb_plg_error(ctx->ins, "Could not save upload id"); + + return -3; + } + } + else { + m_upload->upload_id = flb_sds_create(file_remote_id); + + if (m_upload->upload_id == NULL) { + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + + flb_plg_error(ctx->ins, "Could not allocate upload id copy"); + + return -4; + } + } + + m_upload->part_number = part_id + 1; + + ret = upload_part(ctx, m_upload, out_buf, out_size); + + if (ret == 0) { + ret = flb_blob_db_file_part_update_remote_id(&ctx->blob_db, + id, + m_upload->etags[m_upload->part_number - 1]); + + flb_sds_destroy(m_upload->etags[m_upload->part_number - 1]); + } + + + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + } + + if (ret == 0) { + ret = flb_blob_db_file_part_uploaded(&ctx->blob_db, id); + } + else { + ret = flb_blob_db_file_part_in_progress(&ctx->blob_db, 0, id); + + if (ctx->part_delivery_attempt_limit != FLB_OUT_RETRY_UNLIMITED && + part_delivery_attempts >= ctx->part_delivery_attempt_limit) { + flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 1); + } + } + + if (ret == -1) { + info->active_upload = FLB_FALSE; + + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + return 0; + } + + info->active_upload = FLB_FALSE; + + if (out_buf) { + flb_free(out_buf); + } + + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + return 0; +} + + + + + +static void cb_s3_upload(struct flb_config *config, void *data) +{ + struct flb_s3 *ctx = data; + struct s3_file *chunk = NULL; + struct multipart_upload *m_upload = NULL; + struct flb_fstore_file *fsf; + char *buffer = NULL; + size_t buffer_size = 0; + struct mk_list *tmp; + struct mk_list *head; + int complete; + int ret; + time_t now; + + flb_plg_info(ctx->ins, "Running upload timer callback (cb_s3_upload).."); + + now = time(NULL); + + /* Check all chunks and see if any have timed out */ + mk_list_foreach_safe(head, tmp, &ctx->stream_active->files) { + fsf = mk_list_entry(head, struct flb_fstore_file, _head); + chunk = fsf->data; + + if (now < (chunk->create_time + ctx->upload_timeout + ctx->retry_time)) { + continue; /* Only send chunks which have timed out */ + } + + /* Locked chunks are being processed, skip */ if (chunk->locked == FLB_TRUE) { continue; } @@ -2081,6 +3402,145 @@ static void flush_init(void *out_context) } } +static int blob_chunk_register_parts(struct flb_s3 *ctx, uint64_t file_id, size_t total_size) +{ + int ret; + int64_t parts = 0; + int64_t id; + size_t offset_start = 0; + size_t offset_end = 0; + + /* generate file parts */ + while (offset_start < total_size) { + offset_end = offset_start + ctx->part_size; + + /* do not exceed maximum size */ + if (offset_end > total_size) { + offset_end = total_size; + } + + /* insert part */ + ret = flb_blob_db_file_part_insert(&ctx->blob_db, file_id, parts, offset_start, offset_end, &id); + if (ret == -1) { + flb_plg_error(ctx->ins, "cannot insert blob file part into database"); + return -1; + } + + offset_start = offset_end; + parts++; + } + + return parts; +} + +static int process_blob_chunk(struct flb_s3 *ctx, struct flb_event_chunk *event_chunk) +{ + int64_t ret; + int64_t file_id; + cfl_sds_t file_path = NULL; + cfl_sds_t source = NULL; + size_t file_size; + msgpack_object map; + + struct flb_log_event_decoder log_decoder; + struct flb_log_event log_event; + + if (ctx->blob_db.db == NULL) { + flb_plg_error(ctx->ins, "Cannot process blob because this operation requires a database."); + + return -1; + } + + ret = flb_log_event_decoder_init(&log_decoder, + (char *) event_chunk->data, + event_chunk->size); + + if (ret != FLB_EVENT_DECODER_SUCCESS) { + flb_plg_error(ctx->ins, + "Log event decoder initialization error : %i", (int) ret); + return -1; + + } + + while (flb_log_event_decoder_next(&log_decoder, &log_event) == FLB_EVENT_DECODER_SUCCESS) { + map = *log_event.body; + ret = flb_input_blob_file_get_info(map, &source, &file_path, &file_size); + if (ret == -1) { + flb_plg_error(ctx->ins, "cannot get file info from blob record, skipping"); + continue; + } + + ret = flb_blob_db_file_insert(&ctx->blob_db, + event_chunk->tag, + source, + ctx->endpoint, + file_path, + file_size); + + if (ret == -1) { + flb_plg_error(ctx->ins, "cannot insert blob file into database: %s (size=%lu)", + file_path, file_size); + + cfl_sds_destroy(file_path); + cfl_sds_destroy(source); + + continue; + } + + /* generate the parts by using the newest id created (ret) */ + file_id = ret; + ret = blob_chunk_register_parts(ctx, file_id, file_size); + if (ret == -1) { + flb_plg_error(ctx->ins, "cannot register blob file '%s 'parts into database", + file_path); + + cfl_sds_destroy(file_path); + cfl_sds_destroy(source); + + return -1; + } + + flb_plg_debug(ctx->ins, "blob file '%s' (id=%zu) registered with %zu parts", + file_path, file_id, ret); + + + cfl_sds_destroy(file_path); + cfl_sds_destroy(source); + } + + flb_log_event_decoder_destroy(&log_decoder); + + return 0; +} + +static void cb_s3_blob_file_upload(struct flb_config *config, void *out_context) +{ + cb_s3_upload_blob(config, out_context); + + flb_sched_timer_cb_coro_return(); +} + +static int s3_timer_create(struct flb_s3 *ctx) +{ + int ret; + int64_t ms; + struct flb_sched *sched; + + sched = flb_sched_ctx_get(); + + /* convert from seconds to milliseconds (scheduler needs ms) */ + ms = ctx->upload_parts_timeout * 1000; + + ret = flb_sched_timer_coro_cb_create(sched, FLB_SCHED_TIMER_CB_PERM, ms, + cb_s3_blob_file_upload, ctx, NULL); + if (ret == -1) { + flb_plg_error(ctx->ins, "failed to create upload timer"); + return -1; + } + + return 0; +} + static void cb_s3_flush(struct flb_event_chunk *event_chunk, struct flb_output_flush *out_flush, struct flb_input_instance *i_ins, @@ -2099,6 +3559,19 @@ static void cb_s3_flush(struct flb_event_chunk *event_chunk, struct flb_log_event_decoder log_decoder; struct flb_log_event log_event; + if (event_chunk->type == FLB_EVENT_TYPE_BLOBS) { + /* + * For Blob types, we use the flush callback to enqueue the file, then cb_azb_blob_file_upload() + * takes care of the rest like reading the file and uploading it to S3. + */ + ret = process_blob_chunk(ctx, event_chunk); + if (ret == -1) { + FLB_OUTPUT_RETURN(FLB_RETRY); + } + + FLB_OUTPUT_RETURN(FLB_OK); + } + /* Cleanup old buffers and initialize upload timer */ flush_init(ctx); @@ -2294,6 +3767,12 @@ static int cb_s3_exit(void *data, struct flb_config *config) } } + if (ctx->blob_database_file != NULL && + ctx->blob_db.db != NULL) { + + flb_blob_db_close(&ctx->blob_db); + } + s3_store_exit(ctx); s3_context_destroy(ctx); @@ -2489,18 +3968,81 @@ static struct flb_config_map config_map[] = { "$HOME/.aws/ directory." }, + { + FLB_CONFIG_MAP_STR, "blob_database_file", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, blob_database_file), + "Absolute path to a database file to be used to store blob files contexts" + }, + + { + FLB_CONFIG_MAP_SIZE, "part_size", "25M", + 0, FLB_TRUE, offsetof(struct flb_s3, part_size), + "Size of each part when uploading blob files" + }, + + { + FLB_CONFIG_MAP_INT, "file_delivery_attempt_limit", "1", + 0, FLB_TRUE, offsetof(struct flb_s3, file_delivery_attempt_limit), + "File delivery attempt limit" + }, + + { + FLB_CONFIG_MAP_INT, "part_delivery_attempt_limit", "1", + 0, FLB_TRUE, offsetof(struct flb_s3, part_delivery_attempt_limit), + "File part delivery attempt limit" + }, + + { + FLB_CONFIG_MAP_TIME, "upload_parts_timeout", "10M", + 0, FLB_TRUE, offsetof(struct flb_s3, upload_parts_timeout), + "Timeout to upload parts of a blob file" + }, + + { + FLB_CONFIG_MAP_TIME, "upload_part_freshness_limit", "6D", + 0, FLB_TRUE, offsetof(struct flb_s3, upload_parts_freshness_threshold), + "Maximum lifespan of an uncommitted file part" + }, + + { + FLB_CONFIG_MAP_STR, "configuration_endpoint_url", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, configuration_endpoint_url), + "Configuration endpoint URL" + }, + + { + FLB_CONFIG_MAP_STR, "configuration_endpoint_username", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, configuration_endpoint_username), + "Configuration endpoint basic authentication username" + }, + + { + FLB_CONFIG_MAP_STR, "configuration_endpoint_password", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, configuration_endpoint_password), + "Configuration endpoint basic authentication password" + }, + + { + FLB_CONFIG_MAP_STR, "configuration_endpoint_bearer_token", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, configuration_endpoint_bearer_token), + "Configuration endpoint bearer token" + }, + /* EOF */ {0} }; /* Plugin registration */ struct flb_output_plugin out_s3_plugin = { - .name = "s3", - .description = "Send to S3", - .cb_init = cb_s3_init, - .cb_flush = cb_s3_flush, - .cb_exit = cb_s3_exit, - .workers = 1, - .flags = FLB_OUTPUT_NET | FLB_IO_TLS, - .config_map = config_map + .name = "s3", + .description = "Send to S3", + .cb_init = cb_s3_init, + .cb_flush = cb_s3_flush, + .cb_exit = cb_s3_exit, + .cb_worker_init = cb_s3_worker_init, + .cb_worker_exit = cb_s3_worker_exit, + .workers = 1, + .event_type = FLB_OUTPUT_LOGS | FLB_OUTPUT_BLOBS, + .flags = FLB_OUTPUT_NET | FLB_IO_TLS, + .config_map = config_map }; diff --git a/plugins/out_s3/s3.h b/plugins/out_s3/s3.h index e51d39f2419..b21dd9a1696 100644 --- a/plugins/out_s3/s3.h +++ b/plugins/out_s3/s3.h @@ -25,6 +25,7 @@ #include #include #include +#include /* Upload data to S3 in 5MB chunks */ #define MIN_CHUNKED_UPLOAD_SIZE 5242880 @@ -43,7 +44,7 @@ #define MAX_FILE_SIZE_STR "50,000,000,000" /* Allowed max file size 1 GB for publishing to S3 */ -#define MAX_FILE_SIZE_PUT_OBJECT 1000000000 +#define MAX_FILE_SIZE_PUT_OBJECT 1000000000 #define DEFAULT_UPLOAD_TIMEOUT 3600 @@ -123,6 +124,18 @@ struct flb_s3 { int insecure; size_t store_dir_limit_size; + struct flb_blob_db blob_db; + flb_sds_t blob_database_file; + size_t part_size; + time_t upload_parts_timeout; + time_t upload_parts_freshness_threshold; + int file_delivery_attempt_limit; + int part_delivery_attempt_limit; + flb_sds_t configuration_endpoint_url; + flb_sds_t configuration_endpoint_username; + flb_sds_t configuration_endpoint_password; + flb_sds_t configuration_endpoint_bearer_token; + /* track the total amount of buffered data */ size_t current_buffer_size; @@ -187,6 +200,9 @@ int create_multipart_upload(struct flb_s3 *ctx, int complete_multipart_upload(struct flb_s3 *ctx, struct multipart_upload *m_upload); +int abort_multipart_upload(struct flb_s3 *ctx, + struct multipart_upload *m_upload); + void multipart_read_uploads_from_fs(struct flb_s3 *ctx); void multipart_upload_destroy(struct multipart_upload *m_upload); diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c index 967a27420f2..e634d0287fe 100644 --- a/plugins/out_s3/s3_multipart.c +++ b/plugins/out_s3/s3_multipart.c @@ -334,7 +334,7 @@ static int complete_multipart_upload_payload(struct flb_s3 *ctx, int offset = 0; flb_sds_t etag; size_t size = COMPLETE_MULTIPART_UPLOAD_BASE_LEN; - char part_num[7]; + char part_num[11]; size = size + (COMPLETE_MULTIPART_UPLOAD_PART_LEN * m_upload->part_number); @@ -476,6 +476,72 @@ int complete_multipart_upload(struct flb_s3 *ctx, return -1; } +int abort_multipart_upload(struct flb_s3 *ctx, + struct multipart_upload *m_upload) +{ + flb_sds_t uri = NULL; + flb_sds_t tmp; + int ret; + struct flb_http_client *c = NULL; + struct flb_aws_client *s3_client; + + if (!m_upload->upload_id) { + flb_plg_error(ctx->ins, "Cannot complete multipart upload for key %s: " + "upload ID is unset ", m_upload->s3_key); + return -1; + } + + uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 11 + + flb_sds_len(m_upload->upload_id)); + if (!uri) { + flb_errno(); + return -1; + } + + tmp = flb_sds_printf(&uri, "/%s%s?uploadId=%s", ctx->bucket, + m_upload->s3_key, m_upload->upload_id); + if (!tmp) { + flb_sds_destroy(uri); + return -1; + } + uri = tmp; + + s3_client = ctx->s3_client; + if (s3_plugin_under_test() == FLB_TRUE) { + /* c = mock_s3_call("TEST_ABORT_MULTIPART_UPLOAD_ERROR", "AbortMultipartUpload"); */ + c = NULL; + } + else { + c = s3_client->client_vtable->request(s3_client, FLB_HTTP_DELETE, + uri, NULL, 0, + NULL, 0); + } + flb_sds_destroy(uri); + + if (c) { + flb_plg_debug(ctx->ins, "AbortMultipartUpload http status=%d", + c->resp.status); + if (c->resp.status == 204) { + flb_plg_info(ctx->ins, "Successfully completed multipart upload " + "for %s, UploadId=%s", m_upload->s3_key, + m_upload->upload_id); + flb_http_client_destroy(c); + /* remove this upload from the file system */ + remove_upload_from_fs(ctx, m_upload); + return 0; + } + flb_aws_print_xml_error(c->resp.payload, c->resp.payload_size, + "AbortMultipartUpload", ctx->ins); + if (c->resp.payload != NULL) { + flb_plg_debug(ctx->ins, "Raw AbortMultipartUpload response: %s", + c->resp.payload); + } + flb_http_client_destroy(c); + } + + flb_plg_error(ctx->ins, "AbortMultipartUpload request failed"); + return -1; +} int create_multipart_upload(struct flb_s3 *ctx, struct multipart_upload *m_upload) From ac975c29246b0c7df23a9c0a7cf935bb60dd554e Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Mon, 3 Feb 2025 13:58:26 +0100 Subject: [PATCH 06/18] build: added blob database component Signed-off-by: Leonardo Alminana --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 45bc41b63bb..3df8b8616c5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -92,6 +92,7 @@ set(src flb_cfl_record_accessor.c flb_conditionals.c flb_mem.c + flb_blob_db.c ) # Config format From 4e3392c86eeedee1bea3beb51185936c7ef72879 Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Mon, 3 Feb 2025 15:17:38 +0100 Subject: [PATCH 07/18] out_s3: added missing windows header Signed-off-by: Leonardo Alminana --- plugins/out_s3/s3.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index e7491394569..1413c634a06 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -53,6 +53,13 @@ struct worker_info { FLB_TLS_DEFINE(struct worker_info, s3_worker_info); +#ifdef FLB_SYSTEM_WINDOWS +static int setenv(const char *name, const char *value, int overwrite) +{ + return SetEnvironmentVariableA(name, value); +} +#endif + static int s3_timer_create(struct flb_s3 *ctx); static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, From 33cafc09b90f37f46ff11b6dd4c835c000e2cd98 Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Mon, 3 Feb 2025 15:39:37 +0100 Subject: [PATCH 08/18] out_s3: added missing TLS initializer Signed-off-by: Leonardo Alminana --- plugins/out_s3/s3.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index 1413c634a06..f7b3f13ed76 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -996,6 +996,8 @@ static int cb_s3_init(struct flb_output_instance *ins, struct mk_list *split; int list_size; + FLB_TLS_INIT(s3_worker_info); + ctx = flb_calloc(1, sizeof(struct flb_s3)); if (!ctx) { flb_errno(); From 22d14c2261eb1e2628fff01d753b19d1fd093fa4 Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Wed, 5 Feb 2025 12:15:18 +0100 Subject: [PATCH 09/18] out_s3: removed erroneously included code (CID 532473) Signed-off-by: Leonardo Alminana --- plugins/out_s3/s3.c | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index f7b3f13ed76..7efbae4c3bb 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -2561,40 +2561,6 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) ret = abort_multipart_upload(ctx, m_upload); } - if (ctx->file_delivery_attempt_limit != FLB_OUT_RETRY_UNLIMITED && - file_delivery_attempts < ctx->file_delivery_attempt_limit) { - flb_blob_db_file_reset_upload_states(&ctx->blob_db, file_id, file_path); - flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 0); - } - else { - ret = flb_blob_db_file_delete(&ctx->blob_db, file_id, file_path); - - notification = flb_calloc(1, - sizeof( - struct flb_blob_delivery_notification)); - - if (notification != NULL) { - notification->base.dynamically_allocated = FLB_TRUE; - notification->base.notification_type = FLB_NOTIFICATION_TYPE_BLOB_DELIVERY; - notification->base.destructor = flb_input_blob_delivery_notification_destroy; - notification->success = FLB_FALSE; - notification->path = cfl_sds_create(file_path); - - ret = flb_notification_enqueue(FLB_PLUGIN_INPUT, - source, - ¬ification->base, - config); - - if (ret != 0) { - flb_plg_error(ctx->ins, - "blob file '%s' (id=%" PRIu64 ") notification " \ - "delivery error %d", file_path, file_id, ret); - - flb_notification_cleanup(¬ification->base); - } - } - } - flb_blob_file_update_remote_id(&ctx->blob_db, file_id, ""); flb_blob_db_file_reset_upload_states(&ctx->blob_db, file_id, file_path); flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 0); From 0e54c208e7c1b2b0e6c003dbe9e2202347a629aa Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Wed, 5 Feb 2025 12:50:36 +0100 Subject: [PATCH 10/18] out_s3: fixed potential memory leak (CID 532470) Signed-off-by: Leonardo Alminana --- plugins/out_s3/s3.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index 7efbae4c3bb..b4c85616150 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -2919,6 +2919,8 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) out_size); if (ret != 0) { + flb_free(out_buf); + cfl_sds_destroy(file_tag); cfl_sds_destroy(file_path); cfl_sds_destroy(file_remote_id); @@ -2931,6 +2933,8 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) m_upload = create_blob_upload(ctx, file_tag, cfl_sds_len(file_tag), file_path); if (m_upload == NULL) { + flb_free(out_buf); + cfl_sds_destroy(file_tag); cfl_sds_destroy(file_path); cfl_sds_destroy(file_remote_id); @@ -2945,6 +2949,8 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) ret = create_multipart_upload(ctx, m_upload); if (ret < 0) { + flb_free(out_buf); + cfl_sds_destroy(file_tag); cfl_sds_destroy(file_path); cfl_sds_destroy(file_remote_id); @@ -2962,6 +2968,8 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) ret = flb_blob_file_update_remote_id(&ctx->blob_db, file_id, m_upload->upload_id); if (ret != FLB_BLOB_DB_SUCCESS) { + flb_free(out_buf); + cfl_sds_destroy(file_tag); cfl_sds_destroy(file_path); cfl_sds_destroy(file_remote_id); @@ -2979,6 +2987,8 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) m_upload->upload_id = flb_sds_create(file_remote_id); if (m_upload->upload_id == NULL) { + flb_free(out_buf); + cfl_sds_destroy(file_tag); cfl_sds_destroy(file_path); cfl_sds_destroy(file_remote_id); @@ -3022,22 +3032,9 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) } } - if (ret == -1) { - info->active_upload = FLB_FALSE; - - cfl_sds_destroy(file_tag); - cfl_sds_destroy(file_path); - cfl_sds_destroy(file_remote_id); - cfl_sds_destroy(file_destination); - - return 0; - } - info->active_upload = FLB_FALSE; - if (out_buf) { - flb_free(out_buf); - } + flb_free(out_buf); cfl_sds_destroy(file_tag); cfl_sds_destroy(file_path); From 7ab7a76f9c06f4b132f37038ec73a10fa1c2a1bc Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Wed, 5 Feb 2025 12:53:14 +0100 Subject: [PATCH 11/18] out_s3: fixed compiler warning (CID 532472) Signed-off-by: Leonardo Alminana --- plugins/out_s3/s3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index b4c85616150..36682455bcc 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -2878,7 +2878,7 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) * delivery attempt counter. */ if (part_id == 0) { - ret = flb_blob_db_file_delivery_attempts(&ctx->blob_db, file_id, ++file_delivery_attempts); + flb_blob_db_file_delivery_attempts(&ctx->blob_db, file_id, ++file_delivery_attempts); } /* read the file content */ From 9cd7fe78a02f0dc0920cfdb67409ad7a6a78d619 Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Wed, 5 Feb 2025 12:55:16 +0100 Subject: [PATCH 12/18] out_s3: fixed compiler warning (CID 532471) Signed-off-by: Leonardo Alminana --- src/flb_blob_db.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/flb_blob_db.c b/src/flb_blob_db.c index 0dd715f38f4..65c02f52a78 100644 --- a/src/flb_blob_db.c +++ b/src/flb_blob_db.c @@ -1366,7 +1366,7 @@ int flb_blob_db_file_fetch_part_count(struct flb_blob_db *context, result = sqlite3_step(statement); if (result == SQLITE_ROW) { - result = sqlite3_column_int64(statement, 0); + result = (int) sqlite3_column_int64(statement, 0); } else { context->last_error = result; From 60cd922d134686bb96a0057650171cc648158d12 Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Wed, 5 Feb 2025 12:57:34 +0100 Subject: [PATCH 13/18] out_s3: fixed NULL dereferences (CID 532469) Signed-off-by: Leonardo Alminana --- src/flb_blob_db.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/flb_blob_db.c b/src/flb_blob_db.c index 65c02f52a78..c3b2b6af5fb 100644 --- a/src/flb_blob_db.c +++ b/src/flb_blob_db.c @@ -701,10 +701,10 @@ int flb_blob_db_file_get_next_aborted(struct flb_blob_db *context, int result; int exists; - path = NULL; - source = NULL; - remote_id = NULL; - file_tag = NULL; + *path = NULL; + *source = NULL; + *remote_id = NULL; + *file_tag = NULL; statement = context->stmt_get_next_aborted_file; From a1e453783a2b57e3ca2d3a90b31e8a6e4243f0e6 Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Wed, 5 Feb 2025 13:01:03 +0100 Subject: [PATCH 14/18] out_s3: fixed compiler warning (CID 532468) Signed-off-by: Leonardo Alminana --- src/aws/flb_aws_util.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/aws/flb_aws_util.c b/src/aws/flb_aws_util.c index 9c2acd89aa3..6ba8b28cf2d 100644 --- a/src/aws/flb_aws_util.c +++ b/src/aws/flb_aws_util.c @@ -987,21 +987,23 @@ flb_sds_t flb_get_s3_blob_key(const char *format, error: flb_errno(); + if (tmp_tag){ flb_sds_destroy(tmp_tag); } + if (s3_key){ flb_sds_destroy(s3_key); } + if (buf && buf != tmp){ flb_sds_destroy(buf); } + if (tmp){ flb_sds_destroy(tmp); } - if (tmp_key){ - flb_sds_destroy(tmp_key); - } + return NULL; } From 89982380ccfded43e8994cf3b526afc40a18442b Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Tue, 18 Feb 2025 15:43:50 +0100 Subject: [PATCH 15/18] out_s3: pre-signed url support addition Signed-off-by: Leonardo Alminana --- plugins/out_s3/s3.c | 3117 ++++++++++++++++++--------------- plugins/out_s3/s3.h | 21 +- plugins/out_s3/s3_multipart.c | 52 +- 3 files changed, 1710 insertions(+), 1480 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index 36682455bcc..7ebb71391f3 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -82,6 +82,8 @@ static struct multipart_upload *create_upload(struct flb_s3 *ctx, static void remove_from_queue(struct upload_queue *entry); +static int blob_initialize_authorization_endpoint_upstream(struct flb_s3 *context); + static struct flb_aws_header content_encoding_header = { .key = "Content-Encoding", .key_len = 16, @@ -512,6 +514,14 @@ static void s3_context_destroy(struct flb_s3 *ctx) flb_sds_destroy(ctx->seq_index_file); } + if (ctx->authorization_endpoint_upstream != NULL) { + flb_upstream_destroy(ctx->authorization_endpoint_upstream); + } + + if (ctx->authorization_endpoint_tls_context != NULL) { + flb_tls_destroy(ctx->authorization_endpoint_tls_context); + } + /* Remove uploads */ mk_list_foreach_safe(head, tmp, &ctx->uploads) { m_upload = mk_list_entry(head, struct multipart_upload, _head); @@ -529,1782 +539,1814 @@ static void s3_context_destroy(struct flb_s3 *ctx) flb_free(ctx); } -static int find_map_entry_by_key(msgpack_object_map *map, - char *key, - size_t match_index, - int case_insensitive) +static int cb_s3_init(struct flb_output_instance *ins, + struct flb_config *config, void *data) { - size_t match_count; - int result; - int index; - - match_count = 0; - - for (index = 0 ; index < (int) map->size ; index++) { - if (map->ptr[index].key.type == MSGPACK_OBJECT_STR) { - if (case_insensitive) { - result = strncasecmp(map->ptr[index].key.via.str.ptr, - key, - map->ptr[index].key.via.str.size); - } - else { - result = strncmp(map->ptr[index].key.via.str.ptr, - key, - map->ptr[index].key.via.str.size); - } + int ret; + flb_sds_t tmp_sds; + char *role_arn = NULL; + char *session_name; + const char *tmp; + struct flb_s3 *ctx = NULL; + struct flb_aws_client_generator *generator; + (void) config; + (void) data; + char *ep; + struct flb_split_entry *tok; + struct mk_list *split; + int list_size; - if (result == 0) { - if (match_count == match_index) { - return index; - } + FLB_TLS_INIT(s3_worker_info); - match_count++; - } - } + ctx = flb_calloc(1, sizeof(struct flb_s3)); + if (!ctx) { + flb_errno(); + return -1; } + ctx->ins = ins; + mk_list_init(&ctx->uploads); + mk_list_init(&ctx->upload_queue); - return -1; -} - -static int extract_map_string_entry_by_key(flb_sds_t *output, - msgpack_object_map *map, - char *key, - size_t match_index, - int case_insensitive) -{ - int index; - int result; + ctx->retry_time = 0; + ctx->upload_queue_success = FLB_FALSE; - index = find_map_entry_by_key(map, - key, - match_index, - case_insensitive); + /* Export context */ + flb_output_set_context(ins, ctx); - if (index == -1) { + /* initialize config map */ + ret = flb_output_config_map_set(ins, (void *) ctx); + if (ret == -1) { return -1; } - if (map->ptr[index].val.type != MSGPACK_OBJECT_STR) { - return -2; + /* the check against -1 is works here because size_t is unsigned + * and (int) -1 == unsigned max value + * Fluent Bit uses -1 (which becomes max value) to indicate undefined + */ + if (ctx->ins->total_limit_size != -1) { + flb_plg_warn(ctx->ins, "Please use 'store_dir_limit_size' with s3 output instead of 'storage.total_limit_size'. " + "S3 has its own buffer files located in the store_dir."); } - if (*output == NULL) { - *output = flb_sds_create_len(map->ptr[index].val.via.str.ptr, - map->ptr[index].val.via.str.size); - - if (*output == NULL) { - return -3; + /* Date key */ + ctx->date_key = ctx->json_date_key; + tmp = flb_output_get_property("json_date_key", ins); + if (tmp) { + /* Just check if we have to disable it */ + if (flb_utils_bool(tmp) == FLB_FALSE) { + ctx->date_key = NULL; } } - else { - (*output)[0] = '\0'; - - flb_sds_len_set(*output, 0); - - result = flb_sds_cat_safe(output, - map->ptr[index].val.via.str.ptr, - map->ptr[index].val.via.str.size); - if (result != 0) { - return -4; + /* Date format for JSON output */ + ctx->json_date_format = FLB_PACK_JSON_DATE_ISO8601; + tmp = flb_output_get_property("json_date_format", ins); + if (tmp) { + ret = flb_pack_to_json_date_type(tmp); + if (ret == -1) { + flb_plg_error(ctx->ins, "invalid json_date_format '%s'. ", tmp); + return -1; + } + else { + ctx->json_date_format = ret; } } - return 0; -} - -static int process_remote_configuration_payload( - struct flb_s3 *context, - char *payload, - size_t payload_size) -{ - size_t msgpack_body_length; - msgpack_object_map *configuration_map; - flb_sds_t secret_access_key; - flb_sds_t access_key_id; - flb_sds_t session_token; - msgpack_unpacked unpacked_root; - char *msgpack_body; - int root_type; - size_t offset; - int result; - - result = flb_pack_json(payload, - payload_size, - &msgpack_body, - &msgpack_body_length, - &root_type, - NULL); - - if (result != 0) { - flb_plg_error(context->ins, - "JSON to msgpack conversion error"); - - result = -1; + tmp = flb_output_get_property("bucket", ins); + if (!tmp) { + flb_plg_error(ctx->ins, "'bucket' is a required parameter"); + return -1; } - else { - msgpack_unpacked_init(&unpacked_root); - offset = 0; - result = msgpack_unpack_next(&unpacked_root, - msgpack_body, - msgpack_body_length, - &offset); - - if (result != MSGPACK_UNPACK_SUCCESS) { - flb_plg_error(context->ins, "corrupted msgpack data"); + /* + * store_dir is the user input, buffer_dir is what the code uses + * We append the bucket name to the dir, to support multiple instances + * of this plugin using the same buffer dir + */ + tmp_sds = concat_path(ctx->store_dir, ctx->bucket); + if (!tmp_sds) { + flb_plg_error(ctx->ins, "Could not construct buffer path"); + return -1; + } + ctx->buffer_dir = tmp_sds; - result = -1; + /* Initialize local storage */ + ret = s3_store_init(ctx); + if (ret == -1) { + flb_plg_error(ctx->ins, "Failed to initialize S3 storage: %s", + ctx->store_dir); + return -1; + } - goto cleanup; + tmp = flb_output_get_property("s3_key_format", ins); + if (tmp) { + if (tmp[0] != '/') { + flb_plg_error(ctx->ins, "'s3_key_format' must start with a '/'"); + return -1; } - - if (unpacked_root.data.type != MSGPACK_OBJECT_MAP) { - flb_plg_error(context->ins, "unexpected root object type"); - - result = -1; - - goto cleanup; + if (strstr((char *) tmp, "$INDEX")) { + ret = init_seq_index(ctx); + if (ret < 0) { + return -1; + } } - - configuration_map = &unpacked_root.data.via.map; - - secret_access_key = NULL; - access_key_id = NULL; - session_token = NULL; - - result = extract_map_string_entry_by_key(&access_key_id, - configuration_map, - "access_key_id", 0, FLB_TRUE); - - if (result != 0) { - flb_plg_error(context->ins, - "access_key_id could be extracted : %d", result); - - goto cleanup; + if (strstr((char *) tmp, "$UUID")) { + ctx->key_fmt_has_uuid = FLB_TRUE; } + } - result = extract_map_string_entry_by_key(&secret_access_key, - configuration_map, - "secret_access_key", 0, FLB_TRUE); + /* validate 'total_file_size' */ + if (ctx->file_size <= 0) { + flb_plg_error(ctx->ins, "Failed to parse total_file_size %s", tmp); + return -1; + } + if (ctx->file_size < 1000000) { + flb_plg_error(ctx->ins, "total_file_size must be at least 1MB"); + return -1; + } + if (ctx->file_size > MAX_FILE_SIZE) { + flb_plg_error(ctx->ins, "Max total_file_size is %s bytes", MAX_FILE_SIZE_STR); + return -1; + } + flb_plg_info(ctx->ins, "Using upload size %lu bytes", ctx->file_size); - if (result != 0) { - flb_plg_error(context->ins, - "secret_access_key extraction error : %d", result); + if (ctx->use_put_object == FLB_FALSE && ctx->file_size < 2 * MIN_CHUNKED_UPLOAD_SIZE) { + flb_plg_info(ctx->ins, + "total_file_size is less than 10 MB, will use PutObject API"); + ctx->use_put_object = FLB_TRUE; + } - goto cleanup; + tmp = flb_output_get_property("compression", ins); + if (tmp) { + ret = flb_aws_compression_get_type(tmp); + if (ret == -1) { + flb_plg_error(ctx->ins, "unknown compression: %s", tmp); + return -1; } - - result = extract_map_string_entry_by_key(&session_token, - configuration_map, - "secret_access_key", 0, FLB_TRUE); - - if (result != 0) { - flb_plg_error(context->ins, - "secret_access_key extraction error : %d", result); - - goto cleanup; + if (ctx->use_put_object == FLB_FALSE && ctx->compression == FLB_AWS_COMPRESS_ARROW) { + flb_plg_error(ctx->ins, + "use_put_object must be enabled when Apache Arrow is enabled"); + return -1; } + ctx->compression = ret; + } - setenv("aws_secret_access_key", secret_access_key, 1); - setenv("aws_access_key_id", access_key_id, 1); - setenv("aws_session_token", session_token, 1); - -cleanup: - if (result != 0) { - if (secret_access_key != NULL) { - free(secret_access_key); - - secret_access_key = NULL; - } - - if (access_key_id != NULL) { - free(access_key_id); - - access_key_id = NULL; + tmp = flb_output_get_property("content_type", ins); + if (tmp) { + ctx->content_type = (char *) tmp; + } + if (ctx->use_put_object == FLB_FALSE) { + /* upload_chunk_size */ + if (ctx->upload_chunk_size <= 0) { + flb_plg_error(ctx->ins, "Failed to parse upload_chunk_size %s", tmp); + return -1; + } + if (ctx->upload_chunk_size > ctx->file_size) { + flb_plg_error(ctx->ins, + "upload_chunk_size can not be larger than total_file_size"); + return -1; + } + if (ctx->upload_chunk_size < MIN_CHUNKED_UPLOAD_SIZE) { + flb_plg_error(ctx->ins, "upload_chunk_size must be at least 5,242,880 bytes"); + return -1; + } + if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { + if(ctx->upload_chunk_size > MAX_CHUNKED_UPLOAD_COMPRESS_SIZE) { + flb_plg_error(ctx->ins, "upload_chunk_size in compressed multipart upload cannot exceed 5GB"); + return -1; } - - if (session_token != NULL) { - free(session_token); - - session_token = NULL; + } else { + if (ctx->upload_chunk_size > MAX_CHUNKED_UPLOAD_SIZE) { + flb_plg_error(ctx->ins, "Max upload_chunk_size is 50MB"); + return -1; } - - - result = -1; } - - msgpack_unpacked_destroy(&unpacked_root); - - flb_free(msgpack_body); } - return result; -} - -static int apply_remote_configuration(struct flb_s3 *context) -{ - int ret; - size_t b_sent; - struct flb_http_client *http_client; - struct flb_connection *connection; - struct flb_upstream *upstream; - struct flb_tls *tls_context; - char *scheme = NULL; - char *host = NULL; - char *port = NULL; - char *uri = NULL; - uint16_t port_as_short; - - /* Parse and split URL */ - ret = flb_utils_url_split(context->configuration_endpoint_url, - &scheme, &host, &port, &uri); - if (ret == -1) { - flb_plg_error(context->ins, - "Invalid URL: %s", - context->configuration_endpoint_url); - + if (ctx->upload_chunk_size != MIN_CHUNKED_UPLOAD_SIZE && + (ctx->upload_chunk_size * 2) > ctx->file_size) { + flb_plg_error(ctx->ins, "total_file_size is less than 2x upload_chunk_size"); return -1; } - if (port != NULL) { - port_as_short = (uint16_t) strtoul(port, NULL, 10); - } - else { - if (scheme != NULL) { - if (strcasecmp(scheme, "https") == 0) { - port_as_short = 443; - } - else { - port_as_short = 80; - } + if (ctx->use_put_object == FLB_TRUE) { + /* + * code internally uses 'upload_chunk_size' as the unit for each Put, + * regardless of which API is used to send data + */ + ctx->upload_chunk_size = ctx->file_size; + if (ctx->file_size > MAX_FILE_SIZE_PUT_OBJECT) { + flb_plg_error(ctx->ins, "Max total_file_size is 50M when use_put_object is enabled"); + return -1; } } - if (scheme != NULL) { - flb_free(scheme); - scheme = NULL; - } + tmp = flb_output_get_property("endpoint", ins); + if (tmp) { + ctx->insecure = strncmp(tmp, "http://", 7) == 0 ? FLB_TRUE : FLB_FALSE; + if (ctx->insecure == FLB_TRUE) { + ep = removeProtocol((char *) tmp, "http://"); + } + else { + ep = removeProtocol((char *) tmp, "https://"); + } - if (port != NULL) { - flb_free(port); - port = NULL; - } - - if (host == NULL || uri == NULL) { - flb_plg_error(context->ins, - "Invalid URL: %s", - context->configuration_endpoint_url); - - if (host != NULL) { - flb_free(host); + split = flb_utils_split((const char *)ep, ':', 1); + if (!split) { + flb_errno(); + return -1; } - - if (uri != NULL) { - flb_free(uri); + list_size = mk_list_size(split); + if (list_size > 2) { + flb_plg_error(ctx->ins, "Failed to split endpoint"); + flb_utils_split_free(split); + return -1; } - return -2; + tok = mk_list_entry_first(split, struct flb_split_entry, _head); + ctx->endpoint = flb_strndup(tok->value, tok->len); + if (!ctx->endpoint) { + flb_errno(); + flb_utils_split_free(split); + return -1; + } + ctx->free_endpoint = FLB_TRUE; + if (list_size == 2) { + tok = mk_list_entry_next(&tok->_head, struct flb_split_entry, _head, split); + ctx->port = atoi(tok->value); + } + else { + ctx->port = ctx->insecure == FLB_TRUE ? DEFAULT_S3_INSECURE_PORT : DEFAULT_S3_PORT; + } + flb_utils_split_free(split); + } + else { + /* default endpoint for the given region */ + ctx->endpoint = flb_aws_endpoint("s3", ctx->region); + ctx->insecure = FLB_FALSE; + ctx->port = DEFAULT_S3_PORT; + ctx->free_endpoint = FLB_TRUE; + if (!ctx->endpoint) { + flb_plg_error(ctx->ins, "Could not construct S3 endpoint"); + return -1; + } } - tls_context = flb_tls_create(FLB_TLS_CLIENT_MODE, - FLB_FALSE, - FLB_FALSE, - host, - NULL, - NULL, - NULL, - NULL, - NULL); - - if (tls_context == NULL) { - flb_free(host); - flb_free(uri); + tmp = flb_output_get_property("sts_endpoint", ins); + if (tmp) { + ctx->sts_endpoint = (char *) tmp; + } - flb_plg_error(context->ins, - "TLS context creation errror"); + tmp = flb_output_get_property("canned_acl", ins); + if (tmp) { + ctx->canned_acl = (char *) tmp; + } - return -2; + tmp = flb_output_get_property("storage_class", ins); + if (tmp) { + ctx->storage_class = (char *) tmp; } - upstream = flb_upstream_create_url(context->ins->config, - context->configuration_endpoint_url, - FLB_IO_TCP, - tls_context); + if (ctx->insecure == FLB_FALSE) { + ctx->client_tls = flb_tls_create(FLB_TLS_CLIENT_MODE, + ins->tls_verify, + ins->tls_debug, + ins->tls_vhost, + ins->tls_ca_path, + ins->tls_ca_file, + ins->tls_crt_file, + ins->tls_key_file, + ins->tls_key_passwd); + if (!ctx->client_tls) { + flb_plg_error(ctx->ins, "Failed to create tls context"); + return -1; + } + } - if (upstream == NULL) { - flb_tls_destroy(tls_context); - flb_free(host); - flb_free(uri); + /* AWS provider needs a separate TLS instance */ + ctx->provider_tls = flb_tls_create(FLB_TLS_CLIENT_MODE, + FLB_TRUE, + ins->tls_debug, + ins->tls_vhost, + ins->tls_ca_path, + ins->tls_ca_file, + ins->tls_crt_file, + ins->tls_key_file, + ins->tls_key_passwd); + if (!ctx->provider_tls) { + flb_errno(); + return -1; + } - flb_plg_error(context->ins, - "Upstream creation errror"); + ctx->provider = flb_standard_chain_provider_create(config, + ctx->provider_tls, + ctx->region, + ctx->sts_endpoint, + NULL, + flb_aws_client_generator(), + ctx->profile); - return -3; + if (!ctx->provider) { + flb_plg_error(ctx->ins, "Failed to create AWS Credential Provider"); + return -1; } - flb_stream_disable_async_mode(&upstream->base); + tmp = flb_output_get_property("role_arn", ins); + if (tmp) { + /* Use the STS Provider */ + ctx->base_provider = ctx->provider; + role_arn = (char *) tmp; + + /* STS provider needs yet another separate TLS instance */ + ctx->sts_provider_tls = flb_tls_create(FLB_TLS_CLIENT_MODE, + FLB_TRUE, + ins->tls_debug, + ins->tls_vhost, + ins->tls_ca_path, + ins->tls_ca_file, + ins->tls_crt_file, + ins->tls_key_file, + ins->tls_key_passwd); - /* Get upstream connection */ - connection = flb_upstream_conn_get(upstream); - if (connection == NULL) { - flb_upstream_destroy(upstream); - flb_tls_destroy(tls_context); - flb_free(host); - flb_free(uri); + if (!ctx->sts_provider_tls) { + flb_errno(); + return -1; + } - flb_plg_error(context->ins, - "cannot create connection"); + session_name = flb_sts_session_name(); + if (!session_name) { + flb_plg_error(ctx->ins, "Failed to create aws iam role " + "session name"); + flb_errno(); + return -1; + } - return -3; + ctx->provider = flb_sts_provider_create(config, + ctx->sts_provider_tls, + ctx->base_provider, + ctx->external_id, + role_arn, + session_name, + ctx->region, + ctx->sts_endpoint, + NULL, + flb_aws_client_generator()); + flb_free(session_name); + if (!ctx->provider) { + flb_plg_error(ctx->ins, "Failed to create AWS STS Credential " + "Provider"); + return -1; + } } - /* Create HTTP client context */ - http_client = flb_http_client(connection, - FLB_HTTP_GET, - uri, - NULL, 0, - host, - (int) port_as_short, - NULL, 0); - if (http_client == NULL) { - flb_upstream_conn_release(connection); - flb_upstream_destroy(upstream); - flb_tls_destroy(tls_context); - flb_free(host); - flb_free(uri); + /* read any remaining buffers from previous (failed) executions */ + ctx->has_old_buffers = s3_store_has_data(ctx); + ctx->has_old_uploads = s3_store_has_uploads(ctx); - flb_plg_error(context->ins, - "cannot create HTTP client"); + /* Multipart */ + multipart_read_uploads_from_fs(ctx); - return -4; + if (mk_list_size(&ctx->uploads) > 0) { + /* note that these should be sent */ + ctx->has_old_uploads = FLB_TRUE; } - flb_http_add_header(http_client, - "Accept", - strlen("Accept"), - "application/json", - 16); - - /* User Agent */ - flb_http_add_header(http_client, - "User-Agent", 10, - "Fluent-Bit", 10); + /* create S3 client */ + generator = flb_aws_client_generator(); + ctx->s3_client = generator->create(); + if (!ctx->s3_client) { + return -1; + } + ctx->s3_client->name = "s3_client"; + ctx->s3_client->has_auth = FLB_TRUE; + ctx->s3_client->provider = ctx->provider; + ctx->s3_client->region = ctx->region; + ctx->s3_client->service = "s3"; + ctx->s3_client->port = ctx->port; + ctx->s3_client->flags = 0; + ctx->s3_client->proxy = NULL; + ctx->s3_client->s3_mode = S3_MODE_SIGNED_PAYLOAD; + ctx->s3_client->retry_requests = ctx->retry_requests; - if (context->configuration_endpoint_username != NULL && - context->configuration_endpoint_password != NULL) { - flb_http_basic_auth(http_client, - context->configuration_endpoint_username, - context->configuration_endpoint_password); + if (ctx->insecure == FLB_TRUE) { + ctx->s3_client->upstream = flb_upstream_create(config, ctx->endpoint, ctx->port, + FLB_IO_TCP, NULL); + } else { + ctx->s3_client->upstream = flb_upstream_create(config, ctx->endpoint, ctx->port, + FLB_IO_TLS, ctx->client_tls); } - else if (context->configuration_endpoint_bearer_token != NULL) { - flb_http_bearer_auth(http_client, - context->configuration_endpoint_bearer_token); + if (!ctx->s3_client->upstream) { + flb_plg_error(ctx->ins, "Connection initialization error"); + return -1; } - /* Send HTTP request */ - ret = flb_http_do(http_client, &b_sent); + flb_output_upstream_set(ctx->s3_client->upstream, ctx->ins); - if (ret == -1) { - flb_http_client_destroy(http_client); - flb_upstream_conn_release(connection); - flb_upstream_destroy(upstream); - flb_tls_destroy(tls_context); - flb_free(host); - flb_free(uri); + ctx->s3_client->host = ctx->endpoint; - flb_plg_error(context->ins, - "Error sending configuration request"); + /* set to sync mode and initialize credentials */ + ctx->provider->provider_vtable->sync(ctx->provider); + ctx->provider->provider_vtable->init(ctx->provider); - return -5; + ctx->timer_created = FLB_FALSE; + ctx->timer_ms = (int) (ctx->upload_timeout / 6) * 1000; + if (ctx->timer_ms > UPLOAD_TIMER_MAX_WAIT) { + ctx->timer_ms = UPLOAD_TIMER_MAX_WAIT; } - - if (http_client->resp.status == 200) { - flb_plg_info(context->ins, - "Configuration retrieved successfully"); - - ret = process_remote_configuration_payload( - context, - http_client->resp.payload, - http_client->resp.payload_size); + else if (ctx->timer_ms < UPLOAD_TIMER_MIN_WAIT) { + ctx->timer_ms = UPLOAD_TIMER_MIN_WAIT; + } + + /* + * S3 must ALWAYS use sync mode + * In the timer thread we do a mk_list_foreach_safe on the queue of uplaods and chunks + * Iterating over those lists is not concurrent safe. If a flush call ran at the same time + * And deleted an item from the list, this could cause a crash/corruption. + */ + flb_stream_disable_async_mode(&ctx->s3_client->upstream->base); + + if (ctx->authorization_endpoint_url != NULL) { + ret = blob_initialize_authorization_endpoint_upstream(ctx); if (ret != 0) { - flb_plg_error(context->ins, - "Configuration payload processing error %d", - ret); - - flb_http_client_destroy(http_client); - flb_upstream_conn_release(connection); - flb_upstream_destroy(upstream); - flb_tls_destroy(tls_context); - flb_free(host); - flb_free(uri); + flb_plg_error(ctx->ins, + "Failed to initialize authorization endpoint upstream"); - return -7; + return -1; } - flb_plg_info(context->ins, - "Configuration applied successfully"); + ctx->s3_client->has_auth = FLB_FALSE; } - else { - if (http_client->resp.payload_size > 0) { - flb_plg_error(context->ins, - "Configuration retrieval failed with status %i\n%s", - http_client->resp.status, - http_client->resp.payload); - } - else { - flb_plg_error(context->ins, - "Configuration retrieval failed with status %i", - http_client->resp.status); + + /* clean up any old buffers found on startup */ + if (ctx->has_old_buffers == FLB_TRUE) { + flb_plg_info(ctx->ins, + "Sending locally buffered data from previous " + "executions to S3; buffer=%s", + ctx->fs->root_path); + ctx->has_old_buffers = FLB_FALSE; + ret = put_all_chunks(ctx); + if (ret < 0) { + ctx->has_old_buffers = FLB_TRUE; + flb_plg_error(ctx->ins, + "Failed to send locally buffered data left over " + "from previous executions; will retry. Buffer=%s", + ctx->fs->root_path); } + } - flb_http_client_destroy(http_client); - flb_upstream_conn_release(connection); - flb_upstream_destroy(upstream); - flb_tls_destroy(tls_context); - flb_free(host); - flb_free(uri); + /* clean up any old uploads found on start up */ + if (ctx->has_old_uploads == FLB_TRUE) { + flb_plg_info(ctx->ins, + "Completing multipart uploads from previous " + "executions to S3; buffer=%s", + ctx->stream_upload->path); + ctx->has_old_uploads = FLB_FALSE; - return -6; + /* + * we don't need to worry if this fails; it will retry each + * time the upload callback is called + */ + cb_s3_upload(config, ctx); } - flb_http_client_destroy(http_client); - flb_upstream_conn_release(connection); - flb_upstream_destroy(upstream); - flb_tls_destroy(tls_context); - flb_free(host); - flb_free(uri); + /* this is done last since in the previous block we make calls to AWS */ + ctx->provider->provider_vtable->upstream_set(ctx->provider, ctx->ins); + + /* database file for blob signal handling */ + if (ctx->blob_database_file != NULL) { + ret = flb_blob_db_open(&ctx->blob_db, + config, + ctx->blob_database_file); + + if (ret != FLB_BLOB_DB_SUCCESS) { + return -1; + } + } return 0; } -static int cb_s3_init(struct flb_output_instance *ins, - struct flb_config *config, void *data) +/* worker initialization, used for our internal timers */ +static int cb_s3_worker_init(void *data, struct flb_config *config) { int ret; - flb_sds_t tmp_sds; - char *role_arn = NULL; - char *session_name; - const char *tmp; - struct flb_s3 *ctx = NULL; - struct flb_aws_client_generator *generator; - (void) config; - (void) data; - char *ep; - struct flb_split_entry *tok; - struct mk_list *split; - int list_size; + struct worker_info *info; + struct flb_s3 *ctx = data; - FLB_TLS_INIT(s3_worker_info); + flb_plg_info(ctx->ins, "initializing worker"); - ctx = flb_calloc(1, sizeof(struct flb_s3)); - if (!ctx) { - flb_errno(); + info = FLB_TLS_GET(s3_worker_info); + if (!info) { + /* initialize worker global info */ + info = flb_calloc(1, sizeof(struct worker_info)); + if (!info) { + flb_errno(); + return -1; + } + info->active_upload = FLB_FALSE; + FLB_TLS_SET(s3_worker_info, info); + } + + ret = s3_timer_create(ctx); + if (ret == -1) { + flb_plg_error(ctx->ins, "failed to create upload timer"); return -1; } - ctx->ins = ins; - mk_list_init(&ctx->uploads); - mk_list_init(&ctx->upload_queue); - ctx->retry_time = 0; - ctx->upload_queue_success = FLB_FALSE; + return 0; +} - /* Export context */ - flb_output_set_context(ins, ctx); +/* worker teardown */ +static int cb_s3_worker_exit(void *data, struct flb_config *config) +{ + struct worker_info *info; + struct flb_s3 *ctx = data; - /* initialize config map */ - ret = flb_output_config_map_set(ins, (void *) ctx); - if (ret == -1) { - return -1; + flb_plg_info(ctx->ins, "initializing worker"); + + info = FLB_TLS_GET(s3_worker_info); + if (info != NULL) { + flb_free(info); + FLB_TLS_SET(s3_worker_info, NULL); } - /* the check against -1 is works here because size_t is unsigned - * and (int) -1 == unsigned max value - * Fluent Bit uses -1 (which becomes max value) to indicate undefined + return 0; +} + +/* + * return value is one of FLB_OK, FLB_RETRY, FLB_ERROR + * + * Chunk is allowed to be NULL + */ +static int upload_data(struct flb_s3 *ctx, struct s3_file *chunk, + struct multipart_upload *m_upload, + char *body, size_t body_size, + const char *tag, int tag_len) +{ + int init_upload = FLB_FALSE; + int complete_upload = FLB_FALSE; + int size_check = FLB_FALSE; + int part_num_check = FLB_FALSE; + int timeout_check = FLB_FALSE; + int ret; + void *payload_buf = NULL; + size_t payload_size = 0; + size_t preCompress_size = 0; + time_t file_first_log_time = time(NULL); + + /* + * When chunk does not exist, file_first_log_time will be the current time. + * This is only for unit tests and prevents unit tests from segfaulting when chunk is + * NULL because if so chunk->first_log_time will be NULl either and will cause + * segfault during the process of put_object upload or mutipart upload. */ - if (ctx->ins->total_limit_size != -1) { - flb_plg_warn(ctx->ins, "Please use 'store_dir_limit_size' with s3 output instead of 'storage.total_limit_size'. " - "S3 has its own buffer files located in the store_dir."); + if (chunk != NULL) { + file_first_log_time = chunk->first_log_time; } - /* Date key */ - ctx->date_key = ctx->json_date_key; - tmp = flb_output_get_property("json_date_key", ins); - if (tmp) { - /* Just check if we have to disable it */ - if (flb_utils_bool(tmp) == FLB_FALSE) { - ctx->date_key = NULL; + if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { + /* Map payload */ + ret = flb_aws_compression_compress(ctx->compression, body, body_size, &payload_buf, &payload_size); + if (ret == -1) { + flb_plg_error(ctx->ins, "Failed to compress data"); + return FLB_RETRY; + } else { + preCompress_size = body_size; + body = (void *) payload_buf; + body_size = payload_size; } } - /* Date format for JSON output */ - ctx->json_date_format = FLB_PACK_JSON_DATE_ISO8601; - tmp = flb_output_get_property("json_date_format", ins); - if (tmp) { - ret = flb_pack_to_json_date_type(tmp); - if (ret == -1) { - flb_plg_error(ctx->ins, "invalid json_date_format '%s'. ", tmp); - return -1; + if (ctx->use_put_object == FLB_TRUE) { + goto put_object; + } + + if (s3_plugin_under_test() == FLB_TRUE) { + init_upload = FLB_TRUE; + complete_upload = FLB_TRUE; + if (ctx->use_put_object == FLB_TRUE) { + goto put_object; } else { - ctx->json_date_format = ret; + goto multipart; } } - tmp = flb_output_get_property("bucket", ins); - if (!tmp) { - flb_plg_error(ctx->ins, "'bucket' is a required parameter"); - return -1; + if (m_upload == NULL) { + if (chunk != NULL && time(NULL) > + (chunk->create_time + ctx->upload_timeout + ctx->retry_time)) { + /* timeout already reached, just PutObject */ + goto put_object; + } + else if (body_size >= ctx->file_size) { + /* already big enough, just use PutObject API */ + goto put_object; + } + else if(body_size > MIN_CHUNKED_UPLOAD_SIZE) { + init_upload = FLB_TRUE; + goto multipart; + } + else { + if (ctx->use_put_object == FLB_FALSE && ctx->compression == FLB_AWS_COMPRESS_GZIP) { + flb_plg_info(ctx->ins, "Pre-compression upload_chunk_size= %zu, After compression, chunk is only %zu bytes, " + "the chunk was too small, using PutObject to upload", preCompress_size, body_size); + } + goto put_object; + } } + else { + /* existing upload */ + if (body_size < MIN_CHUNKED_UPLOAD_SIZE) { + complete_upload = FLB_TRUE; + } - /* - * store_dir is the user input, buffer_dir is what the code uses - * We append the bucket name to the dir, to support multiple instances - * of this plugin using the same buffer dir - */ - tmp_sds = concat_path(ctx->store_dir, ctx->bucket); - if (!tmp_sds) { - flb_plg_error(ctx->ins, "Could not construct buffer path"); - return -1; + goto multipart; } - ctx->buffer_dir = tmp_sds; - /* Initialize local storage */ - ret = s3_store_init(ctx); - if (ret == -1) { - flb_plg_error(ctx->ins, "Failed to initialize S3 storage: %s", - ctx->store_dir); - return -1; - } +put_object: - tmp = flb_output_get_property("s3_key_format", ins); - if (tmp) { - if (tmp[0] != '/') { - flb_plg_error(ctx->ins, "'s3_key_format' must start with a '/'"); - return -1; - } - if (strstr((char *) tmp, "$INDEX")) { - ret = init_seq_index(ctx); - if (ret < 0) { - return -1; - } - } - if (strstr((char *) tmp, "$UUID")) { - ctx->key_fmt_has_uuid = FLB_TRUE; - } - } - - /* validate 'total_file_size' */ - if (ctx->file_size <= 0) { - flb_plg_error(ctx->ins, "Failed to parse total_file_size %s", tmp); - return -1; - } - if (ctx->file_size < 1000000) { - flb_plg_error(ctx->ins, "total_file_size must be at least 1MB"); - return -1; + /* + * remove chunk from buffer list + */ + ret = s3_put_object(ctx, tag, file_first_log_time, body, body_size); + if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { + flb_free(payload_buf); } - if (ctx->file_size > MAX_FILE_SIZE) { - flb_plg_error(ctx->ins, "Max total_file_size is %s bytes", MAX_FILE_SIZE_STR); - return -1; + if (ret < 0) { + /* re-add chunk to list */ + if (chunk) { + s3_store_file_unlock(chunk); + chunk->failures += 1; + } + return FLB_RETRY; } - flb_plg_info(ctx->ins, "Using upload size %lu bytes", ctx->file_size); - if (ctx->use_put_object == FLB_FALSE && ctx->file_size < 2 * MIN_CHUNKED_UPLOAD_SIZE) { - flb_plg_info(ctx->ins, - "total_file_size is less than 10 MB, will use PutObject API"); - ctx->use_put_object = FLB_TRUE; + /* data was sent successfully- delete the local buffer */ + if (chunk) { + s3_store_file_delete(ctx, chunk); } + return FLB_OK; - tmp = flb_output_get_property("compression", ins); - if (tmp) { - ret = flb_aws_compression_get_type(tmp); - if (ret == -1) { - flb_plg_error(ctx->ins, "unknown compression: %s", tmp); - return -1; - } - if (ctx->use_put_object == FLB_FALSE && ctx->compression == FLB_AWS_COMPRESS_ARROW) { - flb_plg_error(ctx->ins, - "use_put_object must be enabled when Apache Arrow is enabled"); - return -1; - } - ctx->compression = ret; - } +multipart: - tmp = flb_output_get_property("content_type", ins); - if (tmp) { - ctx->content_type = (char *) tmp; - } - if (ctx->use_put_object == FLB_FALSE) { - /* upload_chunk_size */ - if (ctx->upload_chunk_size <= 0) { - flb_plg_error(ctx->ins, "Failed to parse upload_chunk_size %s", tmp); - return -1; - } - if (ctx->upload_chunk_size > ctx->file_size) { - flb_plg_error(ctx->ins, - "upload_chunk_size can not be larger than total_file_size"); - return -1; - } - if (ctx->upload_chunk_size < MIN_CHUNKED_UPLOAD_SIZE) { - flb_plg_error(ctx->ins, "upload_chunk_size must be at least 5,242,880 bytes"); - return -1; - } - if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { - if(ctx->upload_chunk_size > MAX_CHUNKED_UPLOAD_COMPRESS_SIZE) { - flb_plg_error(ctx->ins, "upload_chunk_size in compressed multipart upload cannot exceed 5GB"); - return -1; + if (init_upload == FLB_TRUE) { + m_upload = create_upload(ctx, tag, tag_len, file_first_log_time); + if (!m_upload) { + flb_plg_error(ctx->ins, "Could not find or create upload for tag %s", tag); + if (chunk) { + s3_store_file_unlock(chunk); } - } else { - if (ctx->upload_chunk_size > MAX_CHUNKED_UPLOAD_SIZE) { - flb_plg_error(ctx->ins, "Max upload_chunk_size is 50MB"); - return -1; + if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { + flb_free(payload_buf); } + return FLB_RETRY; } } - if (ctx->upload_chunk_size != MIN_CHUNKED_UPLOAD_SIZE && - (ctx->upload_chunk_size * 2) > ctx->file_size) { - flb_plg_error(ctx->ins, "total_file_size is less than 2x upload_chunk_size"); - return -1; - } - - if (ctx->use_put_object == FLB_TRUE) { - /* - * code internally uses 'upload_chunk_size' as the unit for each Put, - * regardless of which API is used to send data - */ - ctx->upload_chunk_size = ctx->file_size; - if (ctx->file_size > MAX_FILE_SIZE_PUT_OBJECT) { - flb_plg_error(ctx->ins, "Max total_file_size is 50M when use_put_object is enabled"); - return -1; + if (m_upload->upload_state == MULTIPART_UPLOAD_STATE_NOT_CREATED) { + ret = create_multipart_upload(ctx, m_upload, NULL); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not initiate multipart upload"); + if (chunk) { + s3_store_file_unlock(chunk); + } + if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { + flb_free(payload_buf); + } + return FLB_RETRY; } + m_upload->upload_state = MULTIPART_UPLOAD_STATE_CREATED; } - tmp = flb_output_get_property("endpoint", ins); - if (tmp) { - ctx->insecure = strncmp(tmp, "http://", 7) == 0 ? FLB_TRUE : FLB_FALSE; - if (ctx->insecure == FLB_TRUE) { - ep = removeProtocol((char *) tmp, "http://"); - } - else { - ep = removeProtocol((char *) tmp, "https://"); - } - - split = flb_utils_split((const char *)ep, ':', 1); - if (!split) { - flb_errno(); - return -1; - } - list_size = mk_list_size(split); - if (list_size > 2) { - flb_plg_error(ctx->ins, "Failed to split endpoint"); - flb_utils_split_free(split); - return -1; - } - - tok = mk_list_entry_first(split, struct flb_split_entry, _head); - ctx->endpoint = flb_strndup(tok->value, tok->len); - if (!ctx->endpoint) { - flb_errno(); - flb_utils_split_free(split); - return -1; - } - ctx->free_endpoint = FLB_TRUE; - if (list_size == 2) { - tok = mk_list_entry_next(&tok->_head, struct flb_split_entry, _head, split); - ctx->port = atoi(tok->value); + ret = upload_part(ctx, m_upload, body, body_size, NULL); + if (ret < 0) { + if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { + flb_free(payload_buf); } - else { - ctx->port = ctx->insecure == FLB_TRUE ? DEFAULT_S3_INSECURE_PORT : DEFAULT_S3_PORT; + m_upload->upload_errors += 1; + /* re-add chunk to list */ + if (chunk) { + s3_store_file_unlock(chunk); + chunk->failures += 1; } - flb_utils_split_free(split); + return FLB_RETRY; } - else { - /* default endpoint for the given region */ - ctx->endpoint = flb_aws_endpoint("s3", ctx->region); - ctx->insecure = FLB_FALSE; - ctx->port = DEFAULT_S3_PORT; - ctx->free_endpoint = FLB_TRUE; - if (!ctx->endpoint) { - flb_plg_error(ctx->ins, "Could not construct S3 endpoint"); - return -1; - } + m_upload->part_number += 1; + /* data was sent successfully- delete the local buffer */ + if (chunk) { + s3_store_file_delete(ctx, chunk); + chunk = NULL; } - - tmp = flb_output_get_property("sts_endpoint", ins); - if (tmp) { - ctx->sts_endpoint = (char *) tmp; + if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { + flb_free(payload_buf); } - - tmp = flb_output_get_property("canned_acl", ins); - if (tmp) { - ctx->canned_acl = (char *) tmp; + if (m_upload->bytes >= ctx->file_size) { + size_check = FLB_TRUE; + flb_plg_info(ctx->ins, "Will complete upload for %s because uploaded data is greater" + " than size set by total_file_size", m_upload->s3_key); } - - tmp = flb_output_get_property("storage_class", ins); - if (tmp) { - ctx->storage_class = (char *) tmp; + if (m_upload->part_number >= 10000) { + part_num_check = FLB_TRUE; + flb_plg_info(ctx->ins, "Will complete upload for %s because 10,000 chunks " + "(the API limit) have been uploaded", m_upload->s3_key); } - - if (ctx->insecure == FLB_FALSE) { - ctx->client_tls = flb_tls_create(FLB_TLS_CLIENT_MODE, - ins->tls_verify, - ins->tls_debug, - ins->tls_vhost, - ins->tls_ca_path, - ins->tls_ca_file, - ins->tls_crt_file, - ins->tls_key_file, - ins->tls_key_passwd); - if (!ctx->client_tls) { - flb_plg_error(ctx->ins, "Failed to create tls context"); - return -1; - } + if (time(NULL) > + (m_upload->init_time + ctx->upload_timeout + ctx->retry_time)) { + timeout_check = FLB_TRUE; + flb_plg_info(ctx->ins, "Will complete upload for %s because upload_timeout" + " has elapsed", m_upload->s3_key); } - - /* AWS provider needs a separate TLS instance */ - ctx->provider_tls = flb_tls_create(FLB_TLS_CLIENT_MODE, - FLB_TRUE, - ins->tls_debug, - ins->tls_vhost, - ins->tls_ca_path, - ins->tls_ca_file, - ins->tls_crt_file, - ins->tls_key_file, - ins->tls_key_passwd); - if (!ctx->provider_tls) { - flb_errno(); - return -1; + if (size_check || part_num_check || timeout_check) { + complete_upload = FLB_TRUE; } - ctx->provider = flb_standard_chain_provider_create(config, - ctx->provider_tls, - ctx->region, - ctx->sts_endpoint, - NULL, - flb_aws_client_generator(), - ctx->profile); - - if (!ctx->provider) { - flb_plg_error(ctx->ins, "Failed to create AWS Credential Provider"); - return -1; + if (complete_upload == FLB_TRUE) { + /* mark for completion- the upload timer will handle actual completion */ + m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; } - tmp = flb_output_get_property("role_arn", ins); - if (tmp) { - /* Use the STS Provider */ - ctx->base_provider = ctx->provider; - role_arn = (char *) tmp; + return FLB_OK; +} - /* STS provider needs yet another separate TLS instance */ - ctx->sts_provider_tls = flb_tls_create(FLB_TLS_CLIENT_MODE, - FLB_TRUE, - ins->tls_debug, - ins->tls_vhost, - ins->tls_ca_path, - ins->tls_ca_file, - ins->tls_crt_file, - ins->tls_key_file, - ins->tls_key_passwd); - if (!ctx->sts_provider_tls) { - flb_errno(); - return -1; - } +/* + * Attempts to send all chunks to S3 using PutObject + * Used on shut down to try to send all buffered data + * Used on start up to try to send any leftover buffers from previous executions + */ +static int put_all_chunks(struct flb_s3 *ctx) +{ + struct s3_file *chunk; + struct mk_list *tmp; + struct mk_list *head; + struct mk_list *f_head; + struct flb_fstore_file *fsf; + struct flb_fstore_stream *fs_stream; + void *payload_buf = NULL; + size_t payload_size = 0; + char *buffer = NULL; + size_t buffer_size; + int ret; - session_name = flb_sts_session_name(); - if (!session_name) { - flb_plg_error(ctx->ins, "Failed to create aws iam role " - "session name"); - flb_errno(); - return -1; + mk_list_foreach(head, &ctx->fs->streams) { + /* skip multi upload stream */ + fs_stream = mk_list_entry(head, struct flb_fstore_stream, _head); + if (fs_stream == ctx->stream_upload) { + continue; } - - ctx->provider = flb_sts_provider_create(config, - ctx->sts_provider_tls, - ctx->base_provider, - ctx->external_id, - role_arn, - session_name, - ctx->region, - ctx->sts_endpoint, - NULL, - flb_aws_client_generator()); - flb_free(session_name); - if (!ctx->provider) { - flb_plg_error(ctx->ins, "Failed to create AWS STS Credential " - "Provider"); - return -1; + /* skip metadata stream */ + if (fs_stream == ctx->stream_metadata) { + continue; } - } - if (ctx->configuration_endpoint_url != NULL) { - ret = apply_remote_configuration(ctx); + mk_list_foreach_safe(f_head, tmp, &fs_stream->files) { + fsf = mk_list_entry(f_head, struct flb_fstore_file, _head); + chunk = fsf->data; - if (ret != 0) { - flb_plg_error(ctx->ins, "Failed to retrieve configuration " - "from endpoint"); - flb_errno(); + /* Locked chunks are being processed, skip */ + if (chunk->locked == FLB_TRUE) { + continue; + } - return -1; - } - } + if (chunk->failures >= MAX_UPLOAD_ERRORS) { + flb_plg_warn(ctx->ins, + "Chunk for tag %s failed to send %i times, " + "will not retry", + (char *) fsf->meta_buf, MAX_UPLOAD_ERRORS); + flb_fstore_file_inactive(ctx->fs, fsf); + continue; + } - /* read any remaining buffers from previous (failed) executions */ - ctx->has_old_buffers = s3_store_has_data(ctx); - ctx->has_old_uploads = s3_store_has_uploads(ctx); + ret = construct_request_buffer(ctx, NULL, chunk, + &buffer, &buffer_size); + if (ret < 0) { + flb_plg_error(ctx->ins, + "Could not construct request buffer for %s", + chunk->file_path); + return -1; + } - /* Multipart */ - multipart_read_uploads_from_fs(ctx); + if (ctx->compression != FLB_AWS_COMPRESS_NONE) { + /* Map payload */ + ret = flb_aws_compression_compress(ctx->compression, buffer, buffer_size, &payload_buf, &payload_size); + if (ret == -1) { + flb_plg_error(ctx->ins, "Failed to compress data, uploading uncompressed data instead to prevent data loss"); + } else { + flb_plg_info(ctx->ins, "Pre-compression chunk size is %zu, After compression, chunk is %zu bytes", buffer_size, payload_size); + flb_free(buffer); - if (mk_list_size(&ctx->uploads) > 0) { - /* note that these should be sent */ - ctx->has_old_uploads = FLB_TRUE; - } + buffer = (void *) payload_buf; + buffer_size = payload_size; + } + } - /* create S3 client */ - generator = flb_aws_client_generator(); - ctx->s3_client = generator->create(); - if (!ctx->s3_client) { - return -1; - } - ctx->s3_client->name = "s3_client"; - ctx->s3_client->has_auth = FLB_TRUE; - ctx->s3_client->provider = ctx->provider; - ctx->s3_client->region = ctx->region; - ctx->s3_client->service = "s3"; - ctx->s3_client->port = ctx->port; - ctx->s3_client->flags = 0; - ctx->s3_client->proxy = NULL; - ctx->s3_client->s3_mode = S3_MODE_SIGNED_PAYLOAD; - ctx->s3_client->retry_requests = ctx->retry_requests; + ret = s3_put_object(ctx, (const char *) + fsf->meta_buf, + chunk->create_time, buffer, buffer_size); + flb_free(buffer); + if (ret < 0) { + s3_store_file_unlock(chunk); + chunk->failures += 1; + return -1; + } - if (ctx->insecure == FLB_TRUE) { - ctx->s3_client->upstream = flb_upstream_create(config, ctx->endpoint, ctx->port, - FLB_IO_TCP, NULL); - } else { - ctx->s3_client->upstream = flb_upstream_create(config, ctx->endpoint, ctx->port, - FLB_IO_TLS, ctx->client_tls); - } - if (!ctx->s3_client->upstream) { - flb_plg_error(ctx->ins, "Connection initialization error"); - return -1; + /* data was sent successfully- delete the local buffer */ + s3_store_file_delete(ctx, chunk); + } } - flb_output_upstream_set(ctx->s3_client->upstream, ctx->ins); - - ctx->s3_client->host = ctx->endpoint; + return 0; +} - /* set to sync mode and initialize credentials */ - ctx->provider->provider_vtable->sync(ctx->provider); - ctx->provider->provider_vtable->init(ctx->provider); +/* + * Either new_data or chunk can be NULL, but not both + */ +static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, + struct s3_file *chunk, + char **out_buf, size_t *out_size) +{ + char *body; + char *tmp; + size_t body_size = 0; + char *buffered_data = NULL; + size_t buffer_size = 0; + int ret; - ctx->timer_created = FLB_FALSE; - ctx->timer_ms = (int) (ctx->upload_timeout / 6) * 1000; - if (ctx->timer_ms > UPLOAD_TIMER_MAX_WAIT) { - ctx->timer_ms = UPLOAD_TIMER_MAX_WAIT; - } - else if (ctx->timer_ms < UPLOAD_TIMER_MIN_WAIT) { - ctx->timer_ms = UPLOAD_TIMER_MIN_WAIT; + if (new_data == NULL && chunk == NULL) { + flb_plg_error(ctx->ins, "[construct_request_buffer] Something went wrong" + " both chunk and new_data are NULL"); + return -1; } - /* - * S3 must ALWAYS use sync mode - * In the timer thread we do a mk_list_foreach_safe on the queue of uplaods and chunks - * Iterating over those lists is not concurrent safe. If a flush call ran at the same time - * And deleted an item from the list, this could cause a crash/corruption. - */ - flb_stream_disable_async_mode(&ctx->s3_client->upstream->base); - - /* clean up any old buffers found on startup */ - if (ctx->has_old_buffers == FLB_TRUE) { - flb_plg_info(ctx->ins, - "Sending locally buffered data from previous " - "executions to S3; buffer=%s", - ctx->fs->root_path); - ctx->has_old_buffers = FLB_FALSE; - ret = put_all_chunks(ctx); + if (chunk) { + ret = s3_store_file_read(ctx, chunk, &buffered_data, &buffer_size); if (ret < 0) { - ctx->has_old_buffers = FLB_TRUE; - flb_plg_error(ctx->ins, - "Failed to send locally buffered data left over " - "from previous executions; will retry. Buffer=%s", - ctx->fs->root_path); + flb_plg_error(ctx->ins, "Could not read locally buffered data %s", + chunk->file_path); + return -1; } - } - - /* clean up any old uploads found on start up */ - if (ctx->has_old_uploads == FLB_TRUE) { - flb_plg_info(ctx->ins, - "Completing multipart uploads from previous " - "executions to S3; buffer=%s", - ctx->stream_upload->path); - ctx->has_old_uploads = FLB_FALSE; /* - * we don't need to worry if this fails; it will retry each - * time the upload callback is called + * lock the chunk from buffer list */ - cb_s3_upload(config, ctx); + s3_store_file_lock(chunk); + body = buffered_data; + body_size = buffer_size; } - /* this is done last since in the previous block we make calls to AWS */ - ctx->provider->provider_vtable->upstream_set(ctx->provider, ctx->ins); - - /* database file for blob signal handling */ - if (ctx->blob_database_file != NULL) { - ret = flb_blob_db_open(&ctx->blob_db, - config, - ctx->blob_database_file); + /* + * If new data is arriving, increase the original 'buffered_data' size + * to append the new one. + */ + if (new_data) { + body_size += flb_sds_len(new_data); - if (ret != FLB_BLOB_DB_SUCCESS) { + tmp = flb_realloc(buffered_data, body_size + 1); + if (!tmp) { + flb_errno(); + flb_free(buffered_data); + if (chunk) { + s3_store_file_unlock(chunk); + } return -1; } + body = buffered_data = tmp; + memcpy(body + buffer_size, new_data, flb_sds_len(new_data)); + body[body_size] = '\0'; } - return 0; + *out_buf = body; + *out_size = body_size; + + return 0; } -/* worker initialization, used for our internal timers */ -static int cb_s3_worker_init(void *data, struct flb_config *config) +static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t file_first_log_time, + char *body, size_t body_size) { + flb_sds_t s3_key = NULL; + struct flb_http_client *c = NULL; + struct flb_aws_client *s3_client; + struct flb_aws_header *headers = NULL; + char *random_alphanumeric; + int append_random = FLB_FALSE; + int len; int ret; - struct worker_info *info; - struct flb_s3 *ctx = data; - - flb_plg_info(ctx->ins, "initializing worker"); - - info = FLB_TLS_GET(s3_worker_info); - if (!info) { - /* initialize worker global info */ - info = flb_calloc(1, sizeof(struct worker_info)); - if (!info) { - flb_errno(); - return -1; - } - info->active_upload = FLB_FALSE; - FLB_TLS_SET(s3_worker_info, info); - } + int num_headers = 0; + char *final_key; + flb_sds_t uri; + flb_sds_t tmp; + char final_body_md5[25]; - ret = s3_timer_create(ctx); - if (ret == -1) { - flb_plg_error(ctx->ins, "failed to create upload timer"); + s3_key = flb_get_s3_key(ctx->s3_key_format, file_first_log_time, tag, + ctx->tag_delimiters, ctx->seq_index); + if (!s3_key) { + flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); return -1; } - return 0; -} + len = strlen(s3_key); + if ((len + 16) <= 1024 && !ctx->key_fmt_has_uuid && !ctx->static_file_path && + !ctx->key_fmt_has_seq_index) { + append_random = FLB_TRUE; + len += 16; + } + len += strlen(ctx->bucket + 1); -/* worker teardown */ -static int cb_s3_worker_exit(void *data, struct flb_config *config) -{ - struct worker_info *info; - struct flb_s3 *ctx = data; + uri = flb_sds_create_size(len); - flb_plg_info(ctx->ins, "initializing worker"); + if (append_random == FLB_TRUE) { + random_alphanumeric = flb_sts_session_name(); + if (!random_alphanumeric) { + flb_sds_destroy(s3_key); + flb_sds_destroy(uri); + flb_plg_error(ctx->ins, "Failed to create randomness for S3 key %s", tag); + return -1; + } + /* only use 8 chars of the random string */ + random_alphanumeric[8] = '\0'; - info = FLB_TLS_GET(s3_worker_info); - if (info != NULL) { - flb_free(info); - FLB_TLS_SET(s3_worker_info, NULL); + tmp = flb_sds_printf(&uri, "/%s%s-object%s", ctx->bucket, s3_key, + random_alphanumeric); + flb_free(random_alphanumeric); + } + else { + tmp = flb_sds_printf(&uri, "/%s%s", ctx->bucket, s3_key); } - return 0; -} - -/* - * return value is one of FLB_OK, FLB_RETRY, FLB_ERROR - * - * Chunk is allowed to be NULL - */ -static int upload_data(struct flb_s3 *ctx, struct s3_file *chunk, - struct multipart_upload *m_upload, - char *body, size_t body_size, - const char *tag, int tag_len) -{ - int init_upload = FLB_FALSE; - int complete_upload = FLB_FALSE; - int size_check = FLB_FALSE; - int part_num_check = FLB_FALSE; - int timeout_check = FLB_FALSE; - int ret; - void *payload_buf = NULL; - size_t payload_size = 0; - size_t preCompress_size = 0; - time_t file_first_log_time = time(NULL); - - /* - * When chunk does not exist, file_first_log_time will be the current time. - * This is only for unit tests and prevents unit tests from segfaulting when chunk is - * NULL because if so chunk->first_log_time will be NULl either and will cause - * segfault during the process of put_object upload or mutipart upload. - */ - if (chunk != NULL) { - file_first_log_time = chunk->first_log_time; + if (!tmp) { + flb_sds_destroy(s3_key); + flb_plg_error(ctx->ins, "Failed to create PutObject URI"); + return -1; } + flb_sds_destroy(s3_key); + uri = tmp; - if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { - /* Map payload */ - ret = flb_aws_compression_compress(ctx->compression, body, body_size, &payload_buf, &payload_size); - if (ret == -1) { - flb_plg_error(ctx->ins, "Failed to compress data"); - return FLB_RETRY; - } else { - preCompress_size = body_size; - body = (void *) payload_buf; - body_size = payload_size; + memset(final_body_md5, 0, sizeof(final_body_md5)); + if (ctx->send_content_md5 == FLB_TRUE) { + ret = get_md5_base64(body, body_size, + final_body_md5, sizeof(final_body_md5)); + if (ret != 0) { + flb_plg_error(ctx->ins, "Failed to create Content-MD5 header"); + flb_sds_destroy(uri); + return -1; } } - if (ctx->use_put_object == FLB_TRUE) { - goto put_object; - } + /* Update file and increment index value right before request */ + if (ctx->key_fmt_has_seq_index) { + ctx->seq_index++; - if (s3_plugin_under_test() == FLB_TRUE) { - init_upload = FLB_TRUE; - complete_upload = FLB_TRUE; - if (ctx->use_put_object == FLB_TRUE) { - goto put_object; - } - else { - goto multipart; + ret = write_seq_index(ctx->seq_index_file, ctx->seq_index); + if (ret < 0 && access(ctx->seq_index_file, F_OK) == 0) { + ctx->seq_index--; + flb_plg_error(ctx->ins, "Failed to update sequential index metadata file"); + return -1; } } - if (m_upload == NULL) { - if (chunk != NULL && time(NULL) > - (chunk->create_time + ctx->upload_timeout + ctx->retry_time)) { - /* timeout already reached, just PutObject */ - goto put_object; - } - else if (body_size >= ctx->file_size) { - /* already big enough, just use PutObject API */ - goto put_object; - } - else if(body_size > MIN_CHUNKED_UPLOAD_SIZE) { - init_upload = FLB_TRUE; - goto multipart; - } - else { - if (ctx->use_put_object == FLB_FALSE && ctx->compression == FLB_AWS_COMPRESS_GZIP) { - flb_plg_info(ctx->ins, "Pre-compression upload_chunk_size= %zu, After compression, chunk is only %zu bytes, " - "the chunk was too small, using PutObject to upload", preCompress_size, body_size); - } - goto put_object; - } + s3_client = ctx->s3_client; + if (s3_plugin_under_test() == FLB_TRUE) { + c = mock_s3_call("TEST_PUT_OBJECT_ERROR", "PutObject"); } else { - /* existing upload */ - if (body_size < MIN_CHUNKED_UPLOAD_SIZE) { - complete_upload = FLB_TRUE; + ret = create_headers(ctx, final_body_md5, &headers, &num_headers, FLB_FALSE); + if (ret == -1) { + flb_plg_error(ctx->ins, "Failed to create headers"); + flb_sds_destroy(uri); + goto decrement_index; } - - goto multipart; + c = s3_client->client_vtable->request(s3_client, FLB_HTTP_PUT, + uri, body, body_size, + headers, num_headers); + flb_free(headers); } + if (c) { + flb_plg_debug(ctx->ins, "PutObject http status=%d", c->resp.status); + if (c->resp.status == 200) { + /* + * URI contains bucket name, so we must advance over it + * to print the object key + */ + final_key = uri + strlen(ctx->bucket) + 1; + flb_plg_info(ctx->ins, "Successfully uploaded object %s", final_key); + flb_sds_destroy(uri); + flb_http_client_destroy(c); -put_object: - - /* - * remove chunk from buffer list - */ - ret = s3_put_object(ctx, tag, file_first_log_time, body, body_size); - if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { - flb_free(payload_buf); - } - if (ret < 0) { - /* re-add chunk to list */ - if (chunk) { - s3_store_file_unlock(chunk); - chunk->failures += 1; + return 0; } - return FLB_RETRY; + flb_aws_print_xml_error(c->resp.payload, c->resp.payload_size, + "PutObject", ctx->ins); + if (c->resp.data != NULL) { + flb_plg_error(ctx->ins, "Raw PutObject response: %s", c->resp.data); + } + flb_http_client_destroy(c); } - /* data was sent successfully- delete the local buffer */ - if (chunk) { - s3_store_file_delete(ctx, chunk); - } - return FLB_OK; + flb_plg_error(ctx->ins, "PutObject request failed"); + flb_sds_destroy(uri); + goto decrement_index; -multipart: +decrement_index: + if (ctx->key_fmt_has_seq_index) { + ctx->seq_index--; - if (init_upload == FLB_TRUE) { - m_upload = create_upload(ctx, tag, tag_len, file_first_log_time); - if (!m_upload) { - flb_plg_error(ctx->ins, "Could not find or create upload for tag %s", tag); - if (chunk) { - s3_store_file_unlock(chunk); - } - if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { - flb_free(payload_buf); - } - return FLB_RETRY; + ret = write_seq_index(ctx->seq_index_file, ctx->seq_index); + if (ret < 0) { + flb_plg_error(ctx->ins, "Failed to decrement index after request error"); + return -1; } } + return -1; +} - if (m_upload->upload_state == MULTIPART_UPLOAD_STATE_NOT_CREATED) { - ret = create_multipart_upload(ctx, m_upload); - if (ret < 0) { - flb_plg_error(ctx->ins, "Could not initiate multipart upload"); - if (chunk) { - s3_store_file_unlock(chunk); - } - if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { - flb_free(payload_buf); - } - return FLB_RETRY; - } - m_upload->upload_state = MULTIPART_UPLOAD_STATE_CREATED; - } +int get_md5_base64(char *buf, size_t buf_size, char *md5_str, size_t md5_str_size) +{ + unsigned char md5_bin[16]; + size_t olen; + int ret; - ret = upload_part(ctx, m_upload, body, body_size); - if (ret < 0) { - if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { - flb_free(payload_buf); - } - m_upload->upload_errors += 1; - /* re-add chunk to list */ - if (chunk) { - s3_store_file_unlock(chunk); - chunk->failures += 1; - } - return FLB_RETRY; - } - m_upload->part_number += 1; - /* data was sent successfully- delete the local buffer */ - if (chunk) { - s3_store_file_delete(ctx, chunk); - chunk = NULL; - } - if (ctx->compression == FLB_AWS_COMPRESS_GZIP) { - flb_free(payload_buf); - } - if (m_upload->bytes >= ctx->file_size) { - size_check = FLB_TRUE; - flb_plg_info(ctx->ins, "Will complete upload for %s because uploaded data is greater" - " than size set by total_file_size", m_upload->s3_key); - } - if (m_upload->part_number >= 10000) { - part_num_check = FLB_TRUE; - flb_plg_info(ctx->ins, "Will complete upload for %s because 10,000 chunks " - "(the API limit) have been uploaded", m_upload->s3_key); - } - if (time(NULL) > - (m_upload->init_time + ctx->upload_timeout + ctx->retry_time)) { - timeout_check = FLB_TRUE; - flb_plg_info(ctx->ins, "Will complete upload for %s because upload_timeout" - " has elapsed", m_upload->s3_key); - } - if (size_check || part_num_check || timeout_check) { - complete_upload = FLB_TRUE; + ret = flb_hash_simple(FLB_HASH_MD5, + (unsigned char *) buf, buf_size, + md5_bin, sizeof(md5_bin)); + + if (ret != FLB_CRYPTO_SUCCESS) { + return -1; } - if (complete_upload == FLB_TRUE) { - /* mark for completion- the upload timer will handle actual completion */ - m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + ret = flb_base64_encode((unsigned char*) md5_str, md5_str_size, + &olen, md5_bin, sizeof(md5_bin)); + if (ret != 0) { + return ret; } - return FLB_OK; + return 0; } - -/* - * Attempts to send all chunks to S3 using PutObject - * Used on shut down to try to send all buffered data - * Used on start up to try to send any leftover buffers from previous executions - */ -static int put_all_chunks(struct flb_s3 *ctx) +static struct multipart_upload *get_upload(struct flb_s3 *ctx, + const char *tag, int tag_len) { - struct s3_file *chunk; + struct multipart_upload *m_upload = NULL; + struct multipart_upload *tmp_upload = NULL; struct mk_list *tmp; struct mk_list *head; - struct mk_list *f_head; - struct flb_fstore_file *fsf; - struct flb_fstore_stream *fs_stream; - void *payload_buf = NULL; - size_t payload_size = 0; - char *buffer = NULL; - size_t buffer_size; - int ret; - mk_list_foreach(head, &ctx->fs->streams) { - /* skip multi upload stream */ - fs_stream = mk_list_entry(head, struct flb_fstore_stream, _head); - if (fs_stream == ctx->stream_upload) { + mk_list_foreach_safe(head, tmp, &ctx->uploads) { + tmp_upload = mk_list_entry(head, struct multipart_upload, _head); + + if (tmp_upload->upload_state == MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS) { continue; } - /* skip metadata stream */ - if (fs_stream == ctx->stream_metadata) { + if (tmp_upload->upload_errors >= MAX_UPLOAD_ERRORS) { + tmp_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + flb_plg_error(ctx->ins, "Upload for %s has reached max upload errors", + tmp_upload->s3_key); continue; } + if (strcmp(tmp_upload->tag, tag) == 0) { + m_upload = tmp_upload; + break; + } + } - mk_list_foreach_safe(f_head, tmp, &fs_stream->files) { - fsf = mk_list_entry(f_head, struct flb_fstore_file, _head); - chunk = fsf->data; + return m_upload; +} - /* Locked chunks are being processed, skip */ - if (chunk->locked == FLB_TRUE) { - continue; - } +static struct multipart_upload *create_upload(struct flb_s3 *ctx, const char *tag, + int tag_len, time_t file_first_log_time) +{ + int ret; + struct multipart_upload *m_upload = NULL; + flb_sds_t s3_key = NULL; + flb_sds_t tmp_sds = NULL; - if (chunk->failures >= MAX_UPLOAD_ERRORS) { - flb_plg_warn(ctx->ins, - "Chunk for tag %s failed to send %i times, " - "will not retry", - (char *) fsf->meta_buf, MAX_UPLOAD_ERRORS); - flb_fstore_file_inactive(ctx->fs, fsf); - continue; - } + /* create new upload for this key */ + m_upload = flb_calloc(1, sizeof(struct multipart_upload)); + if (!m_upload) { + flb_errno(); + return NULL; + } + s3_key = flb_get_s3_key(ctx->s3_key_format, file_first_log_time, tag, + ctx->tag_delimiters, ctx->seq_index); + if (!s3_key) { + flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); + flb_free(m_upload); + return NULL; + } + m_upload->s3_key = s3_key; + tmp_sds = flb_sds_create_len(tag, tag_len); + if (!tmp_sds) { + flb_errno(); + flb_sds_destroy(s3_key); + flb_free(m_upload); + return NULL; + } + m_upload->tag = tmp_sds; + m_upload->upload_state = MULTIPART_UPLOAD_STATE_NOT_CREATED; + m_upload->part_number = 1; + m_upload->init_time = time(NULL); + mk_list_add(&m_upload->_head, &ctx->uploads); - ret = construct_request_buffer(ctx, NULL, chunk, - &buffer, &buffer_size); - if (ret < 0) { - flb_plg_error(ctx->ins, - "Could not construct request buffer for %s", - chunk->file_path); - return -1; - } + /* Update file and increment index value right before request */ + if (ctx->key_fmt_has_seq_index) { + ctx->seq_index++; - if (ctx->compression != FLB_AWS_COMPRESS_NONE) { - /* Map payload */ - ret = flb_aws_compression_compress(ctx->compression, buffer, buffer_size, &payload_buf, &payload_size); - if (ret == -1) { - flb_plg_error(ctx->ins, "Failed to compress data, uploading uncompressed data instead to prevent data loss"); - } else { - flb_plg_info(ctx->ins, "Pre-compression chunk size is %zu, After compression, chunk is %zu bytes", buffer_size, payload_size); - flb_free(buffer); + ret = write_seq_index(ctx->seq_index_file, ctx->seq_index); + if (ret < 0) { + ctx->seq_index--; - buffer = (void *) payload_buf; - buffer_size = payload_size; - } - } + mk_list_del(&m_upload->_head); - ret = s3_put_object(ctx, (const char *) - fsf->meta_buf, - chunk->create_time, buffer, buffer_size); - flb_free(buffer); - if (ret < 0) { - s3_store_file_unlock(chunk); - chunk->failures += 1; - return -1; - } + flb_sds_destroy(tmp_sds); + flb_sds_destroy(s3_key); - /* data was sent successfully- delete the local buffer */ - s3_store_file_delete(ctx, chunk); + flb_free(m_upload); + + flb_plg_error(ctx->ins, "Failed to write to sequential index metadata file"); + + return NULL; } } + return m_upload; +} + +/* Adds an entry to upload queue */ +static int add_to_queue(struct flb_s3 *ctx, struct s3_file *upload_file, + struct multipart_upload *m_upload_file, const char *tag, int tag_len) +{ + struct upload_queue *upload_contents; + flb_sds_t tag_cpy; + + /* Create upload contents object and add to upload queue */ + upload_contents = flb_calloc(1, sizeof(struct upload_queue)); + if (upload_contents == NULL) { + flb_plg_error(ctx->ins, "Error allocating memory for upload_queue entry"); + flb_errno(); + return -1; + } + upload_contents->upload_file = upload_file; + upload_contents->m_upload_file = m_upload_file; + upload_contents->tag_len = tag_len; + upload_contents->retry_counter = 0; + upload_contents->upload_time = -1; + + /* Necessary to create separate string for tag to prevent corruption */ + tag_cpy = flb_sds_create_len(tag, tag_len); + if (!tag_cpy) { + flb_errno(); + flb_free(upload_contents); + return -1; + } + upload_contents->tag = tag_cpy; + + + /* Add entry to upload queue */ + mk_list_add(&upload_contents->_head, &ctx->upload_queue); return 0; } -/* - * Either new_data or chunk can be NULL, but not both - */ -static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, - struct s3_file *chunk, - char **out_buf, size_t *out_size) +/* Removes an entry from upload_queue */ +void remove_from_queue(struct upload_queue *entry) +{ + mk_list_del(&entry->_head); + flb_sds_destroy(entry->tag); + flb_free(entry); + return; +} + +/* Validity check for upload queue object */ +static int upload_queue_valid(struct upload_queue *upload_contents, time_t now, + void *out_context) +{ + struct flb_s3 *ctx = out_context; + + if (upload_contents == NULL) { + flb_plg_error(ctx->ins, "Error getting entry from upload_queue"); + return -1; + } + if (upload_contents->_head.next == NULL || upload_contents->_head.prev == NULL) { + flb_plg_debug(ctx->ins, "Encountered previously deleted entry in " + "upload_queue. Deleting invalid entry"); + mk_list_del(&upload_contents->_head); + return -1; + } + if (upload_contents->upload_file->locked == FLB_FALSE) { + flb_plg_debug(ctx->ins, "Encountered unlocked file in upload_queue. " + "Exiting"); + return -1; + } + if (upload_contents->upload_file->size <= 0) { + flb_plg_debug(ctx->ins, "Encountered empty chunk file in upload_queue. " + "Deleting empty chunk file"); + remove_from_queue(upload_contents); + return -1; + } + if (now < upload_contents->upload_time) { + flb_plg_debug(ctx->ins, "Found valid chunk file but not ready to upload"); + return -1; + } + return 0; +} + +static int send_upload_request(void *out_context, flb_sds_t chunk, + struct s3_file *upload_file, + struct multipart_upload *m_upload_file, + const char *tag, int tag_len) +{ + int ret; + char *buffer; + size_t buffer_size; + struct flb_s3 *ctx = out_context; + + /* Create buffer to upload to S3 */ + ret = construct_request_buffer(ctx, chunk, upload_file, &buffer, &buffer_size); + flb_sds_destroy(chunk); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not construct request buffer for %s", + upload_file->file_path); + return -1; + } + + /* Upload to S3 */ + ret = upload_data(ctx, upload_file, m_upload_file, buffer, buffer_size, tag, tag_len); + flb_free(buffer); + + return ret; +} + +static int buffer_chunk(void *out_context, struct s3_file *upload_file, + flb_sds_t chunk, int chunk_size, + const char *tag, int tag_len, + time_t file_first_log_time) +{ + int ret; + struct flb_s3 *ctx = out_context; + + ret = s3_store_buffer_put(ctx, upload_file, tag, + tag_len, chunk, (size_t) chunk_size, file_first_log_time); + flb_sds_destroy(chunk); + if (ret < 0) { + flb_plg_warn(ctx->ins, "Could not buffer chunk. Data order preservation " + "will be compromised"); + return -1; + } + return 0; +} + +/* Uploads all chunk files in queue synchronously */ +static void s3_upload_queue(struct flb_config *config, void *out_context) { - char *body; - char *tmp; - size_t body_size = 0; - char *buffered_data = NULL; - size_t buffer_size = 0; int ret; + time_t now; + struct upload_queue *upload_contents; + struct flb_s3 *ctx = out_context; + struct mk_list *tmp; + struct mk_list *head; - if (new_data == NULL && chunk == NULL) { - flb_plg_error(ctx->ins, "[construct_request_buffer] Something went wrong" - " both chunk and new_data are NULL"); - return -1; + flb_plg_debug(ctx->ins, "Running upload timer callback (upload_queue).."); + + /* No chunks in upload queue. Scan for timed out chunks. */ + if (mk_list_size(&ctx->upload_queue) == 0) { + flb_plg_debug(ctx->ins, "No files found in upload_queue. Scanning for timed " + "out chunks"); + cb_s3_upload(config, out_context); } - if (chunk) { - ret = s3_store_file_read(ctx, chunk, &buffered_data, &buffer_size); + /* Iterate through each file in upload queue */ + mk_list_foreach_safe(head, tmp, &ctx->upload_queue) { + upload_contents = mk_list_entry(head, struct upload_queue, _head); + + now = time(NULL); + + /* Checks if upload_contents is valid */ + ret = upload_queue_valid(upload_contents, now, ctx); if (ret < 0) { - flb_plg_error(ctx->ins, "Could not read locally buffered data %s", - chunk->file_path); - return -1; + goto exit; } - /* - * lock the chunk from buffer list - */ - s3_store_file_lock(chunk); - body = buffered_data; - body_size = buffer_size; - } - - /* - * If new data is arriving, increase the original 'buffered_data' size - * to append the new one. - */ - if (new_data) { - body_size += flb_sds_len(new_data); + /* Try to upload file. Return value can be -1, FLB_OK, FLB_ERROR, FLB_RETRY. */ + ret = send_upload_request(ctx, NULL, upload_contents->upload_file, + upload_contents->m_upload_file, + upload_contents->tag, upload_contents->tag_len); + if (ret < 0) { + goto exit; + } + else if (ret == FLB_OK) { + remove_from_queue(upload_contents); + ctx->retry_time = 0; + ctx->upload_queue_success = FLB_TRUE; + } + else { + s3_store_file_lock(upload_contents->upload_file); + ctx->upload_queue_success = FLB_FALSE; - tmp = flb_realloc(buffered_data, body_size + 1); - if (!tmp) { - flb_errno(); - flb_free(buffered_data); - if (chunk) { - s3_store_file_unlock(chunk); + /* If retry limit was reached, discard file and remove file from queue */ + upload_contents->retry_counter++; + if (upload_contents->retry_counter >= MAX_UPLOAD_ERRORS) { + flb_plg_warn(ctx->ins, "Chunk file failed to send %d times, will not " + "retry", upload_contents->retry_counter); + s3_store_file_inactive(ctx, upload_contents->upload_file); + multipart_upload_destroy(upload_contents->m_upload_file); + remove_from_queue(upload_contents); + continue; } - return -1; + + /* Retry in N seconds */ + upload_contents->upload_time = now + 2 * upload_contents->retry_counter; + ctx->retry_time += 2 * upload_contents->retry_counter; + flb_plg_debug(ctx->ins, "Failed to upload file in upload_queue. Will not " + "retry for %d seconds", 2 * upload_contents->retry_counter); + break; } - body = buffered_data = tmp; - memcpy(body + buffer_size, new_data, flb_sds_len(new_data)); - body[body_size] = '\0'; } - *out_buf = body; - *out_size = body_size; - - return 0; +exit: + return; } -static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t file_first_log_time, - char *body, size_t body_size) +static int blob_initialize_authorization_endpoint_upstream(struct flb_s3 *context) { - flb_sds_t s3_key = NULL; - struct flb_http_client *c = NULL; - struct flb_aws_client *s3_client; - struct flb_aws_header *headers = NULL; - char *random_alphanumeric; - int append_random = FLB_FALSE; - int len; int ret; - int num_headers = 0; - char *final_key; - flb_sds_t uri; - flb_sds_t tmp; - char final_body_md5[25]; + struct flb_upstream *upstream; + struct flb_tls *tls_context; + char *scheme = NULL; + char *host = NULL; + char *port = NULL; + char *uri = NULL; + int upstream_flags; + + context->authorization_endpoint_upstream = NULL; + context->authorization_endpoint_tls_context = NULL; + + /* Parse and split URL */ + ret = flb_utils_url_split(context->authorization_endpoint_url, + &scheme, &host, &port, &uri); + + if (ret == -1) { + flb_plg_error(context->ins, + "Invalid URL: %s", + context->authorization_endpoint_url); - s3_key = flb_get_s3_key(ctx->s3_key_format, file_first_log_time, tag, - ctx->tag_delimiters, ctx->seq_index); - if (!s3_key) { - flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); return -1; } - len = strlen(s3_key); - if ((len + 16) <= 1024 && !ctx->key_fmt_has_uuid && !ctx->static_file_path && - !ctx->key_fmt_has_seq_index) { - append_random = FLB_TRUE; - len += 16; - } - len += strlen(ctx->bucket + 1); + if (scheme != NULL) { + flb_free(scheme); - uri = flb_sds_create_size(len); + scheme = NULL; + } - if (append_random == FLB_TRUE) { - random_alphanumeric = flb_sts_session_name(); - if (!random_alphanumeric) { - flb_sds_destroy(s3_key); - flb_sds_destroy(uri); - flb_plg_error(ctx->ins, "Failed to create randomness for S3 key %s", tag); - return -1; - } - /* only use 8 chars of the random string */ - random_alphanumeric[8] = '\0'; + if (port != NULL) { + flb_free(port); - tmp = flb_sds_printf(&uri, "/%s%s-object%s", ctx->bucket, s3_key, - random_alphanumeric); - flb_free(random_alphanumeric); - } - else { - tmp = flb_sds_printf(&uri, "/%s%s", ctx->bucket, s3_key); + port = NULL; } - if (!tmp) { - flb_sds_destroy(s3_key); - flb_plg_error(ctx->ins, "Failed to create PutObject URI"); - return -1; - } - flb_sds_destroy(s3_key); - uri = tmp; + if (host == NULL || uri == NULL) { + flb_plg_error(context->ins, + "Invalid URL: %s", + context->authorization_endpoint_url); - memset(final_body_md5, 0, sizeof(final_body_md5)); - if (ctx->send_content_md5 == FLB_TRUE) { - ret = get_md5_base64(body, body_size, - final_body_md5, sizeof(final_body_md5)); - if (ret != 0) { - flb_plg_error(ctx->ins, "Failed to create Content-MD5 header"); - flb_sds_destroy(uri); - return -1; + if (host != NULL) { + flb_free(host); } - } - - /* Update file and increment index value right before request */ - if (ctx->key_fmt_has_seq_index) { - ctx->seq_index++; - ret = write_seq_index(ctx->seq_index_file, ctx->seq_index); - if (ret < 0 && access(ctx->seq_index_file, F_OK) == 0) { - ctx->seq_index--; - flb_plg_error(ctx->ins, "Failed to update sequential index metadata file"); - return -1; + if (uri != NULL) { + flb_free(uri); } - } - s3_client = ctx->s3_client; - if (s3_plugin_under_test() == FLB_TRUE) { - c = mock_s3_call("TEST_PUT_OBJECT_ERROR", "PutObject"); + return -2; } - else { - ret = create_headers(ctx, final_body_md5, &headers, &num_headers, FLB_FALSE); - if (ret == -1) { - flb_plg_error(ctx->ins, "Failed to create headers"); - flb_sds_destroy(uri); - goto decrement_index; - } - c = s3_client->client_vtable->request(s3_client, FLB_HTTP_PUT, - uri, body, body_size, - headers, num_headers); - flb_free(headers); + + tls_context = flb_tls_create(FLB_TLS_CLIENT_MODE, + FLB_FALSE, + FLB_FALSE, + host, + NULL, + NULL, + NULL, + NULL, + NULL); + + flb_free(host); + flb_free(uri); + + if (tls_context == NULL) { + flb_plg_error(context->ins, + "TLS context creation errror"); + + return -2; } - if (c) { - flb_plg_debug(ctx->ins, "PutObject http status=%d", c->resp.status); - if (c->resp.status == 200) { - /* - * URI contains bucket name, so we must advance over it - * to print the object key - */ - final_key = uri + strlen(ctx->bucket) + 1; - flb_plg_info(ctx->ins, "Successfully uploaded object %s", final_key); - flb_sds_destroy(uri); - flb_http_client_destroy(c); - return 0; - } - flb_aws_print_xml_error(c->resp.payload, c->resp.payload_size, - "PutObject", ctx->ins); - if (c->resp.data != NULL) { - flb_plg_error(ctx->ins, "Raw PutObject response: %s", c->resp.data); - } - flb_http_client_destroy(c); + upstream = flb_upstream_create_url(context->ins->config, + context->authorization_endpoint_url, + FLB_IO_TCP, + tls_context); + + if (upstream == NULL) { + flb_tls_destroy(tls_context); + + flb_plg_error(context->ins, + "Upstream creation errror"); + + return -3; } - flb_plg_error(ctx->ins, "PutObject request failed"); - flb_sds_destroy(uri); - goto decrement_index; + upstream_flags = flb_stream_get_flags(&upstream->base); -decrement_index: - if (ctx->key_fmt_has_seq_index) { - ctx->seq_index--; + flb_output_upstream_set(upstream, context->ins); - ret = write_seq_index(ctx->seq_index_file, ctx->seq_index); - if (ret < 0) { - flb_plg_error(ctx->ins, "Failed to decrement index after request error"); - return -1; - } - } - return -1; + flb_stream_set_flags(&upstream->base, upstream_flags); + + context->authorization_endpoint_upstream = upstream; + context->authorization_endpoint_tls_context = tls_context; + + return 0; } -int get_md5_base64(char *buf, size_t buf_size, char *md5_str, size_t md5_str_size) +static int blob_fetch_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *url) { - unsigned char md5_bin[16]; - size_t olen; int ret; + size_t b_sent; + struct flb_http_client *http_client; + struct flb_connection *connection; + char *scheme = NULL; + char *host = NULL; + char *port = NULL; + char *uri = NULL; + uint16_t port_as_short; + flb_sds_t tmp; - ret = flb_hash_simple(FLB_HASH_MD5, - (unsigned char *) buf, buf_size, - md5_bin, sizeof(md5_bin)); + /* Parse and split URL */ + ret = flb_utils_url_split(url, + &scheme, &host, &port, &uri); + if (ret == -1) { + flb_plg_error(context->ins, + "Invalid URL: %s", + url); - if (ret != FLB_CRYPTO_SUCCESS) { return -1; } - ret = flb_base64_encode((unsigned char*) md5_str, md5_str_size, - &olen, md5_bin, sizeof(md5_bin)); - if (ret != 0) { - return ret; + if (port != NULL) { + port_as_short = (uint16_t) strtoul(port, NULL, 10); + } + else { + if (scheme != NULL) { + if (strcasecmp(scheme, "https") == 0) { + port_as_short = 443; + } + else { + port_as_short = 80; + } + } } - return 0; -} + if (scheme != NULL) { + flb_free(scheme); + scheme = NULL; + } -static struct multipart_upload *get_upload(struct flb_s3 *ctx, - const char *tag, int tag_len) -{ - struct multipart_upload *m_upload = NULL; - struct multipart_upload *tmp_upload = NULL; - struct mk_list *tmp; - struct mk_list *head; + if (port != NULL) { + flb_free(port); + port = NULL; + } - mk_list_foreach_safe(head, tmp, &ctx->uploads) { - tmp_upload = mk_list_entry(head, struct multipart_upload, _head); + if (host == NULL || uri == NULL) { + flb_plg_error(context->ins, + "Invalid URL: %s", + context->authorization_endpoint_url); - if (tmp_upload->upload_state == MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS) { - continue; - } - if (tmp_upload->upload_errors >= MAX_UPLOAD_ERRORS) { - tmp_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; - flb_plg_error(ctx->ins, "Upload for %s has reached max upload errors", - tmp_upload->s3_key); - continue; + if (host != NULL) { + flb_free(host); } - if (strcmp(tmp_upload->tag, tag) == 0) { - m_upload = tmp_upload; - break; + + if (uri != NULL) { + flb_free(uri); } + + return -2; } - return m_upload; -} + /* Get upstream connection */ + connection = flb_upstream_conn_get(context->authorization_endpoint_upstream); + if (connection == NULL) { + flb_free(host); + flb_free(uri); -static struct multipart_upload *create_upload(struct flb_s3 *ctx, const char *tag, - int tag_len, time_t file_first_log_time) -{ - int ret; - struct multipart_upload *m_upload = NULL; - flb_sds_t s3_key = NULL; - flb_sds_t tmp_sds = NULL; + flb_plg_error(context->ins, + "cannot create connection"); - /* create new upload for this key */ - m_upload = flb_calloc(1, sizeof(struct multipart_upload)); - if (!m_upload) { - flb_errno(); - return NULL; - } - s3_key = flb_get_s3_key(ctx->s3_key_format, file_first_log_time, tag, - ctx->tag_delimiters, ctx->seq_index); - if (!s3_key) { - flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); - flb_free(m_upload); - return NULL; + return -3; } - m_upload->s3_key = s3_key; - tmp_sds = flb_sds_create_len(tag, tag_len); - if (!tmp_sds) { - flb_errno(); - flb_sds_destroy(s3_key); - flb_free(m_upload); - return NULL; + + /* Create HTTP client context */ + http_client = flb_http_client(connection, + FLB_HTTP_GET, + uri, + NULL, 0, + host, + (int) port_as_short, + NULL, 0); + if (http_client == NULL) { + flb_upstream_conn_release(connection); + flb_free(host); + flb_free(uri); + + flb_plg_error(context->ins, + "cannot create HTTP client"); + + return -4; } - m_upload->tag = tmp_sds; - m_upload->upload_state = MULTIPART_UPLOAD_STATE_NOT_CREATED; - m_upload->part_number = 1; - m_upload->init_time = time(NULL); - mk_list_add(&m_upload->_head, &ctx->uploads); - /* Update file and increment index value right before request */ - if (ctx->key_fmt_has_seq_index) { - ctx->seq_index++; + flb_http_add_header(http_client, + "Accept", + strlen("Accept"), + "text/plain", + 10); - ret = write_seq_index(ctx->seq_index_file, ctx->seq_index); - if (ret < 0) { - ctx->seq_index--; + /* User Agent */ + flb_http_add_header(http_client, + "User-Agent", 10, + "Fluent-Bit", 10); - mk_list_del(&m_upload->_head); + if (context->authorization_endpoint_username != NULL && + context->authorization_endpoint_password != NULL) { + flb_http_basic_auth(http_client, + context->authorization_endpoint_username, + context->authorization_endpoint_password); + } + else if (context->authorization_endpoint_bearer_token != NULL) { + flb_http_bearer_auth(http_client, + context->authorization_endpoint_bearer_token); + } - flb_sds_destroy(tmp_sds); - flb_sds_destroy(s3_key); + /* Send HTTP request */ + ret = flb_http_do(http_client, &b_sent); - flb_free(m_upload); + if (ret == -1) { + flb_plg_error(context->ins, + "Error sending configuration request"); - flb_plg_error(ctx->ins, "Failed to write to sequential index metadata file"); + ret = -5; + } + else { + if (http_client->resp.status == 200) { + flb_plg_info(context->ins, + "Pre signed url retrieved successfully"); + + if (*result_url != NULL) { + tmp = flb_sds_copy(*result_url, + http_client->resp.payload, + http_client->resp.payload_size); + } + else { + tmp = flb_sds_create_len(http_client->resp.payload, + http_client->resp.payload_size); + } - return NULL; + if (tmp == NULL) { + flb_plg_error(context->ins, + "Pre signed url duplication error"); + + ret = -7; + } + else { + *result_url = tmp; + } + } + else { + if (http_client->resp.payload_size > 0) { + flb_plg_error(context->ins, + "Pre signed url retrieval failed with status %i\n%s", + http_client->resp.status, + http_client->resp.payload); + } + else { + flb_plg_error(context->ins, + "Pre signed url retrieval failed with status %i", + http_client->resp.status); + } + + ret = -6; } } - return m_upload; + flb_http_client_destroy(http_client); + flb_upstream_conn_release(connection); + flb_free(host); + flb_free(uri); + + return ret; } -/* Adds an entry to upload queue */ -static int add_to_queue(struct flb_s3 *ctx, struct s3_file *upload_file, - struct multipart_upload *m_upload_file, const char *tag, int tag_len) +static int blob_fetch_put_object_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *tag, + char *bucket, + char *path) { - struct upload_queue *upload_contents; - flb_sds_t tag_cpy; + char *valid_path; + int ret; + flb_sds_t url; + flb_sds_t tmp; - /* Create upload contents object and add to upload queue */ - upload_contents = flb_calloc(1, sizeof(struct upload_queue)); - if (upload_contents == NULL) { - flb_plg_error(ctx->ins, "Error allocating memory for upload_queue entry"); - flb_errno(); - return -1; - } - upload_contents->upload_file = upload_file; - upload_contents->m_upload_file = m_upload_file; - upload_contents->tag_len = tag_len; - upload_contents->retry_counter = 0; - upload_contents->upload_time = -1; + valid_path = (char *) path; - /* Necessary to create separate string for tag to prevent corruption */ - tag_cpy = flb_sds_create_len(tag, tag_len); - if (!tag_cpy) { - flb_errno(); - flb_free(upload_contents); - return -1; + while (*valid_path == '.' || + *valid_path == '/') { + valid_path++; } - upload_contents->tag = tag_cpy; + url = flb_sds_create(context->authorization_endpoint_url); - /* Add entry to upload queue */ - mk_list_add(&upload_contents->_head, &ctx->upload_queue); - return 0; -} + if (url == NULL) { + return -1; + } -/* Removes an entry from upload_queue */ -void remove_from_queue(struct upload_queue *entry) -{ - mk_list_del(&entry->_head); - flb_sds_destroy(entry->tag); - flb_free(entry); - return; -} + tmp = flb_sds_printf(&url, "/put_object_presigned_url/%s/%s/%s", bucket, tag, valid_path); -/* Validity check for upload queue object */ -static int upload_queue_valid(struct upload_queue *upload_contents, time_t now, - void *out_context) -{ - struct flb_s3 *ctx = out_context; + if (tmp != NULL) { + url = tmp; - if (upload_contents == NULL) { - flb_plg_error(ctx->ins, "Error getting entry from upload_queue"); - return -1; + ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); } - if (upload_contents->_head.next == NULL || upload_contents->_head.prev == NULL) { - flb_plg_debug(ctx->ins, "Encountered previously deleted entry in " - "upload_queue. Deleting invalid entry"); - mk_list_del(&upload_contents->_head); - return -1; + else { + ret = -1; + } + + flb_sds_destroy(url); + + return ret; +} + +static int blob_fetch_create_multipart_upload_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *tag, + char *bucket, + char *path) +{ + char *valid_path; + int ret; + flb_sds_t url; + flb_sds_t tmp; + + valid_path = (char *) path; + + while (*valid_path == '.' || + *valid_path == '/') { + valid_path++; } - if (upload_contents->upload_file->locked == FLB_FALSE) { - flb_plg_debug(ctx->ins, "Encountered unlocked file in upload_queue. " - "Exiting"); + + url = flb_sds_create(context->authorization_endpoint_url); + + if (url == NULL) { return -1; } - if (upload_contents->upload_file->size <= 0) { - flb_plg_debug(ctx->ins, "Encountered empty chunk file in upload_queue. " - "Deleting empty chunk file"); - remove_from_queue(upload_contents); - return -1; + + tmp = flb_sds_printf(&url, "/multipart_creation_presigned_url/%s/%s/%s", bucket, tag, valid_path); + + if (tmp != NULL) { + url = tmp; + + ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); } - if (now < upload_contents->upload_time) { - flb_plg_debug(ctx->ins, "Found valid chunk file but not ready to upload"); - return -1; + else { + ret = -1; } - return 0; + + flb_sds_destroy(url); + + return ret; } -static int send_upload_request(void *out_context, flb_sds_t chunk, - struct s3_file *upload_file, - struct multipart_upload *m_upload_file, - const char *tag, int tag_len) +static int blob_fetch_multipart_upload_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *tag, + char *bucket, + char *path, + char *upload_id, + int part_number) { - int ret; - char *buffer; - size_t buffer_size; - struct flb_s3 *ctx = out_context; + char *valid_path; + int ret; + flb_sds_t url; + flb_sds_t tmp; - /* Create buffer to upload to S3 */ - ret = construct_request_buffer(ctx, chunk, upload_file, &buffer, &buffer_size); - flb_sds_destroy(chunk); - if (ret < 0) { - flb_plg_error(ctx->ins, "Could not construct request buffer for %s", - upload_file->file_path); + valid_path = (char *) path; + + while (*valid_path == '.' || + *valid_path == '/') { + valid_path++; + } + + url = flb_sds_create(context->authorization_endpoint_url); + + if (url == NULL) { return -1; } - /* Upload to S3 */ - ret = upload_data(ctx, upload_file, m_upload_file, buffer, buffer_size, tag, tag_len); - flb_free(buffer); + tmp = flb_sds_printf(&url, "/multipart_upload_presigned_url/%s/%s/%s/%s/%d", bucket, tag, valid_path, upload_id, part_number); + + if (tmp != NULL) { + url = tmp; + + ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); + } + else { + ret = -1; + } + + flb_sds_destroy(url); return ret; } -static int buffer_chunk(void *out_context, struct s3_file *upload_file, - flb_sds_t chunk, int chunk_size, - const char *tag, int tag_len, - time_t file_first_log_time) +static int blob_fetch_multipart_complete_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *tag, + char *bucket, + char *path, + char *upload_id) { - int ret; - struct flb_s3 *ctx = out_context; + char *valid_path; + int ret; + flb_sds_t url; + flb_sds_t tmp; - ret = s3_store_buffer_put(ctx, upload_file, tag, - tag_len, chunk, (size_t) chunk_size, file_first_log_time); - flb_sds_destroy(chunk); - if (ret < 0) { - flb_plg_warn(ctx->ins, "Could not buffer chunk. Data order preservation " - "will be compromised"); + valid_path = (char *) path; + + while (*valid_path == '.' || + *valid_path == '/') { + valid_path++; + } + + url = flb_sds_create(context->authorization_endpoint_url); + + if (url == NULL) { return -1; } - return 0; + + tmp = flb_sds_printf(&url, "/multipart_complete_presigned_url/%s/%s/%s/%s", bucket, tag, valid_path, upload_id); + + if (tmp != NULL) { + url = tmp; + + ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); + } + else { + ret = -1; + } + + flb_sds_destroy(url); + + return ret; } -/* Uploads all chunk files in queue synchronously */ -static void s3_upload_queue(struct flb_config *config, void *out_context) +static int blob_fetch_multipart_abort_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *tag, + char *bucket, + char *path, + char *upload_id) { - int ret; - time_t now; - struct upload_queue *upload_contents; - struct flb_s3 *ctx = out_context; - struct mk_list *tmp; - struct mk_list *head; + char *valid_path; + int ret; + flb_sds_t url; + flb_sds_t tmp; - flb_plg_debug(ctx->ins, "Running upload timer callback (upload_queue).."); + valid_path = (char *) path; - /* No chunks in upload queue. Scan for timed out chunks. */ - if (mk_list_size(&ctx->upload_queue) == 0) { - flb_plg_debug(ctx->ins, "No files found in upload_queue. Scanning for timed " - "out chunks"); - cb_s3_upload(config, out_context); + while (*valid_path == '.' || + *valid_path == '/') { + valid_path++; } - /* Iterate through each file in upload queue */ - mk_list_foreach_safe(head, tmp, &ctx->upload_queue) { - upload_contents = mk_list_entry(head, struct upload_queue, _head); - - now = time(NULL); + url = flb_sds_create(context->authorization_endpoint_url); - /* Checks if upload_contents is valid */ - ret = upload_queue_valid(upload_contents, now, ctx); - if (ret < 0) { - goto exit; - } + if (url == NULL) { + return -1; + } - /* Try to upload file. Return value can be -1, FLB_OK, FLB_ERROR, FLB_RETRY. */ - ret = send_upload_request(ctx, NULL, upload_contents->upload_file, - upload_contents->m_upload_file, - upload_contents->tag, upload_contents->tag_len); - if (ret < 0) { - goto exit; - } - else if (ret == FLB_OK) { - remove_from_queue(upload_contents); - ctx->retry_time = 0; - ctx->upload_queue_success = FLB_TRUE; - } - else { - s3_store_file_lock(upload_contents->upload_file); - ctx->upload_queue_success = FLB_FALSE; + tmp = flb_sds_printf(&url, "/multipart_upload_presigned_url/%s/%s/%s/%s", bucket, tag, valid_path, upload_id); - /* If retry limit was reached, discard file and remove file from queue */ - upload_contents->retry_counter++; - if (upload_contents->retry_counter >= MAX_UPLOAD_ERRORS) { - flb_plg_warn(ctx->ins, "Chunk file failed to send %d times, will not " - "retry", upload_contents->retry_counter); - s3_store_file_inactive(ctx, upload_contents->upload_file); - multipart_upload_destroy(upload_contents->m_upload_file); - remove_from_queue(upload_contents); - continue; - } + if (tmp != NULL) { + url = tmp; - /* Retry in N seconds */ - upload_contents->upload_time = now + 2 * upload_contents->retry_counter; - ctx->retry_time += 2 * upload_contents->retry_counter; - flb_plg_debug(ctx->ins, "Failed to upload file in upload_queue. Will not " - "retry for %d seconds", 2 * upload_contents->retry_counter); - break; - } + ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); + } + else { + ret = -1; } -exit: - return; -} + flb_sds_destroy(url); + return ret; +} static struct multipart_upload *create_blob_upload(struct flb_s3 *ctx, const char *tag, int tag_len, @@ -2387,31 +2429,42 @@ static int put_blob_object(struct flb_s3 *ctx, flb_sds_t tmp; char final_body_md5[25]; - s3_key = flb_get_s3_blob_key("/$TAG/", - tag, - ctx->tag_delimiters, - path); + if (ctx->authorization_endpoint_url == NULL) { + s3_key = flb_get_s3_blob_key("/$TAG/", + tag, + ctx->tag_delimiters, + path); - if (!s3_key) { - flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); - return -1; - } + if (!s3_key) { + flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); + return -1; + } - len = strlen(s3_key); - len += strlen(ctx->bucket + 1); + len = strlen(s3_key); + len += strlen(ctx->bucket + 1); - uri = flb_sds_create_size(len); + uri = flb_sds_create_size(len); + + tmp = flb_sds_printf(&uri, "/%s%s", ctx->bucket, s3_key); - tmp = flb_sds_printf(&uri, "/%s%s", ctx->bucket, s3_key); + if (!tmp) { + flb_sds_destroy(s3_key); + flb_plg_error(ctx->ins, "Failed to create PutObject URI"); + return -1; + } - if (!tmp) { flb_sds_destroy(s3_key); - flb_plg_error(ctx->ins, "Failed to create PutObject URI"); - return -1; + uri = tmp; } + else { + uri = NULL; - flb_sds_destroy(s3_key); - uri = tmp; + ret = blob_fetch_put_object_pre_signed_url(ctx, &uri, (char *) tag, ctx->bucket, (char *) path); + + if (ret != 0) { + return -1; + } + } memset(final_body_md5, 0, sizeof(final_body_md5)); if (ctx->send_content_md5 == FLB_TRUE) { @@ -2435,6 +2488,7 @@ static int put_blob_object(struct flb_s3 *ctx, flb_sds_destroy(uri); return -1; } + c = s3_client->client_vtable->request(s3_client, FLB_HTTP_PUT, uri, body, body_size, headers, num_headers); @@ -2492,6 +2546,7 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) struct multipart_upload *m_upload; int part_count; int put_object_required; + flb_sds_t pre_signed_url; info = FLB_TLS_GET(s3_worker_info); @@ -2506,6 +2561,7 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) } info->active_upload = FLB_TRUE; + pre_signed_url = NULL; /* * Check if is there any file which has been fully uploaded and we need to commit it with @@ -2558,7 +2614,34 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) return -4; } - ret = abort_multipart_upload(ctx, m_upload); + + if (ctx->authorization_endpoint_url != NULL) { + ret = blob_fetch_multipart_abort_pre_signed_url(ctx, + &pre_signed_url, + file_tag, + ctx->bucket, + file_path, + m_upload->upload_id); + + if (ret != 0) { + multipart_upload_destroy(m_upload); + + flb_blob_db_unlock(&ctx->blob_db); + + return -5; + } + } + else { + pre_signed_url = NULL; + } + + ret = abort_multipart_upload(ctx, m_upload, pre_signed_url); + + if (pre_signed_url != NULL) { + flb_sds_destroy(pre_signed_url); + + pre_signed_url = NULL; + } } flb_blob_file_update_remote_id(&ctx->blob_db, file_id, ""); @@ -2623,7 +2706,33 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) return -4; } - ret = abort_multipart_upload(ctx, m_upload); + if (ctx->authorization_endpoint_url != NULL) { + ret = blob_fetch_multipart_abort_pre_signed_url(ctx, + &pre_signed_url, + file_tag, + ctx->bucket, + file_path, + m_upload->upload_id); + + if (ret != 0) { + multipart_upload_destroy(m_upload); + + flb_blob_db_unlock(&ctx->blob_db); + + return -5; + } + } + else { + pre_signed_url = NULL; + } + + ret = abort_multipart_upload(ctx, m_upload, pre_signed_url); + + if (pre_signed_url != NULL) { + flb_sds_destroy(pre_signed_url); + + pre_signed_url = NULL; + } } if (ctx->file_delivery_attempt_limit != FLB_OUT_RETRY_UNLIMITED && @@ -2737,7 +2846,33 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) m_upload->part_number = part_count; - ret = complete_multipart_upload(ctx, m_upload); + if (ctx->authorization_endpoint_url != NULL) { + ret = blob_fetch_multipart_complete_pre_signed_url(ctx, + &pre_signed_url, + file_tag, + ctx->bucket, + file_path, + m_upload->upload_id); + + if (ret != 0) { + multipart_upload_destroy(m_upload); + + flb_blob_db_unlock(&ctx->blob_db); + + return -5; + } + } + else { + pre_signed_url = NULL; + } + + ret = complete_multipart_upload(ctx, m_upload, pre_signed_url); + + if (pre_signed_url != NULL) { + flb_sds_destroy(pre_signed_url); + + pre_signed_url = NULL; + } if (ret < 0) { multipart_upload_destroy(m_upload); @@ -2946,7 +3081,38 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) mk_list_del(&m_upload->_head); if (part_id == 0) { - ret = create_multipart_upload(ctx, m_upload); + if (ctx->authorization_endpoint_url != NULL) { + ret = blob_fetch_create_multipart_upload_pre_signed_url(ctx, + &pre_signed_url, + file_tag, + ctx->bucket, + file_path); + + if (ret != 0) { + flb_free(out_buf); + + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + + return -1; + } + } + else { + pre_signed_url = NULL; + } + + ret = create_multipart_upload(ctx, m_upload, pre_signed_url); + + if (pre_signed_url != NULL) { + flb_sds_destroy(pre_signed_url); + + pre_signed_url = NULL; + } if (ret < 0) { flb_free(out_buf); @@ -3005,7 +3171,40 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) m_upload->part_number = part_id + 1; - ret = upload_part(ctx, m_upload, out_buf, out_size); + if (ctx->authorization_endpoint_url != NULL) { + ret = blob_fetch_multipart_upload_pre_signed_url(ctx, + &pre_signed_url, + file_tag, + ctx->bucket, + file_path, + m_upload->upload_id, + m_upload->part_number); + + if (ret != 0) { + flb_free(out_buf); + + cfl_sds_destroy(file_tag); + cfl_sds_destroy(file_path); + cfl_sds_destroy(file_remote_id); + cfl_sds_destroy(file_destination); + + m_upload->part_number = 0; + multipart_upload_destroy(m_upload); + + return -1; + } + } + else { + pre_signed_url = NULL; + } + + ret = upload_part(ctx, m_upload, out_buf, out_size, pre_signed_url); + + if (pre_signed_url != NULL) { + flb_sds_destroy(pre_signed_url); + + pre_signed_url = NULL; + } if (ret == 0) { ret = flb_blob_db_file_part_update_remote_id(&ctx->blob_db, @@ -3127,7 +3326,7 @@ static void cb_s3_upload(struct flb_config *config, void *data) if (complete == FLB_TRUE) { m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; mk_list_del(&m_upload->_head); - ret = complete_multipart_upload(ctx, m_upload); + ret = complete_multipart_upload(ctx, m_upload, NULL); if (ret == 0) { multipart_upload_destroy(m_upload); } @@ -3726,7 +3925,7 @@ static int cb_s3_exit(void *data, struct flb_config *config) if (m_upload->bytes > 0) { m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; mk_list_del(&m_upload->_head); - ret = complete_multipart_upload(ctx, m_upload); + ret = complete_multipart_upload(ctx, m_upload, NULL); if (ret == 0) { multipart_upload_destroy(m_upload); } @@ -3977,27 +4176,27 @@ static struct flb_config_map config_map[] = { }, { - FLB_CONFIG_MAP_STR, "configuration_endpoint_url", NULL, - 0, FLB_TRUE, offsetof(struct flb_s3, configuration_endpoint_url), - "Configuration endpoint URL" + FLB_CONFIG_MAP_STR, "authorization_endpoint_url", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, authorization_endpoint_url), + "Authorization endpoint URL" }, { - FLB_CONFIG_MAP_STR, "configuration_endpoint_username", NULL, - 0, FLB_TRUE, offsetof(struct flb_s3, configuration_endpoint_username), - "Configuration endpoint basic authentication username" + FLB_CONFIG_MAP_STR, "authorization_endpoint_username", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, authorization_endpoint_username), + "Authorization endpoint basic authentication username" }, { - FLB_CONFIG_MAP_STR, "configuration_endpoint_password", NULL, - 0, FLB_TRUE, offsetof(struct flb_s3, configuration_endpoint_password), - "Configuration endpoint basic authentication password" + FLB_CONFIG_MAP_STR, "authorization_endpoint_password", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, authorization_endpoint_password), + "Authorization endpoint basic authentication password" }, { - FLB_CONFIG_MAP_STR, "configuration_endpoint_bearer_token", NULL, - 0, FLB_TRUE, offsetof(struct flb_s3, configuration_endpoint_bearer_token), - "Configuration endpoint bearer token" + FLB_CONFIG_MAP_STR, "authorization_endpoint_bearer_token", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, authorization_endpoint_bearer_token), + "Authorization endpoint bearer token" }, /* EOF */ diff --git a/plugins/out_s3/s3.h b/plugins/out_s3/s3.h index b21dd9a1696..d1004fcfec2 100644 --- a/plugins/out_s3/s3.h +++ b/plugins/out_s3/s3.h @@ -131,10 +131,12 @@ struct flb_s3 { time_t upload_parts_freshness_threshold; int file_delivery_attempt_limit; int part_delivery_attempt_limit; - flb_sds_t configuration_endpoint_url; - flb_sds_t configuration_endpoint_username; - flb_sds_t configuration_endpoint_password; - flb_sds_t configuration_endpoint_bearer_token; + flb_sds_t authorization_endpoint_url; + flb_sds_t authorization_endpoint_username; + flb_sds_t authorization_endpoint_password; + flb_sds_t authorization_endpoint_bearer_token; + struct flb_upstream *authorization_endpoint_upstream; + struct flb_tls *authorization_endpoint_tls_context; /* track the total amount of buffered data */ size_t current_buffer_size; @@ -192,16 +194,19 @@ struct flb_s3 { }; int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, - char *body, size_t body_size); + char *body, size_t body_size, char *pre_signed_url); int create_multipart_upload(struct flb_s3 *ctx, - struct multipart_upload *m_upload); + struct multipart_upload *m_upload, + char *pre_signed_url); int complete_multipart_upload(struct flb_s3 *ctx, - struct multipart_upload *m_upload); + struct multipart_upload *m_upload, + char *pre_signed_url); int abort_multipart_upload(struct flb_s3 *ctx, - struct multipart_upload *m_upload); + struct multipart_upload *m_upload, + char *pre_signed_url); void multipart_read_uploads_from_fs(struct flb_s3 *ctx); diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c index e634d0287fe..a3a4a7b3f21 100644 --- a/plugins/out_s3/s3_multipart.c +++ b/plugins/out_s3/s3_multipart.c @@ -403,7 +403,8 @@ static int complete_multipart_upload_payload(struct flb_s3 *ctx, } int complete_multipart_upload(struct flb_s3 *ctx, - struct multipart_upload *m_upload) + struct multipart_upload *m_upload, + char *pre_signed_url) { char *body; size_t size; @@ -426,8 +427,14 @@ int complete_multipart_upload(struct flb_s3 *ctx, return -1; } - tmp = flb_sds_printf(&uri, "/%s%s?uploadId=%s", ctx->bucket, - m_upload->s3_key, m_upload->upload_id); + if (pre_signed_url != NULL) { + tmp = flb_sds_copy(uri, pre_signed_url, strlen(pre_signed_url)); + } + else { + tmp = flb_sds_printf(&uri, "/%s%s?uploadId=%s", ctx->bucket, + m_upload->s3_key, m_upload->upload_id); + } + if (!tmp) { flb_sds_destroy(uri); return -1; @@ -477,11 +484,11 @@ int complete_multipart_upload(struct flb_s3 *ctx, } int abort_multipart_upload(struct flb_s3 *ctx, - struct multipart_upload *m_upload) + struct multipart_upload *m_upload, + char *pre_signed_url) { flb_sds_t uri = NULL; flb_sds_t tmp; - int ret; struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; @@ -498,8 +505,14 @@ int abort_multipart_upload(struct flb_s3 *ctx, return -1; } - tmp = flb_sds_printf(&uri, "/%s%s?uploadId=%s", ctx->bucket, - m_upload->s3_key, m_upload->upload_id); + if (pre_signed_url != NULL) { + tmp = flb_sds_copy(uri, pre_signed_url, strlen(pre_signed_url)); + } + else { + tmp = flb_sds_printf(&uri, "/%s%s?uploadId=%s", ctx->bucket, + m_upload->s3_key, m_upload->upload_id); + } + if (!tmp) { flb_sds_destroy(uri); return -1; @@ -544,7 +557,8 @@ int abort_multipart_upload(struct flb_s3 *ctx, } int create_multipart_upload(struct flb_s3 *ctx, - struct multipart_upload *m_upload) + struct multipart_upload *m_upload, + char *pre_signed_url) { flb_sds_t uri = NULL; flb_sds_t tmp; @@ -560,7 +574,13 @@ int create_multipart_upload(struct flb_s3 *ctx, return -1; } - tmp = flb_sds_printf(&uri, "/%s%s?uploads=", ctx->bucket, m_upload->s3_key); + if (pre_signed_url != NULL) { + tmp = flb_sds_copy(uri, pre_signed_url, strlen(pre_signed_url)); + } + else { + tmp = flb_sds_printf(&uri, "/%s%s?uploads=", ctx->bucket, m_upload->s3_key); + } + if (!tmp) { flb_sds_destroy(uri); return -1; @@ -664,7 +684,7 @@ flb_sds_t get_etag(char *response, size_t size) } int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, - char *body, size_t body_size) + char *body, size_t body_size, char *pre_signed_url) { flb_sds_t uri = NULL; flb_sds_t tmp; @@ -681,9 +701,15 @@ int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, return -1; } - tmp = flb_sds_printf(&uri, "/%s%s?partNumber=%d&uploadId=%s", - ctx->bucket, m_upload->s3_key, m_upload->part_number, - m_upload->upload_id); + if (pre_signed_url != NULL) { + tmp = flb_sds_copy(uri, pre_signed_url, strlen(pre_signed_url)); + } + else { + tmp = flb_sds_printf(&uri, "/%s%s?partNumber=%d&uploadId=%s", + ctx->bucket, m_upload->s3_key, m_upload->part_number, + m_upload->upload_id); + } + if (!tmp) { flb_errno(); flb_sds_destroy(uri); From d1e01220a6e266feb9bb41ee1276061b31c83c4a Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Thu, 24 Apr 2025 10:22:35 +0200 Subject: [PATCH 16/18] blob_db: code quality improvements Signed-off-by: Leonardo Alminana --- include/fluent-bit/flb_blob_db.h | 7 ++---- src/flb_blob_db.c | 42 ++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/include/fluent-bit/flb_blob_db.h b/include/fluent-bit/flb_blob_db.h index f12a0cc39b4..d201c3689ed 100644 --- a/include/fluent-bit/flb_blob_db.h +++ b/include/fluent-bit/flb_blob_db.h @@ -356,12 +356,10 @@ int64_t flb_blob_db_file_insert(struct flb_blob_db *context, size_t size); int flb_blob_db_file_delete(struct flb_blob_db *context, - uint64_t id, - char *path); + uint64_t id); int flb_blob_db_file_set_aborted_state(struct flb_blob_db *context, uint64_t id, - char *path, uint64_t state); int flb_blob_file_change_destination(struct flb_blob_db *context, @@ -394,8 +392,7 @@ int flb_blob_db_file_get_next_stale(struct flb_blob_db *context, int *part_count); int flb_blob_db_file_reset_upload_states(struct flb_blob_db *context, - uint64_t id, - char *path); + uint64_t id); int flb_blob_db_file_part_insert(struct flb_blob_db *context, uint64_t file_id, diff --git a/src/flb_blob_db.c b/src/flb_blob_db.c index c3b2b6af5fb..27a156001a0 100644 --- a/src/flb_blob_db.c +++ b/src/flb_blob_db.c @@ -54,7 +54,7 @@ static int prepare_stmts(struct flb_blob_db *context) } - /* file destination update */ + /* file remote id update */ result = sqlite3_prepare_v2(context->db->handler, SQL_UPDATE_FILE_REMOTE_ID, -1, &context->stmt_update_file_remote_id, @@ -171,6 +171,8 @@ static int prepare_stmts(struct flb_blob_db *context) return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_PART_UPLOADED; } + /* get next file part to upload */ + result = sqlite3_prepare_v2(context->db->handler, SQL_GET_NEXT_FILE_PART, -1, &context->stmt_get_next_file_part, @@ -179,6 +181,8 @@ static int prepare_stmts(struct flb_blob_db *context) return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_NEXT_FILE_PART; } + /* update file part upload in progress flag */ + result = sqlite3_prepare_v2(context->db->handler, SQL_UPDATE_FILE_PART_IN_PROGRESS, -1, &context->stmt_update_file_part_in_progress, @@ -187,6 +191,8 @@ static int prepare_stmts(struct flb_blob_db *context) return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_PART_IN_PROGRESS; } + /* update file part delivery attempt counter */ + result = sqlite3_prepare_v2(context->db->handler, SQL_UPDATE_FILE_PART_DELIVERY_ATTEMPT_COUNT, -1, &context->stmt_update_file_part_delivery_attempt_count, @@ -195,10 +201,13 @@ static int prepare_stmts(struct flb_blob_db *context) return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_UPDATE_FILE_DELIVERY_ATTEMPT_COUNT; } + /* get the oldest (fifo) file available to commit */ + result = sqlite3_prepare_v2(context->db->handler, SQL_GET_OLDEST_FILE_WITH_PARTS_CONCAT, -1, &context->stmt_get_oldest_file_with_parts, NULL); + if (result != SQLITE_OK) { return FLB_BLOB_DB_ERROR_PREPARING_STATEMENT_GET_OLDEST_FILE_WITH_PARTS; } @@ -279,6 +288,8 @@ int flb_blob_db_open(struct flb_blob_db *context, int flb_blob_db_close(struct flb_blob_db *context) { + int result; + if (context == NULL) { return FLB_BLOB_DB_ERROR_INVALID_BLOB_DB_CONTEXT; } @@ -313,7 +324,11 @@ int flb_blob_db_close(struct flb_blob_db *context) flb_lock_destroy(&context->global_lock); - return flb_sqldb_close(context->db); + result = flb_sqldb_close(context->db); + + context->db = NULL; + + return result; } int flb_blob_db_lock(struct flb_blob_db *context) @@ -416,8 +431,7 @@ int64_t flb_blob_db_file_insert(struct flb_blob_db *context, } int flb_blob_db_file_delete(struct flb_blob_db *context, - uint64_t id, - char *path) + uint64_t id) { sqlite3_stmt *statement; int result; @@ -449,7 +463,6 @@ int flb_blob_db_file_delete(struct flb_blob_db *context, int flb_blob_db_file_set_aborted_state(struct flb_blob_db *context, uint64_t id, - char *path, uint64_t state) { sqlite3_stmt *statement; @@ -803,8 +816,7 @@ int flb_blob_db_file_get_next_aborted(struct flb_blob_db *context, static int flb_blob_db_file_reset_part_upload_states(struct flb_blob_db *context, - uint64_t id, - char *path) + uint64_t id) { sqlite3_stmt *statement; int result; @@ -836,8 +848,7 @@ static int flb_blob_db_file_reset_part_upload_states(struct flb_blob_db *context } int flb_blob_db_file_reset_upload_states(struct flb_blob_db *context, - uint64_t id, - char *path) + uint64_t id) { sqlite3_stmt *statement; int result; @@ -861,7 +872,7 @@ int flb_blob_db_file_reset_upload_states(struct flb_blob_db *context, result = FLB_BLOB_DB_ERROR_FILE_UPLOAD_STATE_RESET; } else { - result = flb_blob_db_file_reset_part_upload_states(context, id, path); + result = flb_blob_db_file_reset_part_upload_states(context, id); } return result; @@ -892,9 +903,13 @@ int flb_blob_db_file_part_insert(struct flb_blob_db *context, context->last_error = result; result = FLB_BLOB_DB_ERROR_FILE_PART_INSERT; + + *out_id = -1; } else { result = FLB_BLOB_DB_SUCCESS; + + *out_id = sqlite3_last_insert_rowid(context->db); } sqlite3_clear_bindings(statement); @@ -1414,15 +1429,13 @@ int64_t flb_blob_db_file_insert(struct flb_blob_db *context, } int flb_blob_db_file_delete(struct flb_blob_db *context, - uint64_t id, - char *path) + uint64_t id) { return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; } int flb_blob_db_file_set_aborted_state(struct flb_blob_db *context, uint64_t id, - char *path, uint64_t state) { return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; @@ -1466,8 +1479,7 @@ int flb_blob_db_file_get_next_stale(struct flb_blob_db *context, } int flb_blob_db_file_reset_upload_states(struct flb_blob_db *context, - uint64_t id, - char *path) + uint64_t id) { return FLB_BLOB_DB_ERROR_NO_BACKEND_AVAILABLE; } From 4fc7cd6f25e388dcf1c176acacc0b86224af0a10 Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Thu, 24 Apr 2025 10:22:50 +0200 Subject: [PATCH 17/18] in_blob: updated date Signed-off-by: Leonardo Alminana --- plugins/in_blob/blob_db.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/in_blob/blob_db.c b/plugins/in_blob/blob_db.c index df96769212c..973df3b6d79 100644 --- a/plugins/in_blob/blob_db.c +++ b/plugins/in_blob/blob_db.c @@ -2,7 +2,7 @@ /* Fluent Bit * ========== - * Copyright (C) 2015-2024 The Fluent Bit Authors + * Copyright (C) 2015-2025 The Fluent Bit Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 528011a560197f28bf316fb6b5dd95d37983f5cb Mon Sep 17 00:00:00 2001 From: Leonardo Alminana Date: Thu, 24 Apr 2025 10:33:07 +0200 Subject: [PATCH 18/18] out_s3: addressed review requests Signed-off-by: Leonardo Alminana --- plugins/out_s3/s3.c | 432 ++++++++++++++++------------------ plugins/out_s3/s3_multipart.c | 3 +- 2 files changed, 202 insertions(+), 233 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index 7ebb71391f3..af3c9c37bea 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -256,6 +256,26 @@ struct flb_http_client *mock_s3_call(char *error_env_var, char *api) ""; c->resp.payload_size = strlen(c->resp.payload); } + if (strcmp(api, "AbortMultipartUpload") == 0) { + /* mocked success response */ + c->resp.status = 204; + resp = "Date: Mon, 1 Nov 2010 20:34:56 GMT\n" + "ETag: \"b54357faf0632cce46e942fa68356b38\"\n" + "Content-Length: 0\n" + "Connection: keep-alive\n" + "Server: AmazonS3"; + /* since etag is in the headers, this code uses resp.data */ + len = strlen(resp); + c->resp.data = flb_calloc(len + 1, sizeof(char)); + if (!c->resp.data) { + flb_errno(); + flb_free(c); + return NULL; + } + memcpy(c->resp.data, resp, len); + c->resp.data[len] = '\0'; + c->resp.data_size = len; + } else if (strcmp(api, "UploadPart") == 0) { /* mocked success response */ resp = "Date: Mon, 1 Nov 2010 20:34:56 GMT\n" @@ -1966,9 +1986,9 @@ static int blob_initialize_authorization_endpoint_upstream(struct flb_s3 *contex return 0; } -static int blob_fetch_pre_signed_url(struct flb_s3 *context, - flb_sds_t *result_url, - char *url) +static int blob_request_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *url) { int ret; size_t b_sent; @@ -2144,56 +2164,75 @@ static int blob_fetch_pre_signed_url(struct flb_s3 *context, return ret; } -static int blob_fetch_put_object_pre_signed_url(struct flb_s3 *context, - flb_sds_t *result_url, - char *tag, - char *bucket, - char *path) +static int blob_fetch_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *format, + ...) { - char *valid_path; + va_list arguments[2]; + int url_length; int ret; flb_sds_t url; flb_sds_t tmp; - valid_path = (char *) path; + va_start(arguments[0], format); + va_copy(arguments[1], arguments[0]); - while (*valid_path == '.' || - *valid_path == '/') { - valid_path++; - } + url_length = vsnprintf(NULL, 0, format, arguments[0]); - url = flb_sds_create(context->authorization_endpoint_url); + va_end(arguments[0]); + + if (url_length <= 0) { + va_end(arguments[1]); - if (url == NULL) { return -1; } - tmp = flb_sds_printf(&url, "/put_object_presigned_url/%s/%s/%s", bucket, tag, valid_path); + url = flb_sds_create_size( + flb_sds_len(context->authorization_endpoint_url) + url_length + 2); - if (tmp != NULL) { - url = tmp; + if (url == NULL) { + va_end(arguments[1]); - ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); + return -2; } - else { - ret = -1; + + tmp = flb_sds_cat(url, + context->authorization_endpoint_url, + flb_sds_len(context->authorization_endpoint_url)); + + url_length = vsnprintf( + &tmp[flb_sds_len(tmp)], + flb_sds_avail(tmp), + format, + arguments[1]); + + va_end(arguments[1]); + + if (url_length <= 0) { + flb_sds_destroy(tmp); + + return -3; } + url = tmp; + + flb_sds_len_set(url, flb_sds_len(url) + url_length); + + ret = blob_request_pre_signed_url(context, result_url, (char *) url); + flb_sds_destroy(url); return ret; } -static int blob_fetch_create_multipart_upload_pre_signed_url(struct flb_s3 *context, - flb_sds_t *result_url, - char *tag, - char *bucket, - char *path) +static int blob_fetch_put_object_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *tag, + char *bucket, + char *path) { - char *valid_path; - int ret; - flb_sds_t url; - flb_sds_t tmp; + char *valid_path; valid_path = (char *) path; @@ -2202,40 +2241,21 @@ static int blob_fetch_create_multipart_upload_pre_signed_url(struct flb_s3 *cont valid_path++; } - url = flb_sds_create(context->authorization_endpoint_url); - - if (url == NULL) { - return -1; - } - - tmp = flb_sds_printf(&url, "/multipart_creation_presigned_url/%s/%s/%s", bucket, tag, valid_path); - - if (tmp != NULL) { - url = tmp; - - ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); - } - else { - ret = -1; - } - - flb_sds_destroy(url); - - return ret; + return blob_fetch_pre_signed_url(context, + result_url, + "/put_object_presigned_url/%s/%s/%s", + bucket, + tag, + valid_path); } -static int blob_fetch_multipart_upload_pre_signed_url(struct flb_s3 *context, +static int blob_fetch_create_multipart_upload_pre_signed_url(struct flb_s3 *context, flb_sds_t *result_url, char *tag, char *bucket, - char *path, - char *upload_id, - int part_number) + char *path) { - char *valid_path; - int ret; - flb_sds_t url; - flb_sds_t tmp; + char *valid_path; valid_path = (char *) path; @@ -2244,26 +2264,39 @@ static int blob_fetch_multipart_upload_pre_signed_url(struct flb_s3 *context, valid_path++; } - url = flb_sds_create(context->authorization_endpoint_url); - - if (url == NULL) { - return -1; - } + return blob_fetch_pre_signed_url(context, + result_url, + "/multipart_creation_presigned_url/%s/%s/%s", + bucket, + tag, + valid_path); +} - tmp = flb_sds_printf(&url, "/multipart_upload_presigned_url/%s/%s/%s/%s/%d", bucket, tag, valid_path, upload_id, part_number); +static int blob_fetch_multipart_upload_pre_signed_url(struct flb_s3 *context, + flb_sds_t *result_url, + char *tag, + char *bucket, + char *path, + char *upload_id, + int part_number) +{ + char *valid_path; - if (tmp != NULL) { - url = tmp; + valid_path = (char *) path; - ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); - } - else { - ret = -1; + while (*valid_path == '.' || + *valid_path == '/') { + valid_path++; } - flb_sds_destroy(url); - - return ret; + return blob_fetch_pre_signed_url(context, + result_url, + "/multipart_upload_presigned_url/%s/%s/%s/%s/%d", + bucket, + tag, + valid_path, + upload_id, + part_number); } static int blob_fetch_multipart_complete_pre_signed_url(struct flb_s3 *context, @@ -2273,10 +2306,7 @@ static int blob_fetch_multipart_complete_pre_signed_url(struct flb_s3 *context, char *path, char *upload_id) { - char *valid_path; - int ret; - flb_sds_t url; - flb_sds_t tmp; + char *valid_path; valid_path = (char *) path; @@ -2285,26 +2315,13 @@ static int blob_fetch_multipart_complete_pre_signed_url(struct flb_s3 *context, valid_path++; } - url = flb_sds_create(context->authorization_endpoint_url); - - if (url == NULL) { - return -1; - } - - tmp = flb_sds_printf(&url, "/multipart_complete_presigned_url/%s/%s/%s/%s", bucket, tag, valid_path, upload_id); - - if (tmp != NULL) { - url = tmp; - - ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); - } - else { - ret = -1; - } - - flb_sds_destroy(url); - - return ret; + return blob_fetch_pre_signed_url(context, + result_url, + "/multipart_complete_presigned_url/%s/%s/%s/%s", + bucket, + tag, + valid_path, + upload_id); } static int blob_fetch_multipart_abort_pre_signed_url(struct flb_s3 *context, @@ -2314,10 +2331,7 @@ static int blob_fetch_multipart_abort_pre_signed_url(struct flb_s3 *context, char *path, char *upload_id) { - char *valid_path; - int ret; - flb_sds_t url; - flb_sds_t tmp; + char *valid_path; valid_path = (char *) path; @@ -2326,26 +2340,13 @@ static int blob_fetch_multipart_abort_pre_signed_url(struct flb_s3 *context, valid_path++; } - url = flb_sds_create(context->authorization_endpoint_url); - - if (url == NULL) { - return -1; - } - - tmp = flb_sds_printf(&url, "/multipart_upload_presigned_url/%s/%s/%s/%s", bucket, tag, valid_path, upload_id); - - if (tmp != NULL) { - url = tmp; - - ret = blob_fetch_pre_signed_url(context, result_url, (char *) url); - } - else { - ret = -1; - } - - flb_sds_destroy(url); - - return ret; + return blob_fetch_pre_signed_url(context, + result_url, + "/multipart_upload_presigned_url/%s/%s/%s/%s", + bucket, + tag, + valid_path, + upload_id); } static struct multipart_upload *create_blob_upload(struct flb_s3 *ctx, const char *tag, @@ -2522,6 +2523,72 @@ static int put_blob_object(struct flb_s3 *ctx, return -1; } +static int abort_blob_upload(struct flb_s3 *ctx, + cfl_sds_t file_tag, + cfl_sds_t file_path, + cfl_sds_t file_remote_id) +{ + struct multipart_upload *m_upload; + flb_sds_t pre_signed_url; + int ret; + + pre_signed_url = NULL; + + m_upload = create_blob_upload(ctx, file_tag, cfl_sds_len(file_tag), file_path); + + if (m_upload == NULL) { + return -1; + } + + mk_list_del(&m_upload->_head); + + m_upload->upload_id = flb_sds_create(file_remote_id); + + if (m_upload->upload_id == NULL) { + m_upload->part_number = 0; + + multipart_upload_destroy(m_upload); + + flb_plg_error(ctx->ins, "Could not allocate upload id copy"); + + return -2; + } + + if (ctx->authorization_endpoint_url != NULL) { + ret = blob_fetch_multipart_abort_pre_signed_url(ctx, + &pre_signed_url, + file_tag, + ctx->bucket, + file_path, + m_upload->upload_id); + + if (ret != 0) { + m_upload->part_number = 0; + + multipart_upload_destroy(m_upload); + + return -3; + } + } + else { + pre_signed_url = NULL; + } + + ret = abort_multipart_upload(ctx, m_upload, pre_signed_url); + + if (pre_signed_url != NULL) { + flb_sds_destroy(pre_signed_url); + + pre_signed_url = NULL; + } + + m_upload->part_number = 0; + + multipart_upload_destroy(m_upload); + + return 0; +} + static int cb_s3_upload_blob(struct flb_config *config, void *data) { int ret; @@ -2581,9 +2648,9 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) if (ret == 1) { if (part_count > 1) { - m_upload = create_blob_upload(ctx, file_tag, cfl_sds_len(file_tag), file_path); + ret = abort_blob_upload(ctx, file_tag, file_path, file_remote_id); - if (m_upload == NULL) { + if (ret != 0) { cfl_sds_destroy(file_tag); cfl_sds_destroy(file_path); cfl_sds_destroy(file_remote_id); @@ -2593,60 +2660,11 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) return -1; } - - mk_list_del(&m_upload->_head); - - m_upload->upload_id = flb_sds_create(file_remote_id); - - if (m_upload->upload_id == NULL) { - cfl_sds_destroy(file_tag); - cfl_sds_destroy(file_path); - cfl_sds_destroy(file_remote_id); - cfl_sds_destroy(file_destination); - - m_upload->part_number = 0; - multipart_upload_destroy(m_upload); - - flb_plg_error(ctx->ins, "Could not allocate upload id copy"); - - flb_blob_db_unlock(&ctx->blob_db); - - return -4; - } - - - if (ctx->authorization_endpoint_url != NULL) { - ret = blob_fetch_multipart_abort_pre_signed_url(ctx, - &pre_signed_url, - file_tag, - ctx->bucket, - file_path, - m_upload->upload_id); - - if (ret != 0) { - multipart_upload_destroy(m_upload); - - flb_blob_db_unlock(&ctx->blob_db); - - return -5; - } - } - else { - pre_signed_url = NULL; - } - - ret = abort_multipart_upload(ctx, m_upload, pre_signed_url); - - if (pre_signed_url != NULL) { - flb_sds_destroy(pre_signed_url); - - pre_signed_url = NULL; - } } flb_blob_file_update_remote_id(&ctx->blob_db, file_id, ""); - flb_blob_db_file_reset_upload_states(&ctx->blob_db, file_id, file_path); - flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 0); + flb_blob_db_file_reset_upload_states(&ctx->blob_db, file_id); + flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, 0); cfl_sds_destroy(file_remote_id); cfl_sds_destroy(file_path); @@ -2673,9 +2691,9 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) if (ret == 1) { if (part_count > 1) { - m_upload = create_blob_upload(ctx, file_tag, cfl_sds_len(file_tag), file_path); + ret = abort_blob_upload(ctx, file_tag, file_path, file_remote_id); - if (m_upload == NULL) { + if (ret != 0) { cfl_sds_destroy(file_tag); cfl_sds_destroy(file_path); cfl_sds_destroy(file_remote_id); @@ -2685,65 +2703,17 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) return -1; } - - mk_list_del(&m_upload->_head); - - m_upload->upload_id = flb_sds_create(file_remote_id); - - if (m_upload->upload_id == NULL) { - cfl_sds_destroy(file_tag); - cfl_sds_destroy(file_path); - cfl_sds_destroy(file_remote_id); - cfl_sds_destroy(file_destination); - - m_upload->part_number = 0; - multipart_upload_destroy(m_upload); - - flb_plg_error(ctx->ins, "Could not allocate upload id copy"); - - flb_blob_db_unlock(&ctx->blob_db); - - return -4; - } - - if (ctx->authorization_endpoint_url != NULL) { - ret = blob_fetch_multipart_abort_pre_signed_url(ctx, - &pre_signed_url, - file_tag, - ctx->bucket, - file_path, - m_upload->upload_id); - - if (ret != 0) { - multipart_upload_destroy(m_upload); - - flb_blob_db_unlock(&ctx->blob_db); - - return -5; - } - } - else { - pre_signed_url = NULL; - } - - ret = abort_multipart_upload(ctx, m_upload, pre_signed_url); - - if (pre_signed_url != NULL) { - flb_sds_destroy(pre_signed_url); - - pre_signed_url = NULL; - } } if (ctx->file_delivery_attempt_limit != FLB_OUT_RETRY_UNLIMITED && file_delivery_attempts < ctx->file_delivery_attempt_limit) { flb_blob_file_update_remote_id(&ctx->blob_db, file_id, ""); - flb_blob_db_file_reset_upload_states(&ctx->blob_db, file_id, file_path); - flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 0); + flb_blob_db_file_reset_upload_states(&ctx->blob_db, file_id); + flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, 0); } else { - ret = flb_blob_db_file_delete(&ctx->blob_db, file_id, file_path); + ret = flb_blob_db_file_delete(&ctx->blob_db, file_id); notification = flb_calloc(1, sizeof( @@ -2924,7 +2894,7 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) } /* remove the file entry from the database */ - ret = flb_blob_db_file_delete(&ctx->blob_db, file_id, file_path); + ret = flb_blob_db_file_delete(&ctx->blob_db, file_id); if (ret == -1) { flb_plg_error(ctx->ins, "cannot delete blob file '%s' (id=%" PRIu64 ") from the database", file_path, file_id); @@ -2999,7 +2969,7 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) * to finish and then wipe the slate and start again but we don't want * to increment the failure count in this case. */ - flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 1); + flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, 1); cfl_sds_destroy(file_tag); cfl_sds_destroy(file_path); @@ -3227,7 +3197,7 @@ static int cb_s3_upload_blob(struct flb_config *config, void *data) if (ctx->part_delivery_attempt_limit != FLB_OUT_RETRY_UNLIMITED && part_delivery_attempts >= ctx->part_delivery_attempt_limit) { - flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, file_path, 1); + flb_blob_db_file_set_aborted_state(&ctx->blob_db, file_id, 1); } } diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c index a3a4a7b3f21..7ad7b2095b1 100644 --- a/plugins/out_s3/s3_multipart.c +++ b/plugins/out_s3/s3_multipart.c @@ -521,8 +521,7 @@ int abort_multipart_upload(struct flb_s3 *ctx, s3_client = ctx->s3_client; if (s3_plugin_under_test() == FLB_TRUE) { - /* c = mock_s3_call("TEST_ABORT_MULTIPART_UPLOAD_ERROR", "AbortMultipartUpload"); */ - c = NULL; + c = mock_s3_call("TEST_ABORT_MULTIPART_UPLOAD_ERROR", "AbortMultipartUpload"); } else { c = s3_client->client_vtable->request(s3_client, FLB_HTTP_DELETE,