-
Notifications
You must be signed in to change notification settings - Fork 2.5k
[HUDI-2795] Add mechanism to safely update,delete and recover table properties #4038
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ | |
| import org.apache.hudi.common.model.HoodieTableType; | ||
| import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload; | ||
| import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion; | ||
| import org.apache.hudi.common.util.FileIOUtils; | ||
| import org.apache.hudi.common.util.Option; | ||
| import org.apache.hudi.common.util.ValidationUtils; | ||
| import org.apache.hudi.exception.HoodieIOException; | ||
|
|
@@ -46,6 +47,8 @@ | |
| import java.util.Date; | ||
| import java.util.Map; | ||
| import java.util.Properties; | ||
| import java.util.Set; | ||
| import java.util.function.BiConsumer; | ||
| import java.util.stream.Collectors; | ||
|
|
||
| /** | ||
|
|
@@ -68,6 +71,7 @@ public class HoodieTableConfig extends HoodieConfig { | |
| private static final Logger LOG = LogManager.getLogger(HoodieTableConfig.class); | ||
|
|
||
| public static final String HOODIE_PROPERTIES_FILE = "hoodie.properties"; | ||
| public static final String HOODIE_PROPERTIES_FILE_BACKUP = "hoodie.properties.backup"; | ||
|
|
||
| public static final ConfigProperty<String> NAME = ConfigProperty | ||
| .key("hoodie.table.name") | ||
|
|
@@ -172,12 +176,11 @@ public HoodieTableConfig(FileSystem fs, String metaPath, String payloadClassName | |
| Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE); | ||
| LOG.info("Loading table properties from " + propertyPath); | ||
| try { | ||
| try (FSDataInputStream inputStream = fs.open(propertyPath)) { | ||
| props.load(inputStream); | ||
| } | ||
| fetchConfigs(fs, metaPath); | ||
| if (contains(PAYLOAD_CLASS_NAME) && payloadClassName != null | ||
| && !getString(PAYLOAD_CLASS_NAME).equals(payloadClassName)) { | ||
| setValue(PAYLOAD_CLASS_NAME, payloadClassName); | ||
| // FIXME(vc): wonder if this can be removed. Need to look into history. | ||
| try (FSDataOutputStream outputStream = fs.create(propertyPath)) { | ||
| props.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis())); | ||
| } | ||
|
|
@@ -191,16 +194,103 @@ public HoodieTableConfig(FileSystem fs, String metaPath, String payloadClassName | |
|
|
||
| /** | ||
| * For serializing and de-serializing. | ||
| * | ||
| */ | ||
| public HoodieTableConfig() { | ||
| super(); | ||
| } | ||
|
|
||
| private void fetchConfigs(FileSystem fs, String metaPath) throws IOException { | ||
| Path cfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE); | ||
| try (FSDataInputStream is = fs.open(cfgPath)) { | ||
| props.load(is); | ||
| } catch (IOException ioe) { | ||
| if (!fs.exists(cfgPath)) { | ||
vinothchandar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| LOG.warn("Run `table recover-configs` if config update/delete failed midway. Falling back to backed up configs."); | ||
| // try the backup. this way no query ever fails if update fails midway. | ||
| Path backupCfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE_BACKUP); | ||
| try (FSDataInputStream is = fs.open(backupCfgPath)) { | ||
| props.load(is); | ||
| } | ||
| } else { | ||
| throw ioe; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| public static void recover(FileSystem fs, Path metadataFolder) throws IOException { | ||
| Path cfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE); | ||
| Path backupCfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE_BACKUP); | ||
| recoverIfNeeded(fs, cfgPath, backupCfgPath); | ||
| } | ||
|
|
||
| static void recoverIfNeeded(FileSystem fs, Path cfgPath, Path backupCfgPath) throws IOException { | ||
| if (!fs.exists(cfgPath)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are there chances of partial/corrupted HOODIE_PROPERTIES_FILE during updates? If yes, can we do something like: irrespective of whether HOODIE_PROPERTIES_FILE exists or not, if HOODIE_PROPERTIES_FILE_BACKUP exists, we override the HOODIE_PROPERTIES_FILE with contents from HOODIE_PROPERTIES_FILE_BACKUP.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is not in cloud storage. its atomic. On hdfs there is a small window for this - but very rare since the write will fit within the block size. It's solvable by adding a checksum entry to the file and validating. I have a note in the description around this.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cc @prashantwason I filed HUDI-2809 for this. if you have time, please help! :) |
||
| // copy over from backup | ||
| try (FSDataInputStream in = fs.open(backupCfgPath); | ||
| FSDataOutputStream out = fs.create(cfgPath, false)) { | ||
| FileIOUtils.copy(in, out); | ||
| } | ||
| } | ||
| // regardless, we don't need the backup anymore. | ||
| fs.delete(backupCfgPath, false); | ||
| } | ||
|
|
||
| private static void upsertProperties(Properties current, Properties updated) { | ||
| updated.forEach((k, v) -> current.setProperty(k.toString(), v.toString())); | ||
| } | ||
|
|
||
| private static void deleteProperties(Properties current, Properties deleted) { | ||
| deleted.forEach((k, v) -> current.remove(k.toString())); | ||
| } | ||
|
|
||
| private static void modify(FileSystem fs, Path metadataFolder, Properties modifyProps, BiConsumer<Properties, Properties> modifyFn) { | ||
| Path cfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE); | ||
| Path backupCfgPath = new Path(metadataFolder, HOODIE_PROPERTIES_FILE_BACKUP); | ||
| try { | ||
| // 0. do any recovery from prior attempts. | ||
| recoverIfNeeded(fs, cfgPath, backupCfgPath); | ||
|
|
||
| // 1. backup the existing properties. | ||
| try (FSDataInputStream in = fs.open(cfgPath); | ||
| FSDataOutputStream out = fs.create(backupCfgPath, false)) { | ||
| FileIOUtils.copy(in, out); | ||
| } | ||
| /// 2. delete the properties file, reads will go to the backup, until we are done. | ||
| fs.delete(cfgPath, false); | ||
| // 3. read current props, upsert and save back. | ||
| try (FSDataInputStream in = fs.open(backupCfgPath); | ||
| FSDataOutputStream out = fs.create(cfgPath, true)) { | ||
| Properties props = new Properties(); | ||
| props.load(in); | ||
| modifyFn.accept(props, modifyProps); | ||
| props.store(out, "Updated at " + System.currentTimeMillis()); | ||
| } | ||
| // 4. verify and remove backup. | ||
| // FIXME(vc): generate a hash for verification. | ||
| fs.delete(backupCfgPath, false); | ||
| } catch (IOException e) { | ||
| throw new HoodieIOException("Error updating table configs.", e); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Upserts the table config with the set of properties passed in. We implement a fail-safe backup protocol | ||
| * here for safely updating with recovery and also ensuring the table config continues to be readable. | ||
| */ | ||
| public static void update(FileSystem fs, Path metadataFolder, Properties updatedProps) { | ||
| modify(fs, metadataFolder, updatedProps, HoodieTableConfig::upsertProperties); | ||
| } | ||
|
|
||
| public static void delete(FileSystem fs, Path metadataFolder, Set<String> deletedProps) { | ||
| Properties props = new Properties(); | ||
| deletedProps.forEach(p -> props.setProperty(p, "")); | ||
| modify(fs, metadataFolder, props, HoodieTableConfig::deleteProperties); | ||
| } | ||
|
|
||
| /** | ||
| * Initialize the hoodie meta directory and any necessary files inside the meta (including the hoodie.properties). | ||
| */ | ||
| public static void createHoodieProperties(FileSystem fs, Path metadataFolder, Properties properties) | ||
| public static void create(FileSystem fs, Path metadataFolder, Properties properties) | ||
| throws IOException { | ||
| if (!fs.exists(metadataFolder)) { | ||
| fs.mkdirs(metadataFolder); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO, this is more like replace-configs rather than updating existing ones.