Skip to content

Commit

Permalink
migration-helpers: back-compat: remove datastore keys removed in migr…
Browse files Browse the repository at this point in the history
…ation

The old migrator interface used a single data store as source and target.
After a migration changes the MigrationData, the resulting values are written
out to the data store.  Since this data store also served as the source, it had
all of the old keys on disk already, and nothing explicitly removed them; we'd
only see new keys overwrite old.

To allow downgrading to versions with the old migrator interface, we need
migrations to explicitly remove keys from the data store.  This change to
migration-helpers detects keys that were removed from MigrationData by a
migration and tells the data store to explicitly remove them.  As long as
migrations are rebuilt against this library, they'll work in the old or new
migrator.

This workaround can be removed when we no longer support versions with the old
migrator interface.
  • Loading branch information
tjkirch committed Feb 1, 2020
1 parent 918b815 commit 4fed84a
Show file tree
Hide file tree
Showing 6 changed files with 229 additions and 16 deletions.
2 changes: 2 additions & 0 deletions workspaces/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions workspaces/api/migration/migration-helpers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,7 @@ publish = false
apiserver = { path = "../../apiserver" }
snafu = "0.6"
toml = "0.5"

[dev-dependencies]
maplit = "1.0"
serde_json = "1.0"
25 changes: 25 additions & 0 deletions workspaces/api/migration/migration-helpers/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub(crate) fn parse_args(args: env::Args) -> Result<Args> {
let mut migration_type = None;
let mut source_datastore = None;
let mut target_datastore = None;
let mut datastore_path = None;

let mut iter = args.skip(1);
while let Some(arg) = iter.next() {
Expand All @@ -54,13 +55,37 @@ pub(crate) fn parse_args(args: env::Args) -> Result<Args> {
}))
}

// Support the argument of the old migrator interface, with some caveats
"--datastore-path" => {
datastore_path = Some(
iter.next()
.unwrap_or_else(|| usage_msg("Did not give argument to --datastore-path")),
);
}

"--forward" => migration_type = Some(MigrationType::Forward),
"--backward" => migration_type = Some(MigrationType::Backward),

_ => usage(),
}
}

if let Some(datastore_path) = datastore_path {
// For compatibility with old migration interface that had single source+target; other code
// in migration-helpers checks if source==target to see if it needs to do a workaround.
if source_datastore.is_some() || target_datastore.is_some() {
usage_msg("--datastore-path is only for backward compatibility and \
cannot be used with --source-datastore / --target-datastore");
}
source_datastore = Some(datastore_path.clone());
target_datastore = Some(datastore_path);
} else {
// In no other case should they be the same; we use it for compatibility checks.
if source_datastore == target_datastore {
usage_msg("--source-datastore and --target-datastore cannot be the same");
}
}

Ok(Args {
source_datastore: source_datastore.unwrap_or_else(|| usage()),
target_datastore: target_datastore.unwrap_or_else(|| usage()),
Expand Down
6 changes: 6 additions & 0 deletions workspaces/api/migration/migration-helpers/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ pub enum Error {
#[snafu(display("Unable to write to data store: {}", source))]
DataStoreWrite { source: datastore::Error },

#[snafu(display("Unable to remove key '{}' from data store: {}", key, source))]
DataStoreRemove {
key: String,
source: datastore::Error,
},

#[snafu(display("Migrated data failed validation: {}", msg))]
Validation { msg: String },

Expand Down
40 changes: 24 additions & 16 deletions workspaces/api/migration/migration-helpers/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
mod args;
mod datastore;
pub mod error;
mod workarounds;

use std::collections::HashMap;
use std::env;
Expand All @@ -22,9 +23,10 @@ use std::fmt;
use apiserver::datastore::{Committed, Value};
pub use apiserver::datastore::{DataStore, FilesystemDataStore};

use args::parse_args;
use args::{parse_args, Args};
use datastore::{get_input_data, set_output_data};
pub use error::Result;
use workarounds::fix_migrated_data;

/// The data store implementation currently in use. Used by the simpler `migrate` interface; can
/// be overridden by using the `run_migration` interface.
Expand Down Expand Up @@ -57,7 +59,7 @@ pub type Metadata = HashMap<String, Value>;
/// MigrationData holds all data that can be migrated in a migration, and serves as the input and
/// output format of migrations. A serde Value type is used to hold the arbitrary data of each
/// key because we can't represent types when they could change in the migration.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct MigrationData {
/// Mapping of data key names to their arbitrary values.
pub data: HashMap<String, Value>,
Expand All @@ -83,23 +85,31 @@ fn validate_migrated_data(_migrated: &MigrationData) -> Result<()> {
/// If you need a little more control over a migration than with migrate, or you're using this
/// module as a library, you can call run_migration directly with the arguments that would
/// normally be parsed from the migration binary's command line.
pub fn run_migration<D: DataStore>(
mut migration: impl Migration,
source: &D,
target: &mut D,
migration_type: MigrationType,
) -> Result<()> {
pub fn run_migration(mut migration: impl Migration, args: &Args) -> Result<()> {
let source = DataStoreImplementation::new(&args.source_datastore);
let mut target = DataStoreImplementation::new(&args.target_datastore);

for committed in &[Committed::Live, Committed::Pending] {
let input = get_input_data(source, *committed)?;
let input = get_input_data(&source, *committed)?;

let migrated = match migration_type {
MigrationType::Forward => migration.forward(input),
MigrationType::Backward => migration.backward(input),
let mut migrated = input.clone();
migrated = match args.migration_type {
MigrationType::Forward => migration.forward(migrated),
MigrationType::Backward => migration.backward(migrated),
}?;

fix_migrated_data(
&input,
&mut migrated,
&source,
&mut target,
*committed,
&args,
)?;

validate_migrated_data(&migrated)?;

set_output_data(target, &migrated, *committed)?;
set_output_data(&mut target, &migrated, *committed)?;
}
Ok(())
}
Expand All @@ -126,7 +136,5 @@ impl fmt::Display for MigrationType {
/// migration type.
pub fn migrate(migration: impl Migration) -> Result<()> {
let args = parse_args(env::args())?;
let source = DataStoreImplementation::new(args.source_datastore);
let mut target = DataStoreImplementation::new(args.target_datastore);
run_migration(migration, &source, &mut target, args.migration_type)
run_migration(migration, &args)
}
168 changes: 168 additions & 0 deletions workspaces/api/migration/migration-helpers/src/workarounds.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
use crate::args::Args;
use crate::{error, MigrationData, Result};
use apiserver::datastore::{Committed, DataStore, Key, KeyType};
use snafu::ResultExt;
use std::collections::HashSet;

/// Here we can fix known issues with migrated data, for example issues related to changes
/// in migration interface that we don't want the migrations to have to deal with.
pub(crate) fn fix_migrated_data<D: DataStore>(
input: &MigrationData,
output: &MigrationData,
_source_datastore: &D,
target_datastore: &mut D,
committed: Committed,
args: &Args,
) -> Result<()> {
// If the source and target data store path are the same, we're using the old migrator
// interface, and have to use a workaround to be able to delete keys. They can't just be
// removed from the MigrationData struct, because the old interface used the same data store
// for input and output, and removing from MigrationData just means we won't write it out
// again - but the file will still be there from the input. We need to tell the data store
// to remove it.
if args.source_datastore == args.target_datastore {
// Data keys first
let old_keys: HashSet<_> = input.data.keys().collect();
let new_keys: HashSet<_> = output.data.keys().collect();
for removed_key_str in old_keys.difference(&new_keys) {
// We need to make a Key from the key's name to fit the data store interface; we
// don't use Key in MigrationData for the convenience of migration authors.
let removed_key =
Key::new(KeyType::Data, removed_key_str).context(error::InvalidKey {
key_type: KeyType::Data,
key: *removed_key_str,
})?;
target_datastore
.unset_key(&removed_key, committed)
.context(error::DataStoreRemove {
key: *removed_key_str,
})?;
}

// Now the same thing for metadata
for (data_key_str, old_metadata) in &input.metadata {
let removed: HashSet<_> = if let Some(new_metadata) = output.metadata.get(data_key_str)
{
// Find which metadata keys the migration removed, for this data key
let old_keys: HashSet<_> = old_metadata.keys().collect();
let new_keys: HashSet<_> = new_metadata.keys().collect();
old_keys.difference(&new_keys).map(|&s| s).collect()
} else {
// Migration output has no metadata for this data key, so it was all removed
old_metadata.keys().collect()
};

for removed_meta_str in removed {
let removed_meta =
Key::new(KeyType::Meta, removed_meta_str).context(error::InvalidKey {
key_type: KeyType::Meta,
key: removed_meta_str,
})?;
let removed_data =
Key::new(KeyType::Data, data_key_str).context(error::InvalidKey {
key_type: KeyType::Data,
key: data_key_str,
})?;
target_datastore
.unset_metadata(&removed_meta, &removed_data)
.context(error::DataStoreRemove {
key: removed_meta_str,
})?;
}
}
}

Ok(())
}

#[cfg(test)]
mod test {
use super::fix_migrated_data;
use crate::datastore::set_output_data;
use crate::{Args, MigrationData, MigrationType};
use apiserver::datastore::memory::MemoryDataStore;
use apiserver::datastore::{Committed, DataStore, Key, KeyType};
use maplit::hashmap;
use serde_json::json;

#[test]
fn test_fix_migrated_data() {
// Data/metadata starting with "remove" should be removed
let input = MigrationData {
data: hashmap!(
"keepdata".into() => json!("hi"),
"removedata".into() => json!("sup"),
),
metadata: hashmap!(
"keepdata".into() => hashmap!(
"keepmeta".into() => json!("howdy"),
"removemeta".into() => json!("yo"),
),
"removedata".into() => hashmap!(
"keepmeta".into() => json!("hello"),
"removemeta".into() => json!("hiya"),
),
),
};
// This represents 'input' after a migration removes some data, so it should match the
// data store after we call fix_migrated_data
let expected = MigrationData {
data: hashmap!(
"keepdata".into() => json!("hi"),
),
metadata: hashmap!(
"keepdata".into() => hashmap!(
"keepmeta".into() => json!("howdy"),
),
"removedata".into() => hashmap!(
"keepmeta".into() => json!("hello"),
),
),
};

// The point of the workaround is affecting the data store directly, so make test stores
let mut source = MemoryDataStore::new();
set_output_data(&mut source, &input, Committed::Live).unwrap();
// To replicate old interface, the target data store starts with the input data, and
// we're going to confirm that removed values are actually removed
let mut target = MemoryDataStore::new();
set_output_data(&mut target, &input, Committed::Live).unwrap();

// Ensure values are there at the start
let kept_data = Key::new(KeyType::Data, "keepdata").unwrap();
let removed_data = Key::new(KeyType::Data, "removedata").unwrap();
let kept_meta = Key::new(KeyType::Meta, "keepmeta").unwrap();
let removed_meta = Key::new(KeyType::Meta, "removemeta").unwrap();
assert_eq!(target.get_key(&kept_data, Committed::Live).unwrap(), Some("\"hi\"".into()));
assert_eq!(target.get_key(&removed_data, Committed::Live).unwrap(), Some("\"sup\"".into()));
assert_eq!(target.get_metadata(&kept_meta, &kept_data).unwrap(), Some("\"howdy\"".into()));
assert_eq!(target.get_metadata(&kept_meta, &removed_data).unwrap(), Some("\"hello\"".into()));
assert_eq!(target.get_metadata(&removed_meta, &kept_data).unwrap(), Some("\"yo\"".into()));
assert_eq!(target.get_metadata(&removed_meta, &removed_data).unwrap(), Some("\"hiya\"".into()));

// Same source and target, i.e. using old interface, so we should do our fix
let args = Args {
source_datastore: "same".into(),
target_datastore: "same".into(),
migration_type: MigrationType::Forward,
};
fix_migrated_data(
&input,
&expected,
&source,
&mut target,
Committed::Live,
&args,
)
.unwrap();

// Ensure unaffected values were kept
assert_eq!(target.get_key(&kept_data, Committed::Live).unwrap(), Some("\"hi\"".into()));
assert_eq!(target.get_metadata(&kept_meta, &kept_data).unwrap(), Some("\"howdy\"".into()));
assert_eq!(target.get_metadata(&kept_meta, &removed_data).unwrap(), Some("\"hello\"".into()));
// Ensure removed values were removed
assert_eq!(target.get_key(&removed_data, Committed::Live).unwrap(), None);
assert_eq!(target.get_metadata(&removed_meta, &kept_data).unwrap(), None);
assert_eq!(target.get_metadata(&removed_meta, &removed_data).unwrap(), None);
}
}

0 comments on commit 4fed84a

Please sign in to comment.