Skip to content

Commit

Permalink
[FLASH-480] Fix bug: rename column (#227)
Browse files Browse the repository at this point in the history
* fix bug: rename column
* log info instead of error when column rename is detected
  • Loading branch information
JaySon-Huang authored Sep 10, 2019
1 parent 96b4f0e commit 1b40aba
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 83 deletions.
118 changes: 118 additions & 0 deletions dbms/src/Storages/Transaction/SchemaBuilder-internal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#pragma once

#include <map>
#include <set>
#include <utility>

#include <Core/Types.h>
#include <Storages/Transaction/Types.h>

/// === Some Private struct / method for SchemaBuilder
/// Notice that this file should only included by SchemaBuilder.cpp and unittest for this file.

namespace DB
{

constexpr char tmpNamePrefix[] = "_tiflash_tmp_";

struct TmpTableNameGenerator
{
using TableName = std::pair<String, String>;
TableName operator()(const TableName & name) { return std::make_pair(name.first, String(tmpNamePrefix) + name.second); }
};

struct TmpColNameGenerator
{
String operator()(const String & name) { return String(tmpNamePrefix) + name; }
};


struct ColumnNameWithID
{
String name;
ColumnID id;

explicit ColumnNameWithID(String name_ = "", ColumnID id_ = 0) : name(std::move(name_)), id(id_) {}

bool operator==(const ColumnNameWithID & rhs) const { return name == rhs.name; }

bool operator<(const ColumnNameWithID & rhs) const { return name < rhs.name; }
};

struct TmpColNameWithIDGenerator
{
ColumnNameWithID operator()(const ColumnNameWithID & name_with_id)
{
return ColumnNameWithID{String(tmpNamePrefix) + name_with_id.name, name_with_id.id};
}
};


// CyclicRenameResolver resolves cyclic table rename and column rename.
// TmpNameGenerator rename current name to a temp name that will not conflict with other names.
template <typename Name_, typename TmpNameGenerator>
struct CyclicRenameResolver
{
using Name = Name_;
using NamePair = std::pair<Name, Name>;
using NamePairs = std::vector<NamePair>;
using NameSet = std::set<Name>;
using NameMap = std::map<Name, Name>;

// visited records which name has been processed.
NameSet visited;
TmpNameGenerator name_gen;

// We will not ensure correctness if we call it multiple times, so we make it a rvalue call.
NamePairs resolve(NameMap && rename_map) &&
{
NamePairs result;
for (auto it = rename_map.begin(); it != rename_map.end(); /* */)
{
if (!visited.count(it->first))
{
resolveImpl(rename_map, it, result);
}
// remove dependency of `it` since we have already done rename
it = rename_map.erase(it);
}
return result;
}

private:
NamePair resolveImpl(NameMap & rename_map, typename NameMap::iterator & it, NamePairs & result)
{
Name origin_name = it->first;
Name target_name = it->second;
visited.insert(it->first);
auto next_it = rename_map.find(target_name);
if (next_it == rename_map.end())
{
// The target name does not exist, so we can rename it directly.
result.push_back(NamePair(origin_name, target_name));
return NamePair();
}
else if (visited.find(target_name) != visited.end())
{
// The target name is visited, so this is a cyclic rename.
auto tmp_name = name_gen(target_name);
result.push_back(NamePair(target_name, tmp_name));
result.push_back(NamePair(origin_name, target_name));
return NamePair(target_name, tmp_name);
}
else
{
// The target name is in rename map, so we continue to resolve it.
auto pair = resolveImpl(rename_map, next_it, result);
if (pair.first == origin_name)
{
origin_name = pair.second;
}
result.push_back(NamePair(origin_name, target_name));
return pair;
}
}
};


} // namespace DB
90 changes: 7 additions & 83 deletions dbms/src/Storages/Transaction/SchemaBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <Parsers/parseQuery.h>
#include <Storages/MutableSupport.h>
#include <Storages/Transaction/SchemaBuilder.h>
#include <Storages/Transaction/SchemaBuilder-internal.h>
#include <Storages/Transaction/TMTContext.h>
#include <Storages/Transaction/TypeMapping.h>

Expand All @@ -26,84 +27,6 @@ namespace ErrorCodes
extern const int DDL_ERROR;
}


constexpr char tmpNamePrefix[] = "_tiflash_tmp_";

struct TmpTableNameGenerator
{
using TableName = std::pair<String, String>;
TableName operator()(const TableName & name) { return std::make_pair(name.first, String(tmpNamePrefix) + name.second); }
};

struct TmpColNameGenerator
{
String operator()(const String & name) { return String(tmpNamePrefix) + name; }
};

// CyclicRenameResolver resolves cyclic table rename and column rename.
// TmpNameGenerator rename current name to a temp name that will not conflict with other names.
template <typename Name_, typename TmpNameGenerator>
struct CyclicRenameResolver
{
using Name = Name_;
using NamePair = std::pair<Name, Name>;
using NameMap = std::map<Name, Name>;
using NameSet = std::set<Name>;

// visited records which name has been processed.
NameSet visited;
TmpNameGenerator name_gen;

// We will not ensure correctness if we call it multiple times, so we make it a rvalue call.
std::vector<NamePair> resolve(const NameMap & rename_map) &&
{
std::vector<NamePair> result;
for (auto it = rename_map.begin(); it != rename_map.end(); it++)
{
if (!visited.count(it->first))
{
resolveImpl(rename_map, it, result);
}
}
return result;
}

private:
NamePair resolveImpl(const NameMap & rename_map, typename NameMap::const_iterator & it, std::vector<NamePair> & result)
{
Name target_name = it->second;
Name origin_name = it->first;
visited.insert(it->first);
auto next_it = rename_map.find(target_name);
if (next_it == rename_map.end())
{
// The target name does not exist, so we can rename it directly.
result.push_back(NamePair(origin_name, target_name));
return NamePair();
}
else if (visited.find(target_name) != visited.end())
{
// The target name is visited, so this is a cyclic rename.
auto tmp_name = name_gen(target_name);
result.push_back(NamePair(target_name, tmp_name));
result.push_back(NamePair(origin_name, target_name));
return NamePair(target_name, tmp_name);
}
else
{
// The target name is in rename map, so we continue to resolve it.
auto pair = resolveImpl(rename_map, next_it, result);
if (pair.first == origin_name)
{
origin_name = pair.second;
}
result.push_back(NamePair(origin_name, target_name));
return pair;
}
}
};


inline void setAlterCommandColumn(Logger * log, AlterCommand & command, const ColumnInfo & column_info)
{
command.column_name = column_info.name;
Expand Down Expand Up @@ -152,7 +75,8 @@ inline std::vector<AlterCommands> detectSchemaChanges(Logger * log, const TableI
}

{
std::map<String, String> rename_map;
using Resolver = CyclicRenameResolver<String, TmpColNameGenerator>;
typename Resolver::NameMap rename_map;
/// rename columns.
for (const auto & orig_column_info : orig_table_info.columns)
{
Expand All @@ -167,7 +91,7 @@ inline std::vector<AlterCommands> detectSchemaChanges(Logger * log, const TableI
}
}

auto rename_result = CyclicRenameResolver<String, TmpColNameGenerator>().resolve(rename_map);
typename Resolver::NamePairs rename_result = Resolver().resolve(std::move(rename_map));
for (const auto & rename_pair : rename_result)
{
AlterCommands rename_commands;
Expand All @@ -189,7 +113,7 @@ inline std::vector<AlterCommands> detectSchemaChanges(Logger * log, const TableI
const auto & column_info
= std::find_if(table_info.columns.begin(), table_info.columns.end(), [&](const ColumnInfo & column_info_) {
if (column_info_.id == orig_column_info.id && column_info_.name != orig_column_info.name)
LOG_ERROR(log, "detect column " << orig_column_info.name << " rename to " << column_info_.name);
LOG_INFO(log, "detect column " << orig_column_info.name << " rename to " << column_info_.name);

return column_info_.id == orig_column_info.id
&& (column_info_.tp != orig_column_info.tp || column_info_.hasNotNullFlag() != orig_column_info.hasNotNullFlag());
Expand All @@ -200,7 +124,7 @@ inline std::vector<AlterCommands> detectSchemaChanges(Logger * log, const TableI
AlterCommand command;
// Type changed column.
command.type = AlterCommand::MODIFY_COLUMN;
// Alter column with old name.
// Alter column with new column info
setAlterCommandColumn(log, command, *column_info);
alter_commands.emplace_back(std::move(command));
}
Expand Down Expand Up @@ -798,7 +722,7 @@ void SchemaBuilder<Getter>::alterAndRenameTables(std::vector<std::pair<TableInfo
}
}

auto result = Resolver().resolve(rename_map);
typename Resolver::NamePairs result = Resolver().resolve(std::move(rename_map));
for (const auto & rename_pair : result)
{
applyRenameTableImpl(rename_pair.first.first, rename_pair.second.first, rename_pair.first.second, rename_pair.second.second);
Expand Down
1 change: 1 addition & 0 deletions dbms/src/Storages/Transaction/SchemaBuilder.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#pragma once

#include <Interpreters/Context.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/Transaction/SchemaGetter.h>

Expand Down
91 changes: 91 additions & 0 deletions dbms/src/Storages/Transaction/tests/gtest_rename_resolver.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#include <test_utils/TiflashTestBasic.h>

#include <Storages/Transaction/SchemaBuilder-internal.h>
#include <Storages/Transaction/SchemaBuilder.h>

namespace DB::tests
{

TEST(CyclicRenameResolver_test, resolve_normal)
{
using Resolver = CyclicRenameResolver<String, TmpColNameGenerator>;
std::map<String, String> rename_map;
rename_map["a"] = "aa";
rename_map["b"] = "bb";

typename Resolver::NamePairs rename_result = Resolver().resolve(std::move(rename_map));

ASSERT_EQ(rename_result.size(), 2UL);
// a -> aa
ASSERT_EQ(rename_result[0].first, "a");
ASSERT_EQ(rename_result[0].second, "aa");
// b -> bb
ASSERT_EQ(rename_result[1].first, "b");
ASSERT_EQ(rename_result[1].second, "bb");
}

TEST(CyclicRenameResolver_test, resolve_linked)
{
using Resolver = CyclicRenameResolver<String, TmpColNameGenerator>;
std::map<String, String> rename_map;
rename_map["a"] = "c";
rename_map["b"] = "a";

typename Resolver::NamePairs rename_result = Resolver().resolve(std::move(rename_map));

ASSERT_EQ(rename_result.size(), 2UL);
// a -> c
ASSERT_EQ(rename_result[0].first, "a");
ASSERT_EQ(rename_result[0].second, "c");
// b -> a
ASSERT_EQ(rename_result[1].first, "b");
ASSERT_EQ(rename_result[1].second, "a");
}

TEST(CyclicRenameResolver_test, resolve_simple_cycle)
{
using Resolver = CyclicRenameResolver<String, TmpColNameGenerator>;
std::map<String, String> rename_map;
rename_map["a"] = "b";
rename_map["b"] = "a";

typename Resolver::NamePairs rename_result = Resolver().resolve(std::move(rename_map));

TmpColNameGenerator generator;

ASSERT_EQ(rename_result.size(), 3UL);
// a -> tmp_a
ASSERT_EQ(rename_result[0].first, "a");
ASSERT_EQ(rename_result[0].second, generator("a"));
// b -> a
ASSERT_EQ(rename_result[1].first, "b");
ASSERT_EQ(rename_result[1].second, "a");
// tmp_a -> b
ASSERT_EQ(rename_result[2].first, generator("a"));
ASSERT_EQ(rename_result[2].second, "b");
}

TEST(CyclicRenameResolver_test, resolve_id_simple_cycle)
{
using Resolver = CyclicRenameResolver<ColumnNameWithID, TmpColNameWithIDGenerator>;
std::map<ColumnNameWithID, ColumnNameWithID> rename_map;
rename_map[ColumnNameWithID{"a", 1}] = ColumnNameWithID{"b", 1};
rename_map[ColumnNameWithID{"b", 2}] = ColumnNameWithID{"a", 2};

typename Resolver::NamePairs rename_result = Resolver().resolve(std::move(rename_map));

TmpColNameWithIDGenerator generator;

ASSERT_EQ(rename_result.size(), 3UL);
// a -> tmp_a
ASSERT_EQ(rename_result[0].first, ColumnNameWithID("a", 1L));
ASSERT_EQ(rename_result[0].second, generator(ColumnNameWithID{"a", 1}));
// b -> a
ASSERT_EQ(rename_result[1].first, ColumnNameWithID("b", 2L));
ASSERT_EQ(rename_result[1].second, ColumnNameWithID("a", 2L));
// tmp_a -> b
ASSERT_EQ(rename_result[2].first, generator(ColumnNameWithID{"a", 1}));
ASSERT_EQ(rename_result[2].second, ColumnNameWithID("b", 1));
}

} // namespace DB
27 changes: 27 additions & 0 deletions tests/mutable-test/txn_schema/rename_column.test
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,30 @@
=> DBGInvoke __drop_tidb_table(default, test)
=> drop table if exists default.test
=> DBGInvoke __refresh_schemas()

## test for partial-linked rename
=> DBGInvoke __mock_tidb_table(default, test, 'a String, b Int8')
=> DBGInvoke __refresh_schemas()
=> DBGInvoke __put_region(4, 0, 100, default, test)
=> DBGInvoke __raft_insert_row(default, test, 4, 50, 'test', 1)
=> DBGInvoke __raft_insert_row(default, test, 4, 51, 'test', 2)
=> DBGInvoke __try_flush_region(4)
=> select a, b from default.test order by _tidb_rowid
┌─a────┬─b─┐
│ test │ 1 │
│ test │ 2 │
└──────┴───┘

# rename a -> c, and b -> a
=> DBGInvoke __rename_column_in_tidb_table(default, test, a, c)
=> DBGInvoke __rename_column_in_tidb_table(default, test, b, a)
=> DBGInvoke __refresh_schemas()
=> select a, c from default.test order by _tidb_rowid
┌─a─┬─c────┐
│ 1 │ test │
│ 2 │ test │
└───┴──────┘

=> DBGInvoke __drop_tidb_table(default, test)
=> drop table if exists default.test
=> DBGInvoke __refresh_schemas()

0 comments on commit 1b40aba

Please sign in to comment.