Skip to content

Commit

Permalink
introduce --fixFENsource (#92)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertnurnberg authored Oct 24, 2023
1 parent 22059f1 commit 62abcbe
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 60 deletions.
8 changes: 5 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
*.png
*.json
*.pgn
*.pgn.gz
*.gz
*.log
*.exe

pgns*
scoreWDLstat
*.exe

pgns*
Stockfish
books
129 changes: 84 additions & 45 deletions scoreWDLstat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@ map_t pos_map = {};
// map to collect metadata for tests
using map_meta = std::unordered_map<std::string, TestMetaData>;

// class (and map) to hold data that cutechess-cli changed from original FENs
class FixFenData {
public:
std::string ep; // possibly changed to '-' by cutechess-cli
int halfmove; // for .epd books changed to 0 by cutechess-cli
int fullmove; // for .epd books changed to 1 by cutechess-cli

FixFenData() {}
FixFenData(const std::string &ep, int halfmove, int fullmove)
: ep(ep), halfmove(halfmove), fullmove(fullmove) {}
};
using map_fens = std::unordered_map<std::string, FixFenData>;

std::atomic<std::size_t> total_chunks = 0;
std::atomic<std::size_t> total_games = 0;

Expand All @@ -46,8 +59,8 @@ static constexpr int map_size = 1200000;
/// @brief Analyze a file with pgn games and update the position map, apply filter if present
class Analyze : public pgn::Visitor {
public:
Analyze(const std::string &regex_engine, const std::string &move_counter)
: regex_engine(regex_engine), move_counter(move_counter) {}
Analyze(const std::string &regex_engine, const map_fens &fixfen_map)
: regex_engine(regex_engine), fixfen_map(fixfen_map) {}

virtual ~Analyze() {}

Expand Down Expand Up @@ -82,11 +95,23 @@ class Analyze : public pgn::Visitor {

void header(std::string_view key, std::string_view value) override {
if (key == "FEN") {
std::regex p("0 1$");

// revert change by cutechess-cli of move counters in .epd books to "0 1"
if (!move_counter.empty() && std::regex_search(value.data(), p)) {
board.setFen(std::regex_replace(value.data(), p, "0 " + move_counter));
std::regex p("^(.+) (.+) 0 1$");
std::smatch match;
std::string value_str(value);

// revert changes by cutechess-cli to move counters, but trust it on ep square
if (!fixfen_map.empty() && std::regex_search(value_str, match, p) && match.size() > 2) {
std::string fen = match[1];
auto it = fixfen_map.find(fen);
if (it == fixfen_map.end()) {
std::cerr << "Could not find FEN " << fen << " in fixFENsource." << std::endl;
std::exit(1);
}
const auto &fix = it->second;
std::string ep = match[2]; // trust cutechess-cli on this one
std::string fixed_value = fen + " " + ep + " " + std::to_string(fix.halfmove) +
" " + std::to_string(fix.fullmove);
board.setFen(fixed_value);
} else {
board.setFen(value);
}
Expand Down Expand Up @@ -208,7 +233,7 @@ class Analyze : public pgn::Visitor {

private:
const std::string &regex_engine;
const std::string &move_counter;
const map_fens &fixfen_map;

Board board;
Movelist moves;
Expand All @@ -229,39 +254,10 @@ class Analyze : public pgn::Visitor {
};

void ana_files(const std::vector<std::string> &files, const std::string &regex_engine,
const map_meta &meta_map, bool fix_fens) {
const map_fens &fixfen_map) {
for (const auto &file : files) {
std::string move_counter;
if (fix_fens) {
std::string test_filename = file.substr(0, file.find_last_of('-'));

if (meta_map.find(test_filename) == meta_map.end()) {
std::cout << "Error: No metadata for test " << test_filename << std::endl;
std::exit(1);
}

if (meta_map.at(test_filename).book_depth.has_value()) {
move_counter = std::to_string(meta_map.at(test_filename).book_depth.value() + 1);
} else {
if (!meta_map.at(test_filename).book.has_value()) {
std::cout << "Error: Missing \"book\" key in metadata for test "
<< test_filename << std::endl;
std::exit(1);
}

std::regex p(".epd");

if (std::regex_search(meta_map.at(test_filename).book.value(), p)) {
std::cout << "Error: Missing \"book_depth\" key in metadata for .epd book "
"for test "
<< test_filename << std::endl;
std::exit(1);
}
}
}

const auto pgn_iterator = [&](std::istream &iss) {
auto vis = std::make_unique<Analyze>(regex_engine, move_counter);
auto vis = std::make_unique<Analyze>(regex_engine, fixfen_map);

pgn::StreamParser parser(iss);

Expand All @@ -286,6 +282,45 @@ void ana_files(const std::vector<std::string> &files, const std::string &regex_e

} // namespace analysis

[[nodiscard]] map_fens get_fixfen(std::string file) {
map_fens fixfen_map;
if (file.empty()) {
return fixfen_map;
}

const auto fen_iterator = [&](std::istream &iss) {
std::string line;
while (std::getline(iss, line)) {
std::istringstream iss(line);
std::string key, f1, f2, f3, ep;
int halfmove, fullmove = 0;

iss >> f1 >> f2 >> f3 >> ep >> halfmove >> fullmove;
key = f1 + ' ' + f2 + ' ' + f3;
FixFenData fixfen_data(ep, halfmove, fullmove);

if (fixfen_map.find(key) != fixfen_map.end()) {
// for duplicate FENs, prefer the one with lower full move counter
if (fullmove && fullmove < fixfen_map[key].fullmove) {
fixfen_map[key] = fixfen_data;
}
} else {
fixfen_map[key] = fixfen_data;
}
}
};

if (file.size() >= 3 && file.substr(file.size() - 3) == ".gz") {
igzstream input(file.c_str());
fen_iterator(input);
} else {
std::ifstream input(file);
fen_iterator(input);
}

return fixfen_map;
}

[[nodiscard]] map_meta get_metadata(const std::vector<std::string> &file_list,
bool allow_duplicates) {
map_meta meta_map;
Expand Down Expand Up @@ -387,7 +422,7 @@ void filter_files_sprt(std::vector<std::string> &file_list, const map_meta &meta
}

void process(const std::vector<std::string> &files_pgn, const std::string &regex_engine,
const map_meta &meta_map, bool fix_fens, int concurrency) {
const map_meta &meta_map, const map_fens &fixfen_map, int concurrency) {
// Create more chunks than threads to prevent threads from idling.
int target_chunks = 4 * concurrency;

Expand All @@ -407,8 +442,8 @@ void process(const std::vector<std::string> &files_pgn, const std::string &regex

for (const auto &files : files_chunked) {
pool.enqueue(
[&files, &regex_engine, &meta_map, &fix_fens, &progress_mutex, &files_chunked]() {
analysis::ana_files(files, regex_engine, meta_map, fix_fens);
[&files, &regex_engine, &meta_map, &fixfen_map, &progress_mutex, &files_chunked]() {
analysis::ana_files(files, regex_engine, fixfen_map);

total_chunks++;

Expand Down Expand Up @@ -465,7 +500,7 @@ void print_usage(char const *program_name) {
ss << " --matchBook <regex> Filter data based on book name in metadata" << "\n";
ss << " --matchBookInvert Invert the filter" << "\n";
ss << " --SPRTonly Analyse only pgns from SPRT tests" << "\n";
ss << " --fixFEN Patch move counters lost by cutechess-cli" << "\n";
ss << " --fixFENsource Patch move counters lost by cutechess-cli based on FENs in this file" << "\n";
ss << " -o <path> Path to output json file (default: scoreWDLstat.json)" << "\n";
ss << " --help Print this help message" << "\n";
// clang-format on
Expand Down Expand Up @@ -555,7 +590,11 @@ int main(int argc, char const *argv[]) {
regex_engine = regex_rev;
}

bool fix_fens = find_argument(args, pos, "--fixFEN", true);
std::string fixfen_source;
if (find_argument(args, pos, "--fixFENsource")) {
fixfen_source = *std::next(pos);
}
auto fixfen_map = get_fixfen(fixfen_source);

if (find_argument(args, pos, "--matchEngine")) {
regex_engine = *std::next(pos);
Expand All @@ -569,7 +608,7 @@ int main(int argc, char const *argv[]) {

const auto t0 = std::chrono::high_resolution_clock::now();

process(files_pgn, regex_engine, meta_map, fix_fens, concurrency);
process(files_pgn, regex_engine, meta_map, fixfen_map, concurrency);

const auto t1 = std::chrono::high_resolution_clock::now();

Expand Down
34 changes: 22 additions & 12 deletions updateWDL.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,32 @@ echo "started at: " $(date)
firstrev=70ba9de85cddc5460b1ec53e0a99bee271e26ece
lastrev=HEAD

# regex for book name
# "regex" for book name, TODO: only works for single book name for now!
bookname=UHO_4060_v3.epd

# path for PGN files
pgnpath=pgns

# clone SF if needed
if [[ ! -e Stockfish ]]; then
git clone https://github.com/official-stockfish/Stockfish.git >&clone.log
fi
# clone repos if needed, and pull latest revisions
for repo in "Stockfish" "books"; do
if [[ ! -e "$repo" ]]; then
git clone https://github.com/official-stockfish/"$repo".git >&clone.log
fi
cd "$repo"
git checkout master >&checkout.log
git fetch origin >&fetch.log
git pull >&pull.log
cd ..
done

# compile scoreWDLstat if needed
make >&make.log
# get the book necessary for the move counter fixing, TODO: deal with several books
if [[ ! -e "$bookname.gz" ]]; then
unzip "books/$bookname.zip"
gzip "$bookname"
fi

# update SF and get a revision list
# get a SF revision list
cd Stockfish
git checkout master >&checkout.log
git fetch origin >&fetch.log
git pull >&pull.log
revs=$(git rev-list $firstrev^..$lastrev)

# get the currently valid value of NormalizeToPawnValue
Expand All @@ -51,12 +58,15 @@ regex_pattern="${regex_pattern%|}"

cd ..

# compile scoreWDLstat if needed
make >&make.log

echo "Look recursively in directory $pgnpath for games from SPRT tests using" \
"books matching \"$bookname\" for SF revisions between $firstrev (from" \
"$oldepoch) and $lastrev (from $newepoch)."

# obtain the WDL data from games of SPRT tests of the SF revisions of interest
./scoreWDLstat --dir $pgnpath -r --matchRev $regex_pattern --matchBook "$bookname" --fixFEN --SPRTonly -o updateWDL.json >&scoreWDLstat.log
./scoreWDLstat --dir $pgnpath -r --matchRev $regex_pattern --matchBook "$bookname" --fixFENsource "$bookname.gz" --SPRTonly -o updateWDL.json >&scoreWDLstat.log

# fit the new WDL model, keeping anchor at move 32
# we ignore the first 2 full moves out of book for fitting (11=8+1+2), and the first 9 for (contour) plotting (18=8+1+9)
Expand Down

0 comments on commit 62abcbe

Please sign in to comment.