Skip to content

Commit 3e3a5af

Browse files
dhoekwatercopybara-github
authored andcommitted
Keep CMake files sorted
PiperOrigin-RevId: 700423079
1 parent 9827311 commit 3e3a5af

File tree

4 files changed

+362
-6
lines changed

4 files changed

+362
-6
lines changed

propeller/BUILD

+22
Original file line numberDiff line numberDiff line change
@@ -1103,3 +1103,25 @@ cc_test(
11031103
"@com_google_googletest//:gtest_main",
11041104
],
11051105
)
1106+
1107+
cc_library(
1108+
name = "profile_writer",
1109+
srcs = ["profile_writer.cc"],
1110+
hdrs = ["profile_writer.h"],
1111+
deps = [
1112+
":cfg",
1113+
":cfg_edge",
1114+
":cfg_id",
1115+
":cfg_node",
1116+
":function_chain_info",
1117+
":profile",
1118+
":propeller_options_cc_proto",
1119+
"@abseil-cpp//absl/algorithm:container",
1120+
"@abseil-cpp//absl/container:flat_hash_map",
1121+
"@abseil-cpp//absl/log",
1122+
"@abseil-cpp//absl/log:check",
1123+
"@abseil-cpp//absl/strings",
1124+
"@abseil-cpp//absl/strings:str_format",
1125+
"@llvm-project//llvm:Support",
1126+
],
1127+
)

propeller/CMakeLists.txt

+11-6
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,34 @@ target_link_libraries(propeller_protos PUBLIC ${Protobuf_LIBRARIES})
2525

2626
# Build all CXX targets into a unified library.
2727
add_library(propeller_lib OBJECT
28+
# go/keep-sorted start
2829
addr2cu.cc
2930
binary_address_mapper.cc
3031
binary_content.cc
3132
branch_aggregation.cc
3233
branch_frequencies.cc
34+
cfg.cc
3335
cfg_edge_kind.cc
3436
cfg_node.cc
35-
cfg.cc
3637
chain_cluster_builder.cc
37-
code_layout_scorer.cc
3838
code_layout.cc
39+
code_layout_scorer.cc
3940
file_perf_data_provider.cc
4041
frequencies_branch_aggregator.cc
4142
lbr_branch_aggregator.cc
43+
node_chain.cc
4244
node_chain_assembly.cc
4345
node_chain_builder.cc
44-
node_chain.cc
45-
perfdata_reader.cc
4646
perf_branch_frequencies_aggregator.cc
47-
program_cfg_builder.cc
47+
perfdata_reader.cc
48+
profile_writer.cc
4849
program_cfg.cc
50+
program_cfg_builder.cc
4951
propeller_statistics.cc
5052
proto_branch_frequencies_aggregator.cc
5153
resolve_mmap_name.cc
5254
spe_tid_pid_provider.cc
55+
# go/keep-sorted end
5356
)
5457
target_link_libraries(propeller_lib
5558
absl::base
@@ -76,20 +79,22 @@ target_link_libraries(propeller_test_lib
7679
include(${CMAKE_HOME_DIRECTORY}/CMake/GenerateTests.cmake)
7780
propeller_generate_tests(
7881
SRCS
82+
# go/keep-sorted start
7983
branch_aggregation_test.cc
8084
branch_frequencies_test.cc
8185
cfg_test.cc
8286
file_perf_data_provider_test.cc
8387
frequencies_branch_aggregator_test.cc
8488
lazy_evaluator_test.cc
8589
lbr_branch_aggregator_test.cc
86-
perfdata_reader_test.cc
8790
perf_branch_frequencies_aggregator_test.cc
91+
perfdata_reader_test.cc
8892
propeller_statistics_test.cc
8993
proto_branch_frequencies_aggregator_test.cc
9094
spe_tid_pid_provider_test.cc
9195
status_macros_test.cc
9296
status_testing_macros_test.cc
97+
# go/keep-sorted end
9398
DEPS
9499
propeller_lib
95100
propeller_test_lib

propeller/profile_writer.cc

+268
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
#include "propeller/profile_writer.h"
2+
3+
#include <fstream>
4+
#include <memory>
5+
#include <optional>
6+
#include <ostream>
7+
#include <string>
8+
#include <utility>
9+
#include <vector>
10+
11+
#include "absl/algorithm/container.h"
12+
#include "absl/container/flat_hash_map.h"
13+
#include "absl/log/check.h"
14+
#include "absl/strings/str_cat.h"
15+
#include "absl/strings/str_format.h"
16+
#include "absl/strings/str_join.h"
17+
#include "absl/strings/string_view.h"
18+
#include "llvm/ADT/SmallString.h"
19+
#include "llvm/ADT/SmallVector.h"
20+
#include "llvm/ADT/StringExtras.h"
21+
#include "llvm/ADT/StringRef.h"
22+
#include "llvm/Support/FileSystem.h"
23+
#include "llvm/Support/Path.h"
24+
#include "propeller/cfg.h"
25+
#include "propeller/cfg_edge.h"
26+
#include "propeller/cfg_id.h"
27+
#include "propeller/cfg_node.h"
28+
#include "propeller/function_chain_info.h"
29+
#include "propeller/profile.h"
30+
#include "propeller/propeller_options.pb.h"
31+
32+
namespace propeller {
33+
namespace {
34+
void DumpCfgs(const PropellerProfile &profile,
35+
absl::string_view cfg_dump_dir_name) {
36+
// Create the cfg dump directory and the cfg index file.
37+
llvm::sys::fs::create_directory(cfg_dump_dir_name);
38+
llvm::SmallString<100> cfg_index_file_vec(cfg_dump_dir_name.begin(),
39+
cfg_dump_dir_name.end());
40+
llvm::sys::path::append(cfg_index_file_vec, "cfg-index.txt");
41+
std::string cfg_index_file(cfg_index_file_vec.str());
42+
std::ofstream cfg_index_os(cfg_index_file, std::ofstream::out);
43+
CHECK(cfg_index_os.good())
44+
<< "Failed to open " << cfg_index_file << " for writing.";
45+
cfg_index_os << absl::StrJoin({"Function.Name", "Function.Address", "N_Nodes",
46+
"N_Clusters", "Original.ExtTSP.Score",
47+
"Optimized.ExtTSP.Score"},
48+
" ")
49+
<< "\n";
50+
51+
for (const auto &[section_name, section_function_chain_info] :
52+
profile.functions_chain_info_by_section_name) {
53+
for (const FunctionChainInfo &func_chain_info :
54+
section_function_chain_info) {
55+
const ControlFlowGraph *cfg =
56+
profile.program_cfg->GetCfgByIndex(func_chain_info.function_index);
57+
CHECK_NE(cfg, nullptr);
58+
// Dump hot cfgs into the given directory.
59+
auto func_addr_str =
60+
absl::StrCat("0x", absl::Hex(cfg->GetEntryNode()->addr()));
61+
cfg_index_os << cfg->GetPrimaryName().str() << " " << func_addr_str << " "
62+
<< cfg->nodes().size() << " "
63+
<< func_chain_info.bb_chains.size() << " "
64+
<< func_chain_info.original_score.intra_score << " "
65+
<< func_chain_info.optimized_score.intra_score << "\n";
66+
67+
// Use the address of the function as the CFG filename for uniqueness.
68+
llvm::SmallString<100> cfg_dump_file_vec(cfg_dump_dir_name.begin(),
69+
cfg_dump_dir_name.end());
70+
llvm::sys::path::append(cfg_dump_file_vec,
71+
absl::StrCat(func_addr_str, ".dot"));
72+
std::string cfg_dump_file(cfg_dump_file_vec.str());
73+
std::ofstream cfg_dump_os(cfg_dump_file, std::ofstream::out);
74+
CHECK(cfg_dump_os.good())
75+
<< "Failed to open " << cfg_dump_file << " for writing.";
76+
77+
absl::flat_hash_map<IntraCfgId, int> layout_index_map;
78+
for (auto &bb_chain : func_chain_info.bb_chains) {
79+
int bbs = 0;
80+
for (auto &bb_bundle : bb_chain.bb_bundles) {
81+
for (int bbi = 0; bbi < bb_bundle.full_bb_ids.size(); ++bbi) {
82+
layout_index_map.insert({bb_bundle.full_bb_ids[bbi].intra_cfg_id,
83+
bb_chain.layout_index + bbs + bbi});
84+
}
85+
bbs += bb_bundle.full_bb_ids.size();
86+
}
87+
}
88+
89+
cfg->WriteDotFormat(cfg_dump_os, layout_index_map);
90+
}
91+
}
92+
}
93+
94+
// Dumps the intra-function edge profile of `cfg` into `out`.
95+
// For each CFGNode, it prints out a line in the format of
96+
// "#<bb>:<bb_freq> <succ_bb_1>:<edge_freq_1> <succ_bb_2>:<edge_freq_2> ..."
97+
// which starts first with the bb id and frequency of that node, followed by the
98+
// successors and their edge frequencies. Please note that the edge weights
99+
// may not precisely add up to the node frequency.
100+
void DumpCfgProfile(const ControlFlowGraph &cfg, std::ofstream &out) {
101+
cfg.ForEachNodeRef([&](const CFGNode &node) {
102+
int node_frequency = node.CalculateFrequency();
103+
out << "#cfg-prof " << node.bb_id() << ":" << node_frequency;
104+
node.ForEachOutEdgeRef([&](const CFGEdge &edge) {
105+
if (!edge.IsBranchOrFallthrough()) return;
106+
out << " " << edge.sink()->bb_id() << ":" << edge.weight();
107+
});
108+
out << "\n";
109+
});
110+
}
111+
} // namespace
112+
113+
void PropellerProfileWriter::Write(const PropellerProfile &profile) const {
114+
std::ofstream cc_profile_os(options_.cluster_out_name());
115+
std::ofstream ld_profile_os(options_.symbol_order_out_name());
116+
if (profile_encoding_.version != ClusterEncodingVersion::VERSION_0) {
117+
cc_profile_os << profile_encoding_.version_specifier << "\n";
118+
}
119+
// TODO(b/160339651): Remove this in favour of structured format in LLVM code.
120+
for (const auto &[section_name, section_function_chain_info] :
121+
profile.functions_chain_info_by_section_name) {
122+
if (options_.verbose_cluster_output())
123+
cc_profile_os << "#section " << section_name.str() << "\n";
124+
// Find total number of chains.
125+
unsigned total_chains = 0;
126+
for (const auto &func_chain_info : section_function_chain_info)
127+
total_chains += func_chain_info.bb_chains.size();
128+
129+
// Allocate the symbol order vector
130+
std::vector<std::pair<llvm::SmallVector<llvm::StringRef, 3>,
131+
std::optional<unsigned>>>
132+
symbol_order(total_chains);
133+
// Allocate the cold symbol order vector equally sized as
134+
// function_chain_info, as there is (at most) one cold cluster per
135+
// function.
136+
std::vector<const FunctionChainInfo *> cold_symbol_order(
137+
section_function_chain_info.size());
138+
for (const FunctionChainInfo &func_layout_info :
139+
section_function_chain_info) {
140+
const ControlFlowGraph *cfg =
141+
profile.program_cfg->GetCfgByIndex(func_layout_info.function_index);
142+
CHECK_NE(cfg, nullptr);
143+
if (cfg->module_name().has_value() &&
144+
profile_encoding_.version == ClusterEncodingVersion::VERSION_1) {
145+
// For version 1, print the module name before the function name
146+
// specifier on a separate line.
147+
cc_profile_os << profile_encoding_.module_name_specifier
148+
<< cfg->module_name().value().str() << "\n";
149+
}
150+
// Print all alias names of the function.
151+
cc_profile_os << profile_encoding_.function_name_specifier
152+
<< llvm::join(cfg->names(),
153+
profile_encoding_.function_name_separator);
154+
if (cfg->module_name().has_value() &&
155+
profile_encoding_.version == ClusterEncodingVersion::VERSION_0) {
156+
// For version 0, print the module name after the function names and on
157+
// the same line.
158+
cc_profile_os << profile_encoding_.module_name_specifier
159+
<< cfg->module_name().value().str();
160+
}
161+
cc_profile_os << "\n";
162+
// Print cloning paths.
163+
if (!cfg->clone_paths().empty()) {
164+
CHECK_EQ(profile_encoding_.version, ClusterEncodingVersion::VERSION_1)
165+
<< "cloning is not supported for version: "
166+
<< profile_encoding_.version;
167+
}
168+
for (const std::vector<int> &clone_path : cfg->clone_paths()) {
169+
cc_profile_os << profile_encoding_.clone_path_specifier
170+
<< absl::StrJoin(
171+
clone_path, " ",
172+
[&](std::string *out, const int bb_index) {
173+
absl::StrAppend(out,
174+
cfg->nodes()[bb_index]->bb_id());
175+
})
176+
<< "\n";
177+
}
178+
if (options_.verbose_cluster_output()) {
179+
// Print the layout score for intra-function and inter-function edges
180+
// involving this function. This information allows us to study the
181+
// impact on layout score on each individual function.
182+
cc_profile_os << absl::StreamFormat(
183+
"#ext-tsp score: [intra: %f -> %f] [inter: %f -> %f]\n",
184+
func_layout_info.original_score.intra_score,
185+
func_layout_info.optimized_score.intra_score,
186+
func_layout_info.original_score.inter_out_score,
187+
func_layout_info.optimized_score.inter_out_score);
188+
// Print out the frequency of the function entry node.
189+
cc_profile_os << absl::StreamFormat(
190+
"#entry-freq %llu\n", cfg->GetEntryNode()->CalculateFrequency());
191+
}
192+
const std::vector<FunctionChainInfo::BbChain> &chains =
193+
func_layout_info.bb_chains;
194+
for (unsigned chain_id = 0; chain_id < chains.size(); ++chain_id) {
195+
auto &chain = chains[chain_id];
196+
std::vector<FullIntraCfgId> bb_ids_in_chain =
197+
chains[chain_id].GetAllBbs();
198+
// If a chain starts with zero BB index (function entry basic block),
199+
// the function name is sufficient for section ordering. Otherwise,
200+
// the chain number is required.
201+
symbol_order[chain.layout_index] =
202+
std::pair<llvm::SmallVector<llvm::StringRef, 3>,
203+
std::optional<unsigned>>(
204+
cfg->names(), bb_ids_in_chain.front().intra_cfg_id.bb_index == 0
205+
? std::optional<unsigned>()
206+
: chain_id);
207+
for (int bbi = 0; bbi < bb_ids_in_chain.size(); ++bbi) {
208+
const auto &full_bb_id = bb_ids_in_chain[bbi];
209+
cc_profile_os << (bbi != 0 ? " "
210+
: profile_encoding_.cluster_specifier)
211+
<< full_bb_id.bb_id;
212+
if (full_bb_id.intra_cfg_id.clone_number != 0)
213+
cc_profile_os << "." << full_bb_id.intra_cfg_id.clone_number;
214+
}
215+
cc_profile_os << "\n";
216+
}
217+
218+
// Dump the edge profile for this CFG if requested.
219+
if (options_.verbose_cluster_output())
220+
DumpCfgProfile(*cfg, cc_profile_os);
221+
222+
cold_symbol_order[func_layout_info.cold_chain_layout_index] =
223+
&func_layout_info;
224+
}
225+
226+
for (const auto &[func_names, chain_id] : symbol_order) {
227+
// Print the symbol names corresponding to every function name alias. This
228+
// guarantees we get the right order regardless of which function name is
229+
// picked by the compiler.
230+
for (auto &func_name : func_names) {
231+
ld_profile_os << func_name.str();
232+
if (chain_id.has_value())
233+
ld_profile_os << ".__part." << chain_id.value();
234+
ld_profile_os << "\n";
235+
}
236+
}
237+
238+
// Insert the .cold symbols for cold parts of hot functions.
239+
for (const FunctionChainInfo *chain_info : cold_symbol_order) {
240+
const ControlFlowGraph *cfg =
241+
profile.program_cfg->GetCfgByIndex(chain_info->function_index);
242+
CHECK_NE(cfg, nullptr);
243+
// The cold node should not be emitted if all basic blocks appear in the
244+
// chains.
245+
int num_bbs_in_chains = 0;
246+
for (const FunctionChainInfo::BbChain &chain : chain_info->bb_chains)
247+
num_bbs_in_chains += chain.GetNumBbs();
248+
if (num_bbs_in_chains == cfg->nodes().size()) continue;
249+
// Check if the function entry is in the chains. The entry node always
250+
// begins its chain. So this simply checks the first node in every
251+
// chain.
252+
bool entry_is_in_chains = absl::c_any_of(
253+
chain_info->bb_chains, [](const FunctionChainInfo::BbChain &chain) {
254+
return chain.GetFirstBb().intra_cfg_id.bb_index == 0;
255+
});
256+
for (auto &func_name : cfg->names()) {
257+
ld_profile_os << func_name.str();
258+
// If the entry node is not in chains, function name can serve as the
259+
// cold symbol name. So we don't need the ".cold" suffix.
260+
if (entry_is_in_chains) ld_profile_os << ".cold";
261+
ld_profile_os << "\n";
262+
}
263+
}
264+
}
265+
if (options_.has_cfg_dump_dir_name())
266+
DumpCfgs(profile, options_.cfg_dump_dir_name());
267+
}
268+
} // namespace propeller

0 commit comments

Comments
 (0)