Skip to content

Commit

Permalink
Add RNTuple benchmark for ATLAS file
Browse files Browse the repository at this point in the history
  • Loading branch information
kamahori committed Jul 30, 2020
1 parent 9d751ab commit b5ee814
Show file tree
Hide file tree
Showing 3 changed files with 331 additions and 0 deletions.
17 changes: 17 additions & 0 deletions root/tree/tree/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@ if(ROOT_root7_FOUND)
ROOTDataFrame ROOTVecOps ROOTNTuple
DEPENDS libh1event
)

RB_ADD_TOOL(gen_atlas ${CMAKE_CURRENT_SOURCE_DIR}/gen_atlas.cxx
LIBRARIES Core Hist Imt RIO Tree TreePlayer
ROOTDataFrame ROOTVecOps ROOTNTuple
DEPENDS libh1event
)
endif()

if(ROOT_root7_FOUND AND rootbench-datafiles)
Expand All @@ -28,4 +34,15 @@ if(ROOT_root7_FOUND AND rootbench-datafiles)
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_h1 -o ${RB_DATASETDIR} -c lz4 ${RB_DATASETDIR}/h1dst-lz4.root"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_h1 -o ${RB_DATASETDIR} -c zstd ${RB_DATASETDIR}/h1dst-zstd.root"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_h1 -o ${RB_DATASETDIR} -c none ${RB_DATASETDIR}/h1dst-none.root")

RB_ADD_GBENCHMARK(RNTupleATLASBenchmarks
RNTupleATLASBenchmarks.cxx
LABEL short
LIBRARIES Core Hist MathCore RIO Tree ROOTNTuple
DOWNLOAD_DATAFILES gg_data-zstd.root
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c zlib"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c lzma"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c lz4"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c zstd"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c none")
endif()
149 changes: 149 additions & 0 deletions root/tree/tree/RNTupleATLASBenchmarks.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#include <benchmark/benchmark.h>

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include <algorithm>
#include <cassert>
#include <cstdio>
#include <iostream>
#include <future>
#include <memory>
#include <string>
#include <vector>

#include <ROOT/RDataFrame.hxx>
#include <ROOT/RNTuple.hxx>
#include <ROOT/RNTupleDS.hxx>
#include <ROOT/RNTupleOptions.hxx>
#include <Compression.h>
#include <TApplication.h>
#include <TBranch.h>
#include <TCanvas.h>
#include <TClassTable.h>
#include <TF1.h>
#include <TFile.h>
#include <TH1D.h>
#include <TLatex.h>
#include <TLegend.h>
#include <TLorentzVector.h>
#include <TMath.h>
#include <TStyle.h>
#include <TSystem.h>
#include <TTree.h>
#include <TTreeReader.h>
#include <TTreePerfStats.h>

#include <Math/Vector4D.h>

#include <rootbench/RBConfig.h>


static float ComputeInvariantMass(float pt0, float pt1, float eta0, float eta1, float phi0, float phi1, float e0, float e1)
{
ROOT::Math::PtEtaPhiEVector p1(pt0, eta0, phi0, e0);
ROOT::Math::PtEtaPhiEVector p2(pt1, eta1, phi1, e1);
return (p1 + p2).mass() / 1000.0;
}


static void ProcessNTuple(ROOT::Experimental::RNTupleReader *ntuple, TH1D *hMass, bool isMC)
{

auto viewTrigP = ntuple->GetView<bool>("trigP");
auto viewPhotonN = ntuple->GetView<std::uint32_t>("photon_n");
auto viewPhotonIsTightId = ntuple->GetView<std::vector<bool>>("photon_isTightID");
auto viewPhotonPt = ntuple->GetView<std::vector<float>>("photon_pt");
auto viewPhotonEta = ntuple->GetView<std::vector<float>>("photon_eta");
auto viewPhotonPhi = ntuple->GetView<std::vector<float>>("photon_phi");
auto viewPhotonE = ntuple->GetView<std::vector<float>>("photon_E");
auto viewPhotonPtCone30 = ntuple->GetView<std::vector<float>>("photon_ptcone30");
auto viewPhotonEtCone20 = ntuple->GetView<std::vector<float>>("photon_etcone20");

auto viewScaleFactorPhoton = ntuple->GetView<float>("scaleFactor_PHOTON");
auto viewScaleFactorPhotonTrigger = ntuple->GetView<float>("scaleFactor_PhotonTRIGGER");
auto viewScaleFactorPileUp = ntuple->GetView<float>("scaleFactor_PILEUP");
auto viewMcWeight = ntuple->GetView<float>("mcWeight");

unsigned nevents = 0;
for (auto e : ntuple->GetEntryRange()) {
nevents++;

if (!viewTrigP(e)) continue;

std::vector<size_t> idxGood;
auto isTightId = viewPhotonIsTightId(e);
auto pt = viewPhotonPt(e);
auto eta = viewPhotonEta(e);

for (size_t i = 0; i < viewPhotonN(e); ++i) {
if (!isTightId[i]) continue;
if (pt[i] <= 25000.) continue;
if (abs(eta[i]) >= 2.37) continue;
if (abs(eta[i]) >= 1.37 && abs(eta[i]) <= 1.52) continue;
idxGood.push_back(i);
}
if (idxGood.size() != 2) continue;

auto ptCone30 = viewPhotonPtCone30(e);
auto etCone20 = viewPhotonEtCone20(e);

bool isIsolatedPhotons = true;
for (int i = 0; i < 2; ++i) {
if ((ptCone30[idxGood[i]] / pt[idxGood[i]] >= 0.065) ||
(etCone20[idxGood[i]] / pt[idxGood[i]] >= 0.065))
{
isIsolatedPhotons = false;
break;
}
}
if (!isIsolatedPhotons) continue;

auto phi = viewPhotonPhi(e);
auto E = viewPhotonE(e);

float myy = ComputeInvariantMass(
pt[idxGood[0]], pt[idxGood[1]],
eta[idxGood[0]], eta[idxGood[1]],
phi[idxGood[0]], phi[idxGood[1]],
E[idxGood[0]], E[idxGood[1]]);

if (pt[idxGood[0]] / 1000. / myy <= 0.35) continue;
if (pt[idxGood[1]] / 1000. / myy <= 0.25) continue;
if (myy <= 105) continue;
if (myy >= 160) continue;

if (isMC) {
auto weight = viewScaleFactorPhoton(e) * viewScaleFactorPhotonTrigger(e) *
viewScaleFactorPileUp(e) * viewMcWeight(e);
hMass->Fill(myy, weight);
} else {
hMass->Fill(myy);
}
}
}


static void BM_RNTuple_ATLAS(benchmark::State &state, const std::string &comprAlgorithm)
{
using RNTupleReader = ROOT::Experimental::RNTupleReader;
std::string path = RB::GetDataDir() + "/atlas-" + comprAlgorithm + ".ntuple";

for (auto _ : state) {
auto hData = new TH1D("", "Diphoton invariant mass; m_{#gamma#gamma} [GeV];Events", 30, 105, 160);

auto ntuple = RNTupleReader::Open("mini", path);
ProcessNTuple(ntuple.get(), hData, false /* isMC */);

delete hData;
}
}
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_LZ4, "lz4")->Unit(benchmark::kMicrosecond)->Iterations(5);
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_ZLIB, "zlib")->Unit(benchmark::kMicrosecond)->Iterations(5);
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_LZMA, "lzma")->Unit(benchmark::kMicrosecond)->Iterations(5);
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_ZSTD, "zstd")->Unit(benchmark::kMicrosecond)->Iterations(5);
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_None, "none")->Unit(benchmark::kMicrosecond)->Iterations(5);

BENCHMARK_MAIN();
165 changes: 165 additions & 0 deletions root/tree/tree/gen_atlas.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#include <ROOT/RField.hxx>
#include <ROOT/RNTuple.hxx>
#include <ROOT/RNTupleModel.hxx>
#include <ROOT/RNTupleOptions.hxx>

#include <TBranch.h>
#include <TBranchElement.h>
#include <TBranchSTL.h>
#include <TCanvas.h>
#include <TFile.h>
#include <TH1F.h>
#include <TLeaf.h>
#include <TTree.h>

#include <cassert>
#include <iostream>
#include <memory>
#include <string>
#include <vector>

#include <unistd.h>

using RNTupleModel = ROOT::Experimental::RNTupleModel;
using RFieldBase = ROOT::Experimental::Detail::RFieldBase;
using RNTupleWriter = ROOT::Experimental::RNTupleWriter;
using RNTupleWriteOptions = ROOT::Experimental::RNTupleWriteOptions;

int GetCompressionSettings(std::string shorthand)
{
if (shorthand == "zlib")
return 101;
if (shorthand == "lz4")
return 404;
if (shorthand == "lzma")
return 207;
if (shorthand == "zstd")
return 505;
if (shorthand == "none")
return 0;
abort();
}


static void SplitPath(const std::string &path, std::string *basename, std::string *suffix)
{
size_t idx_dot = path.find_last_of(".");
if (idx_dot == std::string::npos) {
*basename = path;
suffix->clear();
} else {
*basename = path.substr(0, idx_dot);
*suffix = path.substr(idx_dot + 1);
}
}


std::string StripSuffix(const std::string &path) {
std::string basename;
std::string suffix;
SplitPath(path, &basename, &suffix);
return basename;
}


std::string GetFileName(const std::string &path) {
const std::string::size_type idx = path.find_last_of('/');
if (idx != std::string::npos)
return path.substr(idx+1);
else
return path;
}


void Usage(char *progname) {
std::cout << "Usage: " << progname << " -i <gg_*.root> -o <ntuple-path> -c <compression>" << std::endl;
}


int main(int argc, char **argv) {
std::string inputFile = "gg_data.root";
std::string outputPath = ".";
int compressionSettings = 0;
std::string compressionShorthand = "none";

int c;
while ((c = getopt(argc, argv, "hvi:o:c:")) != -1) {
switch (c) {
case 'h':
case 'v':
Usage(argv[0]);
return 0;
case 'i':
inputFile = optarg;
break;
case 'o':
outputPath = optarg;
break;
case 'c':
compressionSettings = GetCompressionSettings(optarg);
compressionShorthand = optarg;
break;
default:
fprintf(stderr, "Unknown option: -%c\n", c);
Usage(argv[0]);
return 1;
}
}
std::string outputFile = outputPath + "/" + "atlas" + "-" + compressionShorthand + ".ntuple";

std::unique_ptr<TFile> f(TFile::Open(inputFile.c_str()));
assert(f && ! f->IsZombie());

auto model = RNTupleModel::Create();

auto tree = f->Get<TTree>("mini");
for (auto b : TRangeDynCast<TBranch>(*tree->GetListOfBranches())) {
assert(b);

TLeaf *l = static_cast<TLeaf*>(b->GetListOfLeaves()->First());

auto field = RFieldBase::Create(l->GetName(), l->GetTypeName());

if (typeid(*b) == typeid(TBranchSTL) || typeid(*b) == typeid(TBranchElement)) {
if (field->GetType() == "std::vector<bool>") {
std::vector<bool> **v = new std::vector<bool> *();
tree->SetBranchAddress(b->GetName(), v);
model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
model->GetFieldZero()->Attach(std::unique_ptr<RFieldBase>(field));
} else if (field->GetType() == "std::vector<float>") {
std::vector<float> **v = new std::vector<float> *();
tree->SetBranchAddress(b->GetName(), v);
model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
model->GetFieldZero()->Attach(std::unique_ptr<RFieldBase>(field));
} else if (field->GetType() == "std::vector<std::int32_t>") {
std::vector<std::int32_t> **v = new std::vector<std::int32_t> *();
tree->SetBranchAddress(b->GetName(), v);
model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
model->GetFieldZero()->Attach(std::unique_ptr<RFieldBase>(field));
} else if (field->GetType() == "std::vector<std::uint32_t>") {
std::vector<std::uint32_t> **v = new std::vector<std::uint32_t> *();
tree->SetBranchAddress(b->GetName(), v);
model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
model->GetFieldZero()->Attach(std::unique_ptr<RFieldBase>(field));
} else {
assert(false);
}
} else {
model->AddField(std::unique_ptr<RFieldBase>(field));
void *fieldDataPtr = model->GetDefaultEntry()->GetValue(l->GetName()).GetRawPtr();
tree->SetBranchAddress(b->GetName(), fieldDataPtr);
}
}

RNTupleWriteOptions options;
options.SetCompression(compressionSettings);
auto ntuple = RNTupleWriter::Recreate(std::move(model), "mini", outputFile, options);

auto nEntries = tree->GetEntries();
for (decltype(nEntries) i = 0; i < nEntries; ++i) {
tree->GetEntry(i);
ntuple->Fill();
}

tree->ResetBranchAddresses();
}

0 comments on commit b5ee814

Please sign in to comment.