Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RNTuple benchmark for ATLAS file #170

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion root/tree/tree/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,16 @@ if(ROOT_root7_FOUND)
ROOTDataFrame ROOTVecOps ROOTNTuple
DEPENDS libh1event
)

RB_ADD_TOOL(gen_atlas ${CMAKE_CURRENT_SOURCE_DIR}/gen_atlas.cxx
LIBRARIES Core Hist Imt RIO Tree TreePlayer
ROOTDataFrame ROOTVecOps ROOTNTuple
DEPENDS libh1event
)
endif()

if(ROOT_root7_FOUND AND rootbench-datafiles)
RB_ADD_GBENCHMARK(RNTupleAnalysisBenchmarks
RB_ADD_GBENCHMARK(RNTupleH1Benchmarks
RNTupleH1Benchmarks.cxx
LABEL short
LIBRARIES Core Hist MathCore RIO Tree ROOTNTuple
Expand All @@ -28,4 +34,15 @@ if(ROOT_root7_FOUND AND rootbench-datafiles)
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_h1 -o ${RB_DATASETDIR} -c lz4 ${RB_DATASETDIR}/h1dst-lz4.root"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_h1 -o ${RB_DATASETDIR} -c zstd ${RB_DATASETDIR}/h1dst-zstd.root"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_h1 -o ${RB_DATASETDIR} -c none ${RB_DATASETDIR}/h1dst-none.root")

RB_ADD_GBENCHMARK(RNTupleATLASBenchmarks
RNTupleATLASBenchmarks.cxx
LABEL short
LIBRARIES Core Hist MathCore RIO Tree ROOTNTuple
DOWNLOAD_DATAFILES gg_data-zstd.root
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c zlib"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c lzma"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c lz4"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c zstd"
SETUP "${CMAKE_CURRENT_BINARY_DIR}/gen_atlas -i ${RB_DATASETDIR}/gg_data-zstd.root -o ${RB_DATASETDIR} -c none")
endif()
149 changes: 149 additions & 0 deletions root/tree/tree/RNTupleATLASBenchmarks.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#include <benchmark/benchmark.h>

#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include <algorithm>
#include <cassert>
#include <cstdio>
#include <iostream>
#include <future>
#include <memory>
#include <string>
#include <vector>

#include <ROOT/RDataFrame.hxx>
#include <ROOT/RNTuple.hxx>
#include <ROOT/RNTupleDS.hxx>
#include <ROOT/RNTupleOptions.hxx>
#include <Compression.h>
#include <TApplication.h>
#include <TBranch.h>
#include <TCanvas.h>
#include <TClassTable.h>
#include <TF1.h>
#include <TFile.h>
#include <TH1D.h>
#include <TLatex.h>
#include <TLegend.h>
#include <TLorentzVector.h>
#include <TMath.h>
#include <TStyle.h>
#include <TSystem.h>
#include <TTree.h>
#include <TTreeReader.h>
#include <TTreePerfStats.h>

#include <Math/Vector4D.h>

#include <rootbench/RBConfig.h>


static float ComputeInvariantMass(float pt0, float pt1, float eta0, float eta1, float phi0, float phi1, float e0, float e1)
{
ROOT::Math::PtEtaPhiEVector p1(pt0, eta0, phi0, e0);
ROOT::Math::PtEtaPhiEVector p2(pt1, eta1, phi1, e1);
return (p1 + p2).mass() / 1000.0;
}


static void ProcessNTuple(ROOT::Experimental::RNTupleReader *ntuple, TH1D *hMass, bool isMC)
{

auto viewTrigP = ntuple->GetView<bool>("trigP");
auto viewPhotonN = ntuple->GetView<std::uint32_t>("photon_n");
auto viewPhotonIsTightId = ntuple->GetView<std::vector<bool>>("photon_isTightID");
auto viewPhotonPt = ntuple->GetView<std::vector<float>>("photon_pt");
auto viewPhotonEta = ntuple->GetView<std::vector<float>>("photon_eta");
auto viewPhotonPhi = ntuple->GetView<std::vector<float>>("photon_phi");
auto viewPhotonE = ntuple->GetView<std::vector<float>>("photon_E");
auto viewPhotonPtCone30 = ntuple->GetView<std::vector<float>>("photon_ptcone30");
auto viewPhotonEtCone20 = ntuple->GetView<std::vector<float>>("photon_etcone20");

auto viewScaleFactorPhoton = ntuple->GetView<float>("scaleFactor_PHOTON");
auto viewScaleFactorPhotonTrigger = ntuple->GetView<float>("scaleFactor_PhotonTRIGGER");
auto viewScaleFactorPileUp = ntuple->GetView<float>("scaleFactor_PILEUP");
auto viewMcWeight = ntuple->GetView<float>("mcWeight");

unsigned nevents = 0;
for (auto e : ntuple->GetEntryRange()) {
nevents++;

if (!viewTrigP(e)) continue;

std::vector<size_t> idxGood;
auto isTightId = viewPhotonIsTightId(e);
auto pt = viewPhotonPt(e);
auto eta = viewPhotonEta(e);

for (size_t i = 0; i < viewPhotonN(e); ++i) {
if (!isTightId[i]) continue;
if (pt[i] <= 25000.) continue;
if (abs(eta[i]) >= 2.37) continue;
if (abs(eta[i]) >= 1.37 && abs(eta[i]) <= 1.52) continue;
idxGood.push_back(i);
}
if (idxGood.size() != 2) continue;

auto ptCone30 = viewPhotonPtCone30(e);
auto etCone20 = viewPhotonEtCone20(e);

bool isIsolatedPhotons = true;
for (int i = 0; i < 2; ++i) {
if ((ptCone30[idxGood[i]] / pt[idxGood[i]] >= 0.065) ||
(etCone20[idxGood[i]] / pt[idxGood[i]] >= 0.065))
{
isIsolatedPhotons = false;
break;
}
}
if (!isIsolatedPhotons) continue;

auto phi = viewPhotonPhi(e);
auto E = viewPhotonE(e);

float myy = ComputeInvariantMass(
pt[idxGood[0]], pt[idxGood[1]],
eta[idxGood[0]], eta[idxGood[1]],
phi[idxGood[0]], phi[idxGood[1]],
E[idxGood[0]], E[idxGood[1]]);

if (pt[idxGood[0]] / 1000. / myy <= 0.35) continue;
if (pt[idxGood[1]] / 1000. / myy <= 0.25) continue;
if (myy <= 105) continue;
if (myy >= 160) continue;

if (isMC) {
auto weight = viewScaleFactorPhoton(e) * viewScaleFactorPhotonTrigger(e) *
viewScaleFactorPileUp(e) * viewMcWeight(e);
hMass->Fill(myy, weight);
} else {
hMass->Fill(myy);
}
}
}


static void BM_RNTuple_ATLAS(benchmark::State &state, const std::string &comprAlgorithm)
{
using RNTupleReader = ROOT::Experimental::RNTupleReader;
std::string path = RB::GetDataDir() + "/atlas-" + comprAlgorithm + ".ntuple";

for (auto _ : state) {
auto hData = new TH1D("", "Diphoton invariant mass; m_{#gamma#gamma} [GeV];Events", 30, 105, 160);

auto ntuple = RNTupleReader::Open("mini", path);
ProcessNTuple(ntuple.get(), hData, false /* isMC */);

delete hData;
}
}
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_LZ4, "lz4")->Unit(benchmark::kMicrosecond)->Iterations(5);
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_ZLIB, "zlib")->Unit(benchmark::kMicrosecond)->Iterations(5);
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_LZMA, "lzma")->Unit(benchmark::kMicrosecond)->Iterations(5);
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_ZSTD, "zstd")->Unit(benchmark::kMicrosecond)->Iterations(5);
BENCHMARK_CAPTURE(BM_RNTuple_ATLAS, BM_RNTuple_ATLAS_None, "none")->Unit(benchmark::kMicrosecond)->Iterations(5);

BENCHMARK_MAIN();
165 changes: 165 additions & 0 deletions root/tree/tree/gen_atlas.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#include <ROOT/RField.hxx>
#include <ROOT/RNTuple.hxx>
#include <ROOT/RNTupleModel.hxx>
#include <ROOT/RNTupleOptions.hxx>

#include <TBranch.h>
#include <TBranchElement.h>
#include <TBranchSTL.h>
#include <TCanvas.h>
#include <TFile.h>
#include <TH1F.h>
#include <TLeaf.h>
#include <TTree.h>

#include <cassert>
#include <iostream>
#include <memory>
#include <string>
#include <vector>

#include <unistd.h>

using RNTupleModel = ROOT::Experimental::RNTupleModel;
using RFieldBase = ROOT::Experimental::Detail::RFieldBase;
using RNTupleWriter = ROOT::Experimental::RNTupleWriter;
using RNTupleWriteOptions = ROOT::Experimental::RNTupleWriteOptions;

int GetCompressionSettings(std::string shorthand)
{
if (shorthand == "zlib")
return 101;
if (shorthand == "lz4")
return 404;
if (shorthand == "lzma")
return 207;
if (shorthand == "zstd")
return 505;
if (shorthand == "none")
return 0;
abort();
}


static void SplitPath(const std::string &path, std::string *basename, std::string *suffix)
{
size_t idx_dot = path.find_last_of(".");
if (idx_dot == std::string::npos) {
*basename = path;
suffix->clear();
} else {
*basename = path.substr(0, idx_dot);
*suffix = path.substr(idx_dot + 1);
}
}


std::string StripSuffix(const std::string &path) {
std::string basename;
std::string suffix;
SplitPath(path, &basename, &suffix);
return basename;
}


std::string GetFileName(const std::string &path) {
const std::string::size_type idx = path.find_last_of('/');
if (idx != std::string::npos)
return path.substr(idx+1);
else
return path;
}


void Usage(char *progname) {
std::cout << "Usage: " << progname << " -i <gg_*.root> -o <ntuple-path> -c <compression>" << std::endl;
}


int main(int argc, char **argv) {
std::string inputFile = "gg_data.root";
std::string outputPath = ".";
int compressionSettings = 0;
std::string compressionShorthand = "none";

int c;
while ((c = getopt(argc, argv, "hvi:o:c:")) != -1) {
switch (c) {
case 'h':
case 'v':
Usage(argv[0]);
return 0;
case 'i':
inputFile = optarg;
break;
case 'o':
outputPath = optarg;
break;
case 'c':
compressionSettings = GetCompressionSettings(optarg);
compressionShorthand = optarg;
break;
default:
fprintf(stderr, "Unknown option: -%c\n", c);
Usage(argv[0]);
return 1;
}
}
std::string outputFile = outputPath + "/" + "atlas" + "-" + compressionShorthand + ".ntuple";

std::unique_ptr<TFile> f(TFile::Open(inputFile.c_str()));
assert(f && ! f->IsZombie());

auto model = RNTupleModel::Create();

auto tree = f->Get<TTree>("mini");
for (auto b : TRangeDynCast<TBranch>(*tree->GetListOfBranches())) {
assert(b);

TLeaf *l = static_cast<TLeaf*>(b->GetListOfLeaves()->First());

auto field = RFieldBase::Create(l->GetName(), l->GetTypeName());

if (typeid(*b) == typeid(TBranchSTL) || typeid(*b) == typeid(TBranchElement)) {
if (field->GetType() == "std::vector<bool>") {
std::vector<bool> **v = new std::vector<bool> *();
tree->SetBranchAddress(b->GetName(), v);
model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
model->GetRootField()->Attach(std::unique_ptr<RFieldBase>(field));
} else if (field->GetType() == "std::vector<float>") {
std::vector<float> **v = new std::vector<float> *();
tree->SetBranchAddress(b->GetName(), v);
model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
model->GetRootField()->Attach(std::unique_ptr<RFieldBase>(field));
} else if (field->GetType() == "std::vector<std::int32_t>") {
std::vector<std::int32_t> **v = new std::vector<std::int32_t> *();
tree->SetBranchAddress(b->GetName(), v);
model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
model->GetRootField()->Attach(std::unique_ptr<RFieldBase>(field));
} else if (field->GetType() == "std::vector<std::uint32_t>") {
std::vector<std::uint32_t> **v = new std::vector<std::uint32_t> *();
tree->SetBranchAddress(b->GetName(), v);
model->GetDefaultEntry()->CaptureValue(field->CaptureValue(*v));
model->GetRootField()->Attach(std::unique_ptr<RFieldBase>(field));
} else {
assert(false);
}
} else {
model->AddField(std::unique_ptr<RFieldBase>(field));
void *fieldDataPtr = model->GetDefaultEntry()->GetValue(l->GetName()).GetRawPtr();
tree->SetBranchAddress(b->GetName(), fieldDataPtr);
}
}

RNTupleWriteOptions options;
options.SetCompression(compressionSettings);
auto ntuple = RNTupleWriter::Recreate(std::move(model), "mini", outputFile, options);

auto nEntries = tree->GetEntries();
for (decltype(nEntries) i = 0; i < nEntries; ++i) {
tree->GetEntry(i);
ntuple->Fill();
}

tree->ResetBranchAddresses();
}