Skip to content

Commit 3589078

Browse files
Add final touches (#50)
* Add final touches * Fix typos * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update include/jet/CudaTensor.hpp Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update include/jet/TaskBasedContractor.hpp Co-authored-by: Mikhail Andrenkov <[email protected]> * Update include/jet/CudaTensor.hpp Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Add PR corrections * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Fix two minor issues Co-authored-by: Trevor Vincent <[email protected]> Co-authored-by: Mikhail Andrenkov <[email protected]>
1 parent d74502b commit 3589078

File tree

4 files changed

+366
-205
lines changed

4 files changed

+366
-205
lines changed

examples/heterogeneous_contraction.cu

+261
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
/**
2+
* @file heterogeneous_contraction.cu
3+
*
4+
* @brief Contracts three tensor network files on two gpus
5+
* and one cpu simultaneously
6+
*
7+
*/
8+
9+
#include <iostream>
10+
11+
#include "CudaTensor.hpp"
12+
#include "PathInfo.hpp"
13+
#include "TaskBasedContractor.hpp"
14+
#include "Tensor.hpp"
15+
#include "TensorNetwork.hpp"
16+
#include "TensorNetworkIO.hpp"
17+
18+
#include <cuComplex.h>
19+
#include <taskflow/cudaflow.hpp>
20+
21+
22+
using namespace Jet;
23+
24+
template <typename T, int device = 0> struct CudaflowContractionTask {
25+
26+
std::vector<std::unique_ptr<CudaTensor<T, device>>> tensors;
27+
std::vector<typename CudaTensor<T, device>::CudaContractionPlan> plans;
28+
std::vector<tf::cudaTask> kernel_tasks;
29+
std::vector<T> result;
30+
};
31+
32+
template <typename T, int device = 0>
33+
void AddCudaContractionToTaskflow(
34+
const TensorNetwork<CudaTensor<T, device>> &tn,
35+
const PathInfo &path_info, tf::Taskflow &taskflow,
36+
CudaflowContractionTask<T, device> &gpu_task)
37+
{
38+
auto &tensors = gpu_task.tensors;
39+
auto &plans = gpu_task.plans;
40+
auto &result = gpu_task.result;
41+
auto &kernel_tasks = gpu_task.kernel_tasks;
42+
43+
const auto &path_node_info = path_info.GetSteps();
44+
const auto &path = path_info.GetPath();
45+
const auto &nodes = tn.GetNodes();
46+
size_t num_leafs = nodes.size();
47+
tensors.resize(path_node_info.size());
48+
plans.resize(path.size());
49+
50+
for (size_t i = 0; i < path.size(); i++) {
51+
52+
const PathStepInfo &pnia = path_node_info[path[i].first];
53+
const PathStepInfo &pnib = path_node_info[path[i].second];
54+
const PathStepInfo &pnic = path_node_info[num_leafs + i];
55+
56+
if (pnia.id >= num_leafs) {
57+
tensors[path[i].first] =
58+
std::make_unique<CudaTensor<cuComplex, device>>(
59+
CudaTensor<cuComplex, device>(pnia.tensor_indices,
60+
pnia.shape));
61+
}
62+
else {
63+
tensors[path[i].first] =
64+
std::make_unique<CudaTensor<cuComplex, device>>(
65+
CudaTensor<cuComplex, device>(
66+
tn.GetNodes()[pnia.id].tensor));
67+
}
68+
69+
if (pnib.id >= num_leafs) {
70+
tensors[path[i].second] =
71+
std::make_unique<CudaTensor<cuComplex, device>>(
72+
CudaTensor<cuComplex, device>(pnib.tensor_indices,
73+
pnib.shape));
74+
}
75+
else {
76+
tensors[path[i].second] =
77+
std::make_unique<CudaTensor<cuComplex, device>>(
78+
CudaTensor<cuComplex, device>(
79+
tn.GetNodes()[pnib.id].tensor));
80+
}
81+
82+
tensors[num_leafs + i] =
83+
std::make_unique<CudaTensor<cuComplex, device>>(
84+
CudaTensor<cuComplex, device>(pnic.tensor_indices, pnic.shape));
85+
86+
CudaTensor<cuComplex, device>::GetCudaContractionPlan(
87+
plans[i], *tensors[path[i].first], *tensors[path[i].second],
88+
*tensors[num_leafs + i]);
89+
}
90+
91+
tf::Task task = taskflow.emplace_on(
92+
[&,path,path_node_info,num_leafs](tf::cudaFlowCapturer &capturer) {
93+
for (int i = 0; i < path.size(); i++) {
94+
95+
const PathStepInfo &pnia = path_node_info[path[i].first];
96+
const PathStepInfo &pnib = path_node_info[path[i].second];
97+
const PathStepInfo &pnic = path_node_info[num_leafs + i];
98+
99+
auto tensor_a = tensors[path[i].first]->GetData();
100+
auto tensor_b = tensors[path[i].second]->GetData();
101+
auto tensor_c = tensors[num_leafs + i]->GetData();
102+
103+
auto &c_plan = plans[i];
104+
tf::cudaTask kernel =
105+
capturer.on([&, c_plan, tensor_a, tensor_b,
106+
tensor_c](cudaStream_t stream) {
107+
cuComplex alpha;
108+
alpha.x = 1.;
109+
alpha.y = 0.;
110+
111+
cuComplex beta;
112+
beta.x = 0.;
113+
beta.y = 0.;
114+
115+
cutensorContraction(&c_plan.handle, &c_plan.plan,
116+
&alpha, tensor_a, tensor_b, &beta,
117+
tensor_c, tensor_c, c_plan.work,
118+
c_plan.work_size, stream);
119+
});
120+
121+
kernel_tasks.push_back(kernel);
122+
123+
if (pnia.id >= num_leafs) {
124+
kernel_tasks[pnia.id - num_leafs].precede(kernel);
125+
}
126+
127+
if (pnib.id >= num_leafs) {
128+
kernel_tasks[pnib.id - num_leafs].precede(kernel);
129+
}
130+
131+
// copy data from gpu_data to host_data
132+
if (i == path.size() - 1) {
133+
result.resize(tensors[pnic.id]->GetSize());
134+
tf::cudaTask d2h = capturer.memcpy(
135+
result.data(), tensors[pnic.id]->GetData(),
136+
tensors[pnic.id]->GetSize() * sizeof(cuComplex));
137+
138+
kernel.precede(d2h);
139+
}
140+
}
141+
},
142+
device);
143+
}
144+
145+
int main(int argc, char *argv[])
146+
{
147+
148+
if (argc != 4) {
149+
std::cout << "heterogeneous_contraction.cu <tensor network file 1 on GPU 0> "
150+
"<tensor network file 2 on GPU 1> <tensor network file 3 on CPU>"
151+
<< std::endl;
152+
std::cout << "Contracts three circuits on two GPUs and one CPU"
153+
<< std::endl;
154+
}
155+
156+
std::string file_name_0 = argv[1];
157+
std::string file_name_1 = argv[2];
158+
std::string file_name_2 = argv[3];
159+
160+
/*
161+
* Load first tensor network file onto GPU 0
162+
*/
163+
164+
TensorNetworkFile<CudaTensor<cuComplex, 0>> tensor_file_0;
165+
try {
166+
std::ifstream tn_data(file_name_0);
167+
std::string circuit_str{std::istreambuf_iterator<char>(tn_data),
168+
std::istreambuf_iterator<char>()};
169+
// Load data into TensorNetwork and PathInfo objects
170+
TensorNetworkSerializer<CudaTensor<cuComplex, 0>> serializer;
171+
tensor_file_0 = serializer(circuit_str, true);
172+
}
173+
catch (...) {
174+
std::cerr << "Please specify a valid first JSON file to contract"
175+
<< std::endl;
176+
exit(1);
177+
}
178+
179+
TensorNetwork<CudaTensor<cuComplex, 0>> tn_0 = tensor_file_0.tensors;
180+
PathInfo path_0 = tensor_file_0.path.value();
181+
182+
/**
183+
* Load second tensor network file onto GPU 1
184+
*/
185+
186+
TensorNetworkFile<CudaTensor<cuComplex, 1>> tensor_file_1;
187+
try {
188+
std::ifstream tn_data(file_name_1);
189+
std::string circuit_str{std::istreambuf_iterator<char>(tn_data),
190+
std::istreambuf_iterator<char>()};
191+
// Load data into TensorNetwork and PathInfo objects
192+
TensorNetworkSerializer<CudaTensor<cuComplex, 1>> serializer;
193+
tensor_file_1 = serializer(circuit_str, true);
194+
}
195+
catch (...) {
196+
std::cerr << "Please specify a valid second JSON file to contract"
197+
<< std::endl;
198+
exit(1);
199+
}
200+
201+
TensorNetwork<CudaTensor<cuComplex, 1>> tn_1 = tensor_file_1.tensors;
202+
PathInfo path_1 = tensor_file_1.path.value();
203+
204+
/**
205+
* Load third tensor network file onto CPU
206+
*/
207+
208+
TensorNetworkFile<Tensor<std::complex<float>>> tensor_file_2;
209+
try {
210+
std::ifstream tn_data(file_name_2);
211+
std::string circuit_str{std::istreambuf_iterator<char>(tn_data),
212+
std::istreambuf_iterator<char>()};
213+
// Load data into TensorNetwork and PathInfo objects
214+
TensorNetworkSerializer<Tensor<std::complex<float>>> serializer;
215+
tensor_file_2 = serializer(circuit_str, true);
216+
}
217+
catch (...) {
218+
std::cerr << "Please specify a valid JSON file to contract"
219+
<< std::endl;
220+
exit(1);
221+
}
222+
TensorNetwork<Tensor<std::complex<float>>> tn_2 =
223+
tensor_file_2.tensors;
224+
PathInfo path_2 = tensor_file_2.path.value();
225+
226+
tf::Taskflow taskflow;
227+
228+
/* set up gpu 0 contraction task */
229+
CudaflowContractionTask<cuComplex, 0> gpu_task_0;
230+
AddCudaContractionToTaskflow<cuComplex, 0>(tn_0, path_0, taskflow,
231+
gpu_task_0);
232+
233+
/* set up gpu 1 contraction task */
234+
CudaflowContractionTask<cuComplex, 1> gpu_task_1;
235+
AddCudaContractionToTaskflow<cuComplex, 1>(tn_1, path_1, taskflow,
236+
gpu_task_1);
237+
238+
/* set up cpu contraction task */
239+
TaskBasedContractor<Tensor<std::complex<float>>> contractor;
240+
contractor.AddContractionTasks(tn_2, path_2);
241+
242+
// Add gpu task graph to cpu task graph
243+
contractor.AddTaskflow(taskflow);
244+
245+
/* Contract on all devices */
246+
contractor.Contract().wait();
247+
248+
/* Display results */
249+
auto result0 = gpu_task_0.result;
250+
std::cout << "GPU 0 result = " << result0[0].x << " " << result0[0].y
251+
<< std::endl;
252+
253+
auto result1 = gpu_task_1.result;
254+
std::cout << "GPU 1 result = " << result1[0].x << " " << result1[0].y
255+
<< std::endl;
256+
257+
auto result2 = contractor.GetResults()[0];
258+
std::cout << "CPU result = " << result2 << std::endl;
259+
260+
return 0;
261+
}

0 commit comments

Comments
 (0)