examples: Update expected output

devitocodes · Feb 1, 2022 · 6636d53 · 6636d53
1 parent c5c20b8
commit 6636d53
Showing 1 changed file with 21 additions and 21 deletions.
diff --git a/examples/performance/00_overview.ipynb b/examples/performance/00_overview.ipynb
@@ -533,7 +533,7 @@
       "      {\n",
       "        for (int z = z_m; z <= z_M; z += 1)\n",
       "        {\n",
-      "          u[t1][x + 4][y + 4][z + 4] = ((-6.66666667e-1F*r1)*(8.33333333e-2F*r1*u[t0][x + 4][y + 1][z + 4] - 6.66666667e-1F*r1*u[t0][x + 4][y + 2][z + 4] + 6.66666667e-1F*r1*u[t0][x + 4][y + 4][z + 4] - 8.33333333e-2F*r1*u[t0][x + 4][y + 5][z + 4]) + (-8.33333333e-2F*r1)*(8.33333333e-2F*r1*u[t0][x + 4][y + 4][z + 4] - 6.66666667e-1F*r1*u[t0][x + 4][y + 5][z + 4] + 6.66666667e-1F*r1*u[t0][x + 4][y + 7][z + 4] - 8.33333333e-2F*r1*u[t0][x + 4][y + 8][z + 4]) + (8.33333333e-2F*r1)*(8.33333333e-2F*r1*u[t0][x + 4][y][z + 4] - 6.66666667e-1F*r1*u[t0][x + 4][y + 1][z + 4] + 6.66666667e-1F*r1*u[t0][x + 4][y + 3][z + 4] - 8.33333333e-2F*r1*u[t0][x + 4][y + 4][z + 4]) + (6.66666667e-1F*r1)*(8.33333333e-2F*r1*u[t0][x + 4][y + 3][z + 4] - 6.66666667e-1F*r1*u[t0][x + 4][y + 4][z + 4] + 6.66666667e-1F*r1*u[t0][x + 4][y + 6][z + 4] - 8.33333333e-2F*r1*u[t0][x + 4][y + 7][z + 4]))*pow(f[x + 1][y + 1][z + 1], 2)*r0[x][y][z];\n",
+      "          u[t1][x + 4][y + 4][z + 4] = ((-6.66666667e-1F*r1)*(8.33333333e-2F*r1*u[t0][x + 4][y + 1][z + 4] - 6.66666667e-1F*r1*u[t0][x + 4][y + 2][z + 4] + 6.66666667e-1F*r1*u[t0][x + 4][y + 4][z + 4] - 8.33333333e-2F*r1*u[t0][x + 4][y + 5][z + 4]) + (-8.33333333e-2F*r1)*(8.33333333e-2F*r1*u[t0][x + 4][y + 4][z + 4] - 6.66666667e-1F*r1*u[t0][x + 4][y + 5][z + 4] + 6.66666667e-1F*r1*u[t0][x + 4][y + 7][z + 4] - 8.33333333e-2F*r1*u[t0][x + 4][y + 8][z + 4]) + (8.33333333e-2F*r1)*(8.33333333e-2F*r1*u[t0][x + 4][y][z + 4] - 6.66666667e-1F*r1*u[t0][x + 4][y + 1][z + 4] + 6.66666667e-1F*r1*u[t0][x + 4][y + 3][z + 4] - 8.33333333e-2F*r1*u[t0][x + 4][y + 4][z + 4]) + (6.66666667e-1F*r1)*(8.33333333e-2F*r1*u[t0][x + 4][y + 3][z + 4] - 6.66666667e-1F*r1*u[t0][x + 4][y + 4][z + 4] + 6.66666667e-1F*r1*u[t0][x + 4][y + 6][z + 4] - 8.33333333e-2F*r1*u[t0][x + 4][y + 7][z + 4]))*r0[x][y][z]*pow(f[x + 1][y + 1][z + 1], 2);\n",
       "        }\n",
       "      }\n",
       "    }\n",
@@ -713,7 +713,7 @@
       "  START_TIMER(section0)\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
-      "    const int tid = omp_get_thread_num();\n",
+      "    const int tid = omp_get_thread_num();;\n",
       "    float (*restrict r0)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr0[tid];\n",
       "\n",
       "    #pragma omp for collapse(1) schedule(dynamic,1)\n",
@@ -852,7 +852,7 @@
       "  }\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
-      "    const int tid = omp_get_thread_num();\n",
+      "    const int tid = omp_get_thread_num();;\n",
       "    float (*restrict r1)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr1[tid];\n",
       "\n",
       "    #pragma omp for collapse(1) schedule(dynamic,1)\n",
@@ -988,7 +988,7 @@
       "  START_TIMER(section0)\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
-      "    const int tid = omp_get_thread_num();\n",
+      "    const int tid = omp_get_thread_num();;\n",
       "    float (*restrict r0)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr0[tid];\n",
       "\n",
       "    #pragma omp for collapse(1) schedule(dynamic,1)\n",
@@ -1216,14 +1216,14 @@
       "\n",
       "int Kernel(struct dataobj *restrict f_vec, const float h_y, struct dataobj *restrict u_vec, const int x_size, const int y0_blk0_size, const int y_size, const int z_size, const int time_M, const int time_m, const int x0_blk0_size, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, struct profiler * timers)\n",
       "{\n",
-      "  float *r0_vec;\n",
-      "  posix_memalign((void**)&r0_vec, 64, sizeof(float[x_size][y_size][z_size]));\n",
       "  float **pr2_vec;\n",
-      "  posix_memalign((void**)&pr2_vec, 64, sizeof(float*)*nthreads);\n",
+      "  posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n",
+      "  float *r0_vec;\n",
+      "  posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
       "    const int tid = omp_get_thread_num();\n",
-      "    posix_memalign((void**)&pr2_vec[tid], 64, sizeof(float[y0_blk0_size + 4][z_size]));\n",
+      "    posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y0_blk0_size + 4)*sizeof(float));\n",
       "  }\n",
       "\n",
       "  float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1263,7 +1263,7 @@
       "    START_TIMER(section1)\n",
       "    #pragma omp parallel num_threads(nthreads)\n",
       "    {\n",
-      "      const int tid = omp_get_thread_num();\n",
+      "      const int tid = omp_get_thread_num();;\n",
       "      float (*restrict r2)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr2[tid];\n",
       "\n",
       "      #pragma omp for collapse(2) schedule(dynamic,1)\n",
@@ -1302,8 +1302,8 @@
       "    const int tid = omp_get_thread_num();\n",
       "    free(pr2_vec[tid]);\n",
       "  }\n",
-      "  free(r0_vec);\n",
       "  free(pr2_vec);\n",
+      "  free(r0_vec);\n",
       "\n",
       "  return 0;\n",
       "}\n",
@@ -1381,7 +1381,7 @@
       "  START_TIMER(section1)\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
-      "    const int tid = omp_get_thread_num();\n",
+      "    const int tid = omp_get_thread_num();;\n",
       "    float (*restrict r2)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr2[tid];\n",
       "\n",
       "    #pragma omp for collapse(2) schedule(dynamic,1)\n",
@@ -1443,7 +1443,7 @@
       "#pragma omp parallel num_threads(nthreads)\n",
       "{\n",
       "  const int tid = omp_get_thread_num();\n",
-      "  posix_memalign((void**)&pr2_vec[tid], 64, sizeof(float[5][z_size]));\n",
+      "  posix_memalign((void**)(&(pr2_vec[tid])),64,5*z_size*sizeof(float));\n",
       "}\n"
      ]
     }
@@ -1512,14 +1512,14 @@
       "\n",
       "int Kernel(struct dataobj *restrict f_vec, const float h_y, struct dataobj *restrict u_vec, const int x_size, const int y_size, const int z_size, const int time_M, const int time_m, const int x0_blk0_size, const int x_M, const int x_m, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, struct profiler * timers)\n",
       "{\n",
-      "  float *r0_vec;\n",
-      "  posix_memalign((void**)&r0_vec, 64, sizeof(float[x_size][y_size][z_size]));\n",
       "  float **pr2_vec;\n",
-      "  posix_memalign((void**)&pr2_vec, 64, sizeof(float*)*nthreads);\n",
+      "  posix_memalign((void**)(&pr2_vec),64,nthreads*sizeof(float*));\n",
+      "  float *r0_vec;\n",
+      "  posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
       "  #pragma omp parallel num_threads(nthreads)\n",
       "  {\n",
       "    const int tid = omp_get_thread_num();\n",
-      "    posix_memalign((void**)&pr2_vec[tid], 64, sizeof(float[y_size + 4][z_size]));\n",
+      "    posix_memalign((void**)(&(pr2_vec[tid])),64,z_size*(y_size + 4)*sizeof(float));\n",
       "  }\n",
       "\n",
       "  float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
@@ -1559,7 +1559,7 @@
       "    START_TIMER(section1)\n",
       "    #pragma omp parallel num_threads(nthreads)\n",
       "    {\n",
-      "      const int tid = omp_get_thread_num();\n",
+      "      const int tid = omp_get_thread_num();;\n",
       "      float (*restrict r2)[z_size] __attribute__ ((aligned (64))) = (float (*)[z_size]) pr2[tid];\n",
       "\n",
       "      #pragma omp for collapse(1) schedule(dynamic,1)\n",
@@ -1595,8 +1595,8 @@
       "    const int tid = omp_get_thread_num();\n",
       "    free(pr2_vec[tid]);\n",
       "  }\n",
-      "  free(r0_vec);\n",
       "  free(pr2_vec);\n",
+      "  free(r0_vec);\n",
       "\n",
       "  return 0;\n",
       "}\n",
@@ -1662,11 +1662,11 @@
       "int Kernel(struct dataobj *restrict f_vec, const float h_x, const float h_y, struct dataobj *restrict u_vec, const int x_size, const int y_size, const int z_size, const int time_M, const int time_m, const int x0_blk0_size, const int x1_blk0_size, const int x_M, const int x_m, const int y0_blk0_size, const int y1_blk0_size, const int y_M, const int y_m, const int z_M, const int z_m, const int nthreads, struct profiler * timers)\n",
       "{\n",
       "  float *r0_vec;\n",
-      "  posix_memalign((void**)&r0_vec, 64, sizeof(float[x_size][y_size][z_size]));\n",
+      "  posix_memalign((void**)(&r0_vec),64,x_size*y_size*z_size*sizeof(float));\n",
       "  float *r3_vec;\n",
-      "  posix_memalign((void**)&r3_vec, 64, sizeof(float[x_size + 4][y_size + 4][z_size]));\n",
+      "  posix_memalign((void**)(&r3_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n",
       "  float *r4_vec;\n",
-      "  posix_memalign((void**)&r4_vec, 64, sizeof(float[x_size + 4][y_size + 4][z_size]));\n",
+      "  posix_memalign((void**)(&r4_vec),64,z_size*(x_size + 4)*(y_size + 4)*sizeof(float));\n",
       "\n",
       "  float (*restrict f)[f_vec->size[1]][f_vec->size[2]] __attribute__ ((aligned (64))) = (float (*)[f_vec->size[1]][f_vec->size[2]]) f_vec->data;\n",
       "  float (*restrict r0)[y_size][z_size] __attribute__ ((aligned (64))) = (float (*)[y_size][z_size]) r0_vec;\n",