Merge branch 'release-2.0.x'

RenderKit · Oct 2, 2023 · 712bdb8 · 712bdb8
2 parents fac6c96 + ef69fd3
commit 712bdb8
Show file tree

Hide file tree

Showing 385 changed files with 23,874 additions and 6,535 deletions.
diff --git a/.github/deps/dpcpp-sycl-nightly.env b/.github/deps/dpcpp-sycl-nightly.env
@@ -0,0 +1 @@
+DPCPP_VERSION=intel-llvm/nightly-2023-09-22-rk
diff --git a/.github/deps/gfx-linux-public.env b/.github/deps/gfx-linux-public.env
@@ -0,0 +1 @@
+GFX_DRIVER_VERSION=linux-latest
diff --git a/.github/deps/gfx-windows-public.env b/.github/deps/gfx-windows-public.env
@@ -0,0 +1 @@
+GFX_DRIVER_VERSION=windows-101.4826
diff --git a/.github/env/pvc-runtime-options.env b/.github/env/pvc-runtime-options.env
@@ -0,0 +1,11 @@
+## Copyright 2023 Intel Corporation
+## SPDX-License-Identifier: Apache-2.0
+
+PrintDebugSettings=1
+NEOReadDebugKeys=1
+
+# Run it on single tile (disable implicit scaling)
+EnableImplicitScaling=0
+
+# Force native SIMD width for PVC
+IGC_ForceOCLSIMDWidth=16
diff --git a/.github/scripts/run-benchmarks.sh b/.github/scripts/run-benchmarks.sh
@@ -0,0 +1,63 @@
+#!/bin/bash -xe
+
+## Copyright 2023 Intel Corporation
+## SPDX-License-Identifier: Apache-2.0
+
+BENCHMARK_DEVICE=$1
+case "$BENCHMARK_DEVICE" in
+  "CPU" | "GPU")
+    echo "Going to run benchmarks on $BENCHMARK_DEVICE device"
+    ;;
+
+  *)
+    echo "$BENCHMARK_DEVICE is not valid device, please use CPU or GPU"
+    exit 1
+    ;;
+esac
+
+SOURCE_ROOT=$GITHUB_WORKSPACE
+PROJECT_NAME="Open VKL"
+BENCHMARK_FLAGS="--benchmark_repetitions=5 --benchmark_min_time=10"
+
+
+
+################################# PLEASE READ ##################################
+#
+# Note that suites and subsuites must exist in the database _before_ attempting
+# insertion of results. This is intentional! You should think carefully about
+# your [suite -> subsuite -> benchmark] hierarchy and definitions. These should
+# be stable over time (especially for suites and subsuites) to facilitate
+# long-term comparisons.
+#
+# These can be inserted using the benchmark client, through the "ls"
+# and "insert subsuite" commands. Ask for help if you have questions.
+#
+################################# PLEASE READ ###################################
+
+initContext() {
+  if [ -z "$HAVE_CONTEXT" ]; then
+    HAVE_CONTEXT=1
+    benny insert code_context "${PROJECT_NAME}" ${SOURCE_ROOT} --save-json code_context.json
+    benny insert run_context ${BENNY_SYSTEM_TOKEN} ./code_context.json --save-json run_context.json
+  fi
+}
+
+
+SUITE_NAME="ExampleRenderers"
+
+initContext
+
+SUBSUITE_NAME="StructuredVolume"
+SUBSUITE_REGEX="structured_regular"
+./bin/vklBenchmark${BENCHMARK_DEVICE} ${BENCHMARK_FLAGS} --benchmark_filter=${SUBSUITE_REGEX} --benchmark_out=results-${SUITE_NAME}-${SUBSUITE_NAME}.json
+benny insert googlebenchmark ./run_context.json ${SUITE_NAME} ${SUBSUITE_NAME} ./results-${SUITE_NAME}-${SUBSUITE_NAME}.json
+
+SUBSUITE_NAME="VDBVolume"
+SUBSUITE_REGEX="vdb"
+./bin/vklBenchmark${BENCHMARK_DEVICE} ${BENCHMARK_FLAGS} --benchmark_filter=${SUBSUITE_REGEX} --benchmark_out=results-${SUITE_NAME}-${SUBSUITE_NAME}.json
+benny insert googlebenchmark ./run_context.json ${SUITE_NAME} ${SUBSUITE_NAME} ./results-${SUITE_NAME}-${SUBSUITE_NAME}.json
+
+SUBSUITE_NAME="UnstructuredVolume"
+SUBSUITE_REGEX="unstructured"
+./bin/vklBenchmark${BENCHMARK_DEVICE} ${BENCHMARK_FLAGS} --benchmark_filter=${SUBSUITE_REGEX} --benchmark_out=results-${SUITE_NAME}-${SUBSUITE_NAME}.json
+benny insert googlebenchmark ./run_context.json ${SUITE_NAME} ${SUBSUITE_NAME} ./results-${SUITE_NAME}-${SUBSUITE_NAME}.json
diff --git a/.github/scripts/run-examples-big-volume-tests.sh b/.github/scripts/run-examples-big-volume-tests.sh
@@ -0,0 +1,32 @@
+#!/bin/bash -xe
+## Copyright 2023 Intel Corporation
+## SPDX-License-Identifier: Apache-2.0
+
+# These tests are to verify if rendered image is correct for big volume size on PVC.
+# `time` command used here is just to see what is total execution time (for roughly estimation).
+
+IMG_DIFF_TOOL=$STORAGE_PATH/tools/img_diff/img_diff
+
+# Enable persistent JIT cache
+export SYCL_CACHE_PERSISTENT=1
+export SYCL_CACHE_DIR=./jit_cache
+
+# dim = 2048^3*sizeof(float) for float give us 32GB of vol. size
+dim=2048
+
+time ./bin/vklExamplesGPU -renderer density_pathtracer_gpu -batch -printStats -spp 50 -framebufferSize 1024 1024 -gridDimensions $dim $dim $dim
+time ./bin/vklExamplesGPU -renderer ray_march_iterator_gpu -batch -printStats -spp 2 -framebufferSize 1024 1024 -gridDimensions $dim $dim $dim
+time ./bin/vklExamplesGPU -renderer hit_iterator_renderer_gpu -batch -printStats -spp 2 -framebufferSize 1024 1024 -gridDimensions $dim $dim $dim
+time ./bin/vklExamplesGPU -renderer interval_iterator_debug_gpu -batch -printStats -spp 2 -framebufferSize 1024 1024 -gridDimensions $dim $dim $dim
+
+# Run cpu examples to get reference images
+time ./bin/vklExamplesCPU -renderer density_pathtracer -batch -printStats -spp 50 -framebufferSize 1024 1024 -gridDimensions $dim $dim $dim
+time ./bin/vklExamplesCPU -renderer ray_march_iterator -batch -printStats -spp 2 -framebufferSize 1024 1024 -gridDimensions $dim $dim $dim
+time ./bin/vklExamplesCPU -renderer hit_iterator_renderer -batch -printStats -spp 2 -framebufferSize 1024 1024 -gridDimensions $dim $dim $dim
+time ./bin/vklExamplesCPU -renderer interval_iterator_debug -batch -printStats -spp 2 -framebufferSize 1024 1024 -gridDimensions $dim $dim $dim
+
+# Compare images generated by GPU examples vs CPU examples
+$IMG_DIFF_TOOL density_pathtracer.pfm density_pathtracer_gpu.pfm
+$IMG_DIFF_TOOL ray_march_iterator.pfm ray_march_iterator_gpu.pfm 0.0001
+$IMG_DIFF_TOOL hit_iterator_renderer.pfm hit_iterator_renderer_gpu.pfm 0.00005
+$IMG_DIFF_TOOL interval_iterator_debug.pfm interval_iterator_debug_gpu.pfm 0.00003
diff --git a/.github/scripts/run-examples-tests.py b/.github/scripts/run-examples-tests.py
@@ -0,0 +1,242 @@
+## Copyright 2023 Intel Corporation
+## SPDX-License-Identifier: Apache-2.0
+
+import subprocess
+import os
+import platform
+import sys
+import signal
+from threading import Timer
+
+# Flag for enabling additional verbose debug info
+debug_enabled = True
+
+# Representation of output which goes to console.
+# It consist of stdout & stderr.
+class Output(object):
+    stdout: str
+    stderr: str
+    def __init__(self, stdout:str, stderr:str):
+        self.stdout = stdout
+        self.stderr = stderr
+
+    # By default we can get string from instance of this class
+    # which will return merged stdout with stderr.
+    def __str__(self):
+        return self.stdout + self.stderr
+
+class TestCommandTool:
+    def run(self, cmd, timeout, cwd = os.getcwd(), test_env = os.environ.copy(), print_output = True):
+        exit_code = 0
+        proc = subprocess.Popen(cmd, shell=True, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=None, universal_newlines=True, env=test_env)
+
+        # This variable we use to pass value back from
+        # Timer callback function - _kill.
+        timeout_flag_wrapper = {'timeout_occured': False}
+
+        # We use timer to not hang forever on
+        # proc.communicate(), in case of timeout
+        # _kill method will kill process and
+        # it will return from proc.communicate()
+        # immediately.
+        timer = Timer(timeout, self._kill, [proc, timeout_flag_wrapper], {})
+        timer.start()
+        stdout, stderr = proc.communicate()
+        timer.cancel()
+        exit_code = proc.poll()
+
+        if timeout_flag_wrapper["timeout_occured"]:
+            exit_code = 124
+            stderr += "=K=> Timeout expired, process was killed."
+
+        output = Output(stdout, stderr)
+        if print_output:
+            print(output, flush=True)
+
+        return (exit_code, output)
+
+    def _kill(self, proc_to_kill, timeout_flag_wrapper):
+        timeout_flag_wrapper["timeout_occured"] = True
+        pid = proc_to_kill.pid
+        # Windows
+        if platform.system() == 'Windows':
+            proc = subprocess.Popen(['taskkill', '/F', '/T', '/PID', str(pid)], shell=True)
+            proc.wait()
+        # Linux
+        else:
+            proc = subprocess.Popen('pkill -TERM -P '+ str(pid), shell=True)
+            proc.wait()
+
+class OpenVKLTestCase:
+    renderer:str = None
+    volume_type:str = None
+    max_mse:float = None
+    spp:int = None
+    extra_gpu_args:str = None
+
+    __gpu_exit_code:int = None
+    __gpu_output:Output = None
+
+    __cpu_exit_code:int = None
+    __cpu_output:Output = None
+
+    __diff_exit_code:int = None
+    __diff_output:Output = None
+
+    def __init__(self, renderer : str, volume_type : str, extra_gpu_args : str = ''):
+        self.renderer = renderer
+        self.volume_type = volume_type
+        self.max_mse = 0.000001
+        self.spp = 2
+        self.extra_gpu_args = extra_gpu_args
+
+        # For this particular case we need to set higher MSE treshold
+        if renderer == "hit_iterator_renderer" and volume_type == "structuredRegular":
+            self.max_mse = 0.000015
+
+        # For density pathtracer we want more spp to get picutre closer to final image
+        if renderer == "density_pathtracer":
+            self.spp = 50
+
+    def __print_debug(self, msg:str):
+        if debug_enabled:
+            print(msg)
+
+    def __get_example_cpu_binary_string(self) -> str:
+        if platform.system() == 'Windows':
+            return "vklExamplesCPU.exe"
+        else:
+            return "./vklExamplesCPU"
+
+    def __get_example_gpu_binary_string(self) -> str:
+        if platform.system() == 'Windows':
+            return "vklExamplesGPU.exe"
+        else:
+            return "./vklExamplesGPU"
+
+    def __get_common_params(self) -> str:
+        return "-batch -framebufferSize 1024 1024"
+
+    def get_name(self) -> str:
+        return "%s-%s%s" % (self.renderer, self.volume_type, self.extra_gpu_args)
+
+    def get_result(self) -> int:
+        return self.__gpu_exit_code + self.__cpu_exit_code + self.__diff_exit_code
+
+    def print_error_outputs(self):
+        # Print output only for commands where exit code != 0
+        if self.__gpu_exit_code != 0:
+            print("#!# GPU cmd output:")
+            print(self.__gpu_output)
+
+        if self.__cpu_exit_code != 0:
+            print("#!# CPU cmd output:")
+            print(self.__cpu_output)
+
+        if self.__diff_exit_code != 0:
+            print("#!# DIFF cmd output:")
+            print(self.__diff_output)
+
+    def get_MSE(self) -> float:
+        stdout = self.__diff_output.stdout
+        # MSE can't be negative and this is how we're returning error
+        if (len(stdout) == 0) or (":" not in stdout):
+            return -1.0
+        return float(stdout.splitlines()[0].split(": ")[1])
+
+    def execute(self, img_diff_tool_path:str):
+        # Default timeout - 60 secs for each command
+        timeout = 60
+
+        # Execute GPU example
+        gpu_run_cmd = "%s -renderer %s_gpu %s -volumeType %s -spp %d %s" % (self.__get_example_gpu_binary_string(), self.renderer, self.__get_common_params(), self.volume_type, self.spp, self.extra_gpu_args)
+        self.__print_debug("## Executing: '%s', with timeout: %d" % (gpu_run_cmd, timeout))
+        self.__gpu_exit_code, self.__gpu_output = TestCommandTool().run(gpu_run_cmd, timeout)
+
+        # Execute CPU example
+        cpu_run_cmd = "%s -renderer %s %s -volumeType %s -spp %d" % (self.__get_example_cpu_binary_string(), self.renderer, self.__get_common_params(), self.volume_type, self.spp)
+        self.__print_debug("## Executing: '%s', with timeout: %d" % (cpu_run_cmd, timeout))
+        self.__cpu_exit_code, self.__cpu_output = TestCommandTool().run(cpu_run_cmd, timeout)
+
+        # Rename generated images to new name pattern "renderer-volume_type" instead of "renderer"
+        # so all images can be stored in the same directory. That way we can avoid overriding output image
+        # by different volume types executions.
+        src_gpu_file_path = os.path.join(os.getcwd(), "%s_gpu.pfm" % self.renderer)
+        dst_gpu_file_path = os.path.join(os.getcwd(), "%s-gpu.pfm" % (self.get_name()))
+        os.rename(src_gpu_file_path, dst_gpu_file_path)
+
+        src_cpu_file_path = os.path.join(os.getcwd(), "%s.pfm" % self.renderer)
+        dst_cpu_file_path = os.path.join(os.getcwd(), "%s-cpu.pfm" % (self.get_name()))
+        os.rename(src_cpu_file_path, dst_cpu_file_path)
+
+        # Calculate difference between GPU & CPU generated image
+        img_diff_cmd = "%s %s %s %.10f" % (img_diff_tool_path, dst_gpu_file_path, dst_cpu_file_path, self.max_mse)
+        self.__print_debug("## Executing: '%s', with timeout: %d" % (img_diff_cmd, timeout))
+        self.__diff_exit_code, self.__diff_output = TestCommandTool().run(img_diff_cmd, timeout)
+
+        self.__print_debug("## MSE: %.10f" % self.get_MSE())
+        self.__print_debug("## Exit codes: %d %d %d" % (self.__gpu_exit_code, self.__cpu_exit_code, self.__diff_exit_code))
+
+def main():
+    if len(sys.argv) <= 1:
+        print("#!## [ERROR] First argument must contain path to diff_tool");
+        return 2
+
+    img_diff_tool_path = sys.argv[1]
+    test_cases = []
+
+    # generate test cases
+    renderer_list = ["density_pathtracer", "ray_march_iterator", "interval_iterator_debug", "hit_iterator_renderer"]
+    volume_type_list = ["structuredRegular", "structuredSpherical", "unstructured", "particle", "amr", "vdb"]
+    for renderer in renderer_list:
+        for volume_type in volume_type_list:
+            test_cases.append(OpenVKLTestCase(renderer, volume_type))
+
+            if volume_type == "structuredRegular":
+                test_cases.append(OpenVKLTestCase(renderer, volume_type, "-deviceOnlySharedBuffers"))
+
+    # execute test cases
+    for test_case in test_cases:
+        test_case.execute(img_diff_tool_path)
+
+    # print summary & analyze results
+    print()
+    print("######################################### SUMMARY ##########################################")
+    print()
+
+    failed_test_cases = []
+    # For any more advanced table formatting external library should be used
+    # or external class should be created.
+    fixed_width_row_format = "%-5s %-7s %-65s %s"
+    print(fixed_width_row_format % ("####", "Result", "Test case name", "MSE value"))
+    print("--------------------------------------------------------------------------------------------")
+    for test_case in test_cases:
+        result = test_case.get_result()
+        if result != 0:
+            log_prefix = "#!##"
+            result_str = "[FAIL]"
+            failed_test_cases.append(test_case)
+        else:
+            log_prefix = "####"
+            result_str = "[PASS]"
+        print(fixed_width_row_format % (log_prefix, result_str, test_case.get_name(), "%.10f" % test_case.get_MSE()))
+
+    total_count = len(test_cases)
+    fail_count = len(failed_test_cases)
+    pass_count = total_count - fail_count
+
+    print()
+    if fail_count == 0:
+        print("#### All test cases PASSED (passrate: %d/%d)" % (pass_count, total_count))
+        return 0
+
+    print("#!## Some test cases FAILED (passrate: %d/%d)" % (pass_count, total_count))
+    print()
+    # Print output from failed tests
+    for test_case in failed_test_cases:
+        print("#!## '%s' failure details:" % test_case.get_name())
+        test_case.print_error_outputs()
+    return 1
+
+if __name__ == "__main__":
+    sys.exit(main())