diff --git a/CMakeLists.txt b/CMakeLists.txt index c14dd0c2..7270b4fe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ set (APEX_VERSION_MINOR 6 CACHE STRING "APEX Minor Version") set (APEX_VERSION_PATCH 5 CACHE STRING "APEX Patch Version") set (APEX_HOMEPAGE_URL "http://github.com/UO-OACISS/apex" CACHE STRING "APEX homepage URL") -cmake_policy(VERSION 2.8.12) +# cmake_policy(VERSION 3.20.1) - implicitly called by cmake_mimium_required if (${CMAKE_MAJOR_VERSION} GREATER 2) cmake_policy(SET CMP0042 NEW) if (${CMAKE_MINOR_VERSION} GREATER 11) @@ -342,12 +342,7 @@ if (APEX_USE_PEDANTIC) endif(APEX_WITH_CUDA) endif (APEX_USE_PEDANTIC) -if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM") - set(APEX_STDCXX_LIB "" CACHE STRING "C++ library for linking") - message("IntelLLVM compiler detected, no stdc++ library needed") -else() - set(APEX_STDCXX_LIB stdc++ CACHE STRING "C++ library for linking") -endif() +set(APEX_STDCXX_LIB stdc++ CACHE STRING "C++ library for linking") # PGI and Intel don't like the concurrentqueue code. if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI") @@ -365,35 +360,6 @@ set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -# add_definitions(-std=c++11) -#include(CheckCXXCompilerFlag) -#CHECK_CXX_COMPILER_FLAG("-std=c++17" COMPILER_SUPPORTS_CXX17) -#if(COMPILER_SUPPORTS_CXX17) -# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++17" CACHE STRING "CXX Support Flag" FORCE) -#else() -# CHECK_CXX_COMPILER_FLAG("-std=c++14" COMPILER_SUPPORTS_CXX14) -# if(COMPILER_SUPPORTS_CXX14) -# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++14" CACHE STRING "CXX Support Flag" FORCE) -# else() -# CHECK_CXX_COMPILER_FLAG("-std=c++1y" COMPILER_SUPPORTS_CXX1Y) -# if(COMPILER_SUPPORTS_CXX1Y) -# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++1y" CACHE STRING "CXX Support Flag" FORCE) -# else() -# CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11) -# if(COMPILER_SUPPORTS_CXX11) -# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++11" CACHE STRING "CXX Support Flag" FORCE) -# else() -# CHECK_CXX_COMPILER_FLAG("-std=c++0x" COMPILER_SUPPORTS_CXX0X) -# if(COMPILER_SUPPORTS_CXX0X) -# set(CMAKE_CXX_SUPPORT_FLAG "-std=c++0x" CACHE STRING "CXX Support Flag" FORCE) -# else() -# message(FATAL_ERROR " Compiler ${CMAKE_CXX_COMPILER} has no C++11 support.") -# endif() -# endif() -# endif() -# endif() -#endif() - # By the way, GCC lies. It accepts the flag, but doesn't have the support. if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) @@ -688,7 +654,9 @@ if(APEX_WITH_KOKKOS) if(APEX_BUILD_TESTS) # Just for testing SET(Kokkos_LIBRARY kokkoscore) - set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "" FORCE) + if(NOT APPLE) + set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "" FORCE) + endif(NOT APPLE) set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE) set(Kokkos_ARCH_NATIVE ON CACHE BOOL "" FORCE) set(Kokkos_ENABLE_TUNING ON CACHE BOOL "" FORCE) diff --git a/src/apex/CMakeLists_standalone.cmake b/src/apex/CMakeLists_standalone.cmake index 20285b56..f8fac7e2 100644 --- a/src/apex/CMakeLists_standalone.cmake +++ b/src/apex/CMakeLists_standalone.cmake @@ -206,6 +206,7 @@ endif (APEX_WITH_HIP) if (APEX_WITH_LEVEL0) SET(LEVEL0_SOURCE apex_level0.cpp) add_definitions(-DAPEX_WITH_LEVEL0) + add_definitions(-DPTI_LEVEL_ZERO=1) add_library (apex_level0 ${LEVEL0_SOURCE}) target_link_libraries (apex_level0 apex ${LIBS} diff --git a/src/apex/L0/utils.h b/src/apex/L0/utils.h index 4b6c3787..837c51a4 100644 --- a/src/apex/L0/utils.h +++ b/src/apex/L0/utils.h @@ -21,6 +21,12 @@ #include "pti_assert.h" +#ifdef _WIN32 +#define PTI_EXPORT __declspec(dllexport) +#else +#define PTI_EXPORT __attribute__ ((visibility ("default"))) +#endif + #define STRINGIFY(x) #x #define TOSTRING(x) STRINGIFY(x) @@ -35,6 +41,18 @@ namespace utils { +struct DeviceUUID { + uint16_t vendorID; + uint16_t deviceID; + uint16_t revisionID; + uint16_t pciDomain; + uint8_t pciBus; + uint8_t pciDevice; + uint8_t pciFunction; + uint8_t reserved[4]; + uint8_t subDeviceId; +}; + struct Comparator { template bool operator()(const T& left, const T& right) const { @@ -45,6 +63,19 @@ namespace utils { } }; +template +struct ComparatorPciAddress { + bool operator()(const T& left, const T& right) const { + if (left.BusNumber != right.BusNumber) { + return (left.BusNumber < right.BusNumber); + } + if (left.DeviceNumber != right.DeviceNumber) { + return (left.DeviceNumber < right.DeviceNumber); + } + return left.FunctionNumber < right.FunctionNumber; + } +}; + #if defined(__gnu_linux__) inline uint64_t GetTime(clockid_t id) { @@ -167,7 +198,7 @@ namespace utils { return GetCurrentThreadId(); #else #ifdef SYS_gettid - return syscall(SYS_gettid); + return (uint32_t)syscall(SYS_gettid); #else #error "SYS_gettid is unavailable on this system" #endif diff --git a/src/apex/L0/ze_kernel_collector.h b/src/apex/L0/ze_kernel_collector.h index 48991e14..c36e02b9 100644 --- a/src/apex/L0/ze_kernel_collector.h +++ b/src/apex/L0/ze_kernel_collector.h @@ -190,6 +190,7 @@ class ZeKernelCollector { prologue_callbacks.EventPool.pfnCreateCb = OnEnterEventPoolCreate; epilogue_callbacks.EventPool.pfnCreateCb = OnExitEventPoolCreate; + epilogue_callbacks.Event.pfnHostSynchronizeCb = OnExitEventHostSynchronize; prologue_callbacks.CommandList.pfnAppendLaunchKernelCb = OnEnterCommandListAppendLaunchKernel; @@ -496,6 +497,19 @@ class ZeKernelCollector { } } + static void OnExitEventHostSynchronize(ze_event_host_synchronize_params_t *params, + ze_result_t result, + void *global_data, + void **instance_data) { + if (*(params->phEvent) != nullptr) { + ZeKernelCollector* collector = + reinterpret_cast(global_data); + PTI_ASSERT(collector != nullptr); + collector->ProcessCall(*(params->phEvent)); + collector->ProcessCalls(); + } + } + static void CreateEvent(ze_context_handle_t context, ze_event_pool_handle_t& event_pool, ze_event_handle_t& event) { diff --git a/src/apex/L0/ze_utils.h b/src/apex/L0/ze_utils.h index 4615592b..c5a25ab0 100644 --- a/src/apex/L0/ze_utils.h +++ b/src/apex/L0/ze_utils.h @@ -355,7 +355,9 @@ namespace utils { ze_device_properties_t props{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2, nullptr}; ze_result_t status = zeDeviceGetProperties(device, &props); PTI_ASSERT(status == ZE_RESULT_SUCCESS); - return (1ull << props.kernelTimestampValidBits) - 1ull; + //return (1ull << props.kernelTimestampValidBits) - 1ull; + return ((props.kernelTimestampValidBits == 64) ? std::numeric_limits::max() + : ((1ull << props.kernelTimestampValidBits) - 1ull)); } inline uint64_t GetMetricTimestampMask(ze_device_handle_t device) { @@ -365,7 +367,15 @@ namespace utils { ze_device_properties_t props{ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES_1_2, nullptr}; ze_result_t status = zeDeviceGetProperties(device, &props); PTI_ASSERT(status == ZE_RESULT_SUCCESS); - return (1ull << props.kernelTimestampValidBits) - 1ull; + //return (1ull << props.kernelTimestampValidBits) - 1ull; + uint32_t devicemask = (props.deviceId & 0xFF00); + if ((devicemask == 0x5600) || (devicemask == 0x4F00) || (devicemask == 0x0B00)) { + return (1ull << (props.kernelTimestampValidBits - 1)) - 1ull; + } + else { + return ((props.kernelTimestampValidBits == 64) ? std::numeric_limits::max() + : ((1ull << props.kernelTimestampValidBits) - 1ull)); + } #endif } diff --git a/src/apex/apex.cpp b/src/apex/apex.cpp index eacd1f72..04c53811 100644 --- a/src/apex/apex.cpp +++ b/src/apex/apex.cpp @@ -247,11 +247,8 @@ void apex::_initialize() tmp << " (Debug)"; #endif tmp << "\nC++ Language Standard version : " << __cplusplus; -#if defined(__clang__) - /* Clang/LLVM. ---------------------------------------------- */ - tmp << "\nClang Compiler version : " << __VERSION__; -#elif defined(__ICC) || defined(__INTEL_COMPILER) - /* Intel ICC/ICPC. ------------------------------------------ */ +#if defined(__ICC) || defined(__INTEL_COMPILER) || defined(__INTEL_CLANG_COMPILER) || defined(__INTEL_LLVM_COMPILER) + /* Intel ICC/ICPC/ICX/ICPX. --------------------------------- */ tmp << "\nIntel Compiler version : " << __VERSION__; #elif defined(__GNUC__) || defined(__GNUG__) /* GNU GCC/G++. --------------------------------------------- */ @@ -271,6 +268,9 @@ void apex::_initialize() #elif defined(__SUNPRO_CC) /* Oracle Solaris Studio. ----------------------------------- */ tmp << "\nOracle Compiler version : " << __SUNPRO_CC; +#elif defined(__clang__) + /* Clang/LLVM. ---------------------------------------------- */ + tmp << "\nClang Compiler version : " << __VERSION__; #endif tmp << "\nConfigured features: Pthread"; #if defined(APEX_WITH_ACTIVEHARMONY) || defined(APEX_HAVE_ACTIVEHARMONY) @@ -605,9 +605,22 @@ uint64_t init(const char * thread_name, uint64_t comm_rank, unsetenv("LD_PRELOAD"); } if (comm_rank == 0) { - printf("%s", apex_banner); - printf("APEX Version: %s\n", instance->version_string.c_str()); - printf("Executing command line: %s\n", getCommandLine().c_str()); + //printf("%s", apex_banner); + //printf("APEX Version: %s\n", instance->version_string.c_str()); + //printf("Executing command line: %s\n", getCommandLine().c_str()); + std::stringstream ss; + //ss << apex_banner << "\n"; + ss << " ___ ______ _______ __\n"; + ss << " / _ \\ | ___ \\ ___\\ \\ / /\n"; + ss << "/ /_\\ \\| |_/ / |__ \\ V /\n"; + ss << "| _ || __/| __| / \\\n"; + ss << "| | | || | | |___/ /^\\ \\\n"; + ss << "\\_| |_/\\_| \\____/\\/ \\/\n"; + ss << "APEX Version: " << instance->version_string << "\n"; + ss << "Executing command line: " << getCommandLine() << "\n" << std::endl; + std::string tmp{ss.str()}; + fputs(tmp.c_str(), stdout); + } FUNCTION_EXIT return APEX_NOERROR; diff --git a/src/apex/apex_kokkos_tuning.cpp b/src/apex/apex_kokkos_tuning.cpp index 880ab9df..5b034dfa 100644 --- a/src/apex/apex_kokkos_tuning.cpp +++ b/src/apex/apex_kokkos_tuning.cpp @@ -186,11 +186,11 @@ class Variable { double dmin; double dmax; double dstep; - uint64_t lmin; - uint64_t lmax; - uint64_t lstep; - uint64_t lvar; - uint64_t numValues; + int64_t lmin; + int64_t lmax; + int64_t lstep; + int64_t lvar; + int64_t numValues; void makeSpace(void); std::vector bins; std::string getBin(double value) { @@ -990,9 +990,10 @@ bool handle_start(const std::string & name, const size_t vars, } double result = profile->minimum; if (result == 0.0) result = profile->accumulated/profile->calls; + result = result * 1.0e-9; // convert to seconds to help search math if(verbose) { std::cout << std::string(getDepth(), ' '); - std::cout << "querying time per call: " << (double)(result)/1000000000.0 << "s" << std::endl; + std::cout << "querying time per call: " << result << "s" << std::endl; } return result; }; @@ -1064,24 +1065,36 @@ bool handle_start(const std::string & name, const size_t vars, } else if (var->info.type == kokkos_value_int64) { front = std::string(values[i].value.string_value); } - //printf("Initial value: %s\n", front.c_str()); fflush(stdout); + //printf("Initial string value: %s\n", front.c_str()); fflush(stdout); auto tmp = request->add_param_enum( session.outputs[id]->name, front, space); } else { if (var->info.type == kokkos_value_double) { + double tval = values[i].value.double_value; + if (tval < session.outputs[id]->dmin || + tval > session.outputs[id]->dmax) { + tval = session.outputs[id]->dmin; + } auto tmp = request->add_param_double( session.outputs[id]->name, values[i].value.double_value, session.outputs[id]->dmin, session.outputs[id]->dmax, session.outputs[id]->dstep); + //printf("Initial double value: %f\n", tval); fflush(stdout); } else if (var->info.type == kokkos_value_int64) { + int64_t tval = values[i].value.int_value; + if (tval < session.outputs[id]->lmin || + tval > session.outputs[id]->lmax) { + tval = session.outputs[id]->lmin; + } auto tmp = request->add_param_long( session.outputs[id]->name, - values[i].value.int_value, + tval, session.outputs[id]->lmin, session.outputs[id]->lmax, session.outputs[id]->lstep); + //printf("Initial long value: %ld\n", tval); fflush(stdout); } } } @@ -1302,7 +1315,7 @@ void kokkosp_end_context(const size_t contextId) { start != session.context_starts.end()) { if (session.verbose) { std::cout << std::string(getDepth(), ' '); - std::cout << name->second << "\t" << (end-(start->second)) << std::endl; + std::cout << name->second << "\t" << (end-(start->second)) << " sec." << std::endl; } if (session.used_history.count(contextId) == 0) { apex::sample_value(name->second, (double)(end-(start->second))); diff --git a/src/apex/apex_policies.cpp b/src/apex/apex_policies.cpp index 284ea7f5..7a486761 100644 --- a/src/apex/apex_policies.cpp +++ b/src/apex/apex_policies.cpp @@ -1566,7 +1566,7 @@ inline int __sa_setup(shared_ptr v.lvalues.push_back(lvalue); lvalue = lvalue + param_long->step; } while (lvalue <= param_long->max); - v.set_init(); + v.set_init(param_long->init); tuning_session->sa_session.add_var(param_name, std::move(v)); } break; @@ -1579,7 +1579,7 @@ inline int __sa_setup(shared_ptr v.dvalues.push_back(dvalue); dvalue = dvalue + param_double->step; } while (dvalue <= param_double->max); - v.set_init(); + v.set_init(param_double->init); tuning_session->sa_session.add_var(param_name, std::move(v)); } break; @@ -1591,7 +1591,7 @@ inline int __sa_setup(shared_ptr param_enum->possible_values) { v.svalues.push_back(possible_value); } - v.set_init(); + v.set_init(param_enum->init_value); tuning_session->sa_session.add_var(param_name, std::move(v)); } break; @@ -1619,6 +1619,7 @@ inline int __nelder_mead_setup(shared_ptr const char * param_name = param->get_name().c_str(); switch(param->get_type()) { case apex_param_type::LONG: { + //std::cout << "Type long" << std::endl; auto param_long = std::static_pointer_cast(param); Variable v(VariableType::longtype, param_long->value.get()); @@ -1627,11 +1628,12 @@ inline int __nelder_mead_setup(shared_ptr v.lvalues.push_back(lvalue); lvalue = lvalue + param_long->step; } while (lvalue <= param_long->max); - v.set_init(); + v.set_init(param_long->init); tuning_session->nelder_mead_session.add_var(param_name, std::move(v)); } break; case apex_param_type::DOUBLE: { + //std::cout << "Type double" << std::endl; auto param_double = std::static_pointer_cast(param); Variable v(VariableType::doubletype, param_double->value.get()); @@ -1640,11 +1642,12 @@ inline int __nelder_mead_setup(shared_ptr v.dvalues.push_back(dvalue); dvalue = dvalue + param_double->step; } while (dvalue <= param_double->max); - v.set_init(); + v.set_init(param_double->init); tuning_session->nelder_mead_session.add_var(param_name, std::move(v)); } break; case apex_param_type::ENUM: { + //std::cout << "Type enum" << std::endl; auto param_enum = std::static_pointer_cast(param); Variable v(VariableType::stringtype, param_enum->value.get()); @@ -1652,7 +1655,7 @@ inline int __nelder_mead_setup(shared_ptr param_enum->possible_values) { v.svalues.push_back(possible_value); } - v.set_init(); + v.set_init(param_enum->init_value); tuning_session->nelder_mead_session.add_var(param_name, std::move(v)); } break; @@ -1688,7 +1691,7 @@ inline int __genetic_setup(shared_ptr v.lvalues.push_back(lvalue); lvalue = lvalue + param_long->step; } while (lvalue <= param_long->max); - v.set_init(); + v.set_init(param_long->init); tuning_session->genetic_session.add_var(param_name, std::move(v)); } break; @@ -1701,7 +1704,7 @@ inline int __genetic_setup(shared_ptr v.dvalues.push_back(dvalue); dvalue = dvalue + param_double->step; } while (dvalue <= param_double->max); - v.set_init(); + v.set_init(param_double->init); tuning_session->genetic_session.add_var(param_name, std::move(v)); } break; @@ -1713,7 +1716,7 @@ inline int __genetic_setup(shared_ptr param_enum->possible_values) { v.svalues.push_back(possible_value); } - v.set_init(); + v.set_init(param_enum->init_value); tuning_session->genetic_session.add_var(param_name, std::move(v)); } break; @@ -1810,7 +1813,7 @@ inline int __random_setup(shared_ptr v.lvalues.push_back(lvalue); lvalue = lvalue + param_long->step; } while (lvalue <= param_long->max); - v.set_init(); + v.set_init(param_long->init); tuning_session->random_session.add_var(param_name, std::move(v)); } break; @@ -1823,7 +1826,7 @@ inline int __random_setup(shared_ptr v.dvalues.push_back(dvalue); dvalue = dvalue + param_double->step; } while (dvalue <= param_double->max); - v.set_init(); + v.set_init(param_double->init); tuning_session->random_session.add_var(param_name, std::move(v)); } break; @@ -1835,7 +1838,7 @@ inline int __random_setup(shared_ptr param_enum->possible_values) { v.svalues.push_back(possible_value); } - v.set_init(); + v.set_init(param_enum->init_value); tuning_session->random_session.add_var(param_name, std::move(v)); } break; diff --git a/src/apex/genetic_search.hpp b/src/apex/genetic_search.hpp index 0637baa9..314e0726 100644 --- a/src/apex/genetic_search.hpp +++ b/src/apex/genetic_search.hpp @@ -49,10 +49,34 @@ class Variable { return current_index; } void save_best() { best_index = current_index; } - void set_init() { - maxlen = (std::max(std::max(dvalues.size(), - lvalues.size()), svalues.size())); - current_index = 0; + void set_init(double init_value) { + maxlen = dvalues.size(); + auto it = std::find(dvalues.begin(), dvalues.end(), init_value); + if (it == dvalues.end()) { + current_index = 0; + } else { + current_index = distance(dvalues.begin(), it); + } + set_current_value(); + } + void set_init(long init_value) { + maxlen = lvalues.size(); + auto it = std::find(lvalues.begin(), lvalues.end(), init_value); + if (it == lvalues.end()) { + current_index = 0; + } else { + current_index = distance(lvalues.begin(), it); + } + set_current_value(); + } + void set_init(std::string init_value) { + maxlen = svalues.size(); + auto it = std::find(svalues.begin(), svalues.end(), init_value); + if (it == svalues.end()) { + current_index = 0; + } else { + current_index = distance(svalues.begin(), it); + } set_current_value(); } std::string getBest() { diff --git a/src/apex/nelder_mead.cpp b/src/apex/nelder_mead.cpp index 602a3149..1f05f367 100644 --- a/src/apex/nelder_mead.cpp +++ b/src/apex/nelder_mead.cpp @@ -10,36 +10,58 @@ namespace apex { namespace nelder_mead { void NelderMead::start(void) { - // create a starting point - std::vector init_point; - for (auto& v : vars) { - init_point.push_back(v.second.get_init()); - } - // create a lower limit + // find a lower and upper limit, and create a starting point std::vector lower_limit; std::vector upper_limit; + std::vector init_point; for (auto& v : vars) { auto& limits = v.second.get_limits(); lower_limit.push_back(limits[0]); upper_limit.push_back(limits[1]); + //init_point.push_back(v.second.get_init()); + //std::cout << "NM: init_point: " << v.second.get_init() << std::endl; + auto tmp = (limits[0] + limits[1]) * 0.5; + //std::cout << "NM: init_point: " << tmp << std::endl; + init_point.push_back(tmp); } // create a starting simplex - random values in the space, nvars+1 of them std::vector> init_simplex; - for (size_t i = 0 ; i < (vars.size() + 1) ; i++) { - std::vector tmp; + // first, create a point that is 0.25 of the range for all variables. + std::vector tmp; + for (auto& v : vars) { + // get the limits + auto& limits = v.second.get_limits(); + // get the range + double range = (limits[1] - limits[0]); + // get a point either 1/4 less than or 1/4 greater than the midpoint + double sample_in_range = range * 0.25; + tmp.push_back(limits[0] + sample_in_range); + } + init_simplex.push_back(tmp); + for (size_t i = 0 ; i < vars.size() ; i++) { + std::vector tmp2; + size_t j = 0; for (auto& v : vars) { - double r = ((double) std::rand() / (RAND_MAX)); + // get the limits auto& limits = v.second.get_limits(); - tmp.push_back(r * ((limits[1] + limits[0]) / 2.0)); + // get the range + double range = (limits[1] - limits[0]); + // get a point either 1/4 less than or 1/4 greater than the midpoint + double sample_in_range = range * (j == i ? 0.75 : 0.25); + tmp2.push_back(limits[0] + sample_in_range); + j++; } - init_simplex.push_back(tmp); + //std::cout << "range: [" << lower_limit[i] << "," << upper_limit[i] << "] value: [" + //<< tmp2[0] << "," << tmp2[1] << "]" << std::endl; + init_simplex.push_back(tmp2); } - searcher = new apex::internal::nelder_mead::Searcher(init_point, init_simplex, lower_limit, upper_limit, true); - searcher->function_tolerance(10000); + searcher = new apex::internal::nelder_mead::Searcher(init_point, init_simplex, lower_limit, upper_limit, false); if (hasDiscrete) { - searcher->point_tolerance(1.0); + searcher->point_tolerance(1.0e-4); + searcher->function_tolerance(1.0e-5); } else { - searcher->point_tolerance(0.01); + searcher->point_tolerance(1.0e-4); + searcher->function_tolerance(1.0e-4); } } @@ -89,9 +111,9 @@ void NelderMead::evaluate(double new_cost) { std::cout << std::endl; } cost = new_cost; - // if the function evaluation takes a long time (in nanoseconds, remember), increase our tolerance. - auto tmp = std::max((new_cost / 50.0), 1000.0); - std::cout << "new function tolerance: " << tmp << std::endl; + // if the function evaluation takes a long time (in seconds, remember), increase our tolerance. + auto tmp = std::max((new_cost / 50.0), 1.0e-6); // no smaller than 1 microsecond + //std::cout << "new function tolerance: " << tmp << std::endl; searcher->function_tolerance(tmp); } k++; diff --git a/src/apex/nelder_mead.hpp b/src/apex/nelder_mead.hpp index 82e2bb80..a14b0308 100644 --- a/src/apex/nelder_mead.hpp +++ b/src/apex/nelder_mead.hpp @@ -36,17 +36,22 @@ class Variable { Variable (VariableType vtype, void * ptr) : vtype(vtype), current_index(0), best_index(0), current_value(0), best_value(0), value(ptr), maxlen(0) { } void set_current_value() { + //std::cout << "Current index: " << current_index << std::endl; if (vtype == VariableType::continuous) { *((double*)(value)) = current_value; + //std::cout << "Current value: " << current_value << std::endl; } else if (vtype == VariableType::doubletype) { *((double*)(value)) = dvalues[current_index]; + //std::cout << "Current value: " << dvalues[current_index] << std::endl; } else if (vtype == VariableType::longtype) { *((long*)(value)) = lvalues[current_index]; + //std::cout << "Current value: " << lvalues[current_index] << std::endl; } else { *((const char**)(value)) = svalues[current_index].c_str(); + //std::cout << "Current value: " << svalues[current_index] << std::endl; } } void save_best() { @@ -56,10 +61,34 @@ class Variable { best_index = current_index; } } - void set_init() { - maxlen = (std::max(std::max(dvalues.size(), - lvalues.size()), svalues.size())); - current_index = 0; + void set_init(double init_value) { + maxlen = dvalues.size(); + auto it = std::find(dvalues.begin(), dvalues.end(), init_value); + if (it == dvalues.end()) { + current_index = 0; + } else { + current_index = distance(dvalues.begin(), it); + } + set_current_value(); + } + void set_init(long init_value) { + maxlen = lvalues.size(); + auto it = std::find(lvalues.begin(), lvalues.end(), init_value); + if (it == lvalues.end()) { + current_index = 0; + } else { + current_index = distance(lvalues.begin(), it); + } + set_current_value(); + } + void set_init(std::string init_value) { + maxlen = svalues.size(); + auto it = std::find(svalues.begin(), svalues.end(), init_value); + if (it == svalues.end()) { + current_index = 0; + } else { + current_index = distance(svalues.begin(), it); + } set_current_value(); } std::string getBest() { @@ -100,19 +129,25 @@ class Variable { } // otherwise, choose an index somewhere in the middle //return ((double)maxlen / 2.0); - return 0.5; + //return 0.5; + // otherwise, choose the "index" of the initial value + double tmp = ((double)(current_index)) / ((double)(maxlen)); + //std::cout << current_index << " / " << maxlen << " = " << tmp << std::endl; + return tmp; } const std::vector& get_limits(void) { limits.reserve(2); // if our variable is continuous, we have been initialized with // two values, the min and the max if (vtype == VariableType::continuous) { + //std::cout << "Not continuous" << std::endl; limits[0] = dvalues[0]; limits[1] = dvalues[1]; // if our variable is discrete, we will use the range from 0 to 1, // and scale that value to the number of descrete values we have to get // an index. } else { + //std::cout << "Continuous" << std::endl; limits[0] = 0.0; limits[1] = 1.0; } diff --git a/src/apex/nelder_mead_internal.h b/src/apex/nelder_mead_internal.h index a21d832d..7d5f595a 100644 --- a/src/apex/nelder_mead_internal.h +++ b/src/apex/nelder_mead_internal.h @@ -5,6 +5,7 @@ #include #include #include +#include "apex_options.hpp" namespace apex { namespace internal { @@ -211,7 +212,7 @@ template class Searcher { : adaptive(_adaptive), tol_fun(1e-8), tol_x(1e-8), max_iter(1000), max_fun_evals(2000), current_simplex_index(0), minimum_limits(_minimum_limits), maximum_limits(_maximum_limits), - _converged(false), verbose(false) { + _converged(false), verbose(apex_options::use_verbose()) { initialize(initial_point, initial_simplex); } void function_tolerance(T tol) { @@ -227,18 +228,18 @@ template class Searcher { // Setting parameters if (adaptive) { // Using the results of doi:10.1007/s10589-010-9329-3 - alpha = 1; - beta = 1 + 2 / dimension; - gamma = 0.75 - 1 / (2 * dimension); - delta = 1 - 1 / dimension; + alpha = 1.0; + beta = 1.0 + (2.0 / dimension); + gamma = 0.75 - (1.0 / (2.0 * dimension)); + delta = 1 - (1.0 / dimension); } else { - alpha = 1; - beta = 2; + alpha = 1.0; + beta = 2.0; gamma = 0.5; delta = 0.5; } - std::cout << alpha << " " << beta << " " << gamma << " " << delta - << std::endl; + //std::cout << alpha << " " << beta << " " << gamma << " " << delta + //<< std::endl; simplex.resize(dimension + 1); if (initial_simplex.empty()) { // Generate initial simplex @@ -434,19 +435,21 @@ template class Searcher { if ((max_val_diff <= tol_fun and max_point_diff <= tol_x) or (func_evals_count >= max_fun_evals) or (niter >= max_iter)) { res = simplex[smallest_idx].vec(); + std::cout << "Converged: " << max_val_diff << " value difference." + << std::endl; + std::cout << "Converged: " << max_point_diff << " point difference." + << std::endl; std::cout << "Converged after " << niter << " iterations." << std::endl; std::cout << "Total func evaluations: " << func_evals_count << std::endl; _converged = true; return; - /* - } else { + } else if (verbose) { std::cout << "Not converged: " << max_val_diff << " value difference." << std::endl; std::cout << "Not converged: " << max_point_diff << " point difference." << std::endl; - */ } // not converged? @@ -534,7 +537,7 @@ template class Searcher { } else { // Shrinking if (verbose) { - std::cout << "shrinking" << std::endl; + std::cout << "shrinking, smallest index = " << smallest_idx << std::endl; } // we take the whole simplex, and move every point towards the // current best candidate diff --git a/src/apex/random.hpp b/src/apex/random.hpp index 8a8941a5..c6427ff5 100644 --- a/src/apex/random.hpp +++ b/src/apex/random.hpp @@ -49,10 +49,34 @@ class Variable { return current_index; } void save_best() { best_index = current_index; } - void set_init() { - maxlen = (std::max(std::max(dvalues.size(), - lvalues.size()), svalues.size())); - current_index = 0; + void set_init(double init_value) { + maxlen = dvalues.size(); + auto it = std::find(dvalues.begin(), dvalues.end(), init_value); + if (it == dvalues.end()) { + current_index = 0; + } else { + current_index = distance(dvalues.begin(), it); + } + set_current_value(); + } + void set_init(long init_value) { + maxlen = lvalues.size(); + auto it = std::find(lvalues.begin(), lvalues.end(), init_value); + if (it == lvalues.end()) { + current_index = 0; + } else { + current_index = distance(lvalues.begin(), it); + } + set_current_value(); + } + void set_init(std::string init_value) { + maxlen = svalues.size(); + auto it = std::find(svalues.begin(), svalues.end(), init_value); + if (it == svalues.end()) { + current_index = 0; + } else { + current_index = distance(svalues.begin(), it); + } set_current_value(); } std::string getBest() { diff --git a/src/apex/simulated_annealing.cpp b/src/apex/simulated_annealing.cpp index aa7f9095..bcf07cf7 100644 --- a/src/apex/simulated_annealing.cpp +++ b/src/apex/simulated_annealing.cpp @@ -51,7 +51,7 @@ double SimulatedAnnealing::acceptance_probability(double new_cost) { void SimulatedAnnealing::evaluate(double new_cost) { /* T <- temperature( (k+1)/kmax ) */ - temp = (double)(k)/(double)(kmax); + temp = std::min((double)(k)/(double)(kmax), 1.0); /* If P(E(s), E(snew), T) ≥ random(0, 1): */ /* s <- snew */ if (new_cost < cost) { diff --git a/src/apex/simulated_annealing.hpp b/src/apex/simulated_annealing.hpp index 7af8939e..4f3f5210 100644 --- a/src/apex/simulated_annealing.hpp +++ b/src/apex/simulated_annealing.hpp @@ -66,40 +66,70 @@ class Variable { if (delta < 0 && (current_index < (size_t)(abs(delta)))) { // do nothing //neighbor_index = 0; - } else if (delta > 0 && ((current_index + delta) > maxlen)) { + } else if (delta > 0 && ((current_index + delta) >= maxlen)) { // do nothing //neighbor_index = maxlen; } else { neighbor_index = current_index + delta; } + /* + std::cout << "scope: " << scope + << " quarter: " << quarter + << " delta: " << delta + << " current_index: " << delta + << " neighbor_index: " << delta + << std::endl; + */ if (vtype == VariableType::doubletype) { *((double*)(value)) = dvalues[neighbor_index]; + //std::cout << "next value : " << *((double*)(value)) << std::endl; } else if (vtype == VariableType::longtype) { *((long*)(value)) = lvalues[neighbor_index]; + //std::cout << "next value : " << *((long*)(value)) << std::endl; } else { *((const char**)(value)) = svalues[neighbor_index].c_str(); + //std::cout << "next value : " << *((const char**)(value)) << std::endl; } -/* std::cout << "scope: " << scope - << " quarter: " << quarter - << " delta: " << delta - << " current_index: " << delta - << " neighbor_index: " << delta - << std::endl; */ } void choose_neighbor() { current_index = neighbor_index; } void save_best() { best_index = current_index; } void restore_best() { current_index = best_index; } /* For initializing in the center of the space */ - void set_init() { - maxlen = (std::max(std::max(dvalues.size(), - lvalues.size()), svalues.size())) - 1; + void set_init(void) { half = maxlen/2; - // we need a minimum value of 2 to get movement at all - quarter = (double)(std::max((half/2), size_t(2))); - current_index = neighbor_index = best_index = half; - //std::cout << "Initialized to " << current_index << std::endl; + quarter = (double)half/2; + } + void set_init(double init_value) { + maxlen = dvalues.size(); + set_init(); + auto it = std::find(dvalues.begin(), dvalues.end(), init_value); + if (it == dvalues.end()) { + current_index = 0; + } else { + current_index = distance(dvalues.begin(), it); + } + } + void set_init(long init_value) { + maxlen = lvalues.size(); + set_init(); + auto it = std::find(lvalues.begin(), lvalues.end(), init_value); + if (it == lvalues.end()) { + current_index = 0; + } else { + current_index = distance(lvalues.begin(), it); + } + } + void set_init(std::string init_value) { + maxlen = svalues.size(); + set_init(); + auto it = std::find(svalues.begin(), svalues.end(), init_value); + if (it == svalues.end()) { + current_index = 0; + } else { + current_index = distance(svalues.begin(), it); + } } std::string getBest() { if (vtype == VariableType::doubletype) { diff --git a/src/unit_tests/Kokkos/CMakeLists.txt b/src/unit_tests/Kokkos/CMakeLists.txt index 7ec3c7aa..077057df 100644 --- a/src/unit_tests/Kokkos/CMakeLists.txt +++ b/src/unit_tests/Kokkos/CMakeLists.txt @@ -7,10 +7,14 @@ include_directories (${APEX_SOURCE_DIR}/src/unit_tests/Kokkos) link_directories (${APEX_BINARY_DIR}/src/apex) #link_directories (${APEX_BINARY_DIR}/src/apex_pthread_wrapper) +if(NOT APPLE) + set(openmp_programs mm2d_tiling) +endif(NOT APPLE) + set(example_programs simple two_var - mm2d_tiling + ${openmp_programs} ) foreach(example_program ${example_programs}) diff --git a/src/unit_tests/Kokkos/mm2d_tiling.cpp b/src/unit_tests/Kokkos/mm2d_tiling.cpp index 42aada15..563eb794 100644 --- a/src/unit_tests/Kokkos/mm2d_tiling.cpp +++ b/src/unit_tests/Kokkos/mm2d_tiling.cpp @@ -1,7 +1,5 @@ #include -#include - #include #include // cbrt #include diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index cd3a75f8..703536a7 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -15,7 +15,7 @@ foreach(util_program ${util_programs}) set(sources ${util_program}.cpp) source_group("Source Files" FILES ${sources}) add_executable("${util_program}" ${sources}) - target_link_libraries ("${util_program}" apex ${APEX_STDCXX_LIB} ${LIBS}) + target_link_libraries ("${util_program}" apex ${LIBS} ${APEX_STDCXX_LIB}) if (BUILD_STATIC_EXECUTABLES) set_target_properties("${util_program}" PROPERTIES LINK_SEARCH_START_STATIC 1 LINK_SEARCH_END_STATIC 1) endif()