diff --git a/cmake/modules/VTA.cmake b/cmake/modules/VTA.cmake index b8be98a53a74..45eea57a53ad 100644 --- a/cmake/modules/VTA.cmake +++ b/cmake/modules/VTA.cmake @@ -43,6 +43,7 @@ elseif(PYTHON) file(GLOB FSIM_RUNTIME_SRCS vta/src/*.cc) list(APPEND FSIM_RUNTIME_SRCS vta/src/sim/sim_driver.cc) list(APPEND FSIM_RUNTIME_SRCS vta/src/vmem/virtual_memory.cc vta/src/vmem/virtual_memory.h) + list(APPEND FSIM_RUNTIME_SRCS vta/src/sim/sim_tlpp.cc) # Target lib: vta_fsim add_library(vta_fsim SHARED ${FSIM_RUNTIME_SRCS}) target_include_directories(vta_fsim PUBLIC vta/include) @@ -54,6 +55,7 @@ elseif(PYTHON) if(APPLE) set_target_properties(vta_fsim PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") endif(APPLE) + target_compile_definitions(vta_fsim PUBLIC USE_FSIM_TLPP) endif() # Cycle accurate simulator driver build diff --git a/vta/include/vta/sim_tlpp.h b/vta/include/vta/sim_tlpp.h new file mode 100644 index 000000000000..ead07f18028f --- /dev/null +++ b/vta/include/vta/sim_tlpp.h @@ -0,0 +1,162 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file sim_tlpp.h + * \brief TVM VTA multiple thread simulator header file. + */ +#ifndef VTA_SIM_TLPP_H_ +#define VTA_SIM_TLPP_H_ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SCOREGEMM "gemm" +#define SCORELOAD "load" +#define SCORESTORE "store" +#define SCOREUNKNOWN "unknown" +typedef void (*Run_Function)(const VTAGenericInsn *, void *); +typedef enum {COREGEMM = 0, CORELOAD, CORESTORE, COREMAX} CORE_TYPE; +typedef std::queue Insn_q_t; +typedef std::queue Dep_q_t; +/*! + * \brief simulate core level pipe line parallism logic. + */ +class TlppVerify { + public: + /*! Return TlppVefiy class instance.*/ + static TlppVerify *Global() { static TlppVerify Cls; return &Cls;} + + /*! + * \brief Loop to process instruction and verify tlpp logic. + * \param run_function function pointer to excute instruction . + * \param fsim_handle class pointer of function simulator class Device. + * \param debug to enable/disable debug + */ + void TlppSynchronization(Run_Function run_function, + void *fsim_handle, + bool debug = false); + /*! + * \brief Push instruction into queue for later excute. + * \param insn instructions. + */ + void TlppPushInsn(const VTAGenericInsn *insn); + /*! \ Event pump to handle dependency event. */ + void EventProcess(void); + /*! \ Schedule a paticular core to run. */ + void CoreRun(CORE_TYPE core_type); + + private: + /*! TlppVerify construction function.*/ + TlppVerify(); + /*! + * \brief clear class variable. + */ + void Clear(); + /*! + * \ brief check if the insn dependency condition satisfy and do notify. + * \ param insn instructions. + * \ param before_run identify this check is happen before + * instruction excute or after instruction excute, for before + * scenario need to check if depency condition satisfy, for post + * case need to check if need to send notfication. + */ + bool InsnDependencyCheck(const VTAGenericInsn *insn, bool before_run); + /*! + * \ brief get operation code from insn + * \ param insn instructions + */ + uint64_t GetOperationCode(const VTAGenericInsn *insn); + /*! + * \ brief find which core should run this instruction. + * \ param operation_code operation type like load/gemm etc. + * \ param insn instructions. + */ + CORE_TYPE GetCoreType(uint64_t operation_code, const VTAGenericInsn *insn); + /*! + * \ brief , pick up first instruction for specify core. + * \ param core_type core type + */ + const VTAGenericInsn *PickFrontInsn(uint64_t core_type); + /*! + * \ brief consume one instruction after pass dependency condition. + * \ param core_type core type + */ + void ConsumeFrontInsn(uint64_t core_type); + /*! + * \ brief, process dependency logic + * param before_run if this call happen before instruction run. + * param pop_prev if instruction have previous core dependency. + * param pop_next if instruction have depency for next core. + * param pop_prev_q notification from previous core. + * param pop_next_q notification from next core. + * param push_prev_q notification queue need to send notification + * for prevous core. + * param push_next_q notification queue need to send notification + * from next core. + * push_to_prev_q_indx which core need wake up if have notification + * fro previous core. + * push_to_next_q_indx which core need wake up if have notification + * fro next core. + */ + bool DependencyProcess(bool before_run, + bool pop_prev, bool pop_next, + bool push_prev, bool push_next, + Dep_q_t *pop_prev_q, Dep_q_t *pop_next_q, + Dep_q_t *push_prev_q, Dep_q_t *push_next_q, + CORE_TYPE push_to_prev_q_indx, CORE_TYPE push_to_next_q_indx); + /*! + * \ brief , return name based on core type. + * \ param core_type core type + */ + inline const char * GetCoreTypeName(CORE_TYPE core_type) { + return (core_type == COREGEMM) ? SCOREGEMM : + (core_type == CORELOAD) ? SCORELOAD : + (core_type == CORESTORE) ? SCORESTORE : + SCOREUNKNOWN; + } + /*! debug flag*/ + bool debug_; + /*! function simulator device class pointer*/ + void *fsim_handle_; + /*! function simulator instruction excute function pointer*/ + Run_Function run_fsim_function_; + /*! instruction queue for each core*/ + Insn_q_t insnq_array_[COREMAX]; + /*! dependency queue from load to gemm*/ + Dep_q_t l2g_q_; + /*! dependency queue from store to gemm*/ + Dep_q_t s2g_q_; + /*! dependency queue from gemm to load*/ + Dep_q_t g2l_q_; + /*! dependency queue from gemm to store*/ + Dep_q_t g2s_q_; + /*! computation done*/ + int done_; + /*! event queue for core wake up*/ + std::queue dep_push_event_; +}; +#endif // VTA_SIM_TLPP_H_ diff --git a/vta/src/sim/sim_driver.cc b/vta/src/sim/sim_driver.cc index eb497125b144..871097f2ef3f 100644 --- a/vta/src/sim/sim_driver.cc +++ b/vta/src/sim/sim_driver.cc @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -275,6 +276,7 @@ class Device { Device() { prof_ = Profiler::ThreadLocal(); dram_ = DRAM::Global(); + ptlpp = TlppVerify::Global(); } int Run(vta_phy_addr_t insn_phy_addr, @@ -286,26 +288,37 @@ class Device { for (uint32_t i = 0; i < insn_count; ++i) { this->Run(insn + i); } + this->TlppSynchronization(); return 0; } private: - void Run(const VTAGenericInsn* insn) { + static void Run_Insn(const VTAGenericInsn* insn, void * dev) { + Device * device = reinterpret_cast (dev); const VTAMemInsn* mem = reinterpret_cast(insn); const VTAGemInsn* gem = reinterpret_cast(insn); const VTAAluInsn* alu = reinterpret_cast(insn); switch (mem->opcode) { - case VTA_OPCODE_LOAD: RunLoad(mem); break; - case VTA_OPCODE_STORE: RunStore(mem); break; - case VTA_OPCODE_GEMM: RunGEMM(gem); break; - case VTA_OPCODE_ALU: RunALU(alu); break; - case VTA_OPCODE_FINISH: ++finish_counter_; break; + case VTA_OPCODE_LOAD: device->RunLoad(mem); break; + case VTA_OPCODE_STORE: device->RunStore(mem); break; + case VTA_OPCODE_GEMM: device->RunGEMM(gem); break; + case VTA_OPCODE_ALU: device->RunALU(alu); break; + case VTA_OPCODE_FINISH: ++(device->finish_counter_); break; default: { LOG(FATAL) << "Unknown op_code" << mem->opcode; } } } + private: + void Run(const VTAGenericInsn* insn) { + ptlpp->TlppPushInsn(insn); + } + + void TlppSynchronization(void) { + ptlpp->TlppSynchronization(Run_Insn, reinterpret_cast (this)); + } + void RunLoad(const VTAMemInsn* op) { if (op->x_size == 0) return; if (op->memory_type == VTA_MEM_ID_INP) { @@ -466,6 +479,7 @@ class Device { Profiler* prof_; // The DRAM interface DRAM* dram_; + TlppVerify* ptlpp; // The SRAM SRAM inp_; SRAM wgt_; diff --git a/vta/src/sim/sim_tlpp.cc b/vta/src/sim/sim_tlpp.cc new file mode 100644 index 000000000000..5a97b93b65dd --- /dev/null +++ b/vta/src/sim/sim_tlpp.cc @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * Copyright (c) 2019 by Contributors + * \file sim_tlpp.cc + * \brief simulate core level pipe line parallism logic. + */ +#include +TlppVerify::TlppVerify() { + done_ = 0; +} + +void TlppVerify::Clear() { + fsim_handle_ = nullptr; + run_fsim_function_ = nullptr; + for (int i = 0; i < COREMAX; i++) { + while (insnq_array_[i].size()) { + insnq_array_[i].pop(); + } + } + done_ = 0; +} + +uint64_t TlppVerify::GetOperationCode(const VTAGenericInsn *insn) { + const VTAMemInsn* mem = reinterpret_cast(insn); + return mem->opcode; +} + +CORE_TYPE TlppVerify::GetCoreType(uint64_t operation_code, + const VTAGenericInsn *insn) { + CORE_TYPE core_type = COREGEMM; + const VTAMemInsn* mem = reinterpret_cast(insn); + switch (operation_code) { + case VTA_OPCODE_GEMM: + case VTA_OPCODE_ALU: + core_type = COREGEMM; + break; + case VTA_OPCODE_LOAD: + if (mem->memory_type == VTA_MEM_ID_INP|| + mem->memory_type == VTA_MEM_ID_WGT) { + core_type = CORELOAD; + } + break; + case VTA_OPCODE_STORE: + core_type = CORESTORE; + break; + default: + break; + } + return core_type; +} + +bool TlppVerify::DependencyProcess(bool before_run, + bool pop_prev, bool pop_next, + bool push_prev, bool push_next, + Dep_q_t *pop_prev_q, Dep_q_t *pop_next_q, + Dep_q_t *push_prev_q, Dep_q_t *push_next_q, + CORE_TYPE push_to_prev_q_indx, CORE_TYPE push_to_next_q_indx) { + + int val = 1; + if (before_run) { + if (pop_prev && pop_prev_q->size() == 0) { + return false; + } + if (pop_next && pop_next_q->size() == 0) { + return false; + } + if (pop_next) pop_next_q->pop(); + if (pop_prev) pop_prev_q->pop(); + } else { + if (push_prev) { + push_prev_q->push(val); + dep_push_event_.push(push_to_prev_q_indx); + } + if (push_next) { + push_next_q->push(val); + dep_push_event_.push(push_to_next_q_indx); + } + } + return true; +} + +bool TlppVerify::InsnDependencyCheck(const VTAGenericInsn *insn, + bool before_run) { + const VTAMemInsn* mem = reinterpret_cast(insn); + bool pop_prev = mem->pop_prev_dep; + bool pop_next = mem->pop_next_dep; + bool push_prev = mem->push_prev_dep; + bool push_next = mem->push_next_dep; + CORE_TYPE core_type = GetCoreType(GetOperationCode(insn), insn); + bool bcheck = false; + switch (core_type) { + case COREGEMM: + bcheck = DependencyProcess(before_run, pop_prev, + pop_next, push_prev, push_next, + &l2g_q_, &s2g_q_, &g2l_q_, &g2s_q_, CORELOAD, CORESTORE); + break; + case CORELOAD: + bcheck = DependencyProcess(before_run, pop_prev, + pop_next, push_prev, push_next, + nullptr, &g2l_q_, nullptr, &l2g_q_, COREMAX, COREGEMM); + break; + case CORESTORE: + bcheck = DependencyProcess(before_run, pop_prev, + pop_next, push_prev, push_next, + &g2s_q_, nullptr, &s2g_q_, nullptr, COREGEMM, COREMAX); + break; + case COREMAX: + assert(0); + break; + } + + return bcheck; +} + +void TlppVerify::CoreRun(CORE_TYPE core_type) { + const VTAGenericInsn *insn = PickFrontInsn(core_type); + while (insn) { + /*! + * Check need to read any dependency queue for wait. + */ + if (!InsnDependencyCheck(insn, true)) { + break; + } + /*! + * Execute the instruction. + */ + run_fsim_function_(insn, fsim_handle_); + /*! + *check if need to write any dependency queue for notify. + */ + InsnDependencyCheck(insn, false); + /*! + * If instruction is FINISH set done flag. + * notification. + */ + done_ = GetOperationCode(insn) == VTA_OPCODE_FINISH; + + if (debug_) { + printf("this is thread for %s\n", GetCoreTypeName(core_type)); + } + ConsumeFrontInsn(core_type); + insn = PickFrontInsn(core_type); + } + return; +} + +void TlppVerify::EventProcess(void) { + while (dep_push_event_.size()) { + CORE_TYPE core_type = dep_push_event_.front(); + dep_push_event_.pop(); + CoreRun(core_type); + } +} + +void TlppVerify::TlppSynchronization(Run_Function run_function, + void *fsim_handle, + bool debug) { + fsim_handle_ = fsim_handle; + run_fsim_function_ = run_function; + debug_ = debug; + done_ = 0; + do { + /* + * Pick a random core to run first. + */ + unsigned int seed = time(NULL); + uint8_t core_start = rand_r(&seed)%COREMAX; + for (int i = 0; i < COREMAX; i++) { + CoreRun(static_cast((core_start + i) % COREMAX)); + } + EventProcess(); + }while (!done_); + Clear(); + return; +} + +void TlppVerify::TlppPushInsn(const VTAGenericInsn *insn) { + uint64_t operation_code = GetOperationCode(insn); + CORE_TYPE core_type = GetCoreType(operation_code, insn); + insnq_array_[core_type].push(static_cast(insn)); + return; +} + +const VTAGenericInsn *TlppVerify::PickFrontInsn(uint64_t core_type) { + const void *return_value = nullptr; + if (insnq_array_[core_type].size()) { + return_value = insnq_array_[core_type].front(); + } + return reinterpret_cast (return_value); +} + +void TlppVerify::ConsumeFrontInsn(uint64_t core_type) { + if (insnq_array_[core_type].size()) { + insnq_array_[core_type].pop(); + } +}