Skip to content

Commit

Permalink
[VTA] Support TLPP in function simulator. (apache#3555)
Browse files Browse the repository at this point in the history
* [VTA] Support TLPP in function simulator.
Issue:
currently vta function simulator just doing serialized instruction
execution, the dependency logic of runtime ISA which use for task
level pipe line parallelism can not get verified by function simulator.

Solution:
make the simulator driver to be multiple thread and support TLPP.

Benefit:
TLPP support VTA function simulator would make VTA logic testing/debug
/change more easy.

replace boost lockfree queue

add configure control for simulator tlpp enable or disable.

change code tyle into google style.

Wrap queue read/write and sync logic to make function call more simple.

Add some comments.

Remove MT logic, change into Single thread mode.

address review comments.

code style change to match google code style and add comments.

add cmake macro to enable/disable simulator tlpp logic.

submodule update.

correct file name mentioned in comments.

* remove USE_VTA_FSIM_TLPP.
  • Loading branch information
huajsj authored and wweic committed Sep 16, 2019
1 parent 0a01098 commit 479b5c9
Show file tree
Hide file tree
Showing 4 changed files with 398 additions and 6 deletions.
2 changes: 2 additions & 0 deletions cmake/modules/VTA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ elseif(PYTHON)
file(GLOB FSIM_RUNTIME_SRCS vta/src/*.cc)
list(APPEND FSIM_RUNTIME_SRCS vta/src/sim/sim_driver.cc)
list(APPEND FSIM_RUNTIME_SRCS vta/src/vmem/virtual_memory.cc vta/src/vmem/virtual_memory.h)
list(APPEND FSIM_RUNTIME_SRCS vta/src/sim/sim_tlpp.cc)
# Target lib: vta_fsim
add_library(vta_fsim SHARED ${FSIM_RUNTIME_SRCS})
target_include_directories(vta_fsim PUBLIC vta/include)
Expand All @@ -54,6 +55,7 @@ elseif(PYTHON)
if(APPLE)
set_target_properties(vta_fsim PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
endif(APPLE)
target_compile_definitions(vta_fsim PUBLIC USE_FSIM_TLPP)
endif()

# Cycle accurate simulator driver build
Expand Down
162 changes: 162 additions & 0 deletions vta/include/vta/sim_tlpp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2019 by Contributors
* \file sim_tlpp.h
* \brief TVM VTA multiple thread simulator header file.
*/
#ifndef VTA_SIM_TLPP_H_
#define VTA_SIM_TLPP_H_
#include <vta/hw_spec.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <vector>
#include <ctime>
#include <cassert>
#include <queue>

#define SCOREGEMM "gemm"
#define SCORELOAD "load"
#define SCORESTORE "store"
#define SCOREUNKNOWN "unknown"
typedef void (*Run_Function)(const VTAGenericInsn *, void *);
typedef enum {COREGEMM = 0, CORELOAD, CORESTORE, COREMAX} CORE_TYPE;
typedef std::queue<const void*> Insn_q_t;
typedef std::queue<int> Dep_q_t;
/*!
* \brief simulate core level pipe line parallism logic.
*/
class TlppVerify {
public:
/*! Return TlppVefiy class instance.*/
static TlppVerify *Global() { static TlppVerify Cls; return &Cls;}

/*!
* \brief Loop to process instruction and verify tlpp logic.
* \param run_function function pointer to excute instruction .
* \param fsim_handle class pointer of function simulator class Device.
* \param debug to enable/disable debug
*/
void TlppSynchronization(Run_Function run_function,
void *fsim_handle,
bool debug = false);
/*!
* \brief Push instruction into queue for later excute.
* \param insn instructions.
*/
void TlppPushInsn(const VTAGenericInsn *insn);
/*! \ Event pump to handle dependency event. */
void EventProcess(void);
/*! \ Schedule a paticular core to run. */
void CoreRun(CORE_TYPE core_type);

private:
/*! TlppVerify construction function.*/
TlppVerify();
/*!
* \brief clear class variable.
*/
void Clear();
/*!
* \ brief check if the insn dependency condition satisfy and do notify.
* \ param insn instructions.
* \ param before_run identify this check is happen before
* instruction excute or after instruction excute, for before
* scenario need to check if depency condition satisfy, for post
* case need to check if need to send notfication.
*/
bool InsnDependencyCheck(const VTAGenericInsn *insn, bool before_run);
/*!
* \ brief get operation code from insn
* \ param insn instructions
*/
uint64_t GetOperationCode(const VTAGenericInsn *insn);
/*!
* \ brief find which core should run this instruction.
* \ param operation_code operation type like load/gemm etc.
* \ param insn instructions.
*/
CORE_TYPE GetCoreType(uint64_t operation_code, const VTAGenericInsn *insn);
/*!
* \ brief , pick up first instruction for specify core.
* \ param core_type core type
*/
const VTAGenericInsn *PickFrontInsn(uint64_t core_type);
/*!
* \ brief consume one instruction after pass dependency condition.
* \ param core_type core type
*/
void ConsumeFrontInsn(uint64_t core_type);
/*!
* \ brief, process dependency logic
* param before_run if this call happen before instruction run.
* param pop_prev if instruction have previous core dependency.
* param pop_next if instruction have depency for next core.
* param pop_prev_q notification from previous core.
* param pop_next_q notification from next core.
* param push_prev_q notification queue need to send notification
* for prevous core.
* param push_next_q notification queue need to send notification
* from next core.
* push_to_prev_q_indx which core need wake up if have notification
* fro previous core.
* push_to_next_q_indx which core need wake up if have notification
* fro next core.
*/
bool DependencyProcess(bool before_run,
bool pop_prev, bool pop_next,
bool push_prev, bool push_next,
Dep_q_t *pop_prev_q, Dep_q_t *pop_next_q,
Dep_q_t *push_prev_q, Dep_q_t *push_next_q,
CORE_TYPE push_to_prev_q_indx, CORE_TYPE push_to_next_q_indx);
/*!
* \ brief , return name based on core type.
* \ param core_type core type
*/
inline const char * GetCoreTypeName(CORE_TYPE core_type) {
return (core_type == COREGEMM) ? SCOREGEMM :
(core_type == CORELOAD) ? SCORELOAD :
(core_type == CORESTORE) ? SCORESTORE :
SCOREUNKNOWN;
}
/*! debug flag*/
bool debug_;
/*! function simulator device class pointer*/
void *fsim_handle_;
/*! function simulator instruction excute function pointer*/
Run_Function run_fsim_function_;
/*! instruction queue for each core*/
Insn_q_t insnq_array_[COREMAX];
/*! dependency queue from load to gemm*/
Dep_q_t l2g_q_;
/*! dependency queue from store to gemm*/
Dep_q_t s2g_q_;
/*! dependency queue from gemm to load*/
Dep_q_t g2l_q_;
/*! dependency queue from gemm to store*/
Dep_q_t g2s_q_;
/*! computation done*/
int done_;
/*! event queue for core wake up*/
std::queue<CORE_TYPE> dep_push_event_;
};
#endif // VTA_SIM_TLPP_H_
26 changes: 20 additions & 6 deletions vta/src/sim/sim_driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <vta/driver.h>
#include <vta/hw_spec.h>
#include <tvm/runtime/registry.h>
#include <vta/sim_tlpp.h>
#include <type_traits>
#include <mutex>
#include <map>
Expand Down Expand Up @@ -275,6 +276,7 @@ class Device {
Device() {
prof_ = Profiler::ThreadLocal();
dram_ = DRAM::Global();
ptlpp = TlppVerify::Global();
}

int Run(vta_phy_addr_t insn_phy_addr,
Expand All @@ -286,26 +288,37 @@ class Device {
for (uint32_t i = 0; i < insn_count; ++i) {
this->Run(insn + i);
}
this->TlppSynchronization();
return 0;
}

private:
void Run(const VTAGenericInsn* insn) {
static void Run_Insn(const VTAGenericInsn* insn, void * dev) {
Device * device = reinterpret_cast<Device *> (dev);
const VTAMemInsn* mem = reinterpret_cast<const VTAMemInsn*>(insn);
const VTAGemInsn* gem = reinterpret_cast<const VTAGemInsn*>(insn);
const VTAAluInsn* alu = reinterpret_cast<const VTAAluInsn*>(insn);
switch (mem->opcode) {
case VTA_OPCODE_LOAD: RunLoad(mem); break;
case VTA_OPCODE_STORE: RunStore(mem); break;
case VTA_OPCODE_GEMM: RunGEMM(gem); break;
case VTA_OPCODE_ALU: RunALU(alu); break;
case VTA_OPCODE_FINISH: ++finish_counter_; break;
case VTA_OPCODE_LOAD: device->RunLoad(mem); break;
case VTA_OPCODE_STORE: device->RunStore(mem); break;
case VTA_OPCODE_GEMM: device->RunGEMM(gem); break;
case VTA_OPCODE_ALU: device->RunALU(alu); break;
case VTA_OPCODE_FINISH: ++(device->finish_counter_); break;
default: {
LOG(FATAL) << "Unknown op_code" << mem->opcode;
}
}
}

private:
void Run(const VTAGenericInsn* insn) {
ptlpp->TlppPushInsn(insn);
}

void TlppSynchronization(void) {
ptlpp->TlppSynchronization(Run_Insn, reinterpret_cast<void *> (this));
}

void RunLoad(const VTAMemInsn* op) {
if (op->x_size == 0) return;
if (op->memory_type == VTA_MEM_ID_INP) {
Expand Down Expand Up @@ -466,6 +479,7 @@ class Device {
Profiler* prof_;
// The DRAM interface
DRAM* dram_;
TlppVerify* ptlpp;
// The SRAM
SRAM<VTA_INP_WIDTH, VTA_BATCH * VTA_BLOCK_IN, VTA_INP_BUFF_DEPTH> inp_;
SRAM<VTA_WGT_WIDTH, VTA_BLOCK_IN * VTA_BLOCK_OUT, VTA_WGT_BUFF_DEPTH> wgt_;
Expand Down
Loading

0 comments on commit 479b5c9

Please sign in to comment.