Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VTA] Support TLPP in function simulator. #3555

Merged
merged 2 commits into from
Sep 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmake/modules/VTA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ elseif(PYTHON)
file(GLOB FSIM_RUNTIME_SRCS vta/src/*.cc)
list(APPEND FSIM_RUNTIME_SRCS vta/src/sim/sim_driver.cc)
list(APPEND FSIM_RUNTIME_SRCS vta/src/vmem/virtual_memory.cc vta/src/vmem/virtual_memory.h)
list(APPEND FSIM_RUNTIME_SRCS vta/src/sim/sim_tlpp.cc)
# Target lib: vta_fsim
add_library(vta_fsim SHARED ${FSIM_RUNTIME_SRCS})
target_include_directories(vta_fsim PUBLIC vta/include)
Expand All @@ -54,6 +55,7 @@ elseif(PYTHON)
if(APPLE)
set_target_properties(vta_fsim PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
endif(APPLE)
target_compile_definitions(vta_fsim PUBLIC USE_FSIM_TLPP)
endif()

# Cycle accurate simulator driver build
Expand Down
162 changes: 162 additions & 0 deletions vta/include/vta/sim_tlpp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a side note, given this interface is not part of public interface used by the user, let us move it to the internal header, as opposed to keep it in the include folder

* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2019 by Contributors
* \file sim_tlpp.h
* \brief TVM VTA multiple thread simulator header file.
*/
#ifndef VTA_SIM_TLPP_H_
#define VTA_SIM_TLPP_H_
#include <vta/hw_spec.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <vector>
#include <ctime>
#include <cassert>
#include <queue>

#define SCOREGEMM "gemm"
huajsj marked this conversation as resolved.
Show resolved Hide resolved
#define SCORELOAD "load"
#define SCORESTORE "store"
#define SCOREUNKNOWN "unknown"
typedef void (*Run_Function)(const VTAGenericInsn *, void *);
typedef enum {COREGEMM = 0, CORELOAD, CORESTORE, COREMAX} CORE_TYPE;
tmoreau89 marked this conversation as resolved.
Show resolved Hide resolved
typedef std::queue<const void*> Insn_q_t;
typedef std::queue<int> Dep_q_t;
/*!
* \brief simulate core level pipe line parallism logic.
*/
class TlppVerify {
public:
/*! Return TlppVefiy class instance.*/
static TlppVerify *Global() { static TlppVerify Cls; return &Cls;}

/*!
* \brief Loop to process instruction and verify tlpp logic.
* \param run_function function pointer to excute instruction .
* \param fsim_handle class pointer of function simulator class Device.
* \param debug to enable/disable debug
*/
void TlppSynchronization(Run_Function run_function,
void *fsim_handle,
bool debug = false);
/*!
* \brief Push instruction into queue for later excute.
* \param insn instructions.
*/
void TlppPushInsn(const VTAGenericInsn *insn);
/*! \ Event pump to handle dependency event. */
void EventProcess(void);
/*! \ Schedule a paticular core to run. */
void CoreRun(CORE_TYPE core_type);

private:
/*! TlppVerify construction function.*/
TlppVerify();
/*!
* \brief clear class variable.
*/
void Clear();
/*!
* \ brief check if the insn dependency condition satisfy and do notify.
* \ param insn instructions.
* \ param before_run identify this check is happen before
* instruction excute or after instruction excute, for before
* scenario need to check if depency condition satisfy, for post
* case need to check if need to send notfication.
*/
bool InsnDependencyCheck(const VTAGenericInsn *insn, bool before_run);
/*!
* \ brief get operation code from insn
* \ param insn instructions
*/
uint64_t GetOperationCode(const VTAGenericInsn *insn);
/*!
* \ brief find which core should run this instruction.
* \ param operation_code operation type like load/gemm etc.
* \ param insn instructions.
*/
CORE_TYPE GetCoreType(uint64_t operation_code, const VTAGenericInsn *insn);
/*!
* \ brief , pick up first instruction for specify core.
* \ param core_type core type
*/
const VTAGenericInsn *PickFrontInsn(uint64_t core_type);
/*!
* \ brief consume one instruction after pass dependency condition.
* \ param core_type core type
*/
void ConsumeFrontInsn(uint64_t core_type);
/*!
* \ brief, process dependency logic
* param before_run if this call happen before instruction run.
* param pop_prev if instruction have previous core dependency.
* param pop_next if instruction have depency for next core.
* param pop_prev_q notification from previous core.
* param pop_next_q notification from next core.
* param push_prev_q notification queue need to send notification
* for prevous core.
* param push_next_q notification queue need to send notification
* from next core.
* push_to_prev_q_indx which core need wake up if have notification
* fro previous core.
* push_to_next_q_indx which core need wake up if have notification
* fro next core.
*/
bool DependencyProcess(bool before_run,
bool pop_prev, bool pop_next,
bool push_prev, bool push_next,
Dep_q_t *pop_prev_q, Dep_q_t *pop_next_q,
Dep_q_t *push_prev_q, Dep_q_t *push_next_q,
CORE_TYPE push_to_prev_q_indx, CORE_TYPE push_to_next_q_indx);
/*!
* \ brief , return name based on core type.
* \ param core_type core type
*/
inline const char * GetCoreTypeName(CORE_TYPE core_type) {
return (core_type == COREGEMM) ? SCOREGEMM :
(core_type == CORELOAD) ? SCORELOAD :
(core_type == CORESTORE) ? SCORESTORE :
SCOREUNKNOWN;
}
/*! debug flag*/
bool debug_;
/*! function simulator device class pointer*/
void *fsim_handle_;
/*! function simulator instruction excute function pointer*/
Run_Function run_fsim_function_;
/*! instruction queue for each core*/
Insn_q_t insnq_array_[COREMAX];
/*! dependency queue from load to gemm*/
Dep_q_t l2g_q_;
/*! dependency queue from store to gemm*/
Dep_q_t s2g_q_;
/*! dependency queue from gemm to load*/
Dep_q_t g2l_q_;
/*! dependency queue from gemm to store*/
Dep_q_t g2s_q_;
/*! computation done*/
int done_;
/*! event queue for core wake up*/
std::queue<CORE_TYPE> dep_push_event_;
};
#endif // VTA_SIM_TLPP_H_
26 changes: 20 additions & 6 deletions vta/src/sim/sim_driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <vta/driver.h>
#include <vta/hw_spec.h>
#include <tvm/runtime/registry.h>
#include <vta/sim_tlpp.h>
#include <type_traits>
#include <mutex>
#include <map>
Expand Down Expand Up @@ -275,6 +276,7 @@ class Device {
Device() {
prof_ = Profiler::ThreadLocal();
dram_ = DRAM::Global();
ptlpp = TlppVerify::Global();
}

int Run(vta_phy_addr_t insn_phy_addr,
Expand All @@ -286,26 +288,37 @@ class Device {
for (uint32_t i = 0; i < insn_count; ++i) {
this->Run(insn + i);
}
this->TlppSynchronization();
return 0;
}

private:
void Run(const VTAGenericInsn* insn) {
static void Run_Insn(const VTAGenericInsn* insn, void * dev) {
Device * device = reinterpret_cast<Device *> (dev);
const VTAMemInsn* mem = reinterpret_cast<const VTAMemInsn*>(insn);
const VTAGemInsn* gem = reinterpret_cast<const VTAGemInsn*>(insn);
const VTAAluInsn* alu = reinterpret_cast<const VTAAluInsn*>(insn);
switch (mem->opcode) {
case VTA_OPCODE_LOAD: RunLoad(mem); break;
case VTA_OPCODE_STORE: RunStore(mem); break;
case VTA_OPCODE_GEMM: RunGEMM(gem); break;
case VTA_OPCODE_ALU: RunALU(alu); break;
case VTA_OPCODE_FINISH: ++finish_counter_; break;
case VTA_OPCODE_LOAD: device->RunLoad(mem); break;
case VTA_OPCODE_STORE: device->RunStore(mem); break;
case VTA_OPCODE_GEMM: device->RunGEMM(gem); break;
case VTA_OPCODE_ALU: device->RunALU(alu); break;
case VTA_OPCODE_FINISH: ++(device->finish_counter_); break;
default: {
LOG(FATAL) << "Unknown op_code" << mem->opcode;
}
}
}

private:
void Run(const VTAGenericInsn* insn) {
ptlpp->TlppPushInsn(insn);
}

void TlppSynchronization(void) {
ptlpp->TlppSynchronization(Run_Insn, reinterpret_cast<void *> (this));
}

void RunLoad(const VTAMemInsn* op) {
if (op->x_size == 0) return;
if (op->memory_type == VTA_MEM_ID_INP) {
Expand Down Expand Up @@ -466,6 +479,7 @@ class Device {
Profiler* prof_;
// The DRAM interface
DRAM* dram_;
TlppVerify* ptlpp;
// The SRAM
SRAM<VTA_INP_WIDTH, VTA_BATCH * VTA_BLOCK_IN, VTA_INP_BUFF_DEPTH> inp_;
SRAM<VTA_WGT_WIDTH, VTA_BLOCK_IN * VTA_BLOCK_OUT, VTA_WGT_BUFF_DEPTH> wgt_;
Expand Down
Loading