-
Notifications
You must be signed in to change notification settings - Fork 123
performance optimization #15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,12 +2,9 @@ | |
// Created by 理 傅 on 2017/1/2. | ||
// | ||
|
||
#include <err.h> | ||
#include <sys/time.h> | ||
#include <iostream> | ||
#include <stdexcept> | ||
#include "fec.h" | ||
#include "sess.h" | ||
#include "encoding.h" | ||
|
||
FEC::FEC(ReedSolomon enc) :enc(enc) {} | ||
|
@@ -33,13 +30,11 @@ FEC::New(int rxlimit, int dataShards, int parityShards) { | |
} | ||
|
||
fecPacket | ||
FEC::Decode(byte *data, size_t sz) { | ||
FEC::Decode(byte *data, size_t sz, uint32_t ts) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fec更贴近协议栈, 通过业务传入时间更合理,避免了协议栈内部调用时间函数,效率更高 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 可以增加一个函数,而不仅是替换,这样可以保持兼容性 |
||
fecPacket pkt; | ||
data = decode32u(data, &pkt.seqid); | ||
data = decode16u(data, &pkt.flag); | ||
struct timeval time; | ||
gettimeofday(&time, NULL); | ||
pkt.ts = uint32_t(time.tv_sec * 1000 + time.tv_usec/1000); | ||
pkt.ts = ts; | ||
pkt.data = std::make_shared<std::vector<byte>>(data, data+sz - fecHeaderSize); | ||
return pkt; | ||
} | ||
|
@@ -62,11 +57,8 @@ FEC::MarkFEC(byte *data) { | |
} | ||
} | ||
|
||
std::vector<row_type> | ||
FEC::Input(fecPacket &pkt) { | ||
std::vector<row_type> recovered; | ||
|
||
uint32_t now = currentMs(); | ||
void | ||
FEC::Input(fecPacket &pkt, uint32_t now, std::vector<row_type>& recovered) { | ||
if (now-lastCheck >= fecExpire) { | ||
for (auto it = rx.begin();it !=rx.end();) { | ||
if (now - it->ts > fecExpire) { | ||
|
@@ -78,13 +70,12 @@ FEC::Input(fecPacket &pkt) { | |
lastCheck = now; | ||
} | ||
|
||
|
||
// insertion | ||
auto n = this->rx.size() -1; | ||
int insertIdx = 0; | ||
for (int i=n;i>=0;i--) { | ||
if (pkt.seqid == rx[i].seqid) { | ||
return recovered; | ||
return; | ||
} else if (pkt.seqid > rx[i].seqid) { | ||
insertIdx = i + 1; | ||
break; | ||
|
@@ -113,10 +104,10 @@ FEC::Input(fecPacket &pkt) { | |
int numDataShard = 0; | ||
int first = 0; | ||
size_t maxlen = 0; | ||
|
||
std::vector<row_type> shardVec(totalShards); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 避免重复申请内存,对移动端更友好 |
||
std::vector<bool> shardflag(totalShards, false); | ||
|
||
static thread_local std::vector<row_type> shardVec(totalShards); | ||
static thread_local std::vector<bool> shardflag(totalShards, false); | ||
std::fill(shardVec.begin(), shardVec.end(), nullptr); | ||
std::fill(shardflag.begin(), shardflag.end(), false); | ||
for (auto i = searchBegin; i <= searchEnd; i++) { | ||
auto seqid = rx[i].seqid; | ||
if (seqid > shardEnd) { | ||
|
@@ -163,7 +154,7 @@ FEC::Input(fecPacket &pkt) { | |
rx.erase(rx.begin()); | ||
} | ||
|
||
return recovered; | ||
return; | ||
} | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,18 +3,17 @@ | |
// | ||
|
||
#include "galois_noasm.h" | ||
#include "matrix.h" | ||
|
||
extern const byte mulTable[256][256]; | ||
|
||
void galMulSlice(byte c, row_type in, row_type out) { | ||
for (int n=0;n<in->size();n++) { | ||
(*out)[n] = mulTable[c][(*in)[n]]; | ||
} | ||
void galMulSlice(byte c, byte* in, byte* out, int size) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 矩阵计算相关 删除了vector和sharedptr的用法, 改为数组操作,效率提升十分明显,仅修改这一处,压测(编译-O2)的耗时就能减少一半 |
||
for (int n = 0; n < size; n++) { | ||
out[n] = mulTable[c][in[n]]; | ||
} | ||
} | ||
|
||
void galMulSliceXor(byte c, row_type in, row_type out) { | ||
for (int n=0;n<in->size();n++) { | ||
(*out)[n] ^= mulTable[c][(*in)[n]]; | ||
} | ||
} | ||
void galMulSliceXor(byte c, byte* in, byte* out, int size) { | ||
for (int n = 0; n < size; n++) { | ||
out[n] ^= mulTable[c][in[n]]; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,65 +8,65 @@ | |
inversionTree inversionTree::newInversionTree(int dataShards, int parityShards) { | ||
inversionTree tree; | ||
tree.m_root.m_children.resize(dataShards + parityShards, nullptr); | ||
tree.m_root.m_matrix = matrix::identityMatrix(dataShards); | ||
tree.m_root.m_matrix = std::make_shared<IdentityMatrix>(dataShards); | ||
return tree; | ||
} | ||
|
||
|
||
matrix | ||
MatrixPtr | ||
inversionTree::GetInvertedMatrix(std::vector<int> &invalidIndices) { | ||
if (invalidIndices.size() == 0) { | ||
return m_root.m_matrix; | ||
} | ||
|
||
return m_root.getInvertedMatrix(invalidIndices, 0); | ||
return m_root.getInvertedMatrix(invalidIndices, 0, 0); | ||
} | ||
|
||
int | ||
inversionTree::InsertInvertedMatrix(std::vector<int> &invalidIndices, matrix &matrix, int shards) { | ||
inversionTree::InsertInvertedMatrix(std::vector<int> &invalidIndices, MatrixPtr &matrix, int shards) { | ||
// If no invalid indices were given then we are done because the | ||
// m_root node is already set with the identity matrix. | ||
if (invalidIndices.size() == 0) { | ||
return -1; | ||
} | ||
|
||
if (!matrix.IsSquare()) { | ||
if (!matrix->IsSquare()) { | ||
return -2; | ||
} | ||
|
||
// Recursively create nodes for the inverted matrix in the tree until | ||
// we reach the node to insert the matrix to. We start by passing in | ||
// 0 as the parent index as we start at the m_root of the tree. | ||
m_root.insertInvertedMatrix(invalidIndices, matrix, shards, 0); | ||
m_root.insertInvertedMatrix(invalidIndices, 0, matrix, shards, 0); | ||
|
||
return 0; | ||
} | ||
|
||
matrix | ||
inversionNode::getInvertedMatrix(std::vector<int> &invalidIndices, int parent) { | ||
MatrixPtr | ||
inversionNode::getInvertedMatrix(const std::vector<int> &invalidIndices, int index, int parent) { | ||
// Get the child node to search next from the list of m_children. The | ||
// list of m_children starts relative to the parent index passed in | ||
// because the indices of invalid rows is sorted (by default). As we | ||
// search recursively, the first invalid index gets popped off the list, | ||
// so when searching through the list of m_children, use that first invalid | ||
// index to find the child node. | ||
int firstIndex = invalidIndices[0]; | ||
int firstIndex = invalidIndices[index]; | ||
auto node = m_children[firstIndex - parent]; | ||
|
||
// If the child node doesn't exist in the list yet, fail fast by | ||
// returning, so we can construct and insert the proper inverted matrix. | ||
if (node == nullptr) { | ||
return matrix{}; | ||
return nullptr; | ||
} | ||
|
||
// If there's more than one invalid index left in the list we should | ||
// keep searching recursively. | ||
if (invalidIndices.size() > 1) { | ||
if (invalidIndices.size() - index> 1) { | ||
// Search recursively on the child node by passing in the invalid indices | ||
// with the first index popped off the front. Also the parent index to | ||
// pass down is the first index plus one. | ||
std::vector<int> v(invalidIndices.begin() + 1, invalidIndices.end()); | ||
return node->getInvertedMatrix(v, firstIndex + 1); | ||
// no copy std::vector<int> v(invalidIndices.begin() + 1, invalidIndices.end()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 避免临时vector申请内存和拷贝。 |
||
return node->getInvertedMatrix(invalidIndices, index + 1, firstIndex + 1); | ||
} | ||
|
||
// If there aren't any more invalid indices to search, we've found our | ||
|
@@ -79,8 +79,9 @@ inversionNode::getInvertedMatrix(std::vector<int> &invalidIndices, int parent) { | |
|
||
void | ||
inversionNode::insertInvertedMatrix( | ||
std::vector<int> &invalidIndices, | ||
struct matrix &matrix, | ||
const std::vector<int> &invalidIndices, | ||
int index, | ||
MatrixPtr &matrix, | ||
int shards, | ||
int parent) { | ||
// As above, get the child node to search next from the list of m_children. | ||
|
@@ -89,7 +90,7 @@ inversionNode::insertInvertedMatrix( | |
// search recursively, the first invalid index gets popped off the list, | ||
// so when searching through the list of m_children, use that first invalid | ||
// index to find the child node. | ||
int firstIndex = invalidIndices[0]; | ||
int firstIndex = invalidIndices[index]; | ||
auto node = m_children[firstIndex - parent]; | ||
|
||
// If the child node doesn't exist in the list yet, create a new | ||
|
@@ -109,13 +110,13 @@ inversionNode::insertInvertedMatrix( | |
// If there's more than one invalid index left in the list we should | ||
// keep searching recursively in order to find the node to add our | ||
// matrix. | ||
if (invalidIndices.size() > 1) { | ||
if (invalidIndices.size() - index > 1) { | ||
// As above, search recursively on the child node by passing in | ||
// the invalid indices with the first index popped off the front. | ||
// Also the total number of shards and parent index are passed down | ||
// which is equal to the first index plus one. | ||
std::vector<int> v(invalidIndices.begin() + 1, invalidIndices.end()); | ||
node->insertInvertedMatrix(v, matrix, shards, firstIndex + 1); | ||
// no copy std::vector<int> v(invalidIndices.begin() + 1, invalidIndices.end()); | ||
node->insertInvertedMatrix(invalidIndices, index + 1, matrix, shards, firstIndex + 1); | ||
} else { | ||
node->m_matrix = matrix; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这个依赖有点奇怪,fec依赖sess,只是为了用其时间函数。
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
通过sess把时间传递到fec协议栈内更合理。