-
Notifications
You must be signed in to change notification settings - Fork 526
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
curvefs client : fix bug of getleader always fails causes stack overflow #1070
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -586,8 +586,15 @@ void MetaServerClientImpl::UpdateInodeAsync(const Inode &inode, | |
MetaServerOpType::UpdateInode, task, inode.fsid(), inode.inodeid()); | ||
auto excutor = std::make_shared<UpdateInodeExcutor>(opt_, | ||
metaCache_, channelManager_, taskCtx); | ||
TaskExecutorDone *taskDone = new TaskExecutorDone(excutor, done); | ||
excutor->DoAsyncRPCTask(taskDone); | ||
TaskExecutorDone *taskDone = new TaskExecutorDone( | ||
excutor, done); | ||
brpc::ClosureGuard taskDone_guard(taskDone); | ||
int ret = excutor->DoAsyncRPCTask(taskDone); | ||
if (ret < 0) { | ||
taskDone->SetRetCode(ret); | ||
return; | ||
} | ||
taskDone_guard.release(); | ||
} | ||
|
||
MetaStatusCode MetaServerClientImpl::GetOrModifyS3ChunkInfo( | ||
|
@@ -741,8 +748,15 @@ void MetaServerClientImpl::GetOrModifyS3ChunkInfoAsync( | |
MetaServerOpType::GetOrModifyS3ChunkInfo, task, fsId, inodeId); | ||
auto excutor = std::make_shared<GetOrModifyS3ChunkInfoExcutor>(opt_, | ||
metaCache_, channelManager_, taskCtx); | ||
TaskExecutorDone *taskDone = new TaskExecutorDone(excutor, done); | ||
excutor->DoAsyncRPCTask(taskDone); | ||
TaskExecutorDone *taskDone = new TaskExecutorDone( | ||
excutor, done); | ||
brpc::ClosureGuard taskDone_guard(taskDone); | ||
int ret = excutor->DoAsyncRPCTask(taskDone); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if getleader failed, which means the cluster is abnomal, it is reasonable to stuck all the async task here. |
||
if (ret < 0) { | ||
taskDone->SetRetCode(ret); | ||
return; | ||
} | ||
taskDone_guard.release(); | ||
} | ||
|
||
MetaStatusCode MetaServerClientImpl::CreateInode(const InodeParam ¶m, | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -79,37 +79,38 @@ int TaskExecutor::DoRPCTask() { | |||||
return retCode; | ||||||
} | ||||||
|
||||||
void TaskExecutor::DoAsyncRPCTask(TaskExecutorDone *done) { | ||||||
brpc::ClosureGuard done_guard(done); | ||||||
int TaskExecutor::DoAsyncRPCTask(TaskExecutorDone *done) { | ||||||
task_->rpcTimeoutMs = opt_.rpcTimeoutMS; | ||||||
|
||||||
int retCode = -1; | ||||||
|
||||||
if (task_->retryTimes++ > opt_.maxRetry) { | ||||||
LOG(ERROR) << task_->TaskContextStr() | ||||||
<< " retry times exceeds the limit"; | ||||||
done->SetRetCode(retCode); | ||||||
return; | ||||||
} | ||||||
do { | ||||||
if (task_->retryTimes++ > opt_.maxRetry) { | ||||||
LOG(ERROR) << task_->TaskContextStr() | ||||||
<< " retry times exceeds the limit"; | ||||||
break; | ||||||
} | ||||||
|
||||||
if (!HasValidTarget() && !GetTarget()) { | ||||||
LOG(WARNING) << "get target fail for " << task_->TaskContextStr() | ||||||
<< ", sleep and retry"; | ||||||
done->SetRetCode(retCode); | ||||||
return; | ||||||
} | ||||||
if (!HasValidTarget() && !GetTarget()) { | ||||||
LOG(WARNING) << "get target fail for " << task_->TaskContextStr() | ||||||
<< ", sleep and retry"; | ||||||
bthread_usleep(opt_.retryIntervalUS); | ||||||
continue; | ||||||
} | ||||||
|
||||||
auto channel = channelManager_->GetOrCreateChannel( | ||||||
task_->target.metaServerID, task_->target.endPoint); | ||||||
if (!channel) { | ||||||
LOG(WARNING) << "GetOrCreateChannel fail for " | ||||||
<< task_->TaskContextStr() << ", sleep and retry"; | ||||||
done->SetRetCode(retCode); | ||||||
return; | ||||||
} | ||||||
auto channel = channelManager_->GetOrCreateChannel( | ||||||
task_->target.metaServerID, task_->target.endPoint); | ||||||
if (!channel) { | ||||||
LOG(WARNING) << "GetOrCreateChannel fail for " | ||||||
<< task_->TaskContextStr() << ", sleep and retry"; | ||||||
bthread_usleep(opt_.retryIntervalUS); | ||||||
continue; | ||||||
} | ||||||
retCode = ExcuteTask(channel.get(), done); | ||||||
break; | ||||||
} while (true); | ||||||
|
||||||
ExcuteTask(channel.get(), done); | ||||||
done_guard.release(); | ||||||
return; | ||||||
return retCode; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
} | ||||||
|
||||||
bool TaskExecutor::OnReturn(int retCode) { | ||||||
|
@@ -303,7 +304,11 @@ void TaskExecutorDone::Run() { | |||||
needRetry = excutor_->OnReturn(code_); | ||||||
if (needRetry) { | ||||||
excutor_->PreProcessBeforeRetry(code_); | ||||||
excutor_->DoAsyncRPCTask(this); | ||||||
code_ = excutor_->DoAsyncRPCTask(this); | ||||||
if (code_ < 0) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AsyncTask always return MetaStatusCode::OK
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AsyncTask will return -1, when retryTimes runing out |
||||||
done_->SetMetaStatusCode(ConvertToMetaStatusCode(code_)); | ||||||
return; | ||||||
} | ||||||
self_guard.release(); | ||||||
done_guard.release(); | ||||||
} else { | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please state the reason for stack overflow in issue or commit message.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When getleader always fails, the function 'TaskExcutor::DoAsyncRpcTask' and the function 'TaskExcurorDone:Run' will call each other cyclically, resulting in stack overflow