Skip to content

Commit d6a5d20

Browse files
committed
Merge branch 'hotfix/SDK-4753_Gfxproc_causes_a_hang_on_desktop_app' into 'release/v8.0.2'
SDK-4753 Gfxproc causes a hang on desktop app [hotfix 8.0.2] See merge request sdk/sdk!6162
2 parents 0a90f22 + 7236016 commit d6a5d20

File tree

5 files changed

+147
-99
lines changed

5 files changed

+147
-99
lines changed

include/mega/gfx/isolatedprocess.h

+7-7
Original file line numberDiff line numberDiff line change
@@ -28,22 +28,20 @@ class CancellableSleeper
2828
bool mCancelled = false;
2929
};
3030

31-
class AutoStartLauncher
31+
class AutoStartLauncher: public std::enable_shared_from_this<AutoStartLauncher>
3232
{
3333
public:
3434
AutoStartLauncher(const std::vector<std::string>& argv, std::function<void()> shutdowner);
3535

36-
~AutoStartLauncher();
36+
bool start();
3737

38-
void shutDownOnce();
38+
void stop();
3939

4040
private:
4141

4242
bool startUntilSuccess(Process& process);
4343

44-
bool startLaunchLoopThread();
45-
46-
void exitLaunchLoopThread();
44+
bool exitLaunchLoopThread();
4745

4846
std::vector<std::string> mArgv;
4947

@@ -137,12 +135,14 @@ class GfxIsolatedProcess
137135

138136
GfxIsolatedProcess(const Params& params);
139137

138+
~GfxIsolatedProcess();
139+
140140
const std::string& endpointName() const { return mEndpointName; }
141141
private:
142142

143143
std::string mEndpointName;
144144

145-
AutoStartLauncher mLauncher;
145+
std::shared_ptr<AutoStartLauncher> mLauncher;
146146

147147
HelloBeater mBeater;
148148
};

src/gfx/isolatedprocess.cpp

+70-41
Original file line numberDiff line numberDiff line change
@@ -161,17 +161,6 @@ AutoStartLauncher::AutoStartLauncher(const std::vector<std::string>& argv, std::
161161
mShutdowner(std::move(shutdowner))
162162
{
163163
assert(!mArgv.empty());
164-
165-
// preventive check: at least one element (executable)
166-
if (!mArgv.empty())
167-
{
168-
// launch loop thread
169-
startLaunchLoopThread();
170-
}
171-
else
172-
{
173-
LOG_fatal << "AutoStartLauncher argv is empty";
174-
}
175164
}
176165

177166
bool AutoStartLauncher::startUntilSuccess(Process& process)
@@ -201,11 +190,14 @@ bool AutoStartLauncher::startUntilSuccess(Process& process)
201190
return false;
202191
}
203192

204-
bool AutoStartLauncher::startLaunchLoopThread()
193+
bool AutoStartLauncher::start()
205194
{
206-
static const milliseconds maxBackoff(400);
195+
static const milliseconds maxBackoff(3000);
207196
static const milliseconds fastFailureThreshold(1000);
208197

198+
if (mArgv.empty())
199+
return false;
200+
209201
// There are permanent startup failure such as missing DLL. This is not likey to happen
210202
// at customer's side as it will be installed properly. It is more likely during development
211203
// and testing phases. We want to implement some backOff to reduce CPU usage if it does happen
@@ -222,7 +214,8 @@ bool AutoStartLauncher::startLaunchLoopThread()
222214
// if less than threshhold, it fails right after startup.
223215
if ((used < fastFailureThreshold) && !mShuttingDown)
224216
{
225-
LOG_err << "process existed too fast: " << used.count() << " backoff" << backOff.count() << "ms";
217+
// LOG_verbose << "process existed too fast: " << used.count() << " backoff "
218+
// << backOff.count() << "ms";
226219
mSleeper.sleep(backOff);
227220
backOff = std::min(backOff * 2, maxBackoff); // double it and maxBackoff at most
228221
}
@@ -233,19 +226,25 @@ bool AutoStartLauncher::startLaunchLoopThread()
233226
}
234227
};
235228

236-
auto launcher = [this, backoffForFastFailure]() {
229+
auto launcher = [this, backoffForFastFailure]()
230+
{
231+
// Keep a copy, so the object is always live while the code is running
232+
auto keepRef = shared_from_this();
233+
237234
mThreadIsRunning = true;
238235

239236
backoffForFastFailure([this](){
240237
Process process;
241238
if (startUntilSuccess(process))
242239
{
243240
bool ret = process.wait();
244-
LOG_debug << "wait: " << ret
245-
<< " hasSignal: " << process.hasTerminateBySignal()
246-
<< " " << (process.hasTerminateBySignal() ? std::to_string(process.getTerminatingSignal()) : "")
247-
<< " hasExited: " << process.hasExited()
248-
<< " " << (process.hasExited() ? std::to_string(process.getExitCode()) : "");
241+
LOG_verbose << "wait: " << ret << " hasSignal: " << process.hasTerminateBySignal()
242+
<< " "
243+
<< (process.hasTerminateBySignal() ?
244+
std::to_string(process.getTerminatingSignal()) :
245+
"")
246+
<< " hasExited: " << process.hasExited() << " "
247+
<< (process.hasExited() ? std::to_string(process.getExitCode()) : "");
249248
}
250249
});
251250

@@ -262,19 +261,33 @@ bool AutoStartLauncher::startLaunchLoopThread()
262261
// is just starting. so we'll retry in the loop, but there is no reason it
263262
// couldn't be shut down in 15 seconds
264263
//
265-
void AutoStartLauncher::exitLaunchLoopThread()
264+
// @return true if thread exits, otherwise false
265+
bool AutoStartLauncher::exitLaunchLoopThread()
266266
{
267-
milliseconds backOff(10);
268-
while (mThreadIsRunning && backOff < 15s)
267+
milliseconds interval{10};
268+
milliseconds totalWaitTime{0};
269+
while (mThreadIsRunning && totalWaitTime < 15s)
269270
{
271+
LOG_verbose << "interval " << interval.count() << " totalWaitTime "
272+
<< totalWaitTime.count();
273+
270274
// shutdown the started process
271275
if (mShutdowner) mShutdowner();
272-
std::this_thread::sleep_for(backOff);
273-
backOff += 10ms;
276+
277+
// wait
278+
std::this_thread::sleep_for(interval);
279+
280+
// Update total wait time
281+
totalWaitTime += interval;
282+
283+
// backoff
284+
interval += 10ms;
274285
}
286+
287+
return !mThreadIsRunning;
275288
}
276289

277-
void AutoStartLauncher::shutDownOnce()
290+
void AutoStartLauncher::stop()
278291
{
279292
bool wasShuttingdown = mShuttingDown.exchange(true);
280293
if (wasShuttingdown)
@@ -287,17 +300,23 @@ void AutoStartLauncher::shutDownOnce()
287300
// cancel sleeper, thread in sleep is woken up if it is
288301
mSleeper.cancel();
289302

290-
exitLaunchLoopThread();
291-
if (mThread.joinable()) mThread.join();
303+
if (exitLaunchLoopThread())
304+
{
305+
if (mThread.joinable())
306+
mThread.join();
307+
}
308+
else
309+
{
310+
// Defensive: the thread doesn't exit, detach the thread
311+
// We had such bug and it is usually a bug
312+
assert(false && "AutoStartLauncher detaching loop thread");
313+
LOG_warn << "AutoStartLauncher detaching loop thread";
314+
mThread.detach();
315+
}
292316

293317
LOG_info << "AutoStartLauncher is down";
294318
}
295319

296-
AutoStartLauncher::~AutoStartLauncher()
297-
{
298-
shutDownOnce();
299-
}
300-
301320
bool CancellableSleeper::sleep(const milliseconds& period)
302321
{
303322
std::unique_lock<std::mutex> l(mMutex);
@@ -345,12 +364,22 @@ std::vector<std::string> GfxIsolatedProcess::Params::toArgs() const
345364
// We divide keepAliveInSeconds by three to set up mBeater so that it allows at least two
346365
// beats within the keep-alive period.
347366
GfxIsolatedProcess::GfxIsolatedProcess(const Params& params):
348-
mEndpointName(params.endpointName),
349-
mLauncher(params.toArgs(),
350-
[endpointName = params.endpointName]()
351-
{
352-
shutdown(endpointName);
353-
}),
354-
mBeater(seconds(params.keepAliveInSeconds / 3), params.endpointName)
355-
{}
367+
mEndpointName{
368+
params.endpointName
369+
},
370+
mLauncher{new AutoStartLauncher{params.toArgs(),
371+
[endpointName = params.endpointName]()
372+
{
373+
shutdown(endpointName);
374+
}}},
375+
mBeater{seconds(params.keepAliveInSeconds / 3), params.endpointName}
376+
{
377+
mLauncher->start();
356378
}
379+
380+
GfxIsolatedProcess::~GfxIsolatedProcess()
381+
{
382+
mLauncher->stop();
383+
}
384+
385+
} // Namespace

src/process.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -205,21 +205,21 @@ bool Process::run(const vector<string>& args, const unordered_map<string, string
205205
if (dup2(childReadFd, STDOUT_FILENO) == -1)
206206
{
207207
reportError("Could not redirect stdout");
208-
exit(1);
208+
_exit(EXIT_FAILURE);
209209
}
210210
// stderr
211211
::close(STDERR_FILENO);
212212
if (dup2(childReadErrorFd, STDERR_FILENO) == -1)
213213
{
214214
reportError("Could not redirect stderr");
215-
exit(1);
215+
_exit(EXIT_FAILURE);
216216
}
217217

218218
// Prepare command-line arguments for child process
219219
if (args.empty())
220220
{
221221
cerr << "Process: Can not execute, no arguments given" << endl;
222-
exit(1);
222+
_exit(EXIT_FAILURE);
223223
}
224224
vector<char*> argv;
225225
for (vector<string>::const_iterator i = args.begin(); i != args.end(); ++i)
@@ -237,7 +237,7 @@ bool Process::run(const vector<string>& args, const unordered_map<string, string
237237
// cerr so parent process sees this
238238
cerr << "Could not execute '" + string(argv[0]) + "'" << ": " << savedErrno << ": " << strerror(savedErrno) << endl;
239239
reportError("Could not execute '" + string(argv[0]) + "'", savedErrno);
240-
exit(1);
240+
_exit(EXIT_FAILURE);
241241
}
242242
// else --> parent process
243243

0 commit comments

Comments
 (0)