Skip to content
This repository has been archived by the owner on Apr 24, 2022. It is now read-only.

Commit

Permalink
Add --tstop and --tstart option preventing GPU overheating.
Browse files Browse the repository at this point in the history
To be sure not to burn the GPUs the miner stops mining on a GPU
if a specific temperature (--tstop) is reached.
After cooling down and reaching --tstart temperature (default 40)
mining will be (re)started on the GPU.
  • Loading branch information
StefanOberhumer committed May 25, 2018
1 parent d38f6c9 commit b0eb122
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 3 deletions.
60 changes: 60 additions & 0 deletions ethminer/MinerAux.h
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,57 @@ class MinerCLI
BOOST_THROW_EXCEPTION(BadArgument());
}
}
else if ((arg == "--tstop") && i + 1 < argc)
{
try
{
m_tstop = stoul(argv[++i]);
if (m_tstop != 0 && (m_tstop < 30 || m_tstop > 100))
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
else if ((arg == "--tstart") && i + 1 < argc)
{
try
{
m_tstart = stoul(argv[++i]);
if (m_tstart < 30 || m_tstart > 100)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
catch (...)
{
cerr << "Bad " << arg << " option: " << argv[i] << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}
}
else
return false;


// Sanity check --tstart/--tstop
if (m_tstop && m_tstop <= m_tstart)
{
cerr << "--tstop must be greater than --tstart !" << endl;
BOOST_THROW_EXCEPTION(BadArgument());
}

if (m_tstop && !m_show_hwmonitors)
{
// if we want stop mining at a specific temperature, we have to
// monitor the temperature ==> so auto enable HWMON.
m_show_hwmonitors = true;
}
return true;
}

Expand Down Expand Up @@ -690,6 +739,9 @@ class MinerCLI
<< " Use at your own risk! If GPU generates errored results they WILL be forwarded to the pool" << endl
<< " Not recommended at high overclock." << endl
#endif
<< " Temperature management: (implies -HWMON=0|1)" << endl
<< " --tstop stop mining on a GPU if temperature equals or greater than option (valid range 30...100)." << endl
<< " --tstart restart mining on a GPU if --tstop stopped the GPU if the temperature is equal or less than option (default 40, valid range 0 or 30...100)." << endl
#if API_CORE
<< " API core configuration:" << endl
<< " --api-port Set the api port, the miner should listen to. Use 0 to disable. Default=0, use negative numbers to run in readonly mode. for example -3333." << endl
Expand Down Expand Up @@ -721,6 +773,8 @@ class MinerCLI
f.setSealers(sealers);
f.onSolutionFound([&](Solution) { return false; });

f.setTStartTStop(m_tstart, m_tstop);

string platformInfo = _m == MinerType::CL ? "CL" : "CUDA";
cout << "Benchmarking on platform: " << platformInfo << endl;

Expand Down Expand Up @@ -812,6 +866,8 @@ class MinerCLI
PoolManager mgr(client, f, m_minerType);
mgr.setReconnectTries(m_maxFarmRetries);

f.setTStartTStop(m_tstart, m_tstop);

// If we are in simulation mode we add a fake connection
if (m_mode == OperationMode::Simulation) {
URI con(URI("http://-:0"));
Expand Down Expand Up @@ -901,6 +957,10 @@ class MinerCLI

bool m_show_hwmonitors = false;
bool m_show_power = false;

unsigned m_tstop = 0;
unsigned m_tstart = 40;

#if API_CORE
int m_api_port = 0;
int m_http_port = 0;
Expand Down
7 changes: 7 additions & 0 deletions libethash-cl/CLMiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,13 @@ void CLMiner::workLoop()
try {
while (!shouldStop())
{
if (is_mining_paused())
{
// cnote << "Paused for 3 s due temperature -tstop.";
std::this_thread::sleep_for(std::chrono::seconds(3));
continue;
}

const WorkPackage w = work();

if (current.header != w.header)
Expand Down
7 changes: 7 additions & 0 deletions libethash-cuda/CUDAMiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,13 @@ void CUDAMiner::workLoop()
{
while(!shouldStop())
{
if (is_mining_paused())
{
// cnote << "Paused for 3 s due temperature -tstop.";
std::this_thread::sleep_for(std::chrono::seconds(3));
continue;
}

// take local copy of work since it may end up being overwritten.
const WorkPackage w = work();

Expand Down
26 changes: 25 additions & 1 deletion libethcore/Farm.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ namespace eth
class Farm: public FarmFace
{
public:
unsigned tstart, tstop;

struct SealerDescriptor
{
std::function<unsigned()> instances;
Expand Down Expand Up @@ -252,7 +254,8 @@ class Farm: public FarmFace
p.hashes = 0;
for (auto const& i : m_miners)
{
p.minersHashes.push_back(0);
p.miningIsPaused.push_back(i->is_mining_paused());
p.minersHashes.push_back(0);
if (hwmon) {
HwMonitorInfo hwInfo = i->hwmonInfo();
HwMonitor hw;
Expand Down Expand Up @@ -310,6 +313,9 @@ class Farm: public FarmFace
}
#endif
}

i->update_temperature(tempC);

hw.tempC = tempC;
hw.fanP = fanpcnt;
hw.powerW = powerW/((double)1000.0);
Expand Down Expand Up @@ -407,6 +413,22 @@ class Farm: public FarmFace
return m_nonce_scrambler;
}

void setTStartTStop(unsigned tstart, unsigned tstop)
{
m_tstart = tstart;
m_tstop = tstop;
}

unsigned get_tstart() override
{
return m_tstart;
}

unsigned get_tstop() override
{
return m_tstop;
}

private:
/**
* @brief Called from a Miner to note a WorkPackage has a solution.
Expand Down Expand Up @@ -448,6 +470,8 @@ class Farm: public FarmFace
string m_pool_addresses;
uint64_t m_nonce_scrambler;

unsigned m_tstart, m_tstop;

wrap_nvml_handle *nvmlh = NULL;
wrap_adl_handle *adlh = NULL;
#if defined(__linux)
Expand Down
1 change: 0 additions & 1 deletion libethcore/Miner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,3 @@ uint8_t* dev::eth::Miner::s_dagInHostMemory = NULL;
bool dev::eth::Miner::s_exit = false;

bool dev::eth::Miner::s_noeval = false;

46 changes: 45 additions & 1 deletion libethcore/Miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ struct WorkingProgress
uint64_t rate() const { return ms == 0 ? 0 : hashes * 1000 / ms; }

std::vector<uint64_t> minersHashes;
std::vector<bool> miningIsPaused;
std::vector<HwMonitor> minerMonitors;
uint64_t minerRate(const uint64_t hashCount) const { return ms == 0 ? 0 : hashCount * 1000 / ms; }
};
Expand All @@ -114,6 +115,14 @@ inline std::ostream& operator<<(std::ostream& _out, WorkingProgress _p)
for (size_t i = 0; i < _p.minersHashes.size(); ++i)
{
mh = _p.minerRate(_p.minersHashes[i]) / 1000000.0f;

if (_p.miningIsPaused.size() == _p.minersHashes.size()) {
// red color if mining is paused on this gpu
if (_p.miningIsPaused[i]) {
_out << EthRed;
}
}

_out << "gpu/" << i << " " << EthTeal << std::fixed << std::setw(5) << std::setprecision(2) << mh << EthReset;
if (_p.minerMonitors.size() == _p.minersHashes.size())
_out << " " << EthTeal << _p.minerMonitors[i] << EthReset;
Expand Down Expand Up @@ -165,7 +174,8 @@ class FarmFace
{
public:
virtual ~FarmFace() = default;

virtual unsigned get_tstart() = 0;
virtual unsigned get_tstop() = 0;
/**
* @brief Called from a Miner to note a WorkPackage has a solution.
* @param _p The solution.
Expand Down Expand Up @@ -218,6 +228,39 @@ class Miner: public Worker
return farm.get_nonce_scrambler() + ((uint64_t) index << 40);
}

void update_temperature(unsigned temperature)
{
/*
cnote << "Setting temp" << temperature << " for gpu" << index <<
" tstop=" << farm.get_tstart() << " tstart=" << farm.get_tstop();
*/
bool _wait_for_tstart_temp = m_wait_for_tstart_temp.load(std::memory_order_relaxed);
if(!_wait_for_tstart_temp)
{
unsigned tstop = farm.get_tstop();
if (tstop && temperature >= tstop)
{
// cnote << "Pause mining due -tstop";
m_wait_for_tstart_temp.store(true, std::memory_order_relaxed);
}
} else {
unsigned tstart = farm.get_tstart();
if (tstart && temperature <= tstart)
{
// cnote << "(Re)starting mining due -tstart";
m_wait_for_tstart_temp.store(false, std::memory_order_relaxed);
}
}
}

bool is_mining_paused()
{
bool _wait_for_tstart_temp = m_wait_for_tstart_temp.load(std::memory_order_relaxed);
if (_wait_for_tstart_temp)
return true;
/* Add here some other reasons why mining on the GPU is paused */
return false;
}
protected:

/**
Expand All @@ -241,6 +284,7 @@ class Miner: public Worker
std::chrono::high_resolution_clock::time_point workSwitchStart;
HwMonitorInfo m_hwmoninfo;
private:
std::atomic<bool> m_wait_for_tstart_temp = { false };
std::atomic<uint64_t> m_hashCount = {0};

WorkPackage m_work;
Expand Down

0 comments on commit b0eb122

Please sign in to comment.