Skip to content

Commit e99096e

Browse files
committed
Merge branch 'versions/0.10' into dev
Conflicts: cmd/hekad/main.go docs/source/conf.py
2 parents 1a5e095 + c7906e5 commit e99096e

File tree

11 files changed

+195
-58
lines changed

11 files changed

+195
-58
lines changed

CHANGES.txt

+40
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,32 @@ Features
1717

1818
* Added decoder support to StatAccumInput.
1919

20+
* Added `git_clone_to_path` to the cmake build to allow git repos to be cloned
21+
into alternate locations; useful for relocating forks of Go packages into
22+
their original import paths.
23+
24+
0.10.0 (2015-??-??)
25+
===================
26+
27+
0.10.0b2 (2015-11-20)
28+
=====================
29+
30+
Backwards Incompatibilities
31+
---------------------------
32+
33+
* StatAccumInput Input: percent_threshold param type convert to slice
34+
35+
Bug Handling
36+
------------
37+
38+
* Updated Sarama dependency from pre-1.0 release fork to fork (with only test
39+
code changes) of Sarama 1.5.0 release.
40+
41+
Features
42+
--------
43+
44+
* Added decoder support to StatAccumInput.
45+
2046
* Added `git_clone_to_path` to the cmake build to allow git repos to be cloned
2147
into alternate locations; useful for relocating forks of Go packages into
2248
their original import paths.
@@ -49,6 +75,17 @@ Features
4975
Bug Handling
5076
------------
5177

78+
* Fixed issue where ElasticSearchOutput was trying to send zero length requests
79+
(#1783).
80+
81+
* Fixed race condition in ElasticSearchOutput (#1786).
82+
83+
* AMQPInput `Run` method now returns an error when the input channel closes but
84+
`Stop` hasn't been called to successfully trigger restarts (#1757).
85+
86+
* Fixed error where restarting plugins were losing specified configuration
87+
(#1756).
88+
5289
* Fixed config error where global `max_pack_idle` setting was the wrong type
5390
and was being ignored (#1778).
5491

@@ -87,6 +124,9 @@ Bug Handling
87124
* Fixed panic that was occurring when loading a config file or directory that
88125
exists but which registers no plugins (#1597).
89126

127+
* Delay start up when a buffered plugin's buffer is at capacity to give the
128+
back-pressure time to resolve (#1738).
129+
90130
* Fixed bug where LogStreamerInput would sometimes loop infinitely reading the
91131
same file over and over when reading gzipped log files.
92132

cmd/hekad/config.go

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ type HekadConfig struct {
4747
Hostname string
4848
MaxMessageSize uint32 `toml:"max_message_size"`
4949
LogFlags int `toml:"log_flags"`
50+
FullBufferMaxRetries uint32 `toml:"full_buffer_max_retries"`
5051
}
5152

5253
func LoadHekadConfig(configPath string) (config *HekadConfig, err error) {
@@ -71,6 +72,7 @@ func LoadHekadConfig(configPath string) (config *HekadConfig, err error) {
7172
PidFile: "",
7273
Hostname: hostname,
7374
LogFlags: log.LstdFlags,
75+
FullBufferMaxRetries: 10,
7476
}
7577

7678
var configFile map[string]toml.Primitive

cmd/hekad/main.go

+1
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ func setGlobalConfigs(config *HekadConfig) (*pipeline.GlobalConfigStruct, string
8787
globals.ShareDir = config.ShareDir
8888
globals.SampleDenominator = config.SampleDenominator
8989
globals.Hostname = config.Hostname
90+
globals.FullBufferMaxRetries = uint(config.FullBufferMaxRetries)
9091

9192
return globals, cpuProfName, memProfName
9293
}

docs/source/config/index.rst

+8
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,14 @@ Config:
171171
and time, the default) or 0 (no prefix). See
172172
`https://golang.org/pkg/log/#pkg-constants Go documentation`_ for details.
173173

174+
- full_buffer_max_retries (int):
175+
When Heka shuts down due to a buffer filling to capacity, the next time
176+
Heka starts it will delay startup briefly to give the buffer a chance to
177+
drain, to alleviate the back-pressure. This setting specifies the maximum
178+
number of intervals (max 1s in duration) Heka should wait for the buffer
179+
size to get below 90% of capacity before deciding that the issue is not
180+
resolved and continuing startup (or shutting down).
181+
174182
Example hekad.toml file
175183
=======================
176184

pipeline/pipeline_runner.go

+18-11
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,15 @@ type GlobalConfigStruct struct {
5151
MaxPackIdle time.Duration
5252
stopping bool
5353
stoppingMutex sync.RWMutex
54+
shutdownOnce sync.Once
5455
BaseDir string
5556
ShareDir string
5657
SampleDenominator int
5758
sigChan chan os.Signal
5859
Hostname string
5960
abortChan chan struct{}
61+
FullBufferMaxRetries uint
62+
exitCode int
6063
}
6164

6265
// Creates a GlobalConfigStruct object populated w/ default values.
@@ -87,10 +90,13 @@ func (g *GlobalConfigStruct) SigChan() chan os.Signal {
8790
// This method returns immediately by spawning a goroutine to do to
8891
// work so that the caller won't end up blocking part of the shutdown
8992
// sequence
90-
func (g *GlobalConfigStruct) ShutDown() {
91-
go func() {
92-
g.sigChan <- syscall.SIGINT
93-
}()
93+
func (g *GlobalConfigStruct) ShutDown(exitCode int) {
94+
g.shutdownOnce.Do(func() {
95+
g.exitCode = exitCode
96+
go func() {
97+
g.sigChan <- syscall.SIGINT
98+
}()
99+
})
94100
}
95101

96102
func (g *GlobalConfigStruct) IsShuttingDown() (stopping bool) {
@@ -251,9 +257,9 @@ func (p *PipelinePack) EncodeMsgBytes() error {
251257
return err
252258
}
253259

254-
// Main function driving Heka execution. Loads config, initializes
255-
// PipelinePack pools, and starts all the runners. Then it listens for signals
256-
// and drives the shutdown process when that is triggered.
260+
// Main function driving Heka execution. Loads config, initializes PipelinePack
261+
// pools, and starts all the runners. Then it listens for signals and drives
262+
// the shutdown process when that is triggered.
257263
func Run(config *PipelineConfig) {
258264
LogInfo.Println("Starting hekad...")
259265

@@ -268,7 +274,7 @@ func Run(config *PipelineConfig) {
268274
LogError.Printf("Output '%s' failed to start: %s", name, err)
269275
outputsWg.Done()
270276
if !output.IsStoppable() {
271-
globals.ShutDown()
277+
globals.ShutDown(1)
272278
}
273279
continue
274280
}
@@ -281,7 +287,7 @@ func Run(config *PipelineConfig) {
281287
LogError.Printf("Filter '%s' failed to start: %s", name, err)
282288
config.filtersWg.Done()
283289
if !filter.IsStoppable() {
284-
globals.ShutDown()
290+
globals.ShutDown(1)
285291
}
286292
continue
287293
}
@@ -319,7 +325,7 @@ func Run(config *PipelineConfig) {
319325
LogError.Printf("Input '%s' failed to start: %s", name, err)
320326
config.inputsWg.Done()
321327
if !input.IsStoppable() {
322-
globals.ShutDown()
328+
globals.ShutDown(1)
323329
}
324330
continue
325331
}
@@ -398,6 +404,7 @@ func Run(config *PipelineConfig) {
398404
}
399405

400406
LogInfo.Println("Shutdown complete.")
407+
os.Exit(globals.exitCode)
401408
}
402409

403410
func sandboxAbort(config *PipelineConfig) {
@@ -442,6 +449,6 @@ func sandboxAbort(config *PipelineConfig) {
442449
// and abort any sandboxes that are wedged inside process_message or
443450
// timer_event.
444451
config.allReportsStdout()
445-
config.Globals.ShutDown()
452+
config.Globals.ShutDown(1)
446453
close(config.Globals.abortChan)
447454
}

pipeline/plugin_runners.go

+85-28
Original file line numberDiff line numberDiff line change
@@ -199,21 +199,21 @@ type InputRunner interface {
199199

200200
type iRunner struct {
201201
pRunnerBase
202-
input Input
203-
config CommonInputConfig
204-
pConfig *PipelineConfig
205-
inChan chan *PipelinePack
206-
ticker <-chan time.Time
207-
transient bool
208-
syncDecode bool
209-
sendDecodeFailures bool
210-
logDecodeFailures bool
211-
deliver DeliverFunc
212-
delivererOnce sync.Once
213-
delivererLock sync.Mutex
214-
canExit bool
215-
shutdownWanters []WantsDecoderRunnerShutdown
216-
shutdownLock sync.Mutex
202+
input Input
203+
config CommonInputConfig
204+
pConfig *PipelineConfig
205+
inChan chan *PipelinePack
206+
ticker <-chan time.Time
207+
transient bool
208+
syncDecode bool
209+
sendDecodeFailures bool
210+
logDecodeFailures bool
211+
deliver DeliverFunc
212+
delivererOnce sync.Once
213+
delivererLock sync.Mutex
214+
canExit bool
215+
shutdownWanters []WantsDecoderRunnerShutdown
216+
shutdownLock sync.Mutex
217217
}
218218

219219
func (ir *iRunner) Ticker() (ticker <-chan time.Time) {
@@ -314,7 +314,7 @@ func (ir *iRunner) Starter(h PluginHelper, wg *sync.WaitGroup) {
314314
if err != nil {
315315
ir.LogError(err)
316316
if !ir.IsStoppable() {
317-
globals.ShutDown()
317+
globals.ShutDown(1)
318318
}
319319
return
320320
}
@@ -344,7 +344,7 @@ func (ir *iRunner) Starter(h PluginHelper, wg *sync.WaitGroup) {
344344
break
345345
}
346346

347-
// Otherwise we'll execute the Retry config
347+
// Otherwise we'll execute the Retry config.
348348
recon.CleanupForRestart()
349349
if ir.maker == nil {
350350
ir.pConfig.makersLock.RLock()
@@ -364,10 +364,16 @@ func (ir *iRunner) Starter(h PluginHelper, wg *sync.WaitGroup) {
364364
ir.LogMessage(fmt.Sprintf("Restarting (attempt %d/%d)\n",
365365
rh.times, rh.retries))
366366

367-
// If we've not been created elsewhere, call the plugin's Init()
367+
// If we've not been created elsewhere, call the plugin's Init().
368368
if !ir.transient {
369-
if err = ir.plugin.Init(ir.maker.Config()); err != nil {
370-
// We couldn't reInit the plugin, do a mini-retry loop
369+
var config interface{}
370+
if config, err = ir.maker.PrepConfig(); err != nil {
371+
// We couldn't reInit the plugin, do a mini-retry loop.
372+
ir.LogError(err)
373+
goto initLoop
374+
}
375+
if err = ir.plugin.Init(config); err != nil {
376+
// We couldn't reInit the plugin, do a mini-retry loop.
371377
ir.LogError(err)
372378
goto initLoop
373379
}
@@ -380,7 +386,7 @@ func (ir *iRunner) Starter(h PluginHelper, wg *sync.WaitGroup) {
380386

381387
// If we're not a stoppable input, trigger Heka shutdown.
382388
if !ir.IsStoppable() {
383-
globals.ShutDown()
389+
globals.ShutDown(1)
384390
}
385391
}
386392

@@ -957,10 +963,43 @@ func (foRunner *foRunner) BackPressured() bool {
957963
return len(foRunner.inChan) >= foRunner.capacity ||
958964
foRunner.matcher.InChanLen() >= foRunner.capacity
959965
}
960-
961966
return foRunner.capacity > 0 && foRunner.bufReader.queueSize.Get() >= uint64(foRunner.capacity)
962967
}
963968

969+
func (foRunner *foRunner) waitForBackPressure() error {
970+
globals := foRunner.pConfig.Globals
971+
retryOptions := getDefaultRetryOptions()
972+
retryOptions.MaxDelay = "1s"
973+
retryOptions.MaxRetries = int(globals.FullBufferMaxRetries)
974+
// NewRetryHelper will only return an error if the duration strings don't
975+
// parse. Ours are hard-coded, so this error shouldn't happen.
976+
retry, err := NewRetryHelper(retryOptions)
977+
if err != nil {
978+
return fmt.Errorf("can't create retry helper: %s", err.Error())
979+
}
980+
for !globals.IsShuttingDown() {
981+
bp := foRunner.BackPressured()
982+
fmt.Println("back-pressured?: ", bp)
983+
if !bp {
984+
return nil
985+
}
986+
err = retry.Wait()
987+
if err != nil {
988+
// We've exhausted our max allowed retries, so we honor the
989+
// buffer's 'full_action' setting and trigger a shutdown if
990+
// necessary.
991+
if foRunner.bufReader.config.FullAction == "shutdown" {
992+
globals.ShutDown(1)
993+
foRunner.LogError(errors.New("back-pressure not resolving: triggering shutdown"))
994+
}
995+
// But we always return `nil` so that regular start up sequence can
996+
// continue.
997+
return nil
998+
}
999+
}
1000+
return nil
1001+
}
1002+
9641003
func (foRunner *foRunner) Start(h PluginHelper, wg *sync.WaitGroup) (err error) {
9651004
foRunner.h = h
9661005
foRunner.pConfig = h.PipelineConfig()
@@ -1034,6 +1073,14 @@ func (foRunner *foRunner) Start(h PluginHelper, wg *sync.WaitGroup) (err error)
10341073
} else {
10351074
go foRunner.OldStarter(h, wg)
10361075
}
1076+
1077+
if foRunner.useBuffering && foRunner.BackPressured() {
1078+
foRunner.LogMessage("Delaying start while trying to relieve back-pressure...")
1079+
if err = foRunner.waitForBackPressure(); err != nil {
1080+
return err
1081+
}
1082+
}
1083+
10371084
return
10381085
}
10391086

@@ -1145,7 +1192,7 @@ func (foRunner *foRunner) Starter(plugin MessageProcessor, h PluginHelper,
11451192
if err != nil {
11461193
foRunner.LogError(err)
11471194
if !foRunner.IsStoppable() {
1148-
globals.ShutDown()
1195+
globals.ShutDown(1)
11491196
}
11501197
return
11511198
}
@@ -1183,7 +1230,7 @@ func (foRunner *foRunner) Starter(plugin MessageProcessor, h PluginHelper,
11831230
// No more retries.
11841231
foRunner.lastErr = err
11851232
if !foRunner.IsStoppable() {
1186-
globals.ShutDown()
1233+
globals.ShutDown(1)
11871234
}
11881235
return
11891236
}
@@ -1251,7 +1298,12 @@ func (foRunner *foRunner) Starter(plugin MessageProcessor, h PluginHelper,
12511298
break
12521299
}
12531300
foRunner.LogMessage("now restarting")
1254-
if err = foRunner.plugin.Init(foRunner.maker.Config()); err != nil {
1301+
var config interface{}
1302+
if config, err = foRunner.maker.PrepConfig(); err != nil {
1303+
foRunner.LogError(err)
1304+
goto initLoop
1305+
}
1306+
if err = foRunner.plugin.Init(config); err != nil {
12551307
foRunner.LogError(err)
12561308
goto initLoop
12571309
}
@@ -1308,7 +1360,7 @@ func (foRunner *foRunner) exit() {
13081360
// Also, if this isn't a "stoppable" plugin we shut everything down.
13091361
if !foRunner.IsStoppable() {
13101362
foRunner.LogMessage("has stopped, shutting down.")
1311-
foRunner.pConfig.Globals.ShutDown()
1363+
foRunner.pConfig.Globals.ShutDown(1)
13121364
return
13131365
}
13141366

@@ -1424,7 +1476,7 @@ func (foRunner *foRunner) OldStarter(helper PluginHelper, wg *sync.WaitGroup) {
14241476
if err != nil {
14251477
foRunner.LogError(err)
14261478
if !foRunner.IsStoppable() {
1427-
globals.ShutDown()
1479+
globals.ShutDown(1)
14281480
}
14291481
return
14301482
}
@@ -1497,7 +1549,12 @@ func (foRunner *foRunner) OldStarter(helper PluginHelper, wg *sync.WaitGroup) {
14971549
break
14981550
}
14991551
foRunner.LogMessage("now restarting")
1500-
if err = foRunner.plugin.Init(foRunner.maker.Config()); err != nil {
1552+
var config interface{}
1553+
if config, err = foRunner.maker.PrepConfig(); err != nil {
1554+
foRunner.LogError(err)
1555+
goto initLoop
1556+
}
1557+
if err = foRunner.plugin.Init(config); err != nil {
15011558
foRunner.LogError(err)
15021559
goto initLoop
15031560
}

0 commit comments

Comments
 (0)