-
Notifications
You must be signed in to change notification settings - Fork 107
set deadline before stats write/read #918
Changes from 2 commits
74adc1a
1fa6aaa
3c01e69
108f75e
1c43fa1
5eb5ad1
287aaea
113554f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ package stats | |
|
||
import ( | ||
"bytes" | ||
"io" | ||
"net" | ||
"time" | ||
|
||
|
@@ -25,10 +26,11 @@ type Graphite struct { | |
prefix []byte | ||
addr string | ||
|
||
timeout time.Duration | ||
toGraphite chan []byte | ||
} | ||
|
||
func NewGraphite(prefix, addr string, interval int, bufferSize int) { | ||
func NewGraphite(prefix, addr string, interval, bufferSize int, timeout time.Duration) { | ||
if len(prefix) != 0 && prefix[len(prefix)-1] != '.' { | ||
prefix = prefix + "." | ||
} | ||
|
@@ -44,6 +46,7 @@ func NewGraphite(prefix, addr string, interval int, bufferSize int) { | |
prefix: []byte(prefix), | ||
addr: addr, | ||
toGraphite: make(chan []byte, bufferSize), | ||
timeout: timeout, | ||
} | ||
go g.writer() | ||
go g.reporter(interval) | ||
|
@@ -80,7 +83,6 @@ func (g *Graphite) reporter(interval int) { | |
} | ||
|
||
// writer connects to graphite and submits all pending data to it | ||
// TODO: conn.Write() returns no error for a while when the remote endpoint is down, the reconnect happens with a delay. this can also cause lost data for a second or two. | ||
func (g *Graphite) writer() { | ||
var conn net.Conn | ||
var err error | ||
|
@@ -92,6 +94,7 @@ func (g *Graphite) writer() { | |
conn, err = net.Dial("tcp", g.addr) | ||
if err == nil { | ||
log.Info("stats now connected to %s", g.addr) | ||
go g.checkEOF(conn) | ||
} else { | ||
log.Warn("stats dialing %s failed: %s. will retry", g.addr, err.Error()) | ||
} | ||
|
@@ -105,6 +108,7 @@ func (g *Graphite) writer() { | |
var ok bool | ||
for !ok { | ||
conn = assureConn() | ||
conn.SetDeadline(time.Now().Add(g.timeout)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this needs to just be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right, updated |
||
pre := time.Now() | ||
_, err = conn.Write(buf) | ||
if err == nil { | ||
|
@@ -118,3 +122,31 @@ func (g *Graphite) writer() { | |
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there's a race condition here. note how the write routine can set conn to nil, but checkEOF requires it to be non-nil. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think you're right... that means i'll need to put a lock around There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that should do it: 113554f There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks good but why do we need the changes to how the conn variable is being set? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. having The assureConn func is working with the exact same |
||
} | ||
} | ||
|
||
// normally the remote end should never write anything back | ||
// but we know when we get EOF that the other end closed the conn | ||
// if not for this, we can happily write and flush without getting errors (in Go) but getting RST tcp packets back (!) | ||
// props to Tv` for this trick. | ||
func (g *Graphite) checkEOF(conn net.Conn) { | ||
b := make([]byte, 1024) | ||
for { | ||
num, err := conn.Read(b) | ||
if err == io.EOF { | ||
log.Info("checkEOF conn.Read returned EOF -> conn is closed. closing conn explicitly") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know I wrote this, but looking at it now, it's too cryptic / implementation-detailed. we can just say "remote closed conn. closing conn" or something |
||
conn.Close() | ||
return | ||
} | ||
|
||
// just in case i misunderstand something or the remote behaves badly | ||
if num != 0 { | ||
log.Info("checkEOF conn.Read data? did not expect that. data: %s\n", b[:num]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can be simplified. maybe log.Warn "read unexpected data from peer: %s" |
||
continue | ||
} | ||
|
||
if err != io.EOF { | ||
log.Warn("checkEOF conn.Read returned err != EOF, which is unexpected. closing conn. error: %s\n", err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this whole "checkEOF conn.Read returned err != EOF, which is unexpected" stuff isn't appropriate. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so just print the error and close the conn |
||
conn.Close() | ||
return | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we remove the TODO for this function? looks like we can