@@ -5,9 +5,17 @@ package debug
5
5
6
6
import (
7
7
"context"
8
+ "net"
9
+ "reflect"
8
10
"runtime"
9
11
"runtime/trace"
12
+ "sync"
13
+ "syscall"
14
+ "time"
10
15
16
+ "github.com/google/gopacket"
17
+ "github.com/google/gopacket/layers"
18
+ "github.com/google/gopacket/pcapgo"
11
19
"github.com/zeebo/errs"
12
20
13
21
"storj.io/common/pb"
@@ -18,6 +26,8 @@ import (
18
26
type Endpoint struct {
19
27
pb.DRPCDebugUnimplementedServer
20
28
29
+ mu sync.Mutex
30
+
21
31
Auth func (ctx context.Context ) error
22
32
}
23
33
@@ -33,11 +43,13 @@ func (f *Endpoint) CollectRuntimeTraces(_ *pb.CollectRuntimeTracesRequest, strea
33
43
if err := f .Auth (stream .Context ()); err != nil {
34
44
return rpcstatus .Wrap (rpcstatus .Unauthenticated , err )
35
45
}
36
-
37
46
if ! traceEnabled {
38
47
return rpcstatus .Wrap (rpcstatus .FailedPrecondition , errs .New ("trace is not enabled: %v" , runtime .Version ()))
39
48
}
40
49
50
+ f .mu .Lock ()
51
+ defer f .mu .Unlock ()
52
+
41
53
if err := trace .Start (& streamWriter {stream : stream }); err != nil {
42
54
return rpcstatus .Wrap (rpcstatus .FailedPrecondition , errs .New ("trace failed to start: %w" , err ))
43
55
}
@@ -48,6 +60,164 @@ func (f *Endpoint) CollectRuntimeTraces(_ *pb.CollectRuntimeTracesRequest, strea
48
60
return nil
49
61
}
50
62
63
+ // CollectRuntimeTraces2 will stream trace data to the client until the client sends a done message
64
+ // some error happens, and it then flushes the trace data and captured packet data.
65
+ func (f * Endpoint ) CollectRuntimeTraces2 (stream pb.DRPCDebug_CollectRuntimeTraces2Stream ) error {
66
+ if err := f .Auth (stream .Context ()); err != nil {
67
+ return rpcstatus .Wrap (rpcstatus .Unauthenticated , err )
68
+ }
69
+ if ! traceEnabled {
70
+ return rpcstatus .Wrap (rpcstatus .FailedPrecondition , errs .New ("trace is not enabled: %v" , runtime .Version ()))
71
+ }
72
+
73
+ f .mu .Lock ()
74
+ defer f .mu .Unlock ()
75
+
76
+ var done sync.WaitGroup
77
+ defer done .Wait ()
78
+
79
+ type Handle struct {
80
+ eh * pcapgo.EthernetHandle
81
+ iface net.Interface
82
+ }
83
+
84
+ if err := trace .Start (& streamWriter {stream : stream }); err != nil {
85
+ return rpcstatus .Wrap (rpcstatus .FailedPrecondition , errs .New ("trace failed to start: %w" , err ))
86
+ }
87
+ stopped := false
88
+ defer func () {
89
+ if ! stopped {
90
+ trace .Stop ()
91
+ }
92
+ }()
93
+
94
+ // start the packet capture
95
+ var handles []Handle
96
+ ifaces , err := net .Interfaces () // ignore errors because pcap is supplemental
97
+ trace .Logf (stream .Context (), "trace-debug" , "found %d interfaces (err:%v)" , len (ifaces ), err )
98
+ for _ , iface := range ifaces {
99
+ trace .Logf (stream .Context (), "trace-debug" , "checking interface %q" , iface .Name )
100
+
101
+ if iface .Flags & net .FlagLoopback != 0 {
102
+ continue
103
+ }
104
+ if iface .Flags & (net .FlagUp | net .FlagRunning ) != net .FlagUp | net .FlagRunning {
105
+ continue
106
+ }
107
+ if len (iface .HardwareAddr ) == 0 {
108
+ continue
109
+ }
110
+ addrs , err := iface .Addrs ()
111
+ if err != nil {
112
+ continue
113
+ }
114
+ hasIPv4 := false
115
+ for _ , addr := range addrs {
116
+ ip , _ := addr .(* net.IPNet )
117
+ hasIPv4 = hasIPv4 || len (ip .IP .To4 ()) == net .IPv4len
118
+ }
119
+ if ! hasIPv4 {
120
+ continue
121
+ }
122
+
123
+ eh , err := pcapgo .NewEthernetHandle (iface .Name )
124
+ if err != nil {
125
+ trace .Logf (stream .Context (), "trace-debug" , "could not open handle for %q: %v" , iface .Name , err )
126
+ continue
127
+ }
128
+ trace .Logf (stream .Context (), "trace-debug" , "opened handle for %q" , iface .Name )
129
+ defer eh .Close ()
130
+
131
+ handles = append (handles , Handle {
132
+ eh : eh ,
133
+ iface : iface ,
134
+ })
135
+ }
136
+
137
+ for _ , handle := range handles {
138
+ handle := handle // avoid loop capture bug
139
+
140
+ done .Add (1 )
141
+ go func () {
142
+ defer done .Done ()
143
+
144
+ src := gopacket .NewPacketSource (handle .eh , layers .LinkTypeEthernet )
145
+ for {
146
+ packet , err := src .NextPacket ()
147
+ if err != nil || ! trace .IsEnabled () {
148
+ return
149
+ }
150
+
151
+ tcp , _ := packet .Layer (layers .LayerTypeTCP ).(* layers.TCP )
152
+ ip , _ := packet .Layer (layers .LayerTypeIPv4 ).(* layers.IPv4 )
153
+ if tcp == nil || ip == nil {
154
+ continue
155
+ }
156
+
157
+ trace .Logf (stream .Context (), "tcp-packet" ,
158
+ "if:%s local:%s:%d remote:%s:%d seq:%d ack:%d flags:%d window:%d payload:%d fragoff:%d ts:%d" ,
159
+ handle .iface .Name ,
160
+ ip .SrcIP ,
161
+ tcp .SrcPort ,
162
+ ip .DstIP ,
163
+ tcp .DstPort ,
164
+ tcp .Seq ,
165
+ tcp .Ack ,
166
+ makeTCPFlags (tcp ),
167
+ tcp .Window ,
168
+ len (tcp .Payload ),
169
+ ip .FragOffset ,
170
+ packet .Metadata ().Timestamp .UnixNano (),
171
+ )
172
+ }
173
+ }()
174
+ }
175
+
176
+ // wait for a done message or error from caller
177
+ for {
178
+ msg , err := stream .Recv ()
179
+ if err != nil {
180
+ return err
181
+ }
182
+ if msg .Done {
183
+ break
184
+ }
185
+ }
186
+
187
+ stopped = true
188
+ trace .Stop ()
189
+
190
+ // wait for all of the handles to be done and send a signal when they are
191
+ doneWaiting := make (chan struct {})
192
+ go func () {
193
+ done .Wait ()
194
+ close (doneWaiting )
195
+ }()
196
+
197
+ select {
198
+ case <- doneWaiting :
199
+ // all of the handles are not being used anymore, so we can do clean
200
+ // close calls
201
+ for _ , handle := range handles {
202
+ handle .eh .Close ()
203
+ }
204
+
205
+ case <- time .After (10 * time .Second ):
206
+ // at least one handle is still being used and so after 10 seconds is
207
+ // almost certainly blocked in the syscall, so we can safely interrupt
208
+ // it with a close call on the fd and be reasonably sure that no reads
209
+ // will happen on a new socket that got the same fd.
210
+ for _ , handle := range handles {
211
+ fd := reflect .ValueOf (handle .eh ).Elem ().FieldByName ("fd" ).Int ()
212
+ _ = syscall .Close (int (fd ))
213
+ }
214
+
215
+ <- doneWaiting
216
+ }
217
+
218
+ return nil
219
+ }
220
+
51
221
type streamWriter struct {
52
222
stream pb.DRPCDebug_CollectRuntimeTracesStream
53
223
}
@@ -59,3 +229,34 @@ func (s *streamWriter) Write(p []byte) (int, error) {
59
229
}
60
230
return len (p ), nil
61
231
}
232
+
233
+ func makeTCPFlags (t * layers.TCP ) (f uint32 ) {
234
+ if t .FIN {
235
+ f |= 0x0001
236
+ }
237
+ if t .SYN {
238
+ f |= 0x0002
239
+ }
240
+ if t .RST {
241
+ f |= 0x0004
242
+ }
243
+ if t .PSH {
244
+ f |= 0x0008
245
+ }
246
+ if t .ACK {
247
+ f |= 0x0010
248
+ }
249
+ if t .URG {
250
+ f |= 0x0020
251
+ }
252
+ if t .ECE {
253
+ f |= 0x0040
254
+ }
255
+ if t .CWR {
256
+ f |= 0x0080
257
+ }
258
+ if t .NS {
259
+ f |= 0x0100
260
+ }
261
+ return f
262
+ }
0 commit comments