diff --git a/.gitignore b/.gitignore index b6e4761..673bba4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,129 +1,8 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class +# IDEA +**/.idea/** -# C extensions -*.so -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ +# Project Proposal +Project Proposal/* +!Project Proposal/*.tex +!Project Proposal/*.bib \ No newline at end of file diff --git a/gazer/bpf.c b/gazer/bpf.c new file mode 100644 index 0000000..82d48d7 --- /dev/null +++ b/gazer/bpf.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include + +BPF_HASH(birth, struct sock *, u64); + +// separate data structs for ipv4 and ipv6 +struct ipv4_data_t { + u64 ts_us; + u32 pid; + u32 saddr; + u32 daddr; + u64 ports; + u64 rx_b; + u64 tx_b; + u64 span_us; + char task[TASK_COMM_LEN]; +}; +BPF_PERF_OUTPUT(ipv4_events); + +struct id_t { + u32 pid; + char task[TASK_COMM_LEN]; +}; +BPF_HASH(whoami, struct sock *, struct id_t); + +typedef struct backlog_key { + u32 backlog; + u32 saddr; + u16 lport; + u64 slot; +} backlog_key_t; + +BPF_HASH(currsock, u32, struct sock *); + +BPF_HISTOGRAM(syn_backlog, backlog_key_t); diff --git a/gazer/main.py b/gazer/main.py new file mode 100644 index 0000000..f2cba94 --- /dev/null +++ b/gazer/main.py @@ -0,0 +1,141 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# tcplife Trace the lifespan of TCP sessions and summarize. +# For Linux, uses BCC, BPF. Embedded C. +# +# USAGE: tcplife [-h] [-C] [-S] [-p PID] [-4 | -6] [interval [count]] +# +# This uses the sock:inet_sock_set_state tracepoint if it exists (added to +# Linux 4.16, and replacing the earlier tcp:tcp_set_state), else it uses +# kernel dynamic tracing of tcp_set_state(). +# +# While throughput counters are emitted, they are fetched in a low-overhead +# manner: reading members of the tcp_info struct on TCP close. ie, we do not +# trace send/receive. +# +# Copyright 2016 Netflix, Inc. +# Licensed under the Apache License, Version 2.0 (the "License") +# +# IDEA: Julia Evans +# +# 18-Oct-2016 Brendan Gregg Created this. +# 29-Dec-2017 " " Added tracepoint support. + +from __future__ import print_function + +import time + +from bcc import BPF +import argparse +from socket import inet_ntop, AF_INET, inet_aton +from struct import pack, unpack + +# arguments +examples = """examples: + ./tcplife # trace all TCP connect()s + ./tcplife -T # include time column (HH:MM:SS) + ./tcplife -w # wider columns (fit IPv6) + ./tcplife -stT # csv output, with times & timestamps + ./tcplife -p 181 # only trace PID 181 + ./tcplife -L 80 # only trace local port 80 + ./tcplife -L 80,81 # only trace local ports 80 and 81 + ./tcplife -D 80 # only trace remote port 80 + ./tcplife -4 # only trace IPv4 family + ./tcplife -6 # only trace IPv6 family +""" +parser = argparse.ArgumentParser( + description="Trace the lifespan of TCP sessions and summarize", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-w", "--wide", action="store_true", + help="wide column output (fits IPv6 addresses)") +parser.add_argument("-p", "--pid", + help="trace this PID only") +parser.add_argument("-a", "--addr", + help="filter for address") +parser.add_argument("--ebpf", action="store_true", + help=argparse.SUPPRESS) +args = parser.parse_args() +debug = 0 + +# define BPF program +with open('bpf.c', 'r') as f: + bpf_text = f.read() + +with open('sock_state.c', 'r') as f: + bpf_text += f.read() + +with open('syn_backlog.c', 'r') as f: + bpf_text += f.read() + +# code substitutions +if args.pid: + bpf_text = bpf_text.replace('FILTER_PID', + 'if (pid != %s) { return 0; }' % args.pid) + +if args.addr: + bpf_text = bpf_text.replace('ADDRFILTER', + """if (data4.saddr != {0} || data4.daddr != {0}) + return 0;""".format(unpack("=I", inet_aton(args.addr))[0])) + +bpf_text = bpf_text.replace('FILTER_PID', '') +bpf_text = bpf_text.replace('ADDRFILTER', '') + +# +# Setup output formats +# +# Don't change the default output (next 2 lines): this fits in 80 chars. I +# know it doesn't have NS or UIDs etc. I know. If you really, really, really +# need to add columns, columns that solve real actual problems, I'd start by +# adding an extended mode (-x) to included those columns. +# +header_string = "%-5s %-10.10s %s%-15s %-5s %-15s %-5s %5s %5s %s" +format_string = "%-5d %-10.10s %s%-15s %-5d %-15s %-5d %5d %5d %.2f" + + +# process event +def print_ipv4_event(cpu, data, size): + event = b["ipv4_events"].event(data) + print(format_string % (event.pid, event.task.decode('utf-8', 'replace'), + "", + inet_ntop(AF_INET, pack("I", event.saddr)), event.ports >> 32, + inet_ntop(AF_INET, pack("I", event.daddr)), event.ports & 0xffffffff, + event.tx_b, event.rx_b, float(event.span_us) / 1000)) + + +# initialize BPF +b = BPF(text=bpf_text) + +# header +print(header_string % ("PID", "COMM", "", "LADDR", + "LPORT", "RADDR", "RPORT", "TX_KB", "RX_KB", "MS")) + +start_ts = 0 + +# read events +b["ipv4_events"].open_perf_buffer(print_ipv4_event, page_cnt=64) +# b.attach_kprobe(event="tcp_v4_syn_recv_sock", fn_name="update_syn_backlog") +while 1: + try: + b.perf_buffer_poll() + except KeyboardInterrupt: + # exit() + break + +# while True: +# try: +# time.sleep(999999) +# except KeyboardInterrupt: +# break + +dist = b['syn_backlog'] +print() +for item in dist.items(): + print({"backlog": item[0].backlog, + "slot": item[0].slot, + "saddr": inet_ntop(AF_INET, pack("I", item[0].saddr)), + "lport": item[0].lport, + "value": item[1].value}) + + diff --git a/gazer/service.yaml b/gazer/service.yaml new file mode 100644 index 0000000..d9494b8 --- /dev/null +++ b/gazer/service.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +kind: Pod +metadata: + name: bcc +spec: + containers: + - command: [ "sh", "-c", "sleep 60d" ] + image: zlim/bcc + imagePullPolicy: IfNotPresent + name: bcc + resources: {} + securityContext: + privileged: true + volumeMounts: + - mountPath: /lib/modules + name: host-modules + readOnly: true + - mountPath: /usr/src + name: host-usr-src + readOnly: true + volumes: + - hostPath: + path: /lib/modules + type: Directory + name: host-modules + - hostPath: + path: /usr/src + type: Directory + name: host-usr-src diff --git a/gazer/sock_state.c b/gazer/sock_state.c new file mode 100644 index 0000000..2633d6e --- /dev/null +++ b/gazer/sock_state.c @@ -0,0 +1,108 @@ +TRACEPOINT_PROBE(sock, inet_sock_set_state) +{ + if (args->protocol != IPPROTO_TCP) + return 0; + + u32 pid = bpf_get_current_pid_tgid() >> 32; + // sk is mostly used as a UUID, and for two tcp stats: + struct sock *sk = (struct sock *)args->skaddr; + + // lport is either used in a filter here, or later + u16 lport = args->sport; + + // dport is either used in a filter here, or later + u16 dport = args->dport; + + /* + * This tool includes PID and comm context. It's best effort, and may + * be wrong in some situations. It currently works like this: + * - record timestamp on any state < TCP_FIN_WAIT1 + * - cache task context on: + * TCP_SYN_SENT: tracing from client + * TCP_LAST_ACK: client-closed from server + * - do output on TCP_CLOSE: + * fetch task context if cached, or use current task + */ + + // capture birth time + if (args->newstate < TCP_FIN_WAIT1) { + /* + * Matching just ESTABLISHED may be sufficient, provided no code-path + * sets ESTABLISHED without a tcp_set_state() call. Until we know + * that for sure, match all early states to increase chances a + * timestamp is set. + * Note that this needs to be set before the PID filter later on, + * since the PID isn't reliable for these early stages, so we must + * save all timestamps and do the PID filter later when we can. + */ + u64 ts = bpf_ktime_get_ns(); + birth.update(&sk, &ts); + } + + // record PID & comm on SYN_SENT + if (args->newstate == TCP_SYN_SENT || args->newstate == TCP_LAST_ACK) { + // now we can PID filter, both here and a little later on for CLOSE + FILTER_PID + struct id_t me = {.pid = pid}; + bpf_get_current_comm(&me.task, sizeof(me.task)); + whoami.update(&sk, &me); + } + + if (args->newstate != TCP_CLOSE) + return 0; + + // calculate lifespan + u64 *tsp, delta_us; + tsp = birth.lookup(&sk); + if (tsp == 0) { + whoami.delete(&sk); // may not exist + return 0; // missed create + } + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; + birth.delete(&sk); + + // fetch possible cached data, and filter + struct id_t *mep; + mep = whoami.lookup(&sk); + if (mep != 0) + pid = mep->pid; + FILTER_PID + + u16 family = args->family; + + // get throughput stats. see tcp_get_info(). + u64 rx_b = 0, tx_b = 0, sport = 0; + struct tcp_sock *tp = (struct tcp_sock *)sk; + rx_b = tp->bytes_received; + tx_b = tp->bytes_acked; + + if (args->family == AF_INET) { + struct ipv4_data_t data4 = {}; + data4.span_us = delta_us; + data4.rx_b = rx_b; + data4.tx_b = tx_b; + data4.ts_us = bpf_ktime_get_ns() / 1000; + __builtin_memcpy(&data4.saddr, args->saddr, sizeof(data4.saddr)); + __builtin_memcpy(&data4.daddr, args->daddr, sizeof(data4.daddr)); + // a workaround until data4 compiles with separate lport/dport + data4.ports = dport + ((0ULL + lport) << 32); + data4.pid = pid; + + ADDRFILTER + + if (mep == 0) { + bpf_get_current_comm(&data4.task, sizeof(data4.task)); + } else { + bpf_probe_read_kernel(&data4.task, sizeof(data4.task), (void *)mep->task); + } + ipv4_events.perf_submit(args, &data4, sizeof(data4)); + + } else /* 6 */ { + return 0; + } + + if (mep != 0) + whoami.delete(&sk); + + return 0; +} \ No newline at end of file diff --git a/gazer/syn_backlog.c b/gazer/syn_backlog.c new file mode 100644 index 0000000..125ac86 --- /dev/null +++ b/gazer/syn_backlog.c @@ -0,0 +1,31 @@ +// https://www.kernel.org/doc/htmldocs/networking/API-struct-sock.html + +int kprobe__tcp_v4_syn_recv_sock(struct pt_regs *ctx, struct sock *sk){ + u32 pid = bpf_get_current_pid_tgid(); + // stash the sock ptr for lookup on return + currsock.update(&pid, &sk); + return 0; +}; + +int kretprobe__tcp_v4_syn_recv_sock(struct pt_regs *ctx){ + struct sock *newsk = (struct sock *)PT_REGS_RC(ctx); + + u32 pid = bpf_get_current_pid_tgid(); + + struct sock **skpp; + skpp = currsock.lookup(&pid); + currsock.delete(&pid); + if (skpp == 0) { + return 0; // missed entry + } + + struct sock *skp = *skpp; + + backlog_key_t key = {}; + key.backlog = skp->sk_max_ack_backlog; + key.saddr = newsk->__sk_common.skc_rcv_saddr; + key.lport = newsk->__sk_common.skc_num; + key.slot = bpf_log2l(skp->sk_ack_backlog); + syn_backlog.atomic_increment(key); + return 0; +} \ No newline at end of file diff --git a/gazer/tests/echo.go b/gazer/tests/echo.go new file mode 100644 index 0000000..574a9f6 --- /dev/null +++ b/gazer/tests/echo.go @@ -0,0 +1,33 @@ +package main + +import ( + "fmt" + "net/http" + "time" +) + +func getIPAddress(r *http.Request) string { + + ipAddress := r.RemoteAddr + fwdAddress := r.Header.Get("X-Forwarded-For") + if fwdAddress != "" { + ipAddress = fwdAddress + } + return ipAddress +} + +func handlerPing(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("pong")) + fmt.Println(time.Now(), getIPAddress(r), r.Method, r.RequestURI, r.UserAgent()) +} + +func main() { + http.HandleFunc("/ping", handlerPing) + + fmt.Println("ping listening on 0.0.0.0, port 80") + err := http.ListenAndServe(":8081", nil) + + if err != nil { + fmt.Println("Error starting ping server: ", err) + } +} diff --git a/gazer/tests/echo.py b/gazer/tests/echo.py new file mode 100644 index 0000000..a2d7cc0 --- /dev/null +++ b/gazer/tests/echo.py @@ -0,0 +1,31 @@ +import time +from flask import Flask, request, Response +import json +import requests + +app = Flask('service') + + +@app.route("/", methods=["POST"]) +def handler(): + start_time = time.time() + try: + endpoint = request.data.decode() + if not endpoint: + endpoint = "https://1.1.1.1" + except Exception: + endpoint = "https://1.1.1.1" + res = requests.get(endpoint) + duration = time.time() - start_time + content_size = len(res.content) + return Response(json.dumps({"endpoint": endpoint, "duration": duration, "content_size": content_size}), + mimetype='application/json') + + +@app.route("/ping", methods=["GET"]) +def ping(): + return "pong" + + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080)