diff --git a/bpf/imports.go b/bpf/imports.go index c55f47dd49..69a3d65afd 100644 --- a/bpf/imports.go +++ b/bpf/imports.go @@ -16,6 +16,5 @@ import ( _ "go.opentelemetry.io/obi/bpf/netolly" _ "go.opentelemetry.io/obi/bpf/pid" _ "go.opentelemetry.io/obi/bpf/rdns" - _ "go.opentelemetry.io/obi/bpf/tctracer" _ "go.opentelemetry.io/obi/bpf/watcher" ) diff --git a/bpf/maps/sock_dir.h b/bpf/maps/sock_dir.h index 73e0610f7f..5e96552992 100644 --- a/bpf/maps/sock_dir.h +++ b/bpf/maps/sock_dir.h @@ -6,8 +6,6 @@ #include #include -#include - // A map of sockets which we track with sock_ops. The sock_msg // program subscribes to this map and runs for each new socket // activity @@ -16,6 +14,6 @@ struct { __uint(type, BPF_MAP_TYPE_SOCKHASH); __uint(max_entries, 65535); - __uint(key_size, sizeof(connection_info_t)); + __uint(key_size, sizeof(u64)); __uint(value_size, sizeof(u32)); } sock_dir SEC(".maps"); diff --git a/bpf/tctracer/placeholder.go b/bpf/tctracer/placeholder.go deleted file mode 100644 index c4b31df306..0000000000 --- a/bpf/tctracer/placeholder.go +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build obi_bpf - -package tctracer // import "go.opentelemetry.io/obi/bpf/tctracer" diff --git a/bpf/tctracer/tctracer.c b/bpf/tctracer/tctracer.c deleted file mode 100644 index e602649bb2..0000000000 --- a/bpf/tctracer/tctracer.c +++ /dev/null @@ -1,725 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build obi_bpf_ignore -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include - -char __license[] SEC("license") = "Dual MIT/GPL"; - -// these are uppercase to adhere to the IPPROTO_* defines -enum { - IPPROTO_HOPOPTS = 0, // IPv6 hop-by-hop options - IPPROTO_ROUTING = 43, // IPv6 routing header - IPPROTO_FRAGMENT = 44, // IPv6 fragmentation header - IPPROTO_DSTOPTS = 60, // IPv6 destination options -}; - -enum { k_ip4_option_code = 0x88 }; - -// use an experimental option value defined by RFC-4727 -// see https://www.rfc-editor.org/rfc/rfc4727.html#section-8 -// and https://www.rfc-editor.org/rfc/rfc8200#section-4.1 -// as each individual bit plays a role -enum : u8 { k_ip6_option_code = 0x1e }; - -typedef struct ipv4_opt_t { - u8 type; - u8 len; - u8 trace_id[TRACE_ID_SIZE_BYTES]; - u8 pad[2]; -} ipv4_opt; - -_Static_assert(sizeof(ipv4_opt) == 20, "invalid IPv4 option len"); - -typedef struct ipv6_opt_data_t { - u8 trace_id[TRACE_ID_SIZE_BYTES]; - u8 span_id[SPAN_ID_SIZE_BYTES]; - u32 pad; -} ipv6_opt_data; - -typedef struct ipv6_opt_t { - u8 nexthdr; - u8 hdrlen; - u8 opttype; - u8 optlen; - ipv6_opt_data data; -} ipv6_opt; - -_Static_assert(sizeof(ipv6_opt) % 8 == 0, "ipv6_opt not 8-byte aligned"); -_Static_assert(sizeof(ipv6_opt) == 32, "invalid IPv6 option len"); - -enum protocol { protocol_ip4, protocol_ip6, protocol_unknown }; - -static __always_inline u16 ip_header_off(struct __sk_buff *ctx) { - void *data = ctx_data(ctx); - void *data_end = ctx_data_end(ctx); - - struct ethhdr *eth = data; - - u16 off = sizeof(*eth); - - if (data + off > data_end) { - return 0; - } - - u16 h_proto = bpf_ntohs(eth->h_proto); - - for (u8 i = 0; i < 2; ++i) { - if (h_proto == ETH_P_8021Q || h_proto == ETH_P_8021AD) { - if (data + off + sizeof(struct vlan_hdr) > data_end) { - return 0; - } - - struct vlan_hdr *vh = data + off; - h_proto = bpf_ntohs(vh->h_vlan_encapsulated_proto); - off += sizeof(*vh); - } - } - - if (h_proto != ETH_P_IP && h_proto != ETH_P_IPV6) { - return 0; - } - - return off; -} - -static __always_inline void populate_span_id_from_seq_ack(tp_info_t *tp, u32 seq, u32 ack) { - // We use a combination of the TCP sequence + TCP ack as a SpanID - *((u32 *)(&tp->span_id[0])) = seq; - *((u32 *)(&tp->span_id[4])) = ack; -} - -static __always_inline bool conn_info_from_skb4(struct __sk_buff *skb, connection_info_t *conn) { - const u16 ip4_off = ip_header_off(skb); - - if (ip4_off == 0) { - return false; - } - - const struct iphdr *iphdr = ctx_data(skb) + ip4_off; - - if ((const void *)(iphdr + 1) > ctx_data_end(skb)) { - return false; - } - - if (iphdr->version != 4) { - return false; - } - - __builtin_memcpy(conn->s_addr, ip4ip6_prefix, sizeof(ip4ip6_prefix)); - __builtin_memcpy(conn->d_addr, ip4ip6_prefix, sizeof(ip4ip6_prefix)); - __builtin_memcpy(conn->s_addr + sizeof(ip4ip6_prefix), &iphdr->saddr, sizeof(iphdr->saddr)); - __builtin_memcpy(conn->d_addr + sizeof(ip4ip6_prefix), &iphdr->daddr, sizeof(iphdr->daddr)); - - if (iphdr->protocol != IPPROTO_TCP) { - return false; - } - - const u16 ihl_bytes = iphdr->ihl << 2; - const struct tcphdr *tcp = (struct tcphdr *)((const void *)iphdr + ihl_bytes); - - if ((const void *)(tcp + 1) > ctx_data_end(skb)) { - return false; - } - - conn->s_port = bpf_ntohs(tcp->source); - conn->d_port = bpf_ntohs(tcp->dest); - - return conn; -} - -static __always_inline bool conn_info_from_skb6(struct __sk_buff *skb, connection_info_t *conn) { - const u16 ip6_off = ip_header_off(skb); - - if (ip6_off == 0) { - return false; - } - - const struct ipv6hdr *iphdr = ctx_data(skb) + ip6_off; - - if ((const void *)(iphdr + 1) > ctx_data_end(skb)) { - return false; - } - - if (iphdr->version != 6) { - return false; - } - - __builtin_memcpy(conn->s_addr, &iphdr->saddr, sizeof(iphdr->saddr)); - __builtin_memcpy(conn->d_addr, &iphdr->daddr, sizeof(iphdr->daddr)); - - const void *ptr = (const void *)(iphdr + 1); - - u8 curr_hdr = iphdr->nexthdr; - - // try to find the start of the TCP header - // iterate at most 4 extension headers - for (u8 i = 0; i < 4; ++i) { - if (curr_hdr == IPPROTO_TCP) { - break; - } - - const struct ipv6_opt_hdr *opt_hdr = ptr; - - if ((const void *)(opt_hdr + 1) > ctx_data_end(skb)) { - return conn; - } - - switch (curr_hdr) { - case IPPROTO_HOPOPTS: - case IPPROTO_ROUTING: - case IPPROTO_DSTOPTS: - ptr += (opt_hdr->hdrlen * 8) + 1; - break; - case IPPROTO_FRAGMENT: - ptr += 8; - break; - default: - // don't know how to parse, bail - return false; - } - - curr_hdr = opt_hdr->nexthdr; - } - - if (curr_hdr != IPPROTO_TCP) { - return false; - } - - const struct tcphdr *tcp = (struct tcphdr *)ptr; - - if ((const void *)(tcp + 1) > ctx_data_end(skb)) { - return false; - } - - conn->s_port = bpf_ntohs(tcp->source); - conn->d_port = bpf_ntohs(tcp->dest); - - return true; -} - -static __always_inline bool conn_info_from_skb(struct __sk_buff *skb, connection_info_t *conn) { - if (skb->protocol == bpf_htons(ETH_P_IP)) { - return conn_info_from_skb4(skb, conn); - } - - if (skb->protocol == bpf_htons(ETH_P_IPV6)) { - return conn_info_from_skb6(skb, conn); - } - - return false; -} - -static __always_inline void print_tp(const char *prefix, const tp_info_t *tp) { - if (!g_bpf_debug) { - return; - } - - unsigned char tp_buf[TP_MAX_VAL_LENGTH + 1]; - tp_buf[TP_MAX_VAL_LENGTH] = '\0'; - - make_tp_string(tp_buf, tp); - bpf_dbg_printk("prefix=%s, tp=%s", prefix, tp_buf); -} - -static __always_inline void update_outgoing_request_span_id(const connection_info_t *conn, - const egress_key_t *e_key, - const tp_info_pid_t *tp_p) { - - const pid_connection_info_t p_conn = {.conn = *conn, .pid = tp_p->pid}; - - http_info_t *h_info = bpf_map_lookup_elem(&ongoing_http, &p_conn); - - if (h_info) { - __builtin_memcpy(h_info->tp.span_id, tp_p->tp.span_id, SPAN_ID_SIZE_BYTES); - - print_tp("Found HTTP info, reset", &h_info->tp); - } - - go_addr_key_t *g_key = bpf_map_lookup_elem(&go_ongoing_http, e_key); - - if (!g_key) { - return; - } - - http_func_invocation_t *invocation = - bpf_map_lookup_elem(&go_ongoing_http_client_requests, g_key); - - if (!invocation) { - return; - } - - __builtin_memcpy(invocation->tp.span_id, tp_p->tp.span_id, SPAN_ID_SIZE_BYTES); - - print_tp("Found Go HTTP invocation, reset", &tp_p->tp); -} - -static __always_inline bool parse_ip_options_ipv4(struct __sk_buff *skb, connection_info_t *conn) { - const u16 ip4_off = ip_header_off(skb); - - if (ip4_off == 0) { - return 0; - } - - const struct iphdr *iphdr = ctx_data(skb) + ip4_off; - - if ((const void *)(iphdr + 1) > ctx_data_end(skb)) { - return false; - } - - if (iphdr->version != 4) { - return false; - } - - if (iphdr->ihl < 5) { - // no options present - return false; - } - - if (iphdr->protocol != IPPROTO_TCP) { - return false; - } - - const unsigned char *ptr = (const unsigned char *)(iphdr + 1); - const unsigned char *end = ctx_data_end(skb); - - const u8 k_max_options = 10; - - for (u8 i = 0; i < k_max_options; ++i) { - if (ptr + 2 > end) { - return false; - } - - if (*ptr == 0x0) { - // end of option list - return false; - } - - if (*ptr == 0x1) { - // NOP - single byte option - ++ptr; - continue; - } - - if (*ptr != k_ip4_option_code) { - // not our option - advance at least one byte - const u8 opt_len = *(ptr + 1); - const u8 advance_len = opt_len > 0 ? opt_len : 1; - - ptr += advance_len; - continue; - } - - // found our option, try to parse it - const ipv4_opt *opt = (const ipv4_opt *)ptr; - - if ((const void *)(opt + 1) > (const void *)end) { - return false; - } - - // sanity check - if (opt->len != sizeof(ipv4_opt)) { - bpf_dbg_printk("wrong IPv4 option size, bailing..."); - return false; - } - - const tp_info_pid_t *existing_tp = - (tp_info_pid_t *)bpf_map_lookup_elem(&incoming_trace_map, conn); - - if (existing_tp) { - bpf_dbg_printk("found existing TP - ignoring IPv4 options"); - return false; - } - - const u16 ihl_bytes = iphdr->ihl << 2; - - const struct tcphdr *tcp = (struct tcphdr *)((const void *)iphdr + ihl_bytes); - - if ((const void *)(tcp + 1) > ctx_data_end(skb)) { - return false; - } - - tp_info_pid_t new_tp = {.pid = 0, .valid = 1}; - populate_span_id_from_seq_ack(&new_tp.tp, tcp->seq, tcp->ack_seq); - - _Static_assert(sizeof(new_tp.tp.trace_id) == sizeof(opt->trace_id), - "trace id size mismatch"); - - __builtin_memcpy(new_tp.tp.trace_id, opt->trace_id, sizeof(opt->trace_id)); - - print_tp("Found TP in IPv4 Options", &new_tp.tp); - - bpf_map_update_elem(&incoming_trace_map, conn, &new_tp, BPF_ANY); - - return true; - } - - return false; -} - -static __always_inline bool parse_ip_options_ipv6(struct __sk_buff *skb, connection_info_t *conn) { - const tp_info_pid_t *existing_tp = - (tp_info_pid_t *)bpf_map_lookup_elem(&incoming_trace_map, conn); - - if (existing_tp) { - print_tp("ignoring existing tp", &existing_tp->tp); - return false; - } - - const u16 ip6_off = ip_header_off(skb); - - if (ip6_off == 0) { - return false; - } - - const struct ipv6hdr *iphdr = ctx_data(skb) + ip6_off; - - if ((const void *)(iphdr + 1) > ctx_data_end(skb)) { - return false; - } - - if (iphdr->version != 6) { - return false; - } - - if (iphdr->nexthdr != IPPROTO_DSTOPTS) { - return false; - } - - const ipv6_opt *opt = (ipv6_opt *)(iphdr + 1); - - if ((const void *)(opt + 1) > ctx_data_end(skb)) { - return false; - } - - if (opt->opttype != k_ip6_option_code) { - return false; - } - - if (opt->optlen != sizeof(ipv6_opt_data)) { - return false; - } - - const u8 expected_hdr_len = (sizeof(ipv6_opt) / 8) - 1; - - if (opt->hdrlen != expected_hdr_len) { - return false; - } - - tp_info_pid_t new_tp = {.pid = 0, .valid = 1}; - - __builtin_memcpy(new_tp.tp.trace_id, opt->data.trace_id, TRACE_ID_SIZE_BYTES); - __builtin_memcpy(new_tp.tp.span_id, opt->data.span_id, SPAN_ID_SIZE_BYTES); - - print_tp(__func__, &new_tp.tp); - bpf_map_update_elem(&incoming_trace_map, conn, &new_tp, BPF_ANY); - - return true; -} - -static __always_inline void inject_tc_ip_options_ipv4(struct __sk_buff *skb, tp_info_pid_t *tp) { - const u16 ip4_off = ip_header_off(skb); - - if (ip4_off == 0) { - return; - } - - struct iphdr *iphdr = ctx_data(skb) + ip4_off; - - if ((void *)(iphdr + 1) > ctx_data_end(skb)) { - return; - } - - if (iphdr->version != 4) { - return; - } - - if (iphdr->protocol != IPPROTO_TCP) { - return; - } - - if (bpf_skb_adjust_room(skb, sizeof(ipv4_opt), BPF_ADJ_ROOM_NET, 0) != 0) { - return; - } - - // reload pointers - iphdr = ctx_data(skb) + ip4_off; - - if ((void *)(iphdr + 1) > ctx_data_end(skb)) { - return; - } - - const u16 ihl_bytes = iphdr->ihl << 2; - - unsigned char *ptr = ((unsigned char *)iphdr) + ihl_bytes; - unsigned char *ptr_b = ptr; - - if ((void *)ptr + sizeof(ipv4_opt) > ctx_data_end(skb)) { - return; - } - - *ptr++ = k_ip4_option_code; - *ptr++ = sizeof(ipv4_opt); - - __builtin_memcpy(ptr, tp->tp.trace_id, TRACE_ID_SIZE_BYTES); - - ptr += TRACE_ID_SIZE_BYTES; - *ptr++ = 0; - *ptr++ = 0; - - // update IP header and checksum - const u16 old_vihl_tos = *(u16 *)iphdr; - - iphdr->ihl += (sizeof(ipv4_opt) >> 2); - - const u16 new_vihl_tos = *(u16 *)iphdr; - - const u16 old_tot_len = iphdr->tot_len; - - iphdr->tot_len = bpf_htons(bpf_ntohs(iphdr->tot_len) + sizeof(ipv4_opt)); - - const u16 new_tot_len = iphdr->tot_len; - - u32 sum = ~iphdr->check & 0xffff; - - iphdr->check = 0; - - const u32 opt_sum = bpf_csum_diff(NULL, 0, (__be32 *)ptr_b, ptr - ptr_b, 0); - - sum += opt_sum; - sum += (~old_vihl_tos & 0xffff); - sum += new_vihl_tos; - sum += (~old_tot_len & 0xffff); - sum += new_tot_len; - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - - const u16 new_check = ~sum; - iphdr->check = new_check; - - const struct tcphdr *tcp = (struct tcphdr *)ptr; - - if ((const void *)(tcp + 1) > ctx_data_end(skb)) { - return; - } - - populate_span_id_from_seq_ack(&tp->tp, tcp->seq, tcp->ack_seq); - - print_tp("injected", &tp->tp); -} - -static __always_inline void inject_tc_ip_options_ipv6(struct __sk_buff *skb, - const tp_info_pid_t *tp) { - const u16 ip6_off = ip_header_off(skb); - - if (ip6_off == 0) { - return; - } - - struct ipv6hdr *iphdr = ctx_data(skb) + ip6_off; - - if ((void *)(iphdr + 1) > ctx_data_end(skb)) { - return; - } - - if (iphdr->version != 6) { - return; - } - - const u8 nexthdr = iphdr->nexthdr; - - // https://www.rfc-editor.org/rfc/rfc8200#section-4.1 defines the - // constraints for header count and ordering. To keep things from - // breaking, we only inject the options when dealing with TCP packets and - // no other options are present - if (nexthdr != IPPROTO_TCP) { - return; - } - - if (bpf_skb_adjust_room(skb, sizeof(ipv6_opt), BPF_ADJ_ROOM_NET, 0) != 0) { - return; - } - - // reload pointers - iphdr = ctx_data(skb) + ip6_off; - - ipv6_opt *opt = (ipv6_opt *)(iphdr + 1); - - if ((void *)(opt + 1) > ctx_data_end(skb)) { - return; - } - - iphdr->nexthdr = IPPROTO_DSTOPTS; - iphdr->payload_len = bpf_htons(bpf_ntohs(iphdr->payload_len) + sizeof(ipv6_opt)); - - opt->nexthdr = nexthdr; - opt->hdrlen = (sizeof(ipv6_opt) / 8) - 1; - opt->opttype = k_ip6_option_code; - opt->optlen = sizeof(ipv6_opt_data); - - __builtin_memcpy(opt->data.trace_id, tp->tp.trace_id, TRACE_ID_SIZE_BYTES); - __builtin_memcpy(opt->data.span_id, tp->tp.span_id, SPAN_ID_SIZE_BYTES); - - opt->data.pad = 0; - - print_tp("injected", &tp->tp); -} - -static __always_inline u8 is_sock_tracked(const connection_info_t *conn) { - struct bpf_sock *sk = (struct bpf_sock *)bpf_map_lookup_elem(&sock_dir, conn); - - if (sk) { - bpf_sk_release(sk); - return 1; - } - - return 0; -} - -static __always_inline void track_sock(struct __sk_buff *skb, const connection_info_t *conn) { - if (is_sock_tracked(conn)) { - return; - } - - struct bpf_sock_tuple tuple = {}; - - u32 tuple_size = 0; - - if (skb->protocol == bpf_htons(ETH_P_IPV6)) { - __builtin_memcpy(tuple.ipv6.saddr, conn->s_addr, IP_V6_ADDR_LEN); - __builtin_memcpy(tuple.ipv6.daddr, conn->d_addr, IP_V6_ADDR_LEN); - - tuple.ipv6.sport = bpf_htons(conn->s_port); - tuple.ipv6.dport = bpf_htons(conn->d_port); - - tuple_size = sizeof(tuple.ipv6); - } else if (skb->protocol == bpf_htons(ETH_P_IP)) { - __builtin_memcpy(&tuple.ipv4.saddr, conn->s_addr + sizeof(ip4ip6_prefix), sizeof(u32)); - __builtin_memcpy(&tuple.ipv4.saddr, conn->s_addr + sizeof(ip4ip6_prefix), sizeof(u32)); - __builtin_memcpy(&tuple.ipv4.daddr, conn->d_addr + sizeof(ip4ip6_prefix), sizeof(u32)); - - tuple.ipv4.sport = bpf_htons(conn->s_port); - tuple.ipv4.dport = bpf_htons(conn->d_port); - - tuple_size = sizeof(tuple.ipv4); - } else { - return; - } - - // this MUST be a signed 32-bit number - const s32 BPF_F_CURRENT_NETNS = -1; - - struct bpf_sock *sk = bpf_sk_lookup_tcp(skb, &tuple, tuple_size, BPF_F_CURRENT_NETNS, 0); - - if (!sk) { - return; - } - - bpf_map_update_elem(&sock_dir, conn, sk, BPF_NOEXIST); - - bpf_sk_release(sk); -} - -static __always_inline bool parse_ip_options(struct __sk_buff *skb, connection_info_t *conn) { - if (skb->protocol == bpf_htons(ETH_P_IP)) { - return parse_ip_options_ipv4(skb, conn); - } - - if (skb->protocol == bpf_htons(ETH_P_IPV6)) { - return parse_ip_options_ipv6(skb, conn); - } - - return false; -} - -static __always_inline void inject_ip_options(struct __sk_buff *skb, - const connection_info_t *conn) { - const egress_key_t e_key = { - .d_port = conn->d_port, - .s_port = conn->s_port, - }; - - tp_info_pid_t *tp = bpf_map_lookup_elem(&outgoing_trace_map, &e_key); - - if (!tp) { - return; - } - - if (tp->written) { - bpf_dbg_printk("tp already written by L7, not injecting IP options"); - bpf_map_delete_elem(&outgoing_trace_map, &e_key); - return; - } - - if (skb->protocol == bpf_htons(ETH_P_IPV6)) { - bpf_dbg_printk("Adding the trace_id in IPv6 Destination Options"); - - inject_tc_ip_options_ipv6(skb, tp); - - bpf_map_delete_elem(&outgoing_trace_map, &e_key); - } else if (skb->protocol == bpf_htons(ETH_P_IP)) { - bpf_dbg_printk("Adding the trace_id in the IP Options"); - - inject_tc_ip_options_ipv4(skb, tp); - - bpf_map_delete_elem(&outgoing_trace_map, &e_key); - - // We look up metadata setup by the Go uprobes or the kprobes on - // a transaction we consider outgoing HTTP request. We will extend this in - // the future for other protocols, e.g. gRPC/HTTP2. - // The metadata always comes setup with the state field valid = 1, which - // means we haven't seen this request yet. - // If it's the first packet of a request: - // We set the span information to match our TCP information. This - // is done for L4 context propagation, where we use the SEQ/ACK - // numbers for the Span ID. Since this is the first time we see - // these SEQ,ACK ids, we update the random Span ID the metadata has - // to match what we send over the wire. - update_outgoing_request_span_id(conn, &e_key, tp); - } -} - -static __always_inline void process_ip_options(struct __sk_buff *skb) { - connection_info_t conn = {}; - - if (!conn_info_from_skb(skb, &conn)) { - return; - } - - track_sock(skb, &conn); - - sort_connection_info(&conn); - - if (parse_ip_options(skb, &conn)) { - return; - } - - inject_ip_options(skb, &conn); -} - -SEC("tc_egress") -int obi_app_egress(struct __sk_buff *skb) { - process_ip_options(skb); - return TC_ACT_UNSPEC; -} - -SEC("tc_ingress") -int obi_app_ingress(struct __sk_buff *skb) { - process_ip_options(skb); - return TC_ACT_UNSPEC; -} diff --git a/bpf/tpinjector/sock_iter.c b/bpf/tpinjector/sock_iter.c new file mode 100644 index 0000000000..7b1317ef36 --- /dev/null +++ b/bpf/tpinjector/sock_iter.c @@ -0,0 +1,109 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include + +#include + +#include + +#include + +// max IPv6+port: "[ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff]:65535" = 48 chars +enum { k_addr_buf_len = 48 }; + +static __always_inline void format_in_addr(__be32 addr, u16 port, char buf[k_addr_buf_len]) { + BPF_SNPRINTF(buf, + k_addr_buf_len, + "%u.%u.%u.%u:%u", + (addr) & 0xFF, + (addr >> 8) & 0xFF, + (addr >> 16) & 0xFF, + (addr >> 24) & 0xFF, + port); +} + +static __always_inline void format_sock_addrs_v4(struct sock_common *skc, + char src_buf[k_addr_buf_len], + char dst_buf[k_addr_buf_len], + u16 src_port, + __be16 dst_port) { + format_in_addr(BPF_CORE_READ(skc, skc_rcv_saddr), src_port, src_buf); + format_in_addr(BPF_CORE_READ(skc, skc_daddr), bpf_ntohs(dst_port), dst_buf); +} + +static __always_inline void +format_in6_addr(const struct in6_addr *addr, u16 port, char buf[k_addr_buf_len]) { + BPF_SNPRINTF(buf, + k_addr_buf_len, + "[%x:%x:%x:%x:%x:%x:%x:%x]:%u", + bpf_ntohs(addr->in6_u.u6_addr16[0]), + bpf_ntohs(addr->in6_u.u6_addr16[1]), + bpf_ntohs(addr->in6_u.u6_addr16[2]), + bpf_ntohs(addr->in6_u.u6_addr16[3]), + bpf_ntohs(addr->in6_u.u6_addr16[4]), + bpf_ntohs(addr->in6_u.u6_addr16[5]), + bpf_ntohs(addr->in6_u.u6_addr16[6]), + bpf_ntohs(addr->in6_u.u6_addr16[7]), + port); +} + +static __always_inline void format_sock_addrs_v6(struct sock_common *skc, + char src_buf[k_addr_buf_len], + char dst_buf[k_addr_buf_len], + u16 src_port, + __be16 dst_port) { + struct in6_addr src6; + struct in6_addr dst6; + + BPF_CORE_READ_INTO(&src6, skc, skc_v6_rcv_saddr); + BPF_CORE_READ_INTO(&dst6, skc, skc_v6_daddr); + + format_in6_addr(&src6, src_port, src_buf); + format_in6_addr(&dst6, bpf_ntohs(dst_port), dst_buf); +} + +static __always_inline void format_sock_addrs(struct sock_common *skc, + char src_buf[k_addr_buf_len], + char dst_buf[k_addr_buf_len]) { + const u16 family = BPF_CORE_READ(skc, skc_family); + const __be16 dst_port = BPF_CORE_READ(skc, skc_dport); + const u16 src_port = BPF_CORE_READ(skc, skc_num); + + if (family == AF_INET) { + format_sock_addrs_v4(skc, src_buf, dst_buf, src_port, dst_port); + } else { + format_sock_addrs_v6(skc, src_buf, dst_buf, src_port, dst_port); + } +} + +SEC("iter/tcp") +int obi_sk_iter_tcp(struct bpf_iter__tcp *ctx) { + struct sock_common *skc = ctx->sk_common; + + if (!skc) { + return 0; + } + + const u64 cookie = bpf_get_socket_cookie(skc); + + char src_buf[k_addr_buf_len] = {}; + char dst_buf[k_addr_buf_len] = {}; + + format_sock_addrs(skc, src_buf, dst_buf); + + struct seq_file *seq = ctx->meta->seq; + + BPF_SEQ_PRINTF(seq, "Tracking socket cookie=%llu src=%s dst=%s\n", cookie, src_buf, dst_buf); + + bpf_d_printk("Tracking socket cookie=%llu src=%s dst=%s", cookie, src_buf, dst_buf); + + if (bpf_map_update_elem(&sock_dir, &cookie, skc, BPF_NOEXIST) != 0) { + bpf_dbg_printk("Failed to track sock cookie=%llu", cookie); + } + + return 0; +} diff --git a/bpf/tpinjector/tpinjector.c b/bpf/tpinjector/tpinjector.c index 0f23f6d3a5..b679d1d494 100644 --- a/bpf/tpinjector/tpinjector.c +++ b/bpf/tpinjector/tpinjector.c @@ -294,9 +294,9 @@ static __always_inline void bpf_sock_ops_set_flags(struct bpf_sock_ops *skops, u // Helper that writes in the sock map for a sock_ops program static __always_inline void bpf_sock_ops_active_est_cb(struct bpf_sock_ops *skops) { - connection_info_t conn = get_connection_info_ops(skops); + const u64 cookie = bpf_get_socket_cookie(skops); - bpf_sock_hash_update(skops, &sock_dir, &conn, BPF_ANY); + bpf_sock_hash_update(skops, &sock_dir, (void *)&cookie, BPF_ANY); bpf_sock_ops_set_flags(skops, BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG); } @@ -975,3 +975,5 @@ int obi_packet_extender_create_tp(struct sk_msg_md *msg) { return SK_PASS; } + +#include "sock_iter.c" diff --git a/devdocs/context-propagation.md b/devdocs/context-propagation.md index 7d211bd3e8..7eefdb5b88 100644 --- a/devdocs/context-propagation.md +++ b/devdocs/context-propagation.md @@ -15,25 +15,24 @@ This document explains how OpenTelemetry context propagation works in the eBPF i - [Case 1: Traffic in sockmap with Go/SSL uprobes](#case-1-traffic-in-sockmap-with-gossl-uprobes) - [Case 2: Traffic in sockmap without uprobes (plain HTTP via kprobes)](#case-2-traffic-in-sockmap-without-uprobes-plain-http-via-kprobes) - [Case 3: Traffic NOT in sockmap (tpinjector doesn't run)](#case-3-traffic-not-in-sockmap-tpinjector-doesnt-run) - - [Case 4: TCP option injection fails](#case-4-tcp-option-injection-fails) - [Ingress (Receiving) Flow](#ingress-receiving-flow) - - [Execution Order](#execution-order) + - [Execution Order](#execution-order-1) - ["Last One Wins" Strategy](#last-one-wins-strategy) - [Why "Last One Wins" on Ingress?](#why-last-one-wins-on-ingress) - [The outgoing_trace_map](#the-outgoing_trace_map) - [tp_info_pid_t::valid (u8)](#tp_info_pid_tvalid-u8) - [tp_info_pid_t::written (u8)](#tp_info_pid_twritten-u8) - [The incoming_trace_map](#the-incoming_trace_map) +- [The sock_dir sockmap](#the-sock_dir-sockmap) - [Summary](#summary) - [Logs correlation](#logs-correlation) ## Overview -Context propagation allows distributed tracing by injecting trace context (trace ID, span ID) into outgoing requests. The eBPF instrumentation supports multiple injection methods organized in a fallback hierarchy: +Context propagation allows distributed tracing by injecting trace context (trace ID, span ID) into outgoing requests. The eBPF instrumentation supports two injection methods: 1. **HTTP headers** (L7) - `Traceparent:` header in plaintext HTTP requests 2. **TCP options** (L4) - Custom TCP option (kind 25) for any TCP traffic -3. **IP options** (L3) - IPv4 options or IPv6 Destination Options as fallback ## Configuration @@ -41,15 +40,14 @@ Context propagation is controlled via `OTEL_EBPF_BPF_CONTEXT_PROPAGATION` which - `headers` - Inject HTTP headers - `tcp` - Inject TCP options -- `ip` - Inject IP options - `all` - Enable all methods (default) - `disabled` - Disable context propagation Examples: - `headers,tcp` - HTTP headers for plaintext HTTP, TCP options otherwise -- `tcp,ip` - TCP options with IP options as fallback - `tcp` - TCP options only +- `headers` - HTTP headers only ## Egress (Sending) Flow @@ -73,10 +71,6 @@ The order in which BPF programs execute varies depending on whether Go uprobes o - Checks `written` flag to reuse trace info - Deletes from `outgoing_trace_map` if tpinjector handled it -4. **TC egress (tctracer)** - - Injects IP options if not handled by upper layers - - Checks `written` flag for mutual exclusion - #### Scenario B: Plain HTTP (no uprobes, kprobes only) 1. **sk_msg (tpinjector)** @@ -90,19 +84,13 @@ The order in which BPF programs execute varies depending on whether Go uprobes o - Checks `written` flag - if set, reuses trace from tpinjector - Deletes from `outgoing_trace_map` if tpinjector handled it -3. **TC egress (tctracer)** - - Injects IP options if not handled by upper layers - - Checks `written` flag for mutual exclusion - #### Scenario C: Non-HTTP TCP (no uprobes, socket not in sockmap) 1. **kprobe (tcp_sendmsg)** - Creates trace info in `outgoing_trace_map` - Sets `valid=1, written=0` -2. **TC egress (tctracer)** - - Sees `written=0`, injects IP options as fallback - - Sets `valid=0` after injection +Note: tpinjector does not run for this traffic because the socket was not in `sock_dir`. The `iter/tcp` iterator pre-populates `sock_dir` at startup for existing connections; new connections are added via `BPF_SOCK_OPS`. ### Mutual Exclusion Mechanism @@ -119,8 +107,6 @@ The `written` flag implements mutual exclusion through the natural execution ord - Sees valid=0 (SSL), deletes outgoing_trace_map entry 3. protocol_http runs: - Lookup fails (entry deleted), skips -4. tctracer runs: - - Lookup fails (entry deleted), no IP injection Result: TCP options only ✓ ``` @@ -144,9 +130,6 @@ The uprobe attempts approach 1 first. If successful, it deletes the `outgoing_tr 4. protocol_http runs: - If written=1: reuses trace, deletes outgoing_trace_map - If written=0: creates new trace -5. tctracer runs: - - If entry deleted: no IP injection - - If entry exists with written=0: injects IP options Result: HTTP headers (via uprobe OR sk_msg) + TCP options ✓ ``` @@ -163,8 +146,6 @@ Result: HTTP headers (via uprobe OR sk_msg) + TCP options ✓ 2. protocol_http (kprobe) runs: - Sees written=1, reuses trace from tpinjector - Deletes outgoing_trace_map -3. tctracer runs: - - Lookup fails (entry deleted), no IP injection Result: HTTP headers + TCP options ✓ ``` @@ -179,62 +160,43 @@ Result: HTTP headers + TCP options ✓ 2. protocol_http (kprobe) runs: - Sees written=1, reuses trace from tpinjector - Deletes outgoing_trace_map -3. tctracer runs: - - Lookup fails (entry deleted), no IP injection Result: TCP options only ✓ ``` #### Case 3: Traffic NOT in sockmap (tpinjector doesn't run) -**For any traffic:** - ``` 1. Kprobe sets valid=1, written=0 in outgoing_trace_map 2. tpinjector doesn't run (socket not in sockmap) 3. protocol_http runs: - Sees written=0, creates new trace - - Does NOT delete outgoing_trace_map -4. tctracer runs: - - Sees written=0, injects IP options - - Sets valid=0 (done) -Result: IP options as fallback ✓ +Result: no context propagation for this connection ✓ ``` -#### Case 4: TCP option injection fails - -If `bpf_sk_storage_get()` fails in `schedule_write_tcp_option`, the function returns early **without setting written=1**. This allows IP options to be injected as fallback. - ## Ingress (Receiving) Flow ### Execution Order -On ingress, the execution order is different: +On ingress, the execution order is: -1. **TC ingress (tctracer)** - Parses IP options first -2. **BPF_SOCK_OPS (tpinjector)** - Parses TCP options second -3. **kprobe (tcp_recvmsg / protocol_http)** - Parses HTTP headers last +1. **BPF_SOCK_OPS (tpinjector)** - Parses TCP options +2. **kprobe (tcp_recvmsg / protocol_http)** - Parses HTTP headers ### "Last One Wins" Strategy Unlike egress (which uses mutual exclusion), ingress uses a **"last one wins"** approach: -1. **TC ingress** parses IP options (if present) - - Extracts trace_id from IP options - - Generates span_id from TCP seq/ack - - Stores in `incoming_trace_map` - -2. **BPF_SOCK_OPS** parses TCP options (if present) +1. **BPF_SOCK_OPS** parses TCP options (if present) - Extracts trace_id and span_id from TCP option - - **Overwrites** entry in `incoming_trace_map` + - Stores in `incoming_trace_map` -3. **protocol_http** parses HTTP headers (if present) +2. **protocol_http** parses HTTP headers (if present) - Extracts trace_id, span_id, flags from `Traceparent:` header - **Overwrites** previous values This creates a natural priority hierarchy: -- **IP options**: Lowest priority (most likely to be stripped by middleboxes) -- **TCP options**: Medium priority (better reliability) +- **TCP options**: Lower priority - **HTTP headers**: Highest priority (W3C standard, most reliable) ### Why "Last One Wins" on Ingress? @@ -252,7 +214,7 @@ This creates a natural priority hierarchy: State machine tracking the injection lifecycle: -- **0**: Invalid/SSL (don't inject) OR injection complete (set by tctracer after IP injection) +- **0**: Invalid/SSL (don't inject) - **1**: First packet seen, needs L4 span ID setup - **2**: L4 span ID setup done, ready for injection @@ -260,7 +222,6 @@ State machine tracking the injection lifecycle: - Go uprobes: SSL connections (`go_nethttp.c`) - Kprobes: SSL connections (`trace_common.h`) -- tctracer: After successful IP option injection (`tctracer.c::encode_data_in_ip_options`) - trace_common: Conflicting requests or timeouts (`trace_common.h`) **Set to 1:** @@ -271,12 +232,11 @@ State machine tracking the injection lifecycle: **Set to 2:** -- tctracer: After populating span ID from TCP seq/ack (`tctracer.c::obi_app_egress`) +- tpinjector: After populating span ID from TCP seq/ack **Checked:** - tpinjector: Skip protocol detection for SSL (`tpinjector.c::handle_existing_tp_pid`) -- tctracer: First packet handling and injection decision (`tctracer.c::obi_app_egress`) ### tp_info_pid_t::written (u8) @@ -301,12 +261,6 @@ Coordination flag for mutual exclusion between egress injection layers: **Checked:** - protocol_http: Skip processing if tpinjector handled it (`protocol_http.h::protocol_http`) -- tctracer: Skip IP injection if upper layer handled it (`tctracer.c::obi_app_egress`) - -**Key Behavior**: The `written` flag serves two purposes: - -1. **protocol_http optimization**: Reuse existing trace info, avoid regenerating span IDs -2. **tctracer mutual exclusion**: Signal that upper layer already injected context ## The incoming_trace_map @@ -314,11 +268,20 @@ Coordination flag for mutual exclusion between egress injection layers: Unlike `outgoing_trace_map`, there is no coordination between layers - each layer independently parses and overwrites the map entry if context is found, implementing the "last one wins" strategy. +## The sock_dir sockmap + +`sock_dir` is a `BPF_MAP_TYPE_SOCKHASH` map keyed by `u64` socket cookie. It controls which sockets the `sk_msg` program (tpinjector) runs on. + +Sockets are added to `sock_dir` in two ways: + +1. **`BPF_SOCK_OPS`**: New connections are added automatically as they are established +2. **`iter/tcp` iterator** (`bpf/tpinjector/sock_iter.c`): Runs at tpinjector startup and iterates over all existing TCP sockets, inserting each into `sock_dir` with `BPF_NOEXIST`. This ensures connections established before tpinjector attached are tracked. + ## Summary 1. **Egress uses mutual exclusion**: - Upper layers (tpinjector, protocol_http) delete the `outgoing_trace_map` entry - - Lower layers (tctracer) can't inject if entry is already deleted + - Lower layers can't inject if entry is already deleted - Result: Only one injection method per connection 2. **Ingress uses "last one wins"**: @@ -326,19 +289,15 @@ Unlike `outgoing_trace_map`, there is no coordination between layers - each laye - Later layers overwrite earlier layers - Result: Most reliable method takes precedence -3. **IP options are truly a fallback**: - - On egress: Only injected when TCP options fail or socket isn't in sockmap - - On ingress: Lowest priority, overwritten by TCP options or HTTP headers - -4. **SSL/TLS uses TCP options, not HTTP headers**: +3. **SSL/TLS uses TCP options, not HTTP headers**: - Can't inject into encrypted payload - TCP options work before TLS handshake - tpinjector deletes entry early to skip HTTP detection -5. **Execution order varies by scenario**: - - Go/SSL: uprobes → tpinjector → kprobe → tctracer - - Plain HTTP (sockmap): tpinjector → kprobe → tctracer - - Non-sockmap: kprobe → tctracer +4. **Execution order varies by scenario**: + - Go/SSL: uprobes → tpinjector → kprobe + - Plain HTTP (sockmap): tpinjector → kprobe + - Non-sockmap: kprobe only ## Logs correlation diff --git a/docs/config-schema.json b/docs/config-schema.json index 18d80b2e53..3080062c46 100644 --- a/docs/config-schema.json +++ b/docs/config-schema.json @@ -124,21 +124,17 @@ }, { "type": "string", - "pattern": "^(headers|http|tcp|ip)(,(headers|http|tcp|ip))*$", - "description": "List of propagation methods to enable (headers/http for HTTP headers, tcp for TCP options, ip for IP options), separated by commas", + "pattern": "^(headers|http|tcp)(,(headers|http|tcp))*$", + "description": "List of propagation methods to enable (headers/http for HTTP headers, tcp for TCP options), separated by commas", "examples": [ "headers", "tcp", - "ip", - "headers,tcp", - "headers,ip", - "tcp,ip", - "headers,tcp,ip" + "headers,tcp" ] } ], "title": "Context Propagation Mode", - "description": "Configures distributed context propagation. Can be 'all' to enable all methods, 'disabled'/'' to disable, or a list of specific methods: 'headers' (or 'http') for HTTP headers, 'tcp' for TCP options, 'ip' for IP options." + "description": "Configures distributed context propagation. Can be 'all' to enable all methods, 'disabled'/'' to disable, or a list of specific methods: 'headers' (or 'http') for HTTP headers, 'tcp' for TCP options." }, "CustomRoutesConfig": { "properties": { @@ -331,7 +327,7 @@ }, "context_propagation": { "$ref": "#/$defs/ContextPropagationMode", - "description": "Enables distributed context propagation. Can be a combination of: headers, tcp, ip (e.g., \"headers,tcp\" or \"all\")", + "description": "Enables distributed context propagation. Can be a combination of: headers, tcp (e.g., \"headers,tcp\" or \"all\")", "x-env-var": "OTEL_EBPF_BPF_CONTEXT_PROPAGATION" }, "couchbase_db_cache_size": { diff --git a/internal/test/integration/multiprocess_test.go b/internal/test/integration/multiprocess_test.go index 4c4d905e3e..fba47cb463 100644 --- a/internal/test/integration/multiprocess_test.go +++ b/internal/test/integration/multiprocess_test.go @@ -161,24 +161,6 @@ func TestMultiProcessAppCPTCPOnly(t *testing.T) { require.NoError(t, compose.Close()) } -func TestMultiProcessAppCPIPOnly(t *testing.T) { - compose, err := docker.ComposeSuite("docker-compose-multiexec-host.yml", path.Join(pathOutput, "test-suite-multiexec-app-cp-ip-only.log")) - require.NoError(t, err) - - // Test IP-only context propagation (no HTTP headers, no TCP options, only IP options) - // Explicitly disable request header tracking since we're not injecting HTTP headers - // Bypass JSON-RPC to avoid loopback interface and enable IP option injection over network - compose.Env = append(compose.Env, `OTEL_EBPF_BPF_DISABLE_BLACK_BOX_CP=1`, `OTEL_EBPF_BPF_CONTEXT_PROPAGATION=ip`, `OTEL_EBPF_BPF_TRACK_REQUEST_HEADERS=false`, `BYPASS_JSONRPC=true`) - - require.NoError(t, compose.Up()) - - t.Run("Nested traces with IP-only propagation", func(t *testing.T) { - testNestedHTTPTracesKProbes(t) - }) - - require.NoError(t, compose.Close()) -} - // Addresses bug https://github.com/grafana/beyla/issues/370 for Go executables // Prevents that two instances of the same process report traces or metrics by duplicate func checkReportedOnlyOnce(t *testing.T, baseURL, serviceName string) { diff --git a/pkg/appolly/discover/finder.go b/pkg/appolly/discover/finder.go index 0f7a0df83b..9d37deac2c 100644 --- a/pkg/appolly/discover/finder.go +++ b/pkg/appolly/discover/finder.go @@ -15,7 +15,6 @@ import ( "go.opentelemetry.io/obi/pkg/internal/ebpf/gotracer" "go.opentelemetry.io/obi/pkg/internal/ebpf/gpuevent" "go.opentelemetry.io/obi/pkg/internal/ebpf/logenricher" - "go.opentelemetry.io/obi/pkg/internal/ebpf/tctracer" "go.opentelemetry.io/obi/pkg/internal/ebpf/tpinjector" msgh "go.opentelemetry.io/obi/pkg/internal/helpers/msg" "go.opentelemetry.io/obi/pkg/obi" @@ -149,11 +148,6 @@ func newCommonTracersGroup(cfg *obi.Config) []ebpf.Tracer { tracers = append(tracers, tpinjector.New(cfg)) } - // Enables tctracer which handles context propagations via IP options only (TC egress/ingress) - if cfg.EBPF.ContextPropagation.HasIPOptions() { - tracers = append(tracers, tctracer.New(cfg)) - } - // Enables log enricher which handles trace-log correlation if cfg.EBPF.LogEnricher.Enabled() { logEnricher := logenricher.New(cfg) diff --git a/pkg/config/ebpf_tracer.go b/pkg/config/ebpf_tracer.go index 22a327439a..49b98405e0 100644 --- a/pkg/config/ebpf_tracer.go +++ b/pkg/config/ebpf_tracer.go @@ -5,6 +5,7 @@ package config // import "go.opentelemetry.io/obi/pkg/config" import ( "fmt" + "log/slog" "os/exec" "strings" "time" @@ -20,10 +21,9 @@ type RedisDBCacheConfig struct { } const ( - ContextPropagationDisabled ContextPropagationMode = 0 - ContextPropagationHeaders ContextPropagationMode = 1 << 0 // HTTP headers - ContextPropagationTCP ContextPropagationMode = 1 << 1 // TCP options - ContextPropagationIPOptions ContextPropagationMode = 1 << 2 // IP options (dangerous) + ContextPropagationDisabled ContextPropagationMode = 0 + ContextPropagationHeaders ContextPropagationMode = 1 << 0 // HTTP headers + ContextPropagationTCP ContextPropagationMode = 1 << 1 // TCP options // Convenience aliases ContextPropagationAll = ContextPropagationHeaders | ContextPropagationTCP @@ -32,7 +32,6 @@ const ( StrContextPropagationHeaders = "headers" StrContextPropagationHTTP = "http" StrContextPropagationTCP = "tcp" - StrContextPropagationIP = "ip" ) // EBPFTracer configuration for eBPF programs @@ -65,7 +64,7 @@ type EBPFTracer struct { HTTPRequestTimeout time.Duration `yaml:"http_request_timeout" env:"OTEL_EBPF_BPF_HTTP_REQUEST_TIMEOUT" validate:"gte=0"` // Enables distributed context propagation. - // Can be a combination of: headers, tcp, ip (e.g., "headers,tcp" or "all") + // Can be a combination of: headers, tcp (e.g., "headers,tcp" or "all") ContextPropagation ContextPropagationMode `yaml:"context_propagation" env:"OTEL_EBPF_BPF_CONTEXT_PROPAGATION"` // Skips checking the kernel version for bpf_loop functionality. Some modified kernels have this @@ -168,11 +167,6 @@ func (m ContextPropagationMode) HasTCP() bool { return m&ContextPropagationTCP != 0 } -// HasIPOptions returns true if IP options context propagation is enabled -func (m ContextPropagationMode) HasIPOptions() bool { - return m&ContextPropagationIPOptions != 0 -} - // IsEnabled returns true if any context propagation is enabled func (m ContextPropagationMode) IsEnabled() bool { return m != ContextPropagationDisabled @@ -202,10 +196,10 @@ func (m *ContextPropagationMode) UnmarshalText(text []byte) error { result |= ContextPropagationHeaders case StrContextPropagationTCP: result |= ContextPropagationTCP - case StrContextPropagationIP: - result |= ContextPropagationIPOptions + case "ip": + slog.Warn("context_propagation value 'ip' is deprecated and has no effect; IP options injection has been removed") default: - return fmt.Errorf("invalid value for context_propagation: '%s' (valid: all, disabled, headers, tcp, ip)", part) + return fmt.Errorf("invalid value for context_propagation: '%s' (valid: all, disabled, headers, tcp)", part) } } @@ -229,10 +223,6 @@ func (m ContextPropagationMode) MarshalText() ([]byte, error) { if m.HasTCP() { parts = append(parts, "tcp") } - if m.HasIPOptions() { - parts = append(parts, "ip") - } - if len(parts) == 0 { return nil, fmt.Errorf("invalid context propagation mode: %d", m) } @@ -241,7 +231,7 @@ func (m ContextPropagationMode) MarshalText() ([]byte, error) { } func (ContextPropagationMode) JSONSchema() *jsonschema.Schema { - options := []string{StrContextPropagationHeaders, StrContextPropagationHTTP, StrContextPropagationTCP, StrContextPropagationIP} + options := []string{StrContextPropagationHeaders, StrContextPropagationHTTP, StrContextPropagationTCP} optionsStr := strings.Join(options, "|") OptionsRegexp := fmt.Sprintf("^(%s)(,(%s))*$", optionsStr, optionsStr) return &jsonschema.Schema{ @@ -253,12 +243,12 @@ func (ContextPropagationMode) JSONSchema() *jsonschema.Schema { }, { Type: "string", - Description: "List of propagation methods to enable (headers/http for HTTP headers, tcp for TCP options, ip for IP options), separated by commas", - Examples: []any{"headers", "tcp", "ip", "headers,tcp", "headers,ip", "tcp,ip", "headers,tcp,ip"}, + Description: "List of propagation methods to enable (headers/http for HTTP headers, tcp for TCP options), separated by commas", + Examples: []any{"headers", "tcp", "headers,tcp"}, Pattern: OptionsRegexp, }, }, Title: "Context Propagation Mode", - Description: "Configures distributed context propagation. Can be 'all' to enable all methods, 'disabled'/'' to disable, or a list of specific methods: 'headers' (or 'http') for HTTP headers, 'tcp' for TCP options, 'ip' for IP options.", + Description: "Configures distributed context propagation. Can be 'all' to enable all methods, 'disabled'/'' to disable, or a list of specific methods: 'headers' (or 'http') for HTTP headers, 'tcp' for TCP options.", } } diff --git a/pkg/config/ebpf_tracer_test.go b/pkg/config/ebpf_tracer_test.go index 52c612a5af..20bc7e9633 100644 --- a/pkg/config/ebpf_tracer_test.go +++ b/pkg/config/ebpf_tracer_test.go @@ -39,36 +39,16 @@ func TestContextPropagationMode_UnmarshalText(t *testing.T) { input: "tcp", want: ContextPropagationTCP, }, - { - name: "ip only", - input: "ip", - want: ContextPropagationIPOptions, - }, { name: "headers and tcp", input: "headers,tcp", want: ContextPropagationHeaders | ContextPropagationTCP, }, - { - name: "tcp and ip", - input: "tcp,ip", - want: ContextPropagationTCP | ContextPropagationIPOptions, - }, - { - name: "headers and ip", - input: "headers,ip", - want: ContextPropagationHeaders | ContextPropagationIPOptions, - }, { name: "all two", input: "headers,tcp", want: ContextPropagationAll, }, - { - name: "with spaces", - input: " headers , tcp , ip ", - want: ContextPropagationHeaders | ContextPropagationTCP | ContextPropagationIPOptions, - }, { name: "invalid value", input: "invalid", @@ -125,26 +105,11 @@ func TestContextPropagationMode_MarshalText(t *testing.T) { mode: ContextPropagationTCP, want: "tcp", }, - { - name: "ip only", - mode: ContextPropagationIPOptions, - want: "ip", - }, { name: "headers and tcp", mode: ContextPropagationHeaders | ContextPropagationTCP, want: "all", }, - { - name: "tcp and ip", - mode: ContextPropagationTCP | ContextPropagationIPOptions, - want: "tcp,ip", - }, - { - name: "headers and ip", - mode: ContextPropagationHeaders | ContextPropagationIPOptions, - want: "headers,ip", - }, } for _, tt := range tests { @@ -169,7 +134,6 @@ func TestContextPropagationMode_HasMethods(t *testing.T) { mode ContextPropagationMode wantHeaders bool wantTCP bool - wantIPOptions bool wantIsEnabled bool }{ { @@ -177,7 +141,6 @@ func TestContextPropagationMode_HasMethods(t *testing.T) { mode: ContextPropagationAll, wantHeaders: true, wantTCP: true, - wantIPOptions: false, wantIsEnabled: true, }, { @@ -185,7 +148,6 @@ func TestContextPropagationMode_HasMethods(t *testing.T) { mode: ContextPropagationDisabled, wantHeaders: false, wantTCP: false, - wantIPOptions: false, wantIsEnabled: false, }, { @@ -193,7 +155,6 @@ func TestContextPropagationMode_HasMethods(t *testing.T) { mode: ContextPropagationHeaders, wantHeaders: true, wantTCP: false, - wantIPOptions: false, wantIsEnabled: true, }, { @@ -201,15 +162,6 @@ func TestContextPropagationMode_HasMethods(t *testing.T) { mode: ContextPropagationTCP, wantHeaders: false, wantTCP: true, - wantIPOptions: false, - wantIsEnabled: true, - }, - { - name: "ip only", - mode: ContextPropagationIPOptions, - wantHeaders: false, - wantTCP: false, - wantIPOptions: true, wantIsEnabled: true, }, { @@ -217,7 +169,6 @@ func TestContextPropagationMode_HasMethods(t *testing.T) { mode: ContextPropagationHeaders | ContextPropagationTCP, wantHeaders: true, wantTCP: true, - wantIPOptions: false, wantIsEnabled: true, }, } @@ -230,9 +181,6 @@ func TestContextPropagationMode_HasMethods(t *testing.T) { if got := tt.mode.HasTCP(); got != tt.wantTCP { t.Errorf("HasTCP() = %v, want %v", got, tt.wantTCP) } - if got := tt.mode.HasIPOptions(); got != tt.wantIPOptions { - t.Errorf("HasIPOptions() = %v, want %v", got, tt.wantIPOptions) - } if got := tt.mode.IsEnabled(); got != tt.wantIsEnabled { t.Errorf("IsEnabled() = %v, want %v", got, tt.wantIsEnabled) } @@ -240,82 +188,6 @@ func TestContextPropagationMode_HasMethods(t *testing.T) { } } -func TestContextPropagationMode_TracerLoading(t *testing.T) { - // Test which tracers should be loaded for each configuration - // tpinjector handles: HTTP headers (sk_msg) and TCP options (BPF_SOCK_OPS) - // tctracer handles: IP options only (TC egress/ingress) - tests := []struct { - name string - mode ContextPropagationMode - wantTPInject bool // should load tpinjector - wantTCTracer bool // should load tctracer - }{ - { - name: "tcp only", - mode: ContextPropagationTCP, - wantTPInject: true, - wantTCTracer: false, - }, - { - name: "headers only", - mode: ContextPropagationHeaders, - wantTPInject: true, - wantTCTracer: false, - }, - { - name: "ip only", - mode: ContextPropagationIPOptions, - wantTPInject: false, - wantTCTracer: true, - }, - { - name: "headers and tcp", - mode: ContextPropagationHeaders | ContextPropagationTCP, - wantTPInject: true, - wantTCTracer: false, - }, - { - name: "tcp and ip", - mode: ContextPropagationTCP | ContextPropagationIPOptions, - wantTPInject: true, - wantTCTracer: true, - }, - { - name: "headers and ip", - mode: ContextPropagationHeaders | ContextPropagationIPOptions, - wantTPInject: true, - wantTCTracer: true, - }, - { - name: "all", - mode: ContextPropagationAll, - wantTPInject: true, - wantTCTracer: false, - }, - { - name: "disabled", - mode: ContextPropagationDisabled, - wantTPInject: false, - wantTCTracer: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - // Determine what should load based on the logic in finder.go - shouldLoadTPInject := tt.mode.HasHeaders() || tt.mode.HasTCP() - shouldLoadTCTracer := tt.mode.HasIPOptions() - - if shouldLoadTPInject != tt.wantTPInject { - t.Errorf("tpinjector loading = %v, want %v", shouldLoadTPInject, tt.wantTPInject) - } - if shouldLoadTCTracer != tt.wantTCTracer { - t.Errorf("tctracer loading = %v, want %v", shouldLoadTCTracer, tt.wantTCTracer) - } - }) - } -} - func TestEBPFTracer_CudaInstrumentationEnabled(t *testing.T) { tests := []struct { name string diff --git a/pkg/ebpf/common/common.go b/pkg/ebpf/common/common.go index a57b0dd055..bcf2b6c411 100644 --- a/pkg/ebpf/common/common.go +++ b/pkg/ebpf/common/common.go @@ -7,6 +7,7 @@ import ( "bufio" "bytes" "errors" + "fmt" "io" "log/slog" "net" @@ -125,6 +126,31 @@ type Iter struct { Link link.Link } +func (it *Iter) Run(log *slog.Logger) error { + log.Debug("Running iterator", "iterator", it.Program.String()) + + if it.Link == nil { + return errors.New("iterator link is nil") + } + + rd, err := it.Link.(*link.Iter).Open() + if err != nil { + return fmt.Errorf("open iterator: %w", err) + } + defer rd.Close() + + scanner := bufio.NewScanner(rd) + for scanner.Scan() { + log.Debug("Iterator output", "line", scanner.Text(), "iterator", it.Program.String()) + } + if err := scanner.Err(); err != nil { + return fmt.Errorf("read iterator: %w", err) + } + + log.Debug("Iterator finished", "iterator", it.Program.String()) + return nil +} + type Tracing struct { Program *ebpf.Program AttachAs ebpf.AttachType diff --git a/pkg/internal/ebpf/generictracer/generictracer.go b/pkg/internal/ebpf/generictracer/generictracer.go index 1c9c5f4548..520c160434 100644 --- a/pkg/internal/ebpf/generictracer/generictracer.go +++ b/pkg/internal/ebpf/generictracer/generictracer.go @@ -6,9 +6,7 @@ package generictracer // import "go.opentelemetry.io/obi/pkg/internal/ebpf/generictracer" import ( - "bufio" "context" - "errors" "fmt" "io" "log/slog" @@ -17,7 +15,6 @@ import ( "unsafe" "github.com/cilium/ebpf" - "github.com/cilium/ebpf/link" "github.com/gavv/monotime" "github.com/vishvananda/netlink" @@ -502,7 +499,7 @@ func (p *Tracer) Run(ctx context.Context, ebpfEventContext *ebpfcommon.EBPFEvent for _, it := range p.Iters() { if it.Program == p.bpfObjects.ObiIterTcp { - if err := p.runIterator(it); err != nil { + if err := it.Run(p.log); err != nil { p.log.Error("error running TCP iterator", "error", err) } } @@ -598,31 +595,6 @@ func (p *Tracer) watchForMisclassifedEvents(ctx context.Context) { } } -func (p *Tracer) runIterator(it *ebpfcommon.Iter) error { - p.log.Debug("Running iterator", "iterator", it.Program.String()) - - if it.Link == nil { - return errors.New("iterator link is nil") - } - - rd, err := it.Link.(*link.Iter).Open() - if err != nil { - return fmt.Errorf("open iterator: %w", err) - } - defer rd.Close() - - scanner := bufio.NewScanner(rd) - for scanner.Scan() { - p.log.Debug("Iterator output", "line", scanner.Text(), "iterator", it.Program.String()) - } - if err := scanner.Err(); err != nil { - return fmt.Errorf("read iterator: %w", err) - } - p.log.Debug("Iterator finished", "iterator", it.Program.String()) - - return nil -} - // Cilium 0.19.0+ is adding a new private field to all the BpfConnectionInfoT // implementations, so we can't directly do a type cast func bpfConnInfoT(src ebpfcommon.BpfConnectionInfoT) (dst BpfConnectionInfoT) { diff --git a/pkg/internal/ebpf/tctracer/tctracer.go b/pkg/internal/ebpf/tctracer/tctracer.go deleted file mode 100644 index 36a89acbb8..0000000000 --- a/pkg/internal/ebpf/tctracer/tctracer.go +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build linux - -package tctracer // import "go.opentelemetry.io/obi/pkg/internal/ebpf/tctracer" - -import ( - "context" - "errors" - "fmt" - "io" - "log/slog" - - "github.com/cilium/ebpf" - - "go.opentelemetry.io/obi/pkg/appolly/app" - "go.opentelemetry.io/obi/pkg/appolly/app/request" - "go.opentelemetry.io/obi/pkg/appolly/app/svc" - "go.opentelemetry.io/obi/pkg/appolly/discover/exec" - ebpfcommon "go.opentelemetry.io/obi/pkg/ebpf/common" - "go.opentelemetry.io/obi/pkg/internal/ebpf/tcmanager" - "go.opentelemetry.io/obi/pkg/internal/goexec" - "go.opentelemetry.io/obi/pkg/obi" - "go.opentelemetry.io/obi/pkg/pipe/msg" -) - -//go:generate $BPF2GO -cc $BPF_CLANG -cflags $BPF_CFLAGS -target amd64,arm64 Bpf ../../../../bpf/tctracer/tctracer.c -- -I../../../../bpf -I../../../../bpf - -type Tracer struct { - cfg *obi.Config - bpfObjects BpfObjects - closers []io.Closer - log *slog.Logger - ifaceManager *tcmanager.InterfaceManager - tcManager tcmanager.TCManager -} - -func New(cfg *obi.Config) *Tracer { - log := slog.With("component", "tc.Tracer") - - return &Tracer{ - log: log, - cfg: cfg, - } -} - -func (p *Tracer) AllowPID(app.PID, uint32, *svc.Attrs) {} - -func (p *Tracer) BlockPID(app.PID, uint32) {} - -func (p *Tracer) Load() (*ebpf.CollectionSpec, error) { - if !ebpfcommon.HasHostPidAccess() { - return nil, errors.New("L4 context-propagation requires host process ID access, e.g. hostPid:true") - } - - hostNet, err := ebpfcommon.HasHostNetworkAccess() - if err != nil { - return nil, fmt.Errorf("failed to check for host network access while enabling IP context-propagation, error: %w", err) - } - - if !hostNet { - return nil, errors.New("L4 context-propagation requires host network access, e.g. hostNetwork:true") - } - - return LoadBpf() -} - -func (p *Tracer) SetupTailCalls() { -} - -func (p *Tracer) Constants() map[string]any { - return map[string]any{ - "g_bpf_debug": p.cfg.EBPF.BpfDebug, - } -} - -func (p *Tracer) RegisterOffsets(_ *exec.FileInfo, _ *goexec.Offsets) {} - -func (p *Tracer) ProcessBinary(_ *exec.FileInfo) {} - -func (p *Tracer) BpfObjects() any { - return &p.bpfObjects -} - -func (p *Tracer) AddCloser(c ...io.Closer) { - p.closers = append(p.closers, c...) -} - -func (p *Tracer) GoProbes() map[string][]*ebpfcommon.ProbeDesc { - return nil -} - -func (p *Tracer) KProbes() map[string]ebpfcommon.ProbeDesc { - return nil -} - -func (p *Tracer) Tracepoints() map[string]ebpfcommon.ProbeDesc { - return nil -} - -func (p *Tracer) UProbes() map[string]map[string][]*ebpfcommon.ProbeDesc { - return nil -} - -func (p *Tracer) SocketFilters() []*ebpf.Program { - return nil -} - -func (p *Tracer) SockMsgs() []ebpfcommon.SockMsg { - return nil -} - -func (p *Tracer) SockOps() []ebpfcommon.SockOps { - return nil -} - -func (p *Tracer) Iters() []*ebpfcommon.Iter { - return nil -} - -func (p *Tracer) Tracing() []*ebpfcommon.Tracing { return nil } - -func (p *Tracer) RecordInstrumentedLib(uint64, []io.Closer) {} - -func (p *Tracer) AddInstrumentedLibRef(uint64) {} - -func (p *Tracer) UnlinkInstrumentedLib(uint64) {} - -func (p *Tracer) AlreadyInstrumentedLib(uint64) bool { - return false -} - -func (p *Tracer) startTC(ctx context.Context) { - if p.tcManager != nil { - return - } - - p.ifaceManager = tcmanager.NewInterfaceManager() - p.tcManager = tcmanager.NewTCManager(p.cfg.EBPF.TCBackend) - p.tcManager.SetInterfaceManager(p.ifaceManager) - p.tcManager.AddProgram("tc/tc_egress", p.bpfObjects.ObiAppEgress, tcmanager.AttachmentEgress) - p.tcManager.AddProgram("tc/tc_ingress", p.bpfObjects.ObiAppIngress, tcmanager.AttachmentIngress) - - p.ifaceManager.Start(ctx) -} - -func (p *Tracer) Run(ctx context.Context, _ *ebpfcommon.EBPFEventContext, _ *msg.Queue[[]request.Span]) { - p.startTC(ctx) - - errorCh := p.tcManager.Errors() - - select { - case <-ctx.Done(): - case err := <-errorCh: - p.log.Error("TC manager returned an error, aborting", "error", err) - } - - p.stopTC() - p.bpfObjects.Close() -} - -func (p *Tracer) stopTC() { - p.log.Info("removing traffic control probes") - - p.tcManager.Shutdown() - p.tcManager = nil - - p.ifaceManager.Stop() - p.ifaceManager.Wait() - p.ifaceManager = nil -} - -func (p *Tracer) Required() bool { - return false -} diff --git a/pkg/internal/ebpf/tctracer/tctracer_notlinux.go b/pkg/internal/ebpf/tctracer/tctracer_notlinux.go deleted file mode 100644 index 2a66cd72d6..0000000000 --- a/pkg/internal/ebpf/tctracer/tctracer_notlinux.go +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright The OpenTelemetry Authors -// SPDX-License-Identifier: Apache-2.0 - -//go:build !linux - -// this file is emptied on purpose to allow OBI compiling in non-linux environments - -package tctracer // import "go.opentelemetry.io/obi/pkg/internal/ebpf/tctracer" - -import ( - "context" - "io" - - "github.com/cilium/ebpf" - - "go.opentelemetry.io/obi/pkg/appolly/app" - "go.opentelemetry.io/obi/pkg/appolly/app/request" - "go.opentelemetry.io/obi/pkg/appolly/app/svc" - "go.opentelemetry.io/obi/pkg/appolly/discover/exec" - ebpfcommon "go.opentelemetry.io/obi/pkg/ebpf/common" - "go.opentelemetry.io/obi/pkg/internal/goexec" - "go.opentelemetry.io/obi/pkg/obi" - "go.opentelemetry.io/obi/pkg/pipe/msg" -) - -type Tracer struct{} - -func New(_ *obi.Config) *Tracer { return nil } -func (p *Tracer) AllowPID(_ app.PID, _ uint32, _ *svc.Attrs) {} -func (p *Tracer) BlockPID(_ app.PID, _ uint32) {} -func (p *Tracer) Load() (*ebpf.CollectionSpec, error) { return nil, nil } -func (p *Tracer) BpfObjects() any { return nil } -func (p *Tracer) AddCloser(_ ...io.Closer) {} -func (p *Tracer) GoProbes() map[string][]*ebpfcommon.ProbeDesc { return nil } -func (p *Tracer) KProbes() map[string]ebpfcommon.ProbeDesc { return nil } -func (p *Tracer) UProbes() map[string]map[string][]*ebpfcommon.ProbeDesc { return nil } -func (p *Tracer) Tracepoints() map[string]ebpfcommon.ProbeDesc { return nil } -func (p *Tracer) SocketFilters() []*ebpf.Program { return nil } -func (p *Tracer) SockMsgs() []ebpfcommon.SockMsg { return nil } -func (p *Tracer) SockOps() []ebpfcommon.SockOps { return nil } -func (p *Tracer) Iters() []*ebpfcommon.Iter { return nil } -func (p *Tracer) Tracing() []*ebpfcommon.Tracing { return nil } -func (p *Tracer) RecordInstrumentedLib(_ uint64, _ []io.Closer) {} -func (p *Tracer) AddInstrumentedLibRef(_ uint64) {} -func (p *Tracer) UnlinkInstrumentedLib(_ uint64) {} -func (p *Tracer) AlreadyInstrumentedLib(_ uint64) bool { return false } -func (p *Tracer) Run(_ context.Context, _ *ebpfcommon.EBPFEventContext, _ *msg.Queue[[]request.Span]) { -} -func (p *Tracer) Constants() map[string]any { return nil } -func (p *Tracer) SetupTailCalls() {} -func (p *Tracer) RegisterOffsets(_ *exec.FileInfo, _ *goexec.Offsets) {} -func (p *Tracer) ProcessBinary(_ *exec.FileInfo) {} -func (p *Tracer) Required() bool { return false } diff --git a/pkg/internal/ebpf/tpinjector/tpinjector.go b/pkg/internal/ebpf/tpinjector/tpinjector.go index cd89d16fdc..2ea4ca4f5a 100644 --- a/pkg/internal/ebpf/tpinjector/tpinjector.go +++ b/pkg/internal/ebpf/tpinjector/tpinjector.go @@ -29,6 +29,7 @@ type Tracer struct { bpfObjects BpfObjects closers []io.Closer log *slog.Logger + iters []*ebpfcommon.Iter } func New(cfg *obi.Config) *Tracer { @@ -132,7 +133,23 @@ func (p *Tracer) SockOps() []ebpfcommon.SockOps { } func (p *Tracer) Iters() []*ebpfcommon.Iter { - return nil + if p.iters != nil { + return p.iters + } + + major, minor := ebpfcommon.KernelVersion() + + if major < 6 || (major == 6 && minor < 4) { + p.log.Warn("TCP socket iterator disabled: kernel versions < 6.4 have a locking bug " + + "in iter/tcp + sockhash that can cause an RCU stall and kernel panic. " + + "Existing connections at startup will not be tracked for context propagation.") + p.iters = []*ebpfcommon.Iter{} + return p.iters + } + + p.iters = []*ebpfcommon.Iter{{Program: p.bpfObjects.ObiSkIterTcp}} + + return p.iters } func (p *Tracer) Tracing() []*ebpfcommon.Tracing { @@ -152,6 +169,12 @@ func (p *Tracer) AlreadyInstrumentedLib(uint64) bool { func (p *Tracer) Run(ctx context.Context, _ *ebpfcommon.EBPFEventContext, _ *msg.Queue[[]request.Span]) { p.log.Debug("tpinjector started") + for _, it := range p.Iters() { + if err := it.Run(p.log); err != nil { + p.log.Error("error running iterator", "error", err) + } + } + <-ctx.Done() p.bpfObjects.Close() diff --git a/pkg/internal/ebpf/tpinjector/tpinjector_test.go b/pkg/internal/ebpf/tpinjector/tpinjector_test.go index 6ef8466fd9..b90cdd1b6f 100644 --- a/pkg/internal/ebpf/tpinjector/tpinjector_test.go +++ b/pkg/internal/ebpf/tpinjector/tpinjector_test.go @@ -43,25 +43,10 @@ func TestTracer_Constants_InjectFlags(t *testing.T) { contextPropagation: "headers,tcp", expectedInjectFlags: 3, // k_inject_http_headers | k_inject_tcp_options }, - { - name: "ip only", - contextPropagation: "ip", - expectedInjectFlags: 0, // tpinjector doesn't handle IP options - }, { name: "all", contextPropagation: "all", - expectedInjectFlags: 3, // k_inject_http_headers | k_inject_tcp_options (IP handled by tctracer) - }, - { - name: "tcp and ip", - contextPropagation: "tcp,ip", - expectedInjectFlags: 2, // k_inject_tcp_options only (IP handled by tctracer) - }, - { - name: "headers and ip", - contextPropagation: "headers,ip", - expectedInjectFlags: 1, // k_inject_http_headers only (IP handled by tctracer) + expectedInjectFlags: 3, // k_inject_http_headers | k_inject_tcp_options }, } diff --git a/pkg/obi/config.go b/pkg/obi/config.go index e99c693de9..e3cd0bcc79 100644 --- a/pkg/obi/config.go +++ b/pkg/obi/config.go @@ -657,8 +657,7 @@ func (c *Config) otelNetO11yEnabled() bool { } func (c *Config) willUseTC() bool { - return c.EBPF.ContextPropagation.HasIPOptions() || - (c.Enabled(FeatureNetO11y) && c.NetworkFlows.Source == EbpfSourceTC) + return c.Enabled(FeatureNetO11y) && c.NetworkFlows.Source == EbpfSourceTC } // Enabled checks if a given OBI feature is enabled according to the global configuration diff --git a/pkg/obi/config_test.go b/pkg/obi/config_test.go index 1a8dd15f36..84dfc2c339 100644 --- a/pkg/obi/config_test.go +++ b/pkg/obi/config_test.go @@ -633,12 +633,8 @@ func TestDefaultLegacyExclusionFilter(t *testing.T) { } func TestWillUseTC(t *testing.T) { - env := envMap{"OTEL_EBPF_BPF_CONTEXT_PROPAGATION": "ip"} + env := envMap{"OTEL_EBPF_BPF_CONTEXT_PROPAGATION": "headers"} cfg := loadConfig(t, env) - assert.True(t, cfg.willUseTC()) - - env = envMap{"OTEL_EBPF_BPF_CONTEXT_PROPAGATION": "headers"} - cfg = loadConfig(t, env) assert.False(t, cfg.willUseTC()) env = envMap{"OTEL_EBPF_BPF_CONTEXT_PROPAGATION": "disabled"} @@ -649,14 +645,6 @@ func TestWillUseTC(t *testing.T) { cfg = loadConfig(t, env) assert.False(t, cfg.willUseTC()) - env = envMap{"OTEL_EBPF_BPF_CONTEXT_PROPAGATION": "headers"} - cfg = loadConfig(t, env) - assert.False(t, cfg.willUseTC()) - - env = envMap{"OTEL_EBPF_BPF_CONTEXT_PROPAGATION": "ip"} - cfg = loadConfig(t, env) - assert.True(t, cfg.willUseTC()) - env = envMap{"OTEL_EBPF_BPF_CONTEXT_PROPAGATION": "disabled", "OTEL_EBPF_NETWORK_SOURCE": "tc", "OTEL_EBPF_NETWORK_METRICS": "true"} cfg = loadConfig(t, env) assert.True(t, cfg.willUseTC()) diff --git a/pkg/obi/os_test.go b/pkg/obi/os_test.go index f7c7805d5f..c6af46e37a 100644 --- a/pkg/obi/os_test.go +++ b/pkg/obi/os_test.go @@ -101,36 +101,30 @@ const ( ) type capTestData struct { - osCap helpers.OSCapability - class capClass - kernMaj int - kernMin int - useTC bool -} - -func contextPropagationMode(useTC bool) config.ContextPropagationMode { - if useTC { - return config.ContextPropagationIPOptions - } - return config.ContextPropagationDisabled + osCap helpers.OSCapability + class capClass + kernMaj int + kernMin int + tcSource bool // use TC as network source (capNet only) + contextPropOn bool // enable context propagation (capApp only) } var capTests = []capTestData{ // core - {osCap: unix.CAP_BPF, class: capCore, kernMaj: 6, kernMin: 10, useTC: false}, + {osCap: unix.CAP_BPF, class: capCore, kernMaj: 6, kernMin: 10}, // app o11y - {osCap: unix.CAP_CHECKPOINT_RESTORE, class: capApp, kernMaj: 6, kernMin: 10, useTC: false}, - {osCap: unix.CAP_DAC_READ_SEARCH, class: capApp, kernMaj: 6, kernMin: 10, useTC: false}, - {osCap: unix.CAP_SYS_PTRACE, class: capApp, kernMaj: 6, kernMin: 10, useTC: false}, - {osCap: unix.CAP_PERFMON, class: capApp, kernMaj: 6, kernMin: 10, useTC: false}, - {osCap: unix.CAP_NET_RAW, class: capApp, kernMaj: 6, kernMin: 10, useTC: false}, - {osCap: unix.CAP_NET_ADMIN, class: capApp, kernMaj: 6, kernMin: 10, useTC: true}, + {osCap: unix.CAP_CHECKPOINT_RESTORE, class: capApp, kernMaj: 6, kernMin: 10}, + {osCap: unix.CAP_DAC_READ_SEARCH, class: capApp, kernMaj: 6, kernMin: 10}, + {osCap: unix.CAP_SYS_PTRACE, class: capApp, kernMaj: 6, kernMin: 10}, + {osCap: unix.CAP_PERFMON, class: capApp, kernMaj: 6, kernMin: 10}, + {osCap: unix.CAP_NET_RAW, class: capApp, kernMaj: 6, kernMin: 10}, + {osCap: unix.CAP_NET_ADMIN, class: capApp, kernMaj: 6, kernMin: 10, contextPropOn: true}, // net o11y - {osCap: unix.CAP_NET_RAW, class: capNet, kernMaj: 6, kernMin: 10, useTC: false}, - {osCap: unix.CAP_PERFMON, class: capNet, kernMaj: 6, kernMin: 10, useTC: true}, - {osCap: unix.CAP_NET_ADMIN, class: capNet, kernMaj: 6, kernMin: 10, useTC: true}, + {osCap: unix.CAP_NET_RAW, class: capNet, kernMaj: 6, kernMin: 10}, + {osCap: unix.CAP_PERFMON, class: capNet, kernMaj: 6, kernMin: 10, tcSource: true}, + {osCap: unix.CAP_NET_ADMIN, class: capNet, kernMaj: 6, kernMin: 10, tcSource: true}, } func TestCheckOSCapabilities(t *testing.T) { @@ -147,17 +141,19 @@ func TestCheckOSCapabilities(t *testing.T) { test := func(data *capTestData) { overrideKernelVersion(testCase{data.kernMaj, data.kernMin}) - netSource := func(useTC bool) string { - if useTC { - return EbpfSourceTC - } + netSource := EbpfSourceSock + if data.tcSource { + netSource = EbpfSourceTC + } - return EbpfSourceSock + contextProp := config.ContextPropagationDisabled + if data.contextPropOn { + contextProp = config.ContextPropagationHeaders } cfg := Config{ - NetworkFlows: NetworkConfig{Enable: data.class == capNet, Source: netSource(data.useTC)}, - EBPF: config.EBPFTracer{ContextPropagation: contextPropagationMode(data.useTC)}, + NetworkFlows: NetworkConfig{Enable: data.class == capNet, Source: netSource}, + EBPF: config.EBPFTracer{ContextPropagation: contextProp}, } if data.class == capApp { // activates app o11y feature