-
Notifications
You must be signed in to change notification settings - Fork 97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Propagate context through TCP packets #1161
Changes from 5 commits
8fa34e1
3f15c50
55df69d
022c6a0
0811044
06ddc8d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -186,6 +186,33 @@ int BPF_KPROBE(kprobe_tcp_connect, struct sock *sk) { | |||||||||
|
||||||||||
bpf_dbg_printk("=== tcp connect %llx ===", id); | ||||||||||
|
||||||||||
tp_info_pid_t *tp_p = tp_buf(); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Connect runs before the SYN packet is sent. We use this opportunity to setup a trace context information for the connection. We'll later query the trace information in tc_egress, and serialize it on the TCP packet. |
||||||||||
|
||||||||||
if (tp_p) { | ||||||||||
tp_p->tp.ts = bpf_ktime_get_ns(); | ||||||||||
tp_p->tp.flags = 1; | ||||||||||
tp_p->valid = 1; | ||||||||||
tp_p->pid = 0; | ||||||||||
urand_bytes(tp_p->tp.span_id, SPAN_ID_SIZE_BYTES); | ||||||||||
tp_info_pid_t *server_tp = find_parent_trace(); | ||||||||||
if (server_tp && valid_trace(server_tp->tp.trace_id)) { | ||||||||||
__builtin_memcpy(tp_p->tp.trace_id, server_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); | ||||||||||
__builtin_memcpy(tp_p->tp.parent_id, server_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); | ||||||||||
} else { | ||||||||||
urand_bytes(tp_p->tp.trace_id, TRACE_ID_SIZE_BYTES); | ||||||||||
__builtin_memset(tp_p->tp.parent_id, 0, sizeof(tp_p->tp.span_id)); | ||||||||||
} | ||||||||||
|
||||||||||
connection_info_t conn = {}; | ||||||||||
parse_sock_info(sk, &conn); | ||||||||||
sort_connection_info(&conn); | ||||||||||
|
||||||||||
bpf_dbg_printk("Setting up tp info"); | ||||||||||
dbg_print_http_connection_info(&conn); | ||||||||||
|
||||||||||
bpf_map_update_elem(&outgoing_trace_map, &conn, tp_p, BPF_ANY); | ||||||||||
} | ||||||||||
|
||||||||||
u64 addr = (u64)sk; | ||||||||||
|
||||||||||
sock_args_t args = {}; | ||||||||||
|
@@ -232,7 +259,7 @@ int BPF_KRETPROBE(kretprobe_sys_connect, int fd) | |||||||||
sort_connection_info(&info.p_conn.conn); | ||||||||||
info.p_conn.pid = pid_from_pid_tgid(id); | ||||||||||
info.orig_dport = orig_dport; | ||||||||||
|
||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: trailing whitespace |
||||||||||
bpf_map_update_elem(&pid_tid_to_conn, &id, &info, BPF_ANY); // to support SSL | ||||||||||
} | ||||||||||
|
||||||||||
|
@@ -678,17 +705,28 @@ int app_ingress(struct __sk_buff *skb) { | |||||||||
return 0; | ||||||||||
} | ||||||||||
|
||||||||||
unsigned char buf[12]; | ||||||||||
if (!tcp_syn(&tcp) || tcp_ack(&tcp)) { | ||||||||||
return 0; | ||||||||||
} | ||||||||||
|
||||||||||
if (tcp_syn(&tcp) && !tcp_ack(&tcp)) { | ||||||||||
bpf_skb_load_bytes(skb, tcp.hdr_len, &buf, 4); | ||||||||||
if (skb->len - tcp.hdr_len < sizeof(tp_info_pid_t)) { | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was looking into this, and stumbled upon this check in
both
or even better
|
||||||||||
bpf_printk("SYN packet without tp info"); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these prints will have to be removed, but keeping them for now since this is disabled. bpf_dbg_printk doesnt' work because we need to fix it to not use bpf_get_pid_tid, but bpf_get_task info |
||||||||||
return 0; | ||||||||||
} | ||||||||||
|
||||||||||
s32 len = skb->len-sizeof(u32); | ||||||||||
bpf_printk("SYN packed len = %d, offset = %d, hdr_len %d", skb->len, len, tcp.hdr_len); | ||||||||||
tp_info_pid_t tp = {0}; | ||||||||||
|
||||||||||
bpf_skb_load_bytes(skb, tcp.hdr_len, &tp, sizeof(tp_info_pid_t)); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if it may be a good idea to have Something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea! I initially thought of this and said to myself, so if they do, we'll read something random and it's no worse than generating new ID. However, on a second thought, the value someone else puts in could be not random, but a constant and it will make all of our traceIDs be the same. I took your advice to put an ID on the PID (we don't use this field for TCP and it's later overwritten with -1). So I'll check the PID on ingress and if it doesn't match our magic value we'll not take in the packet. |
||||||||||
|
||||||||||
s32 len = skb->len-sizeof(u32); | ||||||||||
bpf_printk("Received SYN packed len = %d, offset = %d, hdr_len %d", skb->len, len, tcp.hdr_len); | ||||||||||
|
||||||||||
unsigned char tp_buf[TP_MAX_VAL_LENGTH]; | ||||||||||
make_tp_string(tp_buf, &tp.tp); | ||||||||||
bpf_printk("tp: %s", tp_buf); | ||||||||||
|
||||||||||
bpf_map_update_elem(&incoming_trace_map, &conn, &tp, BPF_ANY); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once we receive a traceID over the wire (TCP packet) we store it for later to be used by the trace code. |
||||||||||
|
||||||||||
bpf_printk("***Data: %x%x", buf[3], buf[2]); | ||||||||||
bpf_printk("***Data: %x%x", buf[1], buf[0]); | ||||||||||
} | ||||||||||
return 0; | ||||||||||
} | ||||||||||
|
||||||||||
|
@@ -703,17 +741,25 @@ int app_egress(struct __sk_buff *skb) { | |||||||||
return 0; | ||||||||||
} | ||||||||||
|
||||||||||
if (!tcp_syn(&tcp) || tcp_ack(&tcp)) { | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd add a comment such as:
|
||||||||||
return 0; | ||||||||||
} | ||||||||||
|
||||||||||
sort_connection_info(&conn); | ||||||||||
|
||||||||||
if (tcp_syn(&tcp) && !tcp_ack(&tcp)) { | ||||||||||
tp_info_pid_t *tp = bpf_map_lookup_elem(&outgoing_trace_map, &conn); | ||||||||||
|
||||||||||
if (tp) { | ||||||||||
bpf_printk("SYN packed len = %d", skb->len); | ||||||||||
|
||||||||||
u32 val=0xdeadf00d; | ||||||||||
unsigned char tp_buf[TP_MAX_VAL_LENGTH]; | ||||||||||
make_tp_string(tp_buf, &tp->tp); | ||||||||||
bpf_printk("tp: %s", tp_buf); | ||||||||||
|
||||||||||
uint16_t pkt_end = skb->data_end - skb->data; | ||||||||||
bpf_printk("Changing tail and setting data on syn, end=%d", pkt_end); | ||||||||||
bpf_skb_change_tail(skb, pkt_end + sizeof(val), 0); | ||||||||||
bpf_skb_store_bytes(skb, pkt_end, &val, sizeof(val), 0); | ||||||||||
bpf_skb_change_tail(skb, pkt_end + sizeof(tp_info_pid_t), 0); | ||||||||||
bpf_skb_store_bytes(skb, pkt_end, tp, sizeof(tp_info_pid_t), 0); | ||||||||||
|
||||||||||
u32 offset_ip_tot_len = 0; | ||||||||||
u32 offset_ip_checksum = 0; | ||||||||||
|
@@ -724,10 +770,10 @@ int app_egress(struct __sk_buff *skb) { | |||||||||
offset_ip_tot_len = ETH_HLEN + offsetof(struct ipv6hdr, payload_len); | ||||||||||
} | ||||||||||
|
||||||||||
u16 new_tot_len = bpf_htons(bpf_ntohs(tcp.tot_len) + sizeof(val)); | ||||||||||
u16 new_tot_len = bpf_htons(bpf_ntohs(tcp.tot_len) + sizeof(tp_info_pid_t)); | ||||||||||
|
||||||||||
bpf_printk("tot_len = %d, tot_len_alt = %d, new_tot_len = %d", tcp.tot_len, bpf_ntohs(tcp.tot_len), new_tot_len); | ||||||||||
bpf_printk("new_tot_len_alt = %d, h_proto = %d, skb->len = %d", bpf_ntohs(new_tot_len), tcp.h_proto, skb->len); | ||||||||||
bpf_printk("tot_len = %d, new_tot_len = %d", bpf_ntohs(tcp.tot_len), bpf_ntohs(new_tot_len)); | ||||||||||
bpf_printk("h_proto = %d, skb->len = %d", tcp.h_proto, skb->len); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
|
||||||||||
if (offset_ip_checksum) { | ||||||||||
bpf_l3_csum_replace(skb, offset_ip_checksum, tcp.tot_len, new_tot_len, sizeof(u16)); | ||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -156,6 +156,14 @@ static __always_inline void delete_server_trace(trace_key_t *t_key) { | |
// bpf_dbg_printk("Deleting server span for id=%llx, pid=%d, ns=%d, res = %d", bpf_get_current_pid_tgid(), t_key->p_key.pid, t_key->p_key.ns, res); | ||
} | ||
|
||
static __always_inline u8 valid_span(unsigned char *span_id) { | ||
return *((u64 *)span_id) != 0; | ||
} | ||
|
||
static __always_inline u8 valid_trace(unsigned char *trace_id) { | ||
return *((u64 *)trace_id) != 0 && *((u64 *)(trace_id + 8)) != 0; | ||
} | ||
|
||
static __always_inline void server_or_client_trace(http_connection_metadata_t *meta, connection_info_t *conn, tp_info_pid_t *tp_p) { | ||
if (!meta) { | ||
return; | ||
|
@@ -196,10 +204,16 @@ static __always_inline void get_or_create_trace_info(http_connection_metadata_t | |
|
||
if (meta) { | ||
if (meta->type == EVENT_HTTP_CLIENT) { | ||
tp_info_pid_t *in_tp = bpf_map_lookup_elem(&outgoing_trace_map, conn); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before this change the client code only looked for a server trace and if it doesn't find it it will generate the trace information later. Now we look if the TCP egress has setup trace info for us, if we find it we mark as having trace info, we must not regenerate it. If we have our own TCP egress info, but there's server incoming request that wrapped this client call, then we'll overwrite this info. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd actually put that in a comment |
||
if (in_tp) { | ||
found_tp = 1; | ||
tp_p = in_tp; | ||
} | ||
|
||
tp_p->pid = -1; // we only want to prevent correlation of duplicate server calls by PID | ||
tp_info_pid_t *server_tp = find_parent_trace(); | ||
|
||
if (server_tp && server_tp->valid) { | ||
if (server_tp && server_tp->valid && valid_trace(server_tp->tp.trace_id)) { | ||
found_tp = 1; | ||
bpf_dbg_printk("Found existing server tp for client call"); | ||
__builtin_memcpy(tp_p->tp.trace_id, server_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); | ||
|
@@ -209,14 +223,22 @@ static __always_inline void get_or_create_trace_info(http_connection_metadata_t | |
//bpf_dbg_printk("Looking up existing trace for connection"); | ||
//dbg_print_http_connection_info(conn); | ||
|
||
tp_info_pid_t *existing_tp = trace_info_for_connection(conn); | ||
|
||
if (correlated_requests(tp_p, existing_tp)) { | ||
tp_info_pid_t *existing_tp = bpf_map_lookup_elem(&incoming_trace_map, conn); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For server requests, we first look for TCP info and then we fall back to black-box info. |
||
if (existing_tp) { | ||
found_tp = 1; | ||
bpf_dbg_printk("Found existing correlated tp for server request"); | ||
bpf_dbg_printk("Found incoming (TCP) tp for server request"); | ||
__builtin_memcpy(tp_p->tp.trace_id, existing_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); | ||
__builtin_memcpy(tp_p->tp.parent_id, existing_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); | ||
} | ||
} else { | ||
existing_tp = trace_info_for_connection(conn); | ||
|
||
if (correlated_requests(tp_p, existing_tp)) { | ||
found_tp = 1; | ||
bpf_dbg_printk("Found existing correlated tp for server request"); | ||
__builtin_memcpy(tp_p->tp.trace_id, existing_tp->tp.trace_id, sizeof(tp_p->tp.trace_id)); | ||
__builtin_memcpy(tp_p->tp.parent_id, existing_tp->tp.span_id, sizeof(tp_p->tp.parent_id)); | ||
} | ||
} | ||
} | ||
} | ||
|
||
|
@@ -276,8 +298,4 @@ static __always_inline void get_or_create_trace_info(http_connection_metadata_t | |
return; | ||
} | ||
|
||
static __always_inline u8 valid_span(unsigned char *span_id) { | ||
return *((u64 *)span_id) != 0; | ||
} | ||
|
||
#endif |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I split this so that we can use it on older kernels where bpf_printk allowed for at most 3 arguments.