Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Draft] Append BTF ID to type ID in libbpf CO-RE relocation #13

Closed
Closed
6 changes: 6 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,7 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
BPF_BTF_VMLINUX_INFO,
};

enum bpf_map_type {
Expand Down Expand Up @@ -1396,6 +1397,11 @@ union bpf_attr {
__aligned_u64 info;
} info;

struct { /* anonymous struct used by BPF_BTF_VMLINUX_INFO */
__u32 info_len;
__aligned_u64 info;
} info_vmlinux;

struct { /* anonymous struct used by BPF_PROG_QUERY command */
__u32 target_fd; /* container object to query */
__u32 attach_type;
Expand Down
22 changes: 22 additions & 0 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -4542,6 +4542,25 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
return ret;
}

#define BPF_BTF_VMLINUX_INFO_LAST_FIELD info.info
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, it's used behind the curtains by CHECK_ATTR():

https://elixir.bootlin.com/linux/v5.15-rc7/source/kernel/bpf/syscall.c#L715

That's why I prefer to pass all needed params to macros explicitly :P

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, got it, thanks


static int bpf_btf_get_vmlinux_info(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
struct bpf_btf_info __user *uinfo = u64_to_user_ptr(attr->info.info);
u32 info_len = attr->info.info_len;
int err;

if (CHECK_ATTR(BPF_BTF_VMLINUX_INFO))
return -EINVAL;

err = bpf_check_uarg_tail_zero(USER_BPFPTR(uinfo), sizeof(*uinfo), info_len);
if (err)
return err;

return btf_get_info_by_fd(bpf_get_btf_vmlinux(), attr, uattr);
}

static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
{
union bpf_attr attr;
Expand Down Expand Up @@ -4678,6 +4697,9 @@ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
case BPF_PROG_BIND_MAP:
err = bpf_prog_bind_map(&attr);
break;
case BPF_BTF_VMLINUX_INFO:
err = bpf_btf_get_vmlinux_info(&attr, uattr.user);
break;
default:
err = -EINVAL;
break;
Expand Down
21 changes: 18 additions & 3 deletions samples/bpf/xdp_meta.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>

struct ice_ring___min {
struct ice_ring *next;
void *desc;
struct device *dev;
struct net_device *netdev;
struct ice_vsi *vsi;
struct ice_q_vector *q_vector;
u8 *tail;
} __attribute__((preserve_access_index));

SEC("xdp")
int xdp_meta_prog(struct xdp_md *ctx)
{
Expand All @@ -17,8 +27,9 @@ int xdp_meta_prog(struct xdp_md *ctx)
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
u64 nh_off;
u32 btf_id_libbpf;
u64 btf_id_libbpf;
u32 btf_id_meta;
u64 btf_id_ring;
u16 rxcvid;
u32 hash;
long *value;
Expand All @@ -34,8 +45,12 @@ int xdp_meta_prog(struct xdp_md *ctx)
btf_id_libbpf = bpf_core_type_id_kernel(struct xdp_meta_generic);
bpf_probe_read_kernel(&btf_id_meta, sizeof(btf_id_meta), (void*)data - 4);

bpf_printk("id from libbpf %d, id from hints metadata %d\n",
btf_id_libbpf, btf_id_meta);
bpf_printk("id from libbpf %u (module BTF id: %u), id from hints metadata %u\n",
btf_id_libbpf & 0xFFFFFFFF, btf_id_libbpf >> 32, btf_id_meta);

btf_id_ring = bpf_core_type_id_kernel(struct ice_ring___min);
bpf_printk("ring type id %u, ice BTF id %u\n",
btf_id_ring & 0xFFFFFFFF, btf_id_ring >> 32);

if (btf_id_libbpf == btf_id_meta)
bpf_printk("Received meta is generic\n");
Expand Down
1 change: 1 addition & 0 deletions tools/include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,7 @@ enum bpf_cmd {
BPF_ITER_CREATE,
BPF_LINK_DETACH,
BPF_PROG_BIND_MAP,
BPF_BTF_VMLINUX_INFO,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BPF_BTF_GET_VMLINUX_ID? Third word is usually a verb here I'd say.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BPF_BTF_GET_VMLINUX_INFO would be better?

};

enum bpf_map_type {
Expand Down
17 changes: 17 additions & 0 deletions tools/lib/bpf/bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,23 @@ int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_s
return libbpf_err_errno(fd);
}

int bpf_get_vmlinux_btf_info(void *info, __u32 *info_len)
{
union bpf_attr attr;
int err;

memset(&attr, 0, sizeof(attr));
attr.info.info_len = *info_len;
attr.info.info = ptr_to_u64(info);

err = sys_bpf(BPF_BTF_VMLINUX_INFO, &attr, sizeof(attr));

if (!err)
*info_len = attr.info.info_len;

return libbpf_err_errno(err);
}

int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
__u64 *probe_addr)
Expand Down
19 changes: 17 additions & 2 deletions tools/lib/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,13 +430,28 @@ const struct btf *btf__base_btf(const struct btf *btf)
return btf->base_btf;
}

__u32 btf__obj_id(const struct btf *btf)
static __u32 btf_get_vmlinux_obj_id(void)
{
struct bpf_btf_info btf_info;
unsigned int len = sizeof(btf_info);
int err = 0;

memset(&btf_info, 0, sizeof(btf_info));
err = bpf_get_vmlinux_btf_info(&btf_info, &len);

if (err) return 0;
return btf_info.id;
}

__u32 btf_obj_id(const struct btf *btf)
{
struct bpf_btf_info btf_info;
unsigned int len = sizeof(btf_info);
int err = 0;
int fd = btf__fd(btf);

if (btf->base_btf == NULL) return btf_get_vmlinux_obj_id();

memset(&btf_info, 0, sizeof(btf_info));
err = bpf_obj_get_info_by_fd(fd, &btf_info, &len);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious if there is a way to get FD for vmlinux to unify both implementations. Loaded kernel is not a file, just a piece of memory, so we can't have a "real" FD here, OTOH we have /proc/kcore, so there probably should be.
Or, we could use this new syscall to be able to get type ID for both vmlinux and modules, without FDs.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've just tested, we can totally get vmlinux fd, so nothing prevents us from initializing it correctly at btf load time. Also now I am kinda more inclined towards adding btf id to libbpf btf struct, it shouldn't be hard to implement. Maybe I'd rather submit that small fix I've mentioned first, just to see what community currently thinks about how fds should be used in libbpf.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, sure! I'm glad we're able to get vmlinux fd, seems like new syscall commands won't be needed.

Copy link
Collaborator Author

@walking-machine walking-machine Oct 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do still need the syscall I've added in this PR, but no other should be necessary

Copy link
Owner

@alobakin alobakin Oct 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getting vmlinux' FD -> bpf_obj_get_info_by_fd() instead of btf_get_vmlinux_obj_id(), or am I missing something?

bpf_obj_get_info_by_fd() ----> bpf_btf_get_info_by_fd() ------
                                                             | ----> btf_get_info_by_fd()
btf_get_vmlinux_obj_id() ----> bpf_btf_get_vmlinux_info() ----

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I need to elaborate on the current situation:

  • When we are loading kernel module BTF, libbpf is iterating over ids, it first gets fd from id, after that it loads btf_info by fd, struct btf_info from with libbpf btf struct is created. Info also contains btf id, which does not seem to be very useful without my changes. We actually do get vmlinux while iterating, but it's discarded.
  • When loading the vmlinux, it opens a virtual file with a path, so no ids, fds or btf_info participate in this process.
  • vmlinux loading happens pretty often in places, where we are not supposed to load any other module BTFs.

Therefore I would do the following:

  • In vmlinux loading function: replace loading from virtual file with loading with my function bpf_btf_get_vmlinux_info(), that will allow us to get a valid id -> valid fd (it's smth like btf_get_fd_from_id())
  • After that vmlinux would have a valid fd + we have an option to add and fill btf_id field, just to avoid a lot of kernel-user copying while resolving CO-RE, which is inevitable if we use fds only (we would call btf_get_info_by_fd() for every relocation)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changing vmlinux loading process will render btf_get_vmlinux_obj_id() obsolete

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds fine to me, seems like we should give it a go!


Expand Down Expand Up @@ -1390,10 +1405,10 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)
}

btf = btf_new(ptr, btf_info.btf_size, base_btf);
btf->fd = btf_fd;

exit_free:
free(ptr);
btf->fd = btf_fd;
return btf;
}

Expand Down
1 change: 0 additions & 1 deletion tools/lib/bpf/btf.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
const char *type_name, __u32 kind);
LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
LIBBPF_API const struct btf *btf__base_btf(const struct btf *btf);
LIBBPF_API __u32 btf__obj_id(const struct btf *btf);
LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
__u32 id);
LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
Expand Down
1 change: 0 additions & 1 deletion tools/lib/bpf/libbpf.map
Original file line number Diff line number Diff line change
Expand Up @@ -385,5 +385,4 @@ LIBBPF_0.5.0 {
btf__load_vmlinux_btf;
btf_dump__dump_type_data;
libbpf_set_strict_mode;
btf__obj_id;
} LIBBPF_0.4.0;
2 changes: 2 additions & 0 deletions tools/lib/bpf/libbpf_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,8 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
const char **prefix, int *kind);
__u32 btf_obj_id(const struct btf *btf);
int bpf_get_vmlinux_btf_info(void *info, __u32 *info_len);

struct btf_ext_info {
/*
Expand Down
2 changes: 1 addition & 1 deletion tools/lib/bpf/relo_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ static int bpf_core_calc_relo(const char *prog_name,
err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
if (!err && relo->kind == BPF_TYPE_ID_TARGET)
res->btf_obj_id = btf__obj_id(targ_spec->btf);
res->btf_obj_id = btf_obj_id(targ_spec->btf);
} else if (core_relo_is_enumval_based(relo->kind)) {
err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
Expand Down