Skip to content

bpf: Implement mprog API on top of existing cgroup progs #8917

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/linux/bpf-cgroup-defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ struct cgroup_bpf {
*/
struct hlist_head progs[MAX_CGROUP_BPF_ATTACH_TYPE];
u8 flags[MAX_CGROUP_BPF_ATTACH_TYPE];
u64 revisions[MAX_CGROUP_BPF_ATTACH_TYPE];

/* list of cgroup shared storages */
struct list_head storages;
Expand Down
7 changes: 7 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1794,6 +1794,13 @@ union bpf_attr {
};
__u64 expected_revision;
} netkit;
struct {
union {
__u32 relative_fd;
__u32 relative_id;
};
__u64 expected_revision;
} cgroup;
};
} link_create;

Expand Down
195 changes: 173 additions & 22 deletions kernel/bpf/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,129 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
return NULL;
}

static struct bpf_link *bpf_get_anchor_link(u32 flags, u32 id_or_fd, enum bpf_prog_type type)
{
struct bpf_link *link = ERR_PTR(-EINVAL);

if (flags & BPF_F_ID)
link = bpf_link_by_id(id_or_fd);
else if (id_or_fd)
link = bpf_link_get_from_fd(id_or_fd);
if (IS_ERR(link))
return link;
if (type && link->prog->type != type) {
bpf_link_put(link);
return ERR_PTR(-EINVAL);
}

return link;
}

static struct bpf_prog *bpf_get_anchor_prog(u32 flags, u32 id_or_fd, enum bpf_prog_type type)
{
struct bpf_prog *prog = ERR_PTR(-EINVAL);

if (flags & BPF_F_ID)
prog = bpf_prog_by_id(id_or_fd);
else if (id_or_fd)
prog = bpf_prog_get(id_or_fd);
if (IS_ERR(prog))
return prog;
if (type && prog->type != type) {
bpf_prog_put(prog);
return ERR_PTR(-EINVAL);
}

return prog;
}

static struct bpf_prog_list *get_prog_list(struct hlist_head *progs, struct bpf_prog *prog,
u32 flags, u32 id_or_fd)
{
bool link = flags & BPF_F_LINK, id = flags & BPF_F_ID;
struct bpf_prog *anchor_prog = NULL, *pltmp_prog;
bool preorder = flags & BPF_F_PREORDER;
struct bpf_link *anchor_link = NULL;
struct bpf_prog_list *pltmp;
int ret = -EINVAL;

if (link || id || id_or_fd) {
/* flags must have either BPF_F_BEFORE or BPF_F_AFTER */
if (!(flags & BPF_F_BEFORE) != !!(flags & BPF_F_AFTER))
return ERR_PTR(-EINVAL);
} else if (!hlist_empty(progs)) {
/* flags cannot have both BPF_F_BEFORE and BPF_F_AFTER */
if ((flags & BPF_F_BEFORE) && (flags & BPF_F_AFTER))
return ERR_PTR(-EINVAL);
}

if (link) {
anchor_link = bpf_get_anchor_link(flags, id_or_fd, prog->type);
if (IS_ERR(anchor_link))
return ERR_PTR(PTR_ERR(anchor_link));
anchor_prog = anchor_link->prog;
} else if (id || id_or_fd) {
anchor_prog = bpf_get_anchor_prog(flags, id_or_fd, prog->type);
if (IS_ERR(anchor_prog))
return ERR_PTR(PTR_ERR(anchor_prog));
}

if (!anchor_prog) {
/* if there is no anchor_prog, then BPF_F_PREORDER doesn't matter
* since either prepend or append to a combined list of progs will
* end up with correct result.
*/
hlist_for_each_entry(pltmp, progs, node) {
if (flags & BPF_F_BEFORE)
return pltmp;
if (pltmp->node.next)
continue;
return pltmp;
}
return NULL;
}

hlist_for_each_entry(pltmp, progs, node) {
pltmp_prog = pltmp->link ? pltmp->link->link.prog : pltmp->prog;
if (pltmp_prog != anchor_prog)
continue;
if (!!(pltmp->flags & BPF_F_PREORDER) != preorder)
goto out;
if (anchor_link)
bpf_link_put(anchor_link);
else
bpf_prog_put(anchor_prog);
return pltmp;
}

ret = -ENOENT;
out:
if (anchor_link)
bpf_link_put(anchor_link);
else
bpf_prog_put(anchor_prog);
return ERR_PTR(ret);
}

static int insert_pl_to_hlist(struct bpf_prog_list *pl, struct hlist_head *progs,
struct bpf_prog *prog, u32 flags, u32 id_or_fd)
{
struct bpf_prog_list *pltmp;

pltmp = get_prog_list(progs, prog, flags, id_or_fd);
if (IS_ERR(pltmp))
return PTR_ERR(pltmp);

if (!pltmp)
hlist_add_head(&pl->node, progs);
else if (flags & BPF_F_BEFORE)
hlist_add_before(&pl->node, &pltmp->node);
else
hlist_add_behind(&pl->node, &pltmp->node);

return 0;
}

/**
* __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
* propagate the change to descendants
Expand All @@ -633,14 +756,17 @@ static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
* @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
* @type: Type of attach operation
* @flags: Option flags
* @id_or_fd: Relative prog id or fd
* @revision: bpf_prog_list revision
*
* Exactly one of @prog or @link can be non-null.
* Must be called with cgroup_mutex held.
*/
static int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type, u32 flags)
enum bpf_attach_type type, u32 flags, u32 id_or_fd,
u64 revision)
{
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
struct bpf_prog *old_prog = NULL;
Expand All @@ -656,6 +782,9 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
/* invalid combination */
return -EINVAL;
if ((flags & BPF_F_REPLACE) && (flags & (BPF_F_BEFORE | BPF_F_AFTER)))
/* only either replace or insertion with before/after */
return -EINVAL;
if (link && (prog || replace_prog))
/* only either link or prog/replace_prog can be specified */
return -EINVAL;
Expand All @@ -666,6 +795,8 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
if (atype < 0)
return -EINVAL;
if (revision && revision != cgrp->bpf.revisions[atype])
return -ESTALE;

progs = &cgrp->bpf.progs[atype];

Expand Down Expand Up @@ -694,22 +825,18 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
if (pl) {
old_prog = pl->prog;
} else {
struct hlist_node *last = NULL;

pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
bpf_cgroup_storages_free(new_storage);
return -ENOMEM;
}
if (hlist_empty(progs))
hlist_add_head(&pl->node, progs);
else
hlist_for_each(last, progs) {
if (last->next)
continue;
hlist_add_behind(&pl->node, last);
break;
}

err = insert_pl_to_hlist(pl, progs, prog ? : link->link.prog, flags, id_or_fd);
if (err) {
kfree(pl);
bpf_cgroup_storages_free(new_storage);
return err;
}
}

pl->prog = prog;
Expand All @@ -728,6 +855,7 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp,
if (err)
goto cleanup_trampoline;

cgrp->bpf.revisions[atype] += 1;
if (old_prog) {
if (type == BPF_LSM_CGROUP)
bpf_trampoline_unlink_cgroup_shim(old_prog);
Expand Down Expand Up @@ -759,12 +887,13 @@ static int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type,
u32 flags)
u32 flags, u32 id_or_fd, u64 revision)
{
int ret;

cgroup_lock();
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags,
id_or_fd, revision);
cgroup_unlock();
return ret;
}
Expand Down Expand Up @@ -852,6 +981,7 @@ static int __cgroup_bpf_replace(struct cgroup *cgrp,
if (!found)
return -ENOENT;

cgrp->bpf.revisions[atype] += 1;
old_prog = xchg(&link->link.prog, new_prog);
replace_effective_prog(cgrp, atype, link);
bpf_prog_put(old_prog);
Expand Down Expand Up @@ -977,12 +1107,14 @@ static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
* @prog: A program to detach or NULL
* @link: A link to detach or NULL
* @type: Type of detach operation
* @revision: bpf_prog_list revision
*
* At most one of @prog or @link can be non-NULL.
* Must be called with cgroup_mutex held.
*/
static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_cgroup_link *link, enum bpf_attach_type type)
struct bpf_cgroup_link *link, enum bpf_attach_type type,
u64 revision)
{
enum cgroup_bpf_attach_type atype;
struct bpf_prog *old_prog;
Expand All @@ -1000,6 +1132,9 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
if (atype < 0)
return -EINVAL;

if (revision && revision != cgrp->bpf.revisions[atype])
return -ESTALE;

progs = &cgrp->bpf.progs[atype];
flags = cgrp->bpf.flags[atype];

Expand All @@ -1025,6 +1160,7 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,

/* now can actually delete it from this cgroup list */
hlist_del(&pl->node);
cgrp->bpf.revisions[atype] += 1;

kfree(pl);
if (hlist_empty(progs))
Expand All @@ -1040,12 +1176,12 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
}

static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type)
enum bpf_attach_type type, u64 revision)
{
int ret;

cgroup_lock();
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
ret = __cgroup_bpf_detach(cgrp, prog, NULL, type, revision);
cgroup_unlock();
return ret;
}
Expand All @@ -1063,6 +1199,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
struct bpf_prog_array *effective;
int cnt, ret = 0, i;
int total_cnt = 0;
u64 revision = 0;
u32 flags;

if (effective_query && prog_attach_flags)
Expand Down Expand Up @@ -1100,6 +1237,10 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
return -EFAULT;
if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
return -EFAULT;
if (!effective_query && from_atype == to_atype)
revision = cgrp->bpf.revisions[from_atype];
if (copy_to_user(&uattr->query.revision, &revision, sizeof(revision)))
return -EFAULT;
if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
/* return early if user requested only program count + flags */
return 0;
Expand Down Expand Up @@ -1182,7 +1323,8 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
}

ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
attr->attach_type, attr->attach_flags);
attr->attach_type, attr->attach_flags,
attr->relative_fd, attr->expected_revision);

if (replace_prog)
bpf_prog_put(replace_prog);
Expand All @@ -1204,7 +1346,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
if (IS_ERR(prog))
prog = NULL;

ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, attr->expected_revision);
if (prog)
bpf_prog_put(prog);

Expand Down Expand Up @@ -1233,7 +1375,7 @@ static void bpf_cgroup_link_release(struct bpf_link *link)
}

WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
cg_link->type));
cg_link->type, 0));
if (cg_link->type == BPF_LSM_CGROUP)
bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);

Expand Down Expand Up @@ -1305,14 +1447,21 @@ static const struct bpf_link_ops bpf_cgroup_link_lops = {
.fill_link_info = bpf_cgroup_link_fill_link_info,
};

#define BPF_F_LINK_ATTACH_MASK \
(BPF_F_ID | \
BPF_F_BEFORE | \
BPF_F_AFTER | \
BPF_F_PREORDER | \
BPF_F_LINK)

int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_link_primer link_primer;
struct bpf_cgroup_link *link;
struct cgroup *cgrp;
int err;

if (attr->link_create.flags)
if (attr->link_create.flags & (~BPF_F_LINK_ATTACH_MASK))
return -EINVAL;

cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
Expand All @@ -1336,7 +1485,9 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
}

err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
link->type, BPF_F_ALLOW_MULTI);
link->type, BPF_F_ALLOW_MULTI | attr->link_create.flags,
attr->link_create.cgroup.relative_fd,
attr->link_create.cgroup.expected_revision);
if (err) {
bpf_link_cleanup(&link_primer);
goto out_put_cgroup;
Expand Down
Loading
Loading