Skip to content

Commit 0d01da6

Browse files
fomichevAlexei Starovoitov
authored and
Alexei Starovoitov
committed
bpf: implement getsockopt and setsockopt hooks
Implement new BPF_PROG_TYPE_CGROUP_SOCKOPT program type and BPF_CGROUP_{G,S}ETSOCKOPT cgroup hooks. BPF_CGROUP_SETSOCKOPT can modify user setsockopt arguments before passing them down to the kernel or bypass kernel completely. BPF_CGROUP_GETSOCKOPT can can inspect/modify getsockopt arguments that kernel returns. Both hooks reuse existing PTR_TO_PACKET{,_END} infrastructure. The buffer memory is pre-allocated (because I don't think there is a precedent for working with __user memory from bpf). This might be slow to do for each {s,g}etsockopt call, that's why I've added __cgroup_bpf_prog_array_is_empty that exits early if there is nothing attached to a cgroup. Note, however, that there is a race between __cgroup_bpf_prog_array_is_empty and BPF_PROG_RUN_ARRAY where cgroup program layout might have changed; this should not be a problem because in general there is a race between multiple calls to {s,g}etsocktop and user adding/removing bpf progs from a cgroup. The return code of the BPF program is handled as follows: * 0: EPERM * 1: success, continue with next BPF program in the cgroup chain v9: * allow overwriting setsockopt arguments (Alexei Starovoitov): * use set_fs (same as kernel_setsockopt) * buffer is always kzalloc'd (no small on-stack buffer) v8: * use s32 for optlen (Andrii Nakryiko) v7: * return only 0 or 1 (Alexei Starovoitov) * always run all progs (Alexei Starovoitov) * use optval=0 as kernel bypass in setsockopt (Alexei Starovoitov) (decided to use optval=-1 instead, optval=0 might be a valid input) * call getsockopt hook after kernel handlers (Alexei Starovoitov) v6: * rework cgroup chaining; stop as soon as bpf program returns 0 or 2; see patch with the documentation for the details * drop Andrii's and Martin's Acked-by (not sure they are comfortable with the new state of things) v5: * skip copy_to_user() and put_user() when ret == 0 (Martin Lau) v4: * don't export bpf_sk_fullsock helper (Martin Lau) * size != sizeof(__u64) for uapi pointers (Martin Lau) * offsetof instead of bpf_ctx_range when checking ctx access (Martin Lau) v3: * typos in BPF_PROG_CGROUP_SOCKOPT_RUN_ARRAY comments (Andrii Nakryiko) * reverse christmas tree in BPF_PROG_CGROUP_SOCKOPT_RUN_ARRAY (Andrii Nakryiko) * use __bpf_md_ptr instead of __u32 for optval{,_end} (Martin Lau) * use BPF_FIELD_SIZEOF() for consistency (Martin Lau) * new CG_SOCKOPT_ACCESS macro to wrap repeated parts v2: * moved bpf_sockopt_kern fields around to remove a hole (Martin Lau) * aligned bpf_sockopt_kern->buf to 8 bytes (Martin Lau) * bpf_prog_array_is_empty instead of bpf_prog_array_length (Martin Lau) * added [0,2] return code check to verifier (Martin Lau) * dropped unused buf[64] from the stack (Martin Lau) * use PTR_TO_SOCKET for bpf_sockopt->sk (Martin Lau) * dropped bpf_target_off from ctx rewrites (Martin Lau) * use return code for kernel bypass (Martin Lau & Andrii Nakryiko) Cc: Andrii Nakryiko <[email protected]> Cc: Martin Lau <[email protected]> Signed-off-by: Stanislav Fomichev <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]>
1 parent 3b1c667 commit 0d01da6

File tree

11 files changed

+472
-1
lines changed

11 files changed

+472
-1
lines changed

include/linux/bpf-cgroup.h

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,14 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
124124
loff_t *ppos, void **new_buf,
125125
enum bpf_attach_type type);
126126

127+
int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
128+
int *optname, char __user *optval,
129+
int *optlen, char **kernel_optval);
130+
int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
131+
int optname, char __user *optval,
132+
int __user *optlen, int max_optlen,
133+
int retval);
134+
127135
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
128136
struct bpf_map *map)
129137
{
@@ -286,6 +294,38 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
286294
__ret; \
287295
})
288296

297+
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
298+
kernel_optval) \
299+
({ \
300+
int __ret = 0; \
301+
if (cgroup_bpf_enabled) \
302+
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
303+
optname, optval, \
304+
optlen, \
305+
kernel_optval); \
306+
__ret; \
307+
})
308+
309+
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
310+
({ \
311+
int __ret = 0; \
312+
if (cgroup_bpf_enabled) \
313+
get_user(__ret, optlen); \
314+
__ret; \
315+
})
316+
317+
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \
318+
max_optlen, retval) \
319+
({ \
320+
int __ret = retval; \
321+
if (cgroup_bpf_enabled) \
322+
__ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \
323+
optname, optval, \
324+
optlen, max_optlen, \
325+
retval); \
326+
__ret; \
327+
})
328+
289329
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
290330
enum bpf_prog_type ptype, struct bpf_prog *prog);
291331
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
@@ -357,6 +397,11 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
357397
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
358398
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
359399
#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })
400+
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
401+
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
402+
optlen, max_optlen, retval) ({ retval; })
403+
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
404+
kernel_optval) ({ 0; })
360405

361406
#define for_each_cgroup_storage_type(stype) for (; false; )
362407

include/linux/bpf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,7 @@ struct bpf_prog_array {
518518
struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
519519
void bpf_prog_array_free(struct bpf_prog_array *progs);
520520
int bpf_prog_array_length(struct bpf_prog_array *progs);
521+
bool bpf_prog_array_is_empty(struct bpf_prog_array *array);
521522
int bpf_prog_array_copy_to_user(struct bpf_prog_array *progs,
522523
__u32 __user *prog_ids, u32 cnt);
523524

@@ -1051,6 +1052,7 @@ extern const struct bpf_func_proto bpf_spin_unlock_proto;
10511052
extern const struct bpf_func_proto bpf_get_local_storage_proto;
10521053
extern const struct bpf_func_proto bpf_strtol_proto;
10531054
extern const struct bpf_func_proto bpf_strtoul_proto;
1055+
extern const struct bpf_func_proto bpf_tcp_sock_proto;
10541056

10551057
/* Shared helpers among cBPF and eBPF. */
10561058
void bpf_user_rnd_init_once(void);

include/linux/bpf_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
3030
#ifdef CONFIG_CGROUP_BPF
3131
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
3232
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl)
33+
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt)
3334
#endif
3435
#ifdef CONFIG_BPF_LIRC_MODE2
3536
BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)

include/linux/filter.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,4 +1199,14 @@ struct bpf_sysctl_kern {
11991199
u64 tmp_reg;
12001200
};
12011201

1202+
struct bpf_sockopt_kern {
1203+
struct sock *sk;
1204+
u8 *optval;
1205+
u8 *optval_end;
1206+
s32 level;
1207+
s32 optname;
1208+
s32 optlen;
1209+
s32 retval;
1210+
};
1211+
12021212
#endif /* __LINUX_FILTER_H__ */

include/uapi/linux/bpf.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ enum bpf_prog_type {
170170
BPF_PROG_TYPE_FLOW_DISSECTOR,
171171
BPF_PROG_TYPE_CGROUP_SYSCTL,
172172
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
173+
BPF_PROG_TYPE_CGROUP_SOCKOPT,
173174
};
174175

175176
enum bpf_attach_type {
@@ -194,6 +195,8 @@ enum bpf_attach_type {
194195
BPF_CGROUP_SYSCTL,
195196
BPF_CGROUP_UDP4_RECVMSG,
196197
BPF_CGROUP_UDP6_RECVMSG,
198+
BPF_CGROUP_GETSOCKOPT,
199+
BPF_CGROUP_SETSOCKOPT,
197200
__MAX_BPF_ATTACH_TYPE
198201
};
199202

@@ -3541,4 +3544,15 @@ struct bpf_sysctl {
35413544
*/
35423545
};
35433546

3547+
struct bpf_sockopt {
3548+
__bpf_md_ptr(struct bpf_sock *, sk);
3549+
__bpf_md_ptr(void *, optval);
3550+
__bpf_md_ptr(void *, optval_end);
3551+
3552+
__s32 level;
3553+
__s32 optname;
3554+
__s32 optlen;
3555+
__s32 retval;
3556+
};
3557+
35443558
#endif /* _UAPI__LINUX_BPF_H__ */

0 commit comments

Comments
 (0)