Skip to content

Commit b00fa38

Browse files
joannekoongAlexei Starovoitov
authored and
Alexei Starovoitov
committed
bpf: Enable non-atomic allocations in local storage
Currently, local storage memory can only be allocated atomically (GFP_ATOMIC). This restriction is too strict for sleepable bpf programs. In this patch, the verifier detects whether the program is sleepable, and passes the corresponding GFP_KERNEL or GFP_ATOMIC flag as a 5th argument to bpf_task/sk/inode_storage_get. This flag will propagate down to the local storage functions that allocate memory. Please note that bpf_task/sk/inode_storage_update_elem functions are invoked by userspace applications through syscalls. Preemption is disabled before bpf_task/sk/inode_storage_update_elem is called, which means they will always have to allocate memory atomically. Signed-off-by: Joanne Koong <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: KP Singh <[email protected]> Acked-by: Martin KaFai Lau <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent a8fee96 commit b00fa38

File tree

6 files changed

+84
-41
lines changed

6 files changed

+84
-41
lines changed

include/linux/bpf_local_storage.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,16 +154,17 @@ void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
154154

155155
struct bpf_local_storage_elem *
156156
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
157-
bool charge_mem);
157+
bool charge_mem, gfp_t gfp_flags);
158158

159159
int
160160
bpf_local_storage_alloc(void *owner,
161161
struct bpf_local_storage_map *smap,
162-
struct bpf_local_storage_elem *first_selem);
162+
struct bpf_local_storage_elem *first_selem,
163+
gfp_t gfp_flags);
163164

164165
struct bpf_local_storage_data *
165166
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
166-
void *value, u64 map_flags);
167+
void *value, u64 map_flags, gfp_t gfp_flags);
167168

168169
void bpf_local_storage_free_rcu(struct rcu_head *rcu);
169170

kernel/bpf/bpf_inode_storage.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ static int bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key,
136136

137137
sdata = bpf_local_storage_update(f->f_inode,
138138
(struct bpf_local_storage_map *)map,
139-
value, map_flags);
139+
value, map_flags, GFP_ATOMIC);
140140
fput(f);
141141
return PTR_ERR_OR_ZERO(sdata);
142142
}
@@ -169,8 +169,9 @@ static int bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
169169
return err;
170170
}
171171

172-
BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
173-
void *, value, u64, flags)
172+
/* *gfp_flags* is a hidden argument provided by the verifier */
173+
BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
174+
void *, value, u64, flags, gfp_t, gfp_flags)
174175
{
175176
struct bpf_local_storage_data *sdata;
176177

@@ -196,7 +197,7 @@ BPF_CALL_4(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode,
196197
if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) {
197198
sdata = bpf_local_storage_update(
198199
inode, (struct bpf_local_storage_map *)map, value,
199-
BPF_NOEXIST);
200+
BPF_NOEXIST, gfp_flags);
200201
return IS_ERR(sdata) ? (unsigned long)NULL :
201202
(unsigned long)sdata->data;
202203
}

kernel/bpf/bpf_local_storage.c

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,15 @@ static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
6363

6464
struct bpf_local_storage_elem *
6565
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
66-
void *value, bool charge_mem)
66+
void *value, bool charge_mem, gfp_t gfp_flags)
6767
{
6868
struct bpf_local_storage_elem *selem;
6969

7070
if (charge_mem && mem_charge(smap, owner, smap->elem_size))
7171
return NULL;
7272

7373
selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
74-
GFP_ATOMIC | __GFP_NOWARN);
74+
gfp_flags | __GFP_NOWARN);
7575
if (selem) {
7676
if (value)
7777
memcpy(SDATA(selem)->data, value, smap->map.value_size);
@@ -282,7 +282,8 @@ static int check_flags(const struct bpf_local_storage_data *old_sdata,
282282

283283
int bpf_local_storage_alloc(void *owner,
284284
struct bpf_local_storage_map *smap,
285-
struct bpf_local_storage_elem *first_selem)
285+
struct bpf_local_storage_elem *first_selem,
286+
gfp_t gfp_flags)
286287
{
287288
struct bpf_local_storage *prev_storage, *storage;
288289
struct bpf_local_storage **owner_storage_ptr;
@@ -293,7 +294,7 @@ int bpf_local_storage_alloc(void *owner,
293294
return err;
294295

295296
storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
296-
GFP_ATOMIC | __GFP_NOWARN);
297+
gfp_flags | __GFP_NOWARN);
297298
if (!storage) {
298299
err = -ENOMEM;
299300
goto uncharge;
@@ -350,10 +351,10 @@ int bpf_local_storage_alloc(void *owner,
350351
*/
351352
struct bpf_local_storage_data *
352353
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
353-
void *value, u64 map_flags)
354+
void *value, u64 map_flags, gfp_t gfp_flags)
354355
{
355356
struct bpf_local_storage_data *old_sdata = NULL;
356-
struct bpf_local_storage_elem *selem;
357+
struct bpf_local_storage_elem *selem = NULL;
357358
struct bpf_local_storage *local_storage;
358359
unsigned long flags;
359360
int err;
@@ -365,6 +366,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
365366
!map_value_has_spin_lock(&smap->map)))
366367
return ERR_PTR(-EINVAL);
367368

369+
if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
370+
return ERR_PTR(-EINVAL);
371+
368372
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
369373
bpf_rcu_lock_held());
370374
if (!local_storage || hlist_empty(&local_storage->list)) {
@@ -373,11 +377,11 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
373377
if (err)
374378
return ERR_PTR(err);
375379

376-
selem = bpf_selem_alloc(smap, owner, value, true);
380+
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
377381
if (!selem)
378382
return ERR_PTR(-ENOMEM);
379383

380-
err = bpf_local_storage_alloc(owner, smap, selem);
384+
err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
381385
if (err) {
382386
kfree(selem);
383387
mem_uncharge(smap, owner, smap->elem_size);
@@ -404,6 +408,12 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
404408
}
405409
}
406410

411+
if (gfp_flags == GFP_KERNEL) {
412+
selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
413+
if (!selem)
414+
return ERR_PTR(-ENOMEM);
415+
}
416+
407417
raw_spin_lock_irqsave(&local_storage->lock, flags);
408418

409419
/* Recheck local_storage->list under local_storage->lock */
@@ -429,19 +439,21 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
429439
goto unlock;
430440
}
431441

432-
/* local_storage->lock is held. Hence, we are sure
433-
* we can unlink and uncharge the old_sdata successfully
434-
* later. Hence, instead of charging the new selem now
435-
* and then uncharge the old selem later (which may cause
436-
* a potential but unnecessary charge failure), avoid taking
437-
* a charge at all here (the "!old_sdata" check) and the
438-
* old_sdata will not be uncharged later during
439-
* bpf_selem_unlink_storage_nolock().
440-
*/
441-
selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
442-
if (!selem) {
443-
err = -ENOMEM;
444-
goto unlock_err;
442+
if (gfp_flags != GFP_KERNEL) {
443+
/* local_storage->lock is held. Hence, we are sure
444+
* we can unlink and uncharge the old_sdata successfully
445+
* later. Hence, instead of charging the new selem now
446+
* and then uncharge the old selem later (which may cause
447+
* a potential but unnecessary charge failure), avoid taking
448+
* a charge at all here (the "!old_sdata" check) and the
449+
* old_sdata will not be uncharged later during
450+
* bpf_selem_unlink_storage_nolock().
451+
*/
452+
selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
453+
if (!selem) {
454+
err = -ENOMEM;
455+
goto unlock_err;
456+
}
445457
}
446458

447459
/* First, link the new selem to the map */
@@ -463,6 +475,10 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
463475

464476
unlock_err:
465477
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
478+
if (selem) {
479+
mem_uncharge(smap, owner, smap->elem_size);
480+
kfree(selem);
481+
}
466482
return ERR_PTR(err);
467483
}
468484

kernel/bpf/bpf_task_storage.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
174174

175175
bpf_task_storage_lock();
176176
sdata = bpf_local_storage_update(
177-
task, (struct bpf_local_storage_map *)map, value, map_flags);
177+
task, (struct bpf_local_storage_map *)map, value, map_flags,
178+
GFP_ATOMIC);
178179
bpf_task_storage_unlock();
179180

180181
err = PTR_ERR_OR_ZERO(sdata);
@@ -226,8 +227,9 @@ static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
226227
return err;
227228
}
228229

229-
BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
230-
task, void *, value, u64, flags)
230+
/* *gfp_flags* is a hidden argument provided by the verifier */
231+
BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
232+
task, void *, value, u64, flags, gfp_t, gfp_flags)
231233
{
232234
struct bpf_local_storage_data *sdata;
233235

@@ -250,7 +252,7 @@ BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
250252
(flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
251253
sdata = bpf_local_storage_update(
252254
task, (struct bpf_local_storage_map *)map, value,
253-
BPF_NOEXIST);
255+
BPF_NOEXIST, gfp_flags);
254256

255257
unlock:
256258
bpf_task_storage_unlock();

kernel/bpf/verifier.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13492,6 +13492,26 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
1349213492
goto patch_call_imm;
1349313493
}
1349413494

13495+
if (insn->imm == BPF_FUNC_task_storage_get ||
13496+
insn->imm == BPF_FUNC_sk_storage_get ||
13497+
insn->imm == BPF_FUNC_inode_storage_get) {
13498+
if (env->prog->aux->sleepable)
13499+
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_KERNEL);
13500+
else
13501+
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__s32)GFP_ATOMIC);
13502+
insn_buf[1] = *insn;
13503+
cnt = 2;
13504+
13505+
new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
13506+
if (!new_prog)
13507+
return -ENOMEM;
13508+
13509+
delta += cnt - 1;
13510+
env->prog = prog = new_prog;
13511+
insn = new_prog->insnsi + i + delta;
13512+
goto patch_call_imm;
13513+
}
13514+
1349513515
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
1349613516
* and other inlining handlers are currently limited to 64 bit
1349713517
* only.

net/core/bpf_sk_storage.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
141141
if (sock) {
142142
sdata = bpf_local_storage_update(
143143
sock->sk, (struct bpf_local_storage_map *)map, value,
144-
map_flags);
144+
map_flags, GFP_ATOMIC);
145145
sockfd_put(sock);
146146
return PTR_ERR_OR_ZERO(sdata);
147147
}
@@ -172,7 +172,7 @@ bpf_sk_storage_clone_elem(struct sock *newsk,
172172
{
173173
struct bpf_local_storage_elem *copy_selem;
174174

175-
copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
175+
copy_selem = bpf_selem_alloc(smap, newsk, NULL, true, GFP_ATOMIC);
176176
if (!copy_selem)
177177
return NULL;
178178

@@ -230,7 +230,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
230230
bpf_selem_link_map(smap, copy_selem);
231231
bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
232232
} else {
233-
ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
233+
ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
234234
if (ret) {
235235
kfree(copy_selem);
236236
atomic_sub(smap->elem_size,
@@ -255,8 +255,9 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
255255
return ret;
256256
}
257257

258-
BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
259-
void *, value, u64, flags)
258+
/* *gfp_flags* is a hidden argument provided by the verifier */
259+
BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
260+
void *, value, u64, flags, gfp_t, gfp_flags)
260261
{
261262
struct bpf_local_storage_data *sdata;
262263

@@ -277,7 +278,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
277278
refcount_inc_not_zero(&sk->sk_refcnt)) {
278279
sdata = bpf_local_storage_update(
279280
sk, (struct bpf_local_storage_map *)map, value,
280-
BPF_NOEXIST);
281+
BPF_NOEXIST, gfp_flags);
281282
/* sk must be a fullsock (guaranteed by verifier),
282283
* so sock_gen_put() is unnecessary.
283284
*/
@@ -417,14 +418,16 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
417418
return false;
418419
}
419420

420-
BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
421-
void *, value, u64, flags)
421+
/* *gfp_flags* is a hidden argument provided by the verifier */
422+
BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
423+
void *, value, u64, flags, gfp_t, gfp_flags)
422424
{
423425
WARN_ON_ONCE(!bpf_rcu_lock_held());
424426
if (in_hardirq() || in_nmi())
425427
return (unsigned long)NULL;
426428

427-
return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
429+
return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags,
430+
gfp_flags);
428431
}
429432

430433
BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,

0 commit comments

Comments
 (0)