|
9 | 9 |
|
10 | 10 | #include <linux/bitfield.h>
|
11 | 11 | #include <linux/bpf.h>
|
| 12 | +#include <linux/memory.h> |
12 | 13 | #include <linux/filter.h>
|
13 | 14 | #include <linux/printk.h>
|
14 | 15 | #include <linux/slab.h>
|
|
18 | 19 | #include <asm/cacheflush.h>
|
19 | 20 | #include <asm/debug-monitors.h>
|
20 | 21 | #include <asm/insn.h>
|
| 22 | +#include <asm/patching.h> |
21 | 23 | #include <asm/set_memory.h>
|
22 | 24 |
|
23 | 25 | #include "bpf_jit.h"
|
@@ -235,13 +237,13 @@ static bool is_lsi_offset(int offset, int scale)
|
235 | 237 | return true;
|
236 | 238 | }
|
237 | 239 |
|
| 240 | +#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) |
| 241 | +#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) |
| 242 | + |
238 | 243 | /* Tail call offset to jump into */
|
239 |
| -#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \ |
240 |
| - IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) |
241 |
| -#define PROLOGUE_OFFSET 9 |
242 |
| -#else |
243 |
| -#define PROLOGUE_OFFSET 8 |
244 |
| -#endif |
| 244 | +#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) |
| 245 | +/* Offset of nop instruction in bpf prog entry to be poked */ |
| 246 | +#define POKE_OFFSET (BTI_INSNS + 1) |
245 | 247 |
|
246 | 248 | static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
|
247 | 249 | {
|
@@ -279,12 +281,15 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
|
279 | 281 | *
|
280 | 282 | */
|
281 | 283 |
|
| 284 | + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) |
| 285 | + emit(A64_BTI_C, ctx); |
| 286 | + |
| 287 | + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); |
| 288 | + emit(A64_NOP, ctx); |
| 289 | + |
282 | 290 | /* Sign lr */
|
283 | 291 | if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
|
284 | 292 | emit(A64_PACIASP, ctx);
|
285 |
| - /* BTI landing pad */ |
286 |
| - else if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) |
287 |
| - emit(A64_BTI_C, ctx); |
288 | 293 |
|
289 | 294 | /* Save FP and LR registers to stay align with ARM64 AAPCS */
|
290 | 295 | emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
|
@@ -1529,3 +1534,87 @@ void bpf_jit_free_exec(void *addr)
|
1529 | 1534 | {
|
1530 | 1535 | return vfree(addr);
|
1531 | 1536 | }
|
| 1537 | + |
| 1538 | +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, |
| 1539 | + void *addr, u32 *insn) |
| 1540 | +{ |
| 1541 | + if (!addr) |
| 1542 | + *insn = aarch64_insn_gen_nop(); |
| 1543 | + else |
| 1544 | + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, |
| 1545 | + (unsigned long)addr, |
| 1546 | + type); |
| 1547 | + |
| 1548 | + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; |
| 1549 | +} |
| 1550 | + |
| 1551 | +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, |
| 1552 | + void *old_addr, void *new_addr) |
| 1553 | +{ |
| 1554 | + int ret; |
| 1555 | + u32 old_insn; |
| 1556 | + u32 new_insn; |
| 1557 | + u32 replaced; |
| 1558 | + unsigned long offset = ~0UL; |
| 1559 | + enum aarch64_insn_branch_type branch_type; |
| 1560 | + char namebuf[KSYM_NAME_LEN]; |
| 1561 | + |
| 1562 | + if (!__bpf_address_lookup((unsigned long)ip, NULL, &offset, namebuf)) |
| 1563 | + /* Only poking bpf text is supported. Since kernel function |
| 1564 | + * entry is set up by ftrace, we reply on ftrace to poke kernel |
| 1565 | + * functions. |
| 1566 | + */ |
| 1567 | + return -EINVAL; |
| 1568 | + |
| 1569 | + /* bpf entry */ |
| 1570 | + if (offset == 0UL) |
| 1571 | + /* skip to the nop instruction in bpf prog entry: |
| 1572 | + * bti c // if BTI enabled |
| 1573 | + * mov x9, x30 |
| 1574 | + * nop |
| 1575 | + */ |
| 1576 | + ip = ip + POKE_OFFSET * AARCH64_INSN_SIZE; |
| 1577 | + |
| 1578 | + if (poke_type == BPF_MOD_CALL) |
| 1579 | + branch_type = AARCH64_INSN_BRANCH_LINK; |
| 1580 | + else |
| 1581 | + branch_type = AARCH64_INSN_BRANCH_NOLINK; |
| 1582 | + |
| 1583 | + if (gen_branch_or_nop(branch_type, ip, old_addr, &old_insn) < 0) |
| 1584 | + return -EFAULT; |
| 1585 | + |
| 1586 | + if (gen_branch_or_nop(branch_type, ip, new_addr, &new_insn) < 0) |
| 1587 | + return -EFAULT; |
| 1588 | + |
| 1589 | + mutex_lock(&text_mutex); |
| 1590 | + if (aarch64_insn_read(ip, &replaced)) { |
| 1591 | + ret = -EFAULT; |
| 1592 | + goto out; |
| 1593 | + } |
| 1594 | + |
| 1595 | + if (replaced != old_insn) { |
| 1596 | + ret = -EFAULT; |
| 1597 | + goto out; |
| 1598 | + } |
| 1599 | + |
| 1600 | + /* We call aarch64_insn_patch_text_nosync() to replace instruction |
| 1601 | + * atomically, so no other CPUs will fetch a half-new and half-old |
| 1602 | + * instruction. But there is chance that another CPU fetches the old |
| 1603 | + * instruction after bpf_arch_text_poke() finishes, that is, different |
| 1604 | + * CPUs may execute different versions of instructions at the same |
| 1605 | + * time before the icache is synchronized by hardware. |
| 1606 | + * |
| 1607 | + * 1. when a new trampoline is attached, it is not an issue for |
| 1608 | + * different CPUs to jump to different trampolines temporarily. |
| 1609 | + * |
| 1610 | + * 2. when an old trampoline is freed, we should wait for all other |
| 1611 | + * CPUs to exit the trampoline and make sure the trampoline is no |
| 1612 | + * longer reachable, since bpf_tramp_image_put() function already |
| 1613 | + * uses percpu_ref and rcu task to do the sync, no need to call the |
| 1614 | + * sync interface here. |
| 1615 | + */ |
| 1616 | + ret = aarch64_insn_patch_text_nosync(ip, new_insn); |
| 1617 | +out: |
| 1618 | + mutex_unlock(&text_mutex); |
| 1619 | + return ret; |
| 1620 | +} |
0 commit comments