diff --git a/.github/workflows/update-pdf.yml b/.github/workflows/build.yml similarity index 55% rename from .github/workflows/update-pdf.yml rename to .github/workflows/build.yml index 1af46ab..fbc9693 100644 --- a/.github/workflows/update-pdf.yml +++ b/.github/workflows/build.yml @@ -1,12 +1,18 @@ -# This action will run rst2pdf to generate a PDF from the latest docs. +# This action will generate documentation artifacts from the latest doc sources. # -name: rst2pdf +name: build on: push: branches: [ update ] paths-ignore: - 'pdf/**' + - 'rfcxml/**' + pull_request: + branches: [ update ] + paths-ignore: + - 'pdf/**' + - 'rfcxml/**' permissions: contents: write # for Git to git push @@ -20,6 +26,10 @@ jobs: run: | pip install --user rst2pdf + - name: Install xml2rfc + run: | + pip install --user xml2rfc + - uses: actions/checkout@v3 - name: Clone docs @@ -32,7 +42,18 @@ jobs: run: | rst2pdf rst/instruction-set.rst pdf/instruction-set.pdf - - name: Checkin PDF + - name: Build rfcxml + run: | + wget https://github.com/dthaler/rst2rfcxml/releases/download/v0.3/Ubuntu.Release.rst2rfcxml.zip + unzip Ubuntu.Release.rst2rfcxml.zip + chmod 755 rst2rfcxml + ./rst2rfcxml rst/instruction-set-skeleton.rst -o pdf/draft-thaler-bpf-isa.xml + cd pdf + xml2rfc draft-thaler-bpf-isa.xml + xml2rfc --html draft-thaler-bpf-isa.xml + + - name: Check in PDF + if: github.event_name == 'push' run: | cd pdf git add . diff --git a/rst/clang-notes.rst b/rst/clang-notes.rst index 528fedd..40c6185 100644 --- a/rst/clang-notes.rst +++ b/rst/clang-notes.rst @@ -20,6 +20,11 @@ Arithmetic instructions For CPU versions prior to 3, Clang v7.0 and later can enable ``BPF_ALU`` support with ``-Xclang -target-feature -Xclang +alu32``. In CPU version 3, support is automatically included. +Reserved instructions +==================== + +Clang will generate the reserved ``BPF_CALL | BPF_X | BPF_JMP`` (0x8d) instruction if ``-O0`` is used. + Atomic operations ================= diff --git a/rst/instruction-set-opcodes.rst b/rst/instruction-set-opcodes.rst new file mode 100644 index 0000000..44e25e6 --- /dev/null +++ b/rst/instruction-set-opcodes.rst @@ -0,0 +1,201 @@ +For reference, the following table lists opcodes in order by value. + +====== === ==== =================================================== ======================================== +opcode src imm description reference +====== === ==== =================================================== ======================================== +0x00 0x0 any (additional immediate value) `64-bit immediate instructions`_ +0x04 0x0 any dst = (u32)((u32)dst + (u32)imm) `Arithmetic instructions`_ +0x05 0x0 0x00 goto +offset `Jump instructions`_ +0x07 0x0 any dst += imm `Arithmetic instructions`_ +0x0c any 0x00 dst = (u32)((u32)dst + (u32)src) `Arithmetic instructions`_ +0x0f any 0x00 dst += src `Arithmetic instructions`_ +0x14 0x0 any dst = (u32)((u32)dst - (u32)imm) `Arithmetic instructions`_ +0x15 0x0 any if dst == imm goto +offset `Jump instructions`_ +0x16 0x0 any if (u32)dst == imm goto +offset `Jump instructions`_ +0x17 0x0 any dst -= imm `Arithmetic instructions`_ +0x18 0x0 any dst = imm64 `64-bit immediate instructions`_ +0x18 0x1 any dst = map_by_fd(imm) `64-bit immediate instructions`_ +0x18 0x2 any dst = mva(map_by_fd(imm)) + next_imm `64-bit immediate instructions`_ +0x18 0x3 any dst = variable_addr(imm) `64-bit immediate instructions`_ +0x18 0x4 any dst = code_addr(imm) `64-bit immediate instructions`_ +0x18 0x5 any dst = map_by_idx(imm) `64-bit immediate instructions`_ +0x18 0x6 any dst = mva(map_by_idx(imm)) + next_imm `64-bit immediate instructions`_ +0x1c any 0x00 dst = (u32)((u32)dst - (u32)src) `Arithmetic instructions`_ +0x1d any 0x00 if dst == src goto +offset `Jump instructions`_ +0x1e any 0x00 if (u32)dst == (u32)src goto +offset `Jump instructions`_ +0x1f any 0x00 dst -= src `Arithmetic instructions`_ +0x20 any any (deprecated, implementation-specific) `Legacy BPF Packet access instructions`_ +0x24 0x0 any dst = (u32)(dst \* imm) `Arithmetic instructions`_ +0x25 0x0 any if dst > imm goto +offset `Jump instructions`_ +0x26 0x0 any if (u32)dst > imm goto +offset `Jump instructions`_ +0x27 0x0 any dst \*= imm `Arithmetic instructions`_ +0x28 any any (deprecated, implementation-specific) `Legacy BPF Packet access instructions`_ +0x2c any 0x00 dst = (u32)(dst \* src) `Arithmetic instructions`_ +0x2d any 0x00 if dst > src goto +offset `Jump instructions`_ +0x2e any 0x00 if (u32)dst > (u32)src goto +offset `Jump instructions`_ +0x2f any 0x00 dst \*= src `Arithmetic instructions`_ +0x30 any any (deprecated, implementation-specific) `Legacy BPF Packet access instructions`_ +0x34 0x0 any dst = (u32)((imm != 0) ? (dst / imm) : 0) `Arithmetic instructions`_ +0x35 0x0 any if dst >= imm goto +offset `Jump instructions`_ +0x36 0x0 any if (u32)dst >= imm goto +offset `Jump instructions`_ +0x37 0x0 any dst = (imm != 0) ? (dst / imm) : 0 `Arithmetic instructions`_ +0x38 any any (deprecated, implementation-specific) `Legacy BPF Packet access instructions`_ +0x3c any 0x00 dst = (u32)((imm != 0) ? (dst / src) : 0) `Arithmetic instructions`_ +0x3d any 0x00 if dst >= src goto +offset `Jump instructions`_ +0x3e any 0x00 if (u32)dst >= (u32)src goto +offset `Jump instructions`_ +0x3f any 0x00 dst = (src !+ 0) ? (dst / src) : 0 `Arithmetic instructions`_ +0x40 any any (deprecated, implementation-specific) `Legacy BPF Packet access instructions`_ +0x44 0x0 any dst = (u32)(dst \| imm) `Arithmetic instructions`_ +0x45 0x0 any if dst & imm goto +offset `Jump instructions`_ +0x46 0x0 any if (u32)dst & imm goto +offset `Jump instructions`_ +0x47 0x0 any dst \|= imm `Arithmetic instructions`_ +0x48 any any (deprecated, implementation-specific) `Legacy BPF Packet access instructions`_ +0x4c any 0x00 dst = (u32)(dst \| src) `Arithmetic instructions`_ +0x4d any 0x00 if dst & src goto +offset `Jump instructions`_ +0x4e any 0x00 if (u32)dst & (u32)src goto +offset `Jump instructions`_ +0x4f any 0x00 dst \|= src `Arithmetic instructions`_ +0x50 any any (deprecated, implementation-specific) `Legacy BPF Packet access instructions`_ +0x54 0x0 any dst = (u32)(dst & imm) `Arithmetic instructions`_ +0x55 0x0 any if dst != imm goto +offset `Jump instructions`_ +0x56 0x0 any if (u32)dst != imm goto +offset `Jump instructions`_ +0x57 0x0 any dst &= imm `Arithmetic instructions`_ +0x58 any any (deprecated, implementation-specific) `Legacy BPF Packet access instructions`_ +0x5c any 0x00 dst = (u32)(dst & src) `Arithmetic instructions`_ +0x5d any 0x00 if dst != src goto +offset `Jump instructions`_ +0x5e any 0x00 if (u32)dst != (u32)src goto +offset `Jump instructions`_ +0x5f any 0x00 dst &= src `Arithmetic instructions`_ +0x61 any 0x00 dst = \*(u32 \*)(src + offset) `Load and store instructions`_ +0x62 0x0 any \*(u32 \*)(dst + offset) = imm `Load and store instructions`_ +0x63 any 0x00 \*(u32 \*)(dst + offset) = src `Load and store instructions`_ +0x64 0x0 any dst = (u32)(dst << imm) `Arithmetic instructions`_ +0x65 0x0 any if dst s> imm goto +offset `Jump instructions`_ +0x66 0x0 any if (s32)dst s> (s32)imm goto +offset `Jump instructions`_ +0x67 0x0 any dst <<= imm `Arithmetic instructions`_ +0x69 any 0x00 dst = \*(u16 \*)(src + offset) `Load and store instructions`_ +0x6a 0x0 any \*(u16 \*)(dst + offset) = imm `Load and store instructions`_ +0x6b any 0x00 \*(u16 \*)(dst + offset) = src `Load and store instructions`_ +0x6c any 0x00 dst = (u32)(dst << src) `Arithmetic instructions`_ +0x6d any 0x00 if dst s> src goto +offset `Jump instructions`_ +0x6e any 0x00 if (s32)dst s> (s32)src goto +offset `Jump instructions`_ +0x6f any 0x00 dst <<= src `Arithmetic instructions`_ +0x71 any 0x00 dst = \*(u8 \*)(src + offset) `Load and store instructions`_ +0x72 0x0 any \*(u8 \*)(dst + offset) = imm `Load and store instructions`_ +0x73 any 0x00 \*(u8 \*)(dst + offset) = src `Load and store instructions`_ +0x74 0x0 any dst = (u32)(dst >> imm) `Arithmetic instructions`_ +0x75 0x0 any if dst s>= imm goto +offset `Jump instructions`_ +0x76 0x0 any if (s32)dst s>= (s32)imm goto +offset `Jump instructions`_ +0x77 0x0 any dst >>= imm `Arithmetic instructions`_ +0x79 any 0x00 dst = \*(u64 \*)(src + offset) `Load and store instructions`_ +0x7a 0x0 any \*(u64 \*)(dst + offset) = imm `Load and store instructions`_ +0x7b any 0x00 \*(u64 \*)(dst + offset) = src `Load and store instructions`_ +0x7c any 0x00 dst = (u32)(dst >> src) `Arithmetic instructions`_ +0x7d any 0x00 if dst s>= src goto +offset `Jump instructions`_ +0x7e any 0x00 if (s32)dst s>= (s32)src goto +offset `Jump instructions`_ +0x7f any 0x00 dst >>= src `Arithmetic instructions`_ +0x84 0x0 0x00 dst = (u32)-dst `Arithmetic instructions`_ +0x85 0x0 any call helper function imm `Helper functions`_ +0x85 0x1 any call PC += offset `eBPF functions`_ +0x85 0x2 any call runtime function imm `Runtime functions`_ +0x87 0x0 0x00 dst = -dst `Arithmetic instructions`_ +0x94 0x0 any dst = (u32)((imm != 0) ? (dst % imm) : dst) `Arithmetic instructions`_ +0x95 0x0 0x00 return `Jump instructions`_ +0x97 0x0 any dst = (imm != 0) ? (dst % imm) : dst `Arithmetic instructions`_ +0x9c any 0x00 dst = (u32)((src != 0) ? (dst % src) : dst) `Arithmetic instructions`_ +0x9f any 0x00 dst = (src != 0) ? (dst % src) : dst `Arithmetic instructions`_ +0xa4 0x0 any dst = (u32)(dst ^ imm) `Arithmetic instructions`_ +0xa5 0x0 any if dst < imm goto +offset `Jump instructions`_ +0xa6 0x0 any if (u32)dst < imm goto +offset `Jump instructions`_ +0xa7 0x0 any dst ^= imm `Arithmetic instructions`_ +0xac any 0x00 dst = (u32)(dst ^ src) `Arithmetic instructions`_ +0xad any 0x00 if dst < src goto +offset `Jump instructions`_ +0xae any 0x00 if (u32)dst < (u32)src goto +offset `Jump instructions`_ +0xaf any 0x00 dst ^= src `Arithmetic instructions`_ +0xb4 0x0 any dst = (u32) imm `Arithmetic instructions`_ +0xb5 0x0 any if dst <= imm goto +offset `Jump instructions`_ +0xa6 0x0 any if (u32)dst <= imm goto +offset `Jump instructions`_ +0xb7 0x0 any dst = imm `Arithmetic instructions`_ +0xbc any 0x00 dst = (u32) src `Arithmetic instructions`_ +0xbd any 0x00 if dst <= src goto +offset `Jump instructions`_ +0xbe any 0x00 if (u32)dst <= (u32)src goto +offset `Jump instructions`_ +0xbf any 0x00 dst = src `Arithmetic instructions`_ +0xc3 any 0x00 lock \*(u32 \*)(dst + offset) += src `Atomic operations`_ +0xc3 any 0x01 lock:: `Atomic operations`_ + + *(u32 *)(dst + offset) += src + src = *(u32 *)(dst + offset) +0xc3 any 0x40 \*(u32 \*)(dst + offset) \|= src `Atomic operations`_ +0xc3 any 0x41 lock:: `Atomic operations`_ + + *(u32 *)(dst + offset) |= src + src = *(u32 *)(dst + offset) +0xc3 any 0x50 \*(u32 \*)(dst + offset) &= src `Atomic operations`_ +0xc3 any 0x51 lock:: `Atomic operations`_ + + *(u32 *)(dst + offset) &= src + src = *(u32 *)(dst + offset) +0xc3 any 0xa0 \*(u32 \*)(dst + offset) ^= src `Atomic operations`_ +0xc3 any 0xa1 lock:: `Atomic operations`_ + + *(u32 *)(dst + offset) ^= src + src = *(u32 *)(dst + offset) +0xc3 any 0xe1 lock:: `Atomic operations`_ + + temp = *(u32 *)(dst + offset) + *(u32 *)(dst + offset) = src + src = temp +0xc3 any 0xf1 lock:: `Atomic operations`_ + + temp = *(u32 *)(dst + offset) + if *(u32)(dst + offset) == R0 + *(u32)(dst + offset) = src + R0 = temp +0xc4 0x0 any dst = (u32)(dst s>> imm) `Arithmetic instructions`_ +0xc5 0x0 any if dst s< imm goto +offset `Jump instructions`_ +0xc6 0x0 any if (s32)dst s< (s32)imm goto +offset `Jump instructions`_ +0xc7 0x0 any dst s>>= imm `Arithmetic instructions`_ +0xcc any 0x00 dst = (u32)(dst s>> src) `Arithmetic instructions`_ +0xcd any 0x00 if dst s< src goto +offset `Jump instructions`_ +0xce any 0x00 if (s32)dst s< (s32)src goto +offset `Jump instructions`_ +0xcf any 0x00 dst s>>= src `Arithmetic instructions`_ +0xd4 0x0 0x10 dst = htole16(dst) `Byte swap instructions`_ +0xd4 0x0 0x20 dst = htole32(dst) `Byte swap instructions`_ +0xd4 0x0 0x40 dst = htole64(dst) `Byte swap instructions`_ +0xd5 0x0 any if dst s<= imm goto +offset `Jump instructions`_ +0xd6 0x0 any if (s32)dst s<= (s32)imm goto +offset `Jump instructions`_ +0xdb any 0x00 lock \*(u64 \*)(dst + offset) += src `Atomic operations`_ +0xdb any 0x01 lock:: `Atomic operations`_ + + *(u64 *)(dst + offset) += src + src = *(u64 *)(dst + offset) +0xdb any 0x40 \*(u64 \*)(dst + offset) \|= src `Atomic operations`_ +0xdb any 0x41 lock:: `Atomic operations`_ + + *(u64 *)(dst + offset) |= src + lock src = *(u64 *)(dst + offset) +0xdb any 0x50 \*(u64 \*)(dst + offset) &= src `Atomic operations`_ +0xdb any 0x51 lock:: `Atomic operations`_ + + *(u64 *)(dst + offset) &= src + src = *(u64 *)(dst + offset) +0xdb any 0xa0 \*(u64 \*)(dst + offset) ^= src `Atomic operations`_ +0xdb any 0xa1 lock:: `Atomic operations`_ + + *(u64 *)(dst + offset) ^= src + src = *(u64 *)(dst + offset) +0xdb any 0xe1 lock:: `Atomic operations`_ + + temp = *(u64 *)(dst + offset) + *(u64 *)(dst + offset) = src + src = temp +0xdb any 0xf1 lock:: `Atomic operations`_ + + temp = *(u64 *)(dst + offset) + if *(u64)(dst + offset) == R0 + *(u64)(dst + offset) = src + R0 = temp +0xdc 0x0 0x10 dst = htobe16(dst) `Byte swap instructions`_ +0xdc 0x0 0x20 dst = htobe32(dst) `Byte swap instructions`_ +0xdc 0x0 0x40 dst = htobe64(dst) `Byte swap instructions`_ +0xdd any 0x00 if dst s<= src goto +offset `Jump instructions`_ +0xde any 0x00 if (s32)dst s<= (s32)src goto +offset `Jump instructions`_ +====== === ==== =================================================== ======================================== diff --git a/rst/instruction-set-prologue.rst b/rst/instruction-set-prologue.rst new file mode 100644 index 0000000..3afd225 --- /dev/null +++ b/rst/instruction-set-prologue.rst @@ -0,0 +1,13 @@ +.. |docName| replace:: draft-thaler-bpf-instruction-set-00 +.. |ipr| replace:: trust200902 +.. |category| replace:: std +.. |titleAbbr| replace:: eBPF ISA +.. |submissionType| replace:: IETF +.. |author[0].fullname| replace:: Dave Thaler +.. |author[0].role| replace:: editor +.. |author[0].surname| replace:: Thaler +.. |author[0].initials| replace:: D. +.. |author[0].email| replace:: dthaler@microsoft.com +.. |author[0].city| replace:: Redmond +.. |author[0].region| replace:: WA +.. header:: diff --git a/rst/instruction-set-skeleton.rst b/rst/instruction-set-skeleton.rst new file mode 100644 index 0000000..db4b1d7 --- /dev/null +++ b/rst/instruction-set-skeleton.rst @@ -0,0 +1,7 @@ +.. include:: instruction-set-prologue.rst +.. include:: instruction-set.rst + +Appendix +======== + +.. include:: instruction-set-opcodes.rst diff --git a/rst/instruction-set.rst b/rst/instruction-set.rst index db8789e..b3e2f98 100644 --- a/rst/instruction-set.rst +++ b/rst/instruction-set.rst @@ -7,11 +7,15 @@ eBPF Instruction Set Specification, v1.0 This document specifies version 1.0 of the eBPF instruction set. +The eBPF instruction set consists of eleven 64 bit registers, a program counter, +and an implementation-specific amount (e.g., 512 bytes) of stack space. + Documentation conventions ========================= For brevity, this document uses the type notion "u64", "u32", etc. -to mean an unsigned integer whose width is the specified number of bits. +to mean an unsigned integer whose width is the specified number of bits, +and "s32", etc. to mean a signed integer of the specified number of bits. Registers and calling convention ================================ @@ -26,12 +30,24 @@ The eBPF calling convention is defined as: * R6 - R9: callee saved registers that function calls will preserve * R10: read-only frame pointer to access stack -R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if -necessary across calls. +Registers R0 - R5 are caller-saved registers, meaning the BPF program needs to either +spill them to the BPF stack or move them to callee saved registers if these +arguments are to be reused across multiple function calls. Spilling means +that the value in the register is moved to the BPF stack. The reverse operation +of moving the variable from the BPF stack to the register is called filling. +The reason for spilling/filling is due to the limited number of registers. + +Upon entering execution of an eBPF program, registers R1 - R5 initially can contain +the input arguments for the program (similar to the argc/argv pair for a typical C program). +The actual number of registers used, and their meaning, is defined by the program type; +for example, a networking program might have an argument that includes network packet data +and/or metadata. Instruction encoding ==================== +An eBPF program is a sequence of instructions. + eBPF has two instruction encodings: * the basic instruction encoding, which uses 64 bits to encode an instruction @@ -73,7 +89,7 @@ For example:: 07 1 0 00 00 11 22 33 44 r1 += 0x11223344 // big Note that most instructions do not use all of the fields. -Unused fields shall be cleared to zero. +Unused fields must be set to zero. As discussed below in `64-bit immediate instructions`_, a 64-bit immediate instruction uses a 64-bit immediate value that is constructed as follows. @@ -102,7 +118,9 @@ instruction are reserved and shall be cleared to zero. Instruction classes ------------------- -The three LSB bits of the 'opcode' field store the instruction class: +The encoding of the 'opcode' field varies and can be determined from +the three least significant bits (LSB) of the 'opcode' field which holds +the "instruction class", as follows: ========= ===== =============================== =================================== class value description reference @@ -148,7 +166,8 @@ code source instruction class Arithmetic instructions ----------------------- -``BPF_ALU`` uses 32-bit wide operands while ``BPF_ALU64`` uses 64-bit wide operands for +Instruction class ``BPF_ALU`` uses 32-bit wide operands (zeroing the upper 32 bits +of the destination register) while ``BPF_ALU64`` uses 64-bit wide operands for otherwise identical operations. The 'code' field encodes the operation as below, where 'src' and 'dst' refer to the values of the source and destination registers, respectively. @@ -179,21 +198,23 @@ If execution would result in modulo by zero, for ``BPF_ALU64`` the value of the destination register is unchanged whereas for ``BPF_ALU`` the upper 32 bits of the destination register are zeroed. -``BPF_ADD | BPF_X | BPF_ALU`` means:: +Examples: + +``BPF_ADD | BPF_X | BPF_ALU`` (0x0c) means:: dst = (u32) ((u32) dst + (u32) src) where '(u32)' indicates that the upper 32 bits are zeroed. -``BPF_ADD | BPF_X | BPF_ALU64`` means:: +``BPF_ADD | BPF_X | BPF_ALU64`` (0x0f) means:: dst = dst + src -``BPF_XOR | BPF_K | BPF_ALU`` means:: +``BPF_XOR | BPF_K | BPF_ALU`` (0xa4) means:: dst = (u32) dst ^ (u32) imm32 -``BPF_XOR | BPF_K | BPF_ALU64`` means:: +``BPF_XOR | BPF_K | BPF_ALU64`` (0xa7) means:: dst = dst ^ imm32 @@ -212,8 +233,9 @@ The byte swap instructions use an instruction class of ``BPF_ALU`` and a 4-bit The byte swap instructions operate on the destination register only and do not use a separate source register or immediate value. -The 1-bit source operand field in the opcode is used to select what byte -order the operation convert from or to: +Byte swap instructions use the 1-bit 'source' field in the 'opcode' field +as follows. Instead of indicating the source operator, it is instead +used to select what byte order the operation converts from or to: ========= ===== ================================================= source value description @@ -223,47 +245,84 @@ BPF_TO_BE 0x08 convert between host byte order and big endian ========= ===== ================================================= The 'imm' field encodes the width of the swap operations. The following widths -are supported: 16, 32 and 64. - -Examples: - -``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16 means:: - - dst = htole16(dst) - -``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 64 means:: - - dst = htobe64(dst) +are supported: 16, 32 and 64. The following table summarizes the resulting +possibilities: + +============================= ========= === ======== ================== +opcode construction opcode imm mnemonic pseudocode +============================= ========= === ======== ================== +BPF_END | BPF_TO_LE | BPF_ALU 0xd4 16 le16 dst dst = htole16(dst) +BPF_END | BPF_TO_LE | BPF_ALU 0xd4 32 le32 dst dst = htole32(dst) +BPF_END | BPF_TO_LE | BPF_ALU 0xd4 64 le64 dst dst = htole64(dst) +BPF_END | BPF_TO_BE | BPF_ALU 0xdc 16 be16 dst dst = htobe16(dst) +BPF_END | BPF_TO_BE | BPF_ALU 0xdc 32 be32 dst dst = htobe32(dst) +BPF_END | BPF_TO_BE | BPF_ALU 0xdc 64 be64 dst dst = htobe64(dst) +============================= ========= === ======== ================== + +where + +* mnenomic indicates a short form that might be displayed by some tools such as disassemblers +* 'htoleNN()' indicates converting a NN-bit value from host byte order to little-endian byte order +* 'htobeNN()' indicates converting a NN-bit value from host byte order to big-endian byte order Jump instructions ----------------- -``BPF_JMP32`` uses 32-bit wide operands while ``BPF_JMP`` uses 64-bit wide operands for +Instruction class ``BPF_JMP32`` uses 32-bit wide operands while ``BPF_JMP`` uses 64-bit wide operands for otherwise identical operations. -The 'code' field encodes the operation as below: - -======== ===== ========================= ============ -code value description notes -======== ===== ========================= ============ -BPF_JA 0x00 PC += off BPF_JMP only -BPF_JEQ 0x10 PC += off if dst == src -BPF_JGT 0x20 PC += off if dst > src unsigned -BPF_JGE 0x30 PC += off if dst >= src unsigned -BPF_JSET 0x40 PC += off if dst & src -BPF_JNE 0x50 PC += off if dst != src -BPF_JSGT 0x60 PC += off if dst > src signed -BPF_JSGE 0x70 PC += off if dst >= src signed -BPF_CALL 0x80 function call -BPF_EXIT 0x90 function / program return BPF_JMP only -BPF_JLT 0xa0 PC += off if dst < src unsigned -BPF_JLE 0xb0 PC += off if dst <= src unsigned -BPF_JSLT 0xc0 PC += off if dst < src signed -BPF_JSLE 0xd0 PC += off if dst <= src signed -======== ===== ========================= ============ - -The eBPF program needs to store the return value into register R0 before doing a -BPF_EXIT. +The 4-bit 'code' field encodes the operation as below, where PC is the program counter: + +======== ===== === ========================== ======================== +code value src description notes +======== ===== === ========================== ======================== +BPF_JA 0x0 0x0 PC += offset BPF_JMP only +BPF_JEQ 0x1 any PC += offset if dst == src +BPF_JGT 0x2 any PC += offset if dst > src unsigned +BPF_JGE 0x3 any PC += offset if dst >= src unsigned +BPF_JSET 0x4 any PC += offset if dst & src +BPF_JNE 0x5 any PC += offset if dst != src +BPF_JSGT 0x6 any PC += offset if dst > src signed +BPF_JSGE 0x7 any PC += offset if dst >= src signed +BPF_CALL 0x8 0x0 call helper function imm see `Helper functions`_ +BPF_CALL 0x8 0x1 call PC += offset see `eBPF functions`_ +BPF_CALL 0x8 0x2 call runtime function imm see `Runtime functions`_ +BPF_EXIT 0x9 0x0 return BPF_JMP only +BPF_JLT 0xa any PC += offset if dst < src unsigned +BPF_JLE 0xb any PC += offset if dst <= src unsigned +BPF_JSLT 0xc any PC += offset if dst < src signed +BPF_JSLE 0xd any PC += offset if dst <= src signed +======== ===== === ========================== ======================== + +Example: + +``BPF_JSGE | BPF_X | BPF_JMP32`` (0x7e) means:: + + if (s32)dst s>= (s32)src goto +offset + +where 's>=' indicates a signed '>=' comparison. + +Helper functions +~~~~~~~~~~~~~~~~ +Helper functions are a concept whereby BPF programs can call into a +set of function calls exposed by the eBPF runtime. Each helper +function is identified by an integer used in a ``BPF_CALL`` instruction. +The available helper functions may differ for each eBPF program type. + +Note that ``BPF_CALL | BPF_X | BPF_JMP`` (0x8d), where the helper function integer +would be read from a specified register, is reserved and currently not permitted. + +Runtime functions +~~~~~~~~~~~~~~~~~ +Runtime functions are like helper functions except that they are not specific +to eBPF programs. They use a different numbering space from helper functions, +but otherwise the same considerations apply. + +eBPF functions +~~~~~~~~~~~~~~ +eBPF functions are functions exposed by the same eBPF program as the caller, +and are referenced by offset from the call instruction, similar to ``BPF_JA``. +A ``BPF_EXIT`` within the eBPF function will return to the caller. Load and store instructions =========================== @@ -277,7 +336,8 @@ For load and store instructions (``BPF_LD``, ``BPF_LDX``, ``BPF_ST``, and ``BPF_ mode size instruction class ============ ====== ================= -The mode modifier is one of: +mode + one of: ============= ===== ==================================== ============= mode modifier value description reference @@ -289,7 +349,8 @@ The mode modifier is one of: BPF_ATOMIC 0xc0 atomic operations `Atomic operations`_ ============= ===== ==================================== ============= -The size modifier is one of: +size + one of: ============= ===== ===================== size modifier value description @@ -300,6 +361,9 @@ The size modifier is one of: BPF_DW 0x18 double word (8 bytes) ============= ===== ===================== +instruction class + the instruction class (see `Instruction classes`_) + Regular load and store operations --------------------------------- @@ -318,7 +382,7 @@ instructions that transfer data between a register and memory. dst = *(size *) (src + offset) -Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``. +where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``. Atomic operations ----------------- @@ -330,9 +394,11 @@ by other eBPF programs or means outside of this specification. All atomic operations supported by eBPF are encoded as store operations that use the ``BPF_ATOMIC`` mode modifier as follows: -* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations -* ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations -* 8-bit and 16-bit wide atomic operations are not supported. +* ``BPF_ATOMIC | BPF_W | BPF_STX`` (0xc3) for 32-bit operations +* ``BPF_ATOMIC | BPF_DW | BPF_STX`` (0xdb) for 64-bit operations + +Note that 8-bit (``BPF_B``) and 16-bit (``BPF_H``) wide atomic operations are not supported, +nor is ``BPF_ATOMIC | | BPF_ST``. The 'imm' field is used to encode the actual atomic operation. Simple atomic operation use a subset of the values defined to encode @@ -347,16 +413,15 @@ BPF_AND 0x50 atomic and BPF_XOR 0xa0 atomic xor ======== ===== =========== - -``BPF_ATOMIC | BPF_W | BPF_STX`` with 'imm' = BPF_ADD means:: +``BPF_ATOMIC | BPF_W | BPF_STX`` (0xc3) with 'imm' = BPF_ADD means:: *(u32 *)(dst + offset) += src -``BPF_ATOMIC | BPF_DW | BPF_STX`` with 'imm' = BPF ADD means:: +``BPF_ATOMIC | BPF_DW | BPF_STX`` (0xdb) with 'imm' = BPF ADD means:: *(u64 *)(dst + offset) += src -In addition to the simple atomic operations, there also is a modifier and +In addition to the simple atomic operations above, there also is a modifier and two complex atomic operations: =========== ================ =========================== @@ -385,14 +450,54 @@ and loaded back to ``R0``. ----------------------------- Instructions with the ``BPF_IMM`` 'mode' modifier use the wide instruction -encoding for an extra imm64 value. - -There is currently only one such instruction. - -``BPF_LD | BPF_DW | BPF_IMM`` means:: - - dst = imm64 - +encoding defined in `Instruction encoding`_, and use the 'src' field of the +basic instruction to hold an opcode subtype. + +The following instructions are defined, and use additional concepts defined below: + +========================= ====== === ===================================== =========== ============== +opcode construction opcode src pseudocode imm type dst type +========================= ====== === ===================================== =========== ============== +BPF_IMM | BPF_DW | BPF_LD 0x18 0x0 dst = imm64 integer integer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x1 dst = map_by_fd(imm) map fd map +BPF_IMM | BPF_DW | BPF_LD 0x18 0x2 dst = mva(map_by_fd(imm)) + next_imm map fd data pointer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x3 dst = variable_addr(imm) variable id data pointer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x4 dst = code_addr(imm) integer code pointer +BPF_IMM | BPF_DW | BPF_LD 0x18 0x5 dst = map_by_idx(imm) map index map +BPF_IMM | BPF_DW | BPF_LD 0x18 0x6 dst = mva(map_by_idx(imm)) + next_imm map index data pointer +========================= ====== === ===================================== =========== ============== + +where + +* map_by_fd(fd) means to convert a 32-bit POSIX file descriptor into an address of a map object (see `Map objects`_) +* map_by_index(index) means to convert a 32-bit index into an address of a map object +* mva(map) gets the address of the first value in a given map object +* variable_addr(id) gets the address of a variable (see `Variables`_) with a given id +* code_addr(offset) gets the address of the instruction at a specified relative offset in units of 64-bit blocks +* the 'imm type' can be used by disassemblers for display +* the 'dst type' can be used for verification and JIT compilation purposes + +Map objects +~~~~~~~~~~~ + +Maps are shared memory regions accessible by eBPF programs on some platforms, where we use the term "map object" +to refer to an object containing the data and metadata (e.g., size) about the memory region. +A map can have various semantics as defined in a separate document, and may or may not have a single +contiguous memory region, but the 'mva(map)' is currently only defined for maps that do have a single +contiguous memory region. Support for maps is optional. + +Each map object can have a POSIX file descriptor (fd) if supported by the platform, +where 'map_by_fd(fd)' means to get the map with the specified file descriptor. +Each eBPF program can also be defined to use a set of maps associated with the program +at load time, and 'map_by_index(index)' means to get the map with the given index in the set +associated with the eBPF program containing the instruction. + +Variables +~~~~~~~~~ + +Variables are memory regions, identified by integer ids, accessible by eBPF programs on +some platforms. The 'variable_addr(id)' operation means to get the address of the memory region +identified by the given id. Support for such variables is optional. Legacy BPF Packet access instructions ------------------------------------- diff --git a/rst/linux-notes.rst b/rst/linux-notes.rst index 956b0c8..fb050a4 100644 --- a/rst/linux-notes.rst +++ b/rst/linux-notes.rst @@ -7,11 +7,26 @@ Linux implementation notes This document provides more details specific to the Linux kernel implementation of the eBPF instruction set. +Stack space +====================== + +Linux currently supports 512 bytes of stack space. + Byte swap instructions ====================== ``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and ``BPF_TO_BE`` respectively. +Map objects +=========== + +Linux only supports the 'mva(map)' operation on array maps with a single element. + +Variables +========= + +Linux uses BTF ids to identify variables. + Legacy BPF Packet access instructions ===================================== diff --git a/rst/linux-opcodes.rst b/rst/linux-opcodes.rst new file mode 100644 index 0000000..c49c782 --- /dev/null +++ b/rst/linux-opcodes.rst @@ -0,0 +1,12 @@ +====== ==== =================================================== ============= +opcode imm description reference +====== ==== =================================================== ============= +0x20 any dst = ntohl(\*(u32 \*)(R6->data + imm)) `Legacy BPF Packet access instructions `_ +0x28 any dst = ntohs(\*(u16 \*)(R6->data + imm)) `Legacy BPF Packet access instructions `_ +0x30 any dst = (\*(u8 \*)(R6->data + imm)) `Legacy BPF Packet access instructions `_ +0x38 any dst = ntohll(\*(u64 \*)(R6->data + imm)) `Legacy BPF Packet access instructions `_ +0x40 any dst = ntohl(\*(u32 \*)(R6->data + src + imm)) `Legacy BPF Packet access instructions `_ +0x48 any dst = ntohs(\*(u16 \*)(R6->data + src + imm)) `Legacy BPF Packet access instructions `_ +0x50 any dst = \*(u8 \*)(R6->data + src + imm)) `Legacy BPF Packet access instructions `_ +0x58 any dst = ntohll(\*(u64 \*)(R6->data + src + imm)) `Legacy BPF Packet access instructions `_ +====== ==== =================================================== =============