-
Notifications
You must be signed in to change notification settings - Fork 593
Initial framework of an ethos-u runtime backend #501
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
7f44e16
c6755bf
6421ead
1f62acc
ced2e06
83a5e32
93cfdc2
a38f080
a538747
8de6e92
644eafc
b307c31
52dc73c
b2a431f
77e8eb0
9b244b3
1cabc63
2f10ee3
298fb22
08d71d9
189b04c
7007120
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright 2023 Arm Limited and/or its affiliates. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
cmake_minimum_required(VERSION 3.19) | ||
|
||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) | ||
|
||
# Source root directory for executorch. | ||
if(NOT EXECUTORCH_ROOT) | ||
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..) | ||
endif() | ||
|
||
include(${EXECUTORCH_ROOT}/build/Utils.cmake) | ||
|
||
set(_common_include_directories ${EXECUTORCH_ROOT}/..) | ||
set(_common_compile_options -Wno-deprecated-declarations) | ||
|
||
include(cmake/Dependencies.cmake) | ||
|
||
set(_arm_baremetal_sources backends/arm/runtime/ArmBackendEthosU.cpp) | ||
list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/") | ||
add_library(ethos_u STATIC ${_arm_baremetal_sources}) | ||
target_include_directories(ethos_u PUBLIC ${_common_include_directories}) | ||
target_include_directories(ethos_u PUBLIC ${DRIVER_ETHOSU_INCLUDE_DIR}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
import operator | ||
import os | ||
import tempfile | ||
import subprocess | ||
from typing import final, List | ||
|
||
import numpy as np | ||
|
@@ -140,6 +141,64 @@ def dbg_tosa_dump(tosa_fb, path): | |
f.write(js) | ||
f.close() | ||
|
||
# Output to Vela with current file-based compilation | ||
# WARNING: if this changes, the runtime reader also needs to change | ||
def vela_compile(tosa_fb): | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
print(f"compiling to Vela in {tmpdir}") | ||
|
||
tosaname = "out.tosa" | ||
flatbuffer = tosa_fb.serialize() | ||
f = open(os.path.join(tmpdir,tosaname), "wb") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: use |
||
f.write(flatbuffer) | ||
f.close() | ||
|
||
# invoke vela | ||
# TODO target ethos-u55-128 | ||
vela_command = f"cd {tmpdir}; vela --accelerator-config ethos-u55-128 {tosaname}" | ||
subprocess.run([vela_command], shell=True, check=True) | ||
|
||
np_path = os.path.join(tmpdir,"output","out_sg0_vela.npz") | ||
blocks = b'' | ||
with np.load(np_path, allow_pickle=False) as data: | ||
# Emit the NPZ regions as: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK for now but this should be refactored into its own function to |
||
# - 16 byte block name null terminated string (padded to 16 if name shorter) | ||
# - 4 byes of int32 block length and 12 bytes of 0's | ||
# - block data (padded to 16 byte alignment at end) | ||
# Repeat for all blocks | ||
for key in data.keys(): | ||
block_name = bytes(key,"utf8")[:15] | ||
block_name = block_name + b'\x00'*(16-len(block_name)) | ||
block_data = data[key].tobytes() | ||
# We need the acual unpadded block lengths for hw setup | ||
block_length = len(block_data).to_bytes(16, 'little') | ||
# pad block data to multiple of 16 bytes | ||
block_data = block_data + b'\x00'*(15-(len(block_data)-1)%16) | ||
|
||
block = block_name + block_length + block_data | ||
blocks = blocks + block | ||
|
||
# Add a block for scratch, inputs and outputs | ||
# scratch shape is a 1 element array giving us size in bytes | ||
block_name = bytes("scratch_data","utf8")[:15] | ||
block_name = block_name + b'\x00'*(16-len(block_name)) | ||
block_length = data["scratch_shape"][0].item() | ||
print(f"scratch length = {block_length}") | ||
block_length = block_length+(15-(block_length-1)%16) | ||
block_data = b'\x00'*block_length | ||
block_length = block_length.to_bytes(16, 'little') | ||
print(f"lengths {len(block_name)} {len(block_length)} {len(block_data)}") | ||
block = block_name + block_length + block_data | ||
blocks = blocks + block | ||
# TODO are these already in scratch shape? look to be | ||
#input_shape * input_elem_size | ||
#output_shape * output_elem_size | ||
# input_offset and output_offset specify the location these arrays are written from base of scratch | ||
|
||
# return 16 byte VELA bin header + blocks + footer | ||
header = bytes("vela_bin_stream","utf-8") + b'\x00' | ||
footer = bytes("vela_end_stream","utf-8") + b'\x00' | ||
return header + blocks + footer | ||
|
||
def dbg_fail(node, tosa_fb, path): | ||
dbg_tosa_dump(tosa_fb, path) | ||
|
@@ -205,10 +264,6 @@ def preprocess( # noqa: C901 | |
path = spec.value.decode() | ||
debug_output = True | ||
|
||
# in non debug builds we still pass files to vela | ||
if path is None: | ||
path = tempfile.mkdtemp(prefix="arm_tosa_") | ||
|
||
# Converted output for this subgraph, serializer needs path early as it emits | ||
# const data directly. Path created and data written only in debug builds. | ||
tosa_fb = ts.TosaSerializer(path) | ||
|
@@ -680,5 +735,7 @@ def preprocess( # noqa: C901 | |
dbg_tosa_dump(tosa_fb, path) | ||
|
||
# Serialize and return the tosa flatbuffer | ||
fb = tosa_fb.serialize() | ||
return PreprocessResult(processed_bytes=bytes(fb)) | ||
# fb = bytes(tosa_fb.serialize()) | ||
binary = vela_compile(tosa_fb) | ||
|
||
return PreprocessResult(processed_bytes=binary) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Copyright 2023 Arm Limited and/or its affiliates. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party") | ||
|
||
# Ethos-U driver | ||
set(DRIVER_ETHOSU_SOURCE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver") | ||
set(DRIVER_ETHOSU_INCLUDE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include") | ||
add_subdirectory( ${DRIVER_ETHOSU_SOURCE_DIR} ) | ||
include_directories( ${DRIVER_ETHOSU_INCLUDE_DIR} ) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# Copyright 2023 Arm Limited and/or its affiliates. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU") | ||
string(TOLOWER ${TARGET_CPU} CMAKE_SYSTEM_PROCESSOR) | ||
|
||
set(CMAKE_SYSTEM_NAME Generic) | ||
set(CMAKE_C_COMPILER "arm-none-eabi-gcc") | ||
set(CMAKE_CXX_COMPILER "arm-none-eabi-g++") | ||
set(CMAKE_ASM_COMPILER "arm-none-eabi-gcc") | ||
set(CMAKE_LINKER "arm-none-eabi-ld") | ||
|
||
set(CMAKE_EXECUTABLE_SUFFIX ".elf") | ||
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) | ||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) | ||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) | ||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) | ||
|
||
# Select C/C++ version | ||
set(CMAKE_C_STANDARD 11) | ||
set(CMAKE_CXX_STANDARD 14) | ||
|
||
set(GCC_CPU ${CMAKE_SYSTEM_PROCESSOR}) | ||
string(REPLACE "cortex-m85" "cortex-m55" GCC_CPU ${GCC_CPU}) | ||
|
||
# Compile options | ||
add_compile_options( | ||
-mcpu=${GCC_CPU} | ||
-mthumb | ||
"$<$<CONFIG:DEBUG>:-gdwarf-3>" | ||
"$<$<COMPILE_LANGUAGE:CXX>:-fno-unwind-tables;-fno-rtti;-fno-exceptions>" | ||
-fdata-sections | ||
-ffunction-sections) | ||
|
||
# Compile defines | ||
add_compile_definitions( | ||
"$<$<NOT:$<CONFIG:DEBUG>>:NDEBUG>") | ||
|
||
# Link options | ||
add_link_options( | ||
-mcpu=${GCC_CPU} | ||
-mthumb | ||
--specs=nosys.specs) | ||
|
||
# Set floating point unit | ||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "\\+fp") | ||
set(FLOAT hard) | ||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "\\+nofp") | ||
set(FLOAT soft) | ||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m33(\\+|$)" OR | ||
CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m55(\\+|$)" OR | ||
CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m85(\\+|$)") | ||
set(FLOAT hard) | ||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m4(\\+|$)" OR | ||
CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m7(\\+|$)") | ||
set(FLOAT hard) | ||
set(FPU_CONFIG "fpv4-sp-d16") | ||
add_compile_options(-mfpu=${FPU_CONFIG}) | ||
add_link_options(-mfpu=${FPU_CONFIG}) | ||
else() | ||
set(FLOAT soft) | ||
endif() | ||
|
||
if (FLOAT) | ||
add_compile_options(-mfloat-abi=${FLOAT}) | ||
add_link_options(-mfloat-abi=${FLOAT}) | ||
endif() | ||
|
||
add_link_options(LINKER:--nmagic,--gc-sections) | ||
|
||
# Compilation warnings | ||
add_compile_options( | ||
# -Wall | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we (ET runtime) want to enable these eventually (or at least be cool when someone enables them) but right now we are not ready I guess |
||
# -Wextra | ||
|
||
# -Wcast-align | ||
# -Wdouble-promotion | ||
# -Wformat | ||
# -Wmissing-field-initializers | ||
# -Wnull-dereference | ||
# -Wredundant-decls | ||
# -Wshadow | ||
# -Wswitch | ||
# -Wswitch-default | ||
# -Wunused | ||
-Wno-redundant-decls | ||
-Wno-psabi | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/bin/bash | ||
# Copyright 2023 Arm Limited and/or its affiliates. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
set -e | ||
|
||
# | ||
# Setup toolchain | ||
# | ||
BASEDIR=`realpath $(dirname "$0")` | ||
echo "building using build.sh in $BASEDIR" | ||
|
||
ARCH=$(uname -i) | ||
GCCPATH=${BASEDIR}/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi/bin/ | ||
|
||
echo $GCCPATH | ||
if test -d "${GCCPATH}"; then | ||
echo Using exising compiler ${GCCPATH} | ||
else | ||
pushd ${BASEDIR}/ | ||
./toolchain.sh | ||
popd | ||
fi | ||
export PATH=${PATH}:${GCCPATH} | ||
|
||
echo building with `arm-none-eabi-gcc -v 2>&1 | grep "^gcc"` | ||
|
||
|
||
# | ||
# Prepare and run clean build | ||
# | ||
rm -rf buck-out/ build/lib/ cmake-out/ | ||
rm -rf cmake-corstone | ||
mkdir cmake-corstone | ||
cd cmake-corstone | ||
|
||
#cmake -DBUCK2=buck2 .. | ||
|
||
#cmake --toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake .. | ||
cmake -DFLATC_EXECUTABLE=flatc \ | ||
-DEXECUTORCH_BUILD_XNNPACK=OFF \ | ||
-DEXECUTORCH_BUILD_HOST_TARGETS=OFF \ | ||
-DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \ | ||
-DCMAKE_SYSTEM_PROCESSOR=cortex-m55+nodsp+nofp \ | ||
-DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \ | ||
--toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DEXECUTORCH_ENABLE_LOGGING_RELEASE_MODE=ON \ | ||
.. | ||
|
||
cd .. | ||
cmake --build cmake-corstone -j9 --target ethos_u ethosu_core_driver executorch portable_ops_lib portable_kernels |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/bin/bash | ||
# Copyright 2023 Arm Limited and/or its affiliates. | ||
# | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
set -e | ||
|
||
# Cross compiler for Arm baremetal (e.g. Corestone-300 FVP or silcon) | ||
ARCH=$(uname -i) | ||
curl -o gcc.tar.xz https://armkeil.blob.core.windows.net/developer/Files/downloads/gnu/12.3.rel1/binrel/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi.tar.xz | ||
tar xf gcc.tar.xz | ||
export PATH=${PATH}:`(cd arm-gnu-toolchain-12.3.rel1-aarch64-arm-none-eabi/bin/; pwd)` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice. If we are planning to have multiple delegate flavors - i.e. runtime logic - under
arm
name then we can do something like,