Skip to content

Initial framework of an ethos-u runtime backend #501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
7f44e16
Add ethos-u-core-driver submodule
robell Sep 27, 2023
c6755bf
Added shell of runtime Arm Backend for Ethos-U
robell Sep 27, 2023
6421ead
First-pass ethos-u backend with an assumed flat program format in SRA…
robell Sep 27, 2023
1f62acc
fixed builds of ethos-u-core-driver
robell Sep 27, 2023
ced2e06
Merge branch 'pytorch:main' into main
robell Sep 27, 2023
83a5e32
Emit Ethos-U55 chunked binaries from preprocess
robell Sep 28, 2023
93cfdc2
added executorch to the build targets
robell Sep 28, 2023
a38f080
Extended the delegate to read 'vela_bin_stream's
robell Sep 28, 2023
a538747
Merge branch 'pytorch:main' into main
robell Sep 29, 2023
8de6e92
[ET][Portable] Add int types header
digantdesai Sep 28, 2023
644eafc
[Executorch] Simplify FunctionRef to make it more portable
digantdesai Sep 28, 2023
b307c31
[WIP] headrify pte
digantdesai Sep 28, 2023
52dc73c
[NOT FOR LAND] Hacks for ET_LOG
digantdesai Sep 28, 2023
b2a431f
[NOT FOR LAND] Hack op_add to reduce size
digantdesai Sep 28, 2023
77e8eb0
[NOT FOR LAND] Hacks for add minimal example
digantdesai Sep 28, 2023
9b244b3
[NOT FOR LAND] HACK for manual kernel registration
digantdesai Sep 28, 2023
1cabc63
[NOT FOR LAND] Allow enabling logging in Release mode
digantdesai Sep 29, 2023
2f10ee3
[NOT FOR LAND][arm] setup for core_platform
digantdesai Sep 29, 2023
298fb22
Fixes to make simple_add run on hardware
robell Sep 29, 2023
08d71d9
tidied binary reading and moved to ET_LOG
robell Oct 2, 2023
189b04c
Simplified EthosU invocation code
robell Oct 2, 2023
7007120
Basic ethos output copy to EValue
robell Oct 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@
[submodule "backends/arm/third-party/serialization_lib"]
path = backends/arm/third-party/serialization_lib
url = https://git.mlplatform.org/tosa/serialization_lib.git
[submodule "backends/arm/third-party/ethos-u-core-driver"]
path = backends/arm/third-party/ethos-u-core-driver
url = https://git.mlplatform.org/ml/ethos-u/ethos-u-core-driver.git
[submodule "backends/arm/third-party/cmsis"]
path = backends/arm/third-party/cmsis
url = https://github.com/ARM-software/CMSIS_5.git
20 changes: 19 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,18 @@ endif()
# - targets in the current directory, before and after this command is invoked
# - targets in sub-directories added after this command is invoked
if(CMAKE_BUILD_TYPE STREQUAL "Release")
# To enable logging in Release mode
option(
EXECUTORCH_ENABLE_LOGGING_RELEASE_MODE
"Enable logging in release mode" OFF)

set(_ET_LOG_ENABLE 0)
if (${EXECUTORCH_ENABLE_LOGGING_RELEASE_MODE})
set(_ET_LOG_ENABLE 1)
endif()

# Avoid pulling in the logging strings, which can be large.
add_definitions(-DET_LOG_ENABLED=0)
add_definitions(-DET_LOG_ENABLED=${_ET_LOG_ENABLE})
# Avoid pulling in the flatbuffer data verification
# logic, which can add about 20kB.
add_definitions(-DET_ENABLE_PROGRAM_VERIFICATION=0)
Expand Down Expand Up @@ -106,6 +116,10 @@ if(BUILD_SELECTIVE_BUILD_TEST)
option(SELECT_OPS_YAML "Register all the ops from a given yaml file" OFF)
endif()

# Build Arm Baremetal backend
option(EXECUTORCH_BUILD_ARM_BAREMETAL
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. If we are planning to have multiple delegate flavors - i.e. runtime logic - under arm name then we can do something like,

EXECUTORCH_BUILD_ARM && EXECUTORCH_BUILD_ARM_<RUNTIME_FLAVOR>

Suggested change
option(EXECUTORCH_BUILD_ARM_BAREMETAL
option(EXECUTORCH_BUILD_ARM

"Build the Arm Baremetal flow for Cortex-M and Ethos-U" OFF)

# Build xnn_executor_runner which depends on XNNPACK
option(EXECUTORCH_BUILD_XNNPACK
"Build xnn_executor_runner which depends on XNNPACK" OFF)
Expand Down Expand Up @@ -295,6 +309,10 @@ if(EXECUTORCH_BUILD_XNNPACK)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/xnnpack)
endif()

if(EXECUTORCH_BUILD_ARM_BAREMETAL)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/arm)
endif()

# Add selective build subdirectory
if(BUILD_SELECTIVE_BUILD_TEST)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/selective_build)
Expand Down
25 changes: 25 additions & 0 deletions backends/arm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
cmake_minimum_required(VERSION 3.19)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

# Source root directory for executorch.
if(NOT EXECUTORCH_ROOT)
set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
endif()

include(${EXECUTORCH_ROOT}/build/Utils.cmake)

set(_common_include_directories ${EXECUTORCH_ROOT}/..)
set(_common_compile_options -Wno-deprecated-declarations)

include(cmake/Dependencies.cmake)

set(_arm_baremetal_sources backends/arm/runtime/ArmBackendEthosU.cpp)
list(TRANSFORM _arm_baremetal_sources PREPEND "${EXECUTORCH_ROOT}/")
add_library(ethos_u STATIC ${_arm_baremetal_sources})
target_include_directories(ethos_u PUBLIC ${_common_include_directories})
target_include_directories(ethos_u PUBLIC ${DRIVER_ETHOSU_INCLUDE_DIR})
69 changes: 63 additions & 6 deletions backends/arm/arm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import operator
import os
import tempfile
import subprocess
from typing import final, List

import numpy as np
Expand Down Expand Up @@ -140,6 +141,64 @@ def dbg_tosa_dump(tosa_fb, path):
f.write(js)
f.close()

# Output to Vela with current file-based compilation
# WARNING: if this changes, the runtime reader also needs to change
def vela_compile(tosa_fb):
with tempfile.TemporaryDirectory() as tmpdir:
print(f"compiling to Vela in {tmpdir}")

tosaname = "out.tosa"
flatbuffer = tosa_fb.serialize()
f = open(os.path.join(tmpdir,tosaname), "wb")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: use with for this resource

f.write(flatbuffer)
f.close()

# invoke vela
# TODO target ethos-u55-128
vela_command = f"cd {tmpdir}; vela --accelerator-config ethos-u55-128 {tosaname}"
subprocess.run([vela_command], shell=True, check=True)

np_path = os.path.join(tmpdir,"output","out_sg0_vela.npz")
blocks = b''
with np.load(np_path, allow_pickle=False) as data:
# Emit the NPZ regions as:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK for now but this should be refactored into its own function to emit_block

# - 16 byte block name null terminated string (padded to 16 if name shorter)
# - 4 byes of int32 block length and 12 bytes of 0's
# - block data (padded to 16 byte alignment at end)
# Repeat for all blocks
for key in data.keys():
block_name = bytes(key,"utf8")[:15]
block_name = block_name + b'\x00'*(16-len(block_name))
block_data = data[key].tobytes()
# We need the acual unpadded block lengths for hw setup
block_length = len(block_data).to_bytes(16, 'little')
# pad block data to multiple of 16 bytes
block_data = block_data + b'\x00'*(15-(len(block_data)-1)%16)

block = block_name + block_length + block_data
blocks = blocks + block

# Add a block for scratch, inputs and outputs
# scratch shape is a 1 element array giving us size in bytes
block_name = bytes("scratch_data","utf8")[:15]
block_name = block_name + b'\x00'*(16-len(block_name))
block_length = data["scratch_shape"][0].item()
print(f"scratch length = {block_length}")
block_length = block_length+(15-(block_length-1)%16)
block_data = b'\x00'*block_length
block_length = block_length.to_bytes(16, 'little')
print(f"lengths {len(block_name)} {len(block_length)} {len(block_data)}")
block = block_name + block_length + block_data
blocks = blocks + block
# TODO are these already in scratch shape? look to be
#input_shape * input_elem_size
#output_shape * output_elem_size
# input_offset and output_offset specify the location these arrays are written from base of scratch

# return 16 byte VELA bin header + blocks + footer
header = bytes("vela_bin_stream","utf-8") + b'\x00'
footer = bytes("vela_end_stream","utf-8") + b'\x00'
return header + blocks + footer

def dbg_fail(node, tosa_fb, path):
dbg_tosa_dump(tosa_fb, path)
Expand Down Expand Up @@ -205,10 +264,6 @@ def preprocess( # noqa: C901
path = spec.value.decode()
debug_output = True

# in non debug builds we still pass files to vela
if path is None:
path = tempfile.mkdtemp(prefix="arm_tosa_")

# Converted output for this subgraph, serializer needs path early as it emits
# const data directly. Path created and data written only in debug builds.
tosa_fb = ts.TosaSerializer(path)
Expand Down Expand Up @@ -680,5 +735,7 @@ def preprocess( # noqa: C901
dbg_tosa_dump(tosa_fb, path)

# Serialize and return the tosa flatbuffer
fb = tosa_fb.serialize()
return PreprocessResult(processed_bytes=bytes(fb))
# fb = bytes(tosa_fb.serialize())
binary = vela_compile(tosa_fb)

return PreprocessResult(processed_bytes=binary)
12 changes: 12 additions & 0 deletions backends/arm/cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set(THIRD_PARTY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party")

# Ethos-U driver
set(DRIVER_ETHOSU_SOURCE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver")
set(DRIVER_ETHOSU_INCLUDE_DIR "${THIRD_PARTY_ROOT}/ethos-u-core-driver/include")
add_subdirectory( ${DRIVER_ETHOSU_SOURCE_DIR} )
include_directories( ${DRIVER_ETHOSU_INCLUDE_DIR} )
90 changes: 90 additions & 0 deletions backends/arm/cmake/arm-none-eabi-gcc.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set(TARGET_CPU "cortex-m4" CACHE STRING "Target CPU")
string(TOLOWER ${TARGET_CPU} CMAKE_SYSTEM_PROCESSOR)

set(CMAKE_SYSTEM_NAME Generic)
set(CMAKE_C_COMPILER "arm-none-eabi-gcc")
set(CMAKE_CXX_COMPILER "arm-none-eabi-g++")
set(CMAKE_ASM_COMPILER "arm-none-eabi-gcc")
set(CMAKE_LINKER "arm-none-eabi-ld")

set(CMAKE_EXECUTABLE_SUFFIX ".elf")
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

# Select C/C++ version
set(CMAKE_C_STANDARD 11)
set(CMAKE_CXX_STANDARD 14)

set(GCC_CPU ${CMAKE_SYSTEM_PROCESSOR})
string(REPLACE "cortex-m85" "cortex-m55" GCC_CPU ${GCC_CPU})

# Compile options
add_compile_options(
-mcpu=${GCC_CPU}
-mthumb
"$<$<CONFIG:DEBUG>:-gdwarf-3>"
"$<$<COMPILE_LANGUAGE:CXX>:-fno-unwind-tables;-fno-rtti;-fno-exceptions>"
-fdata-sections
-ffunction-sections)

# Compile defines
add_compile_definitions(
"$<$<NOT:$<CONFIG:DEBUG>>:NDEBUG>")

# Link options
add_link_options(
-mcpu=${GCC_CPU}
-mthumb
--specs=nosys.specs)

# Set floating point unit
if(CMAKE_SYSTEM_PROCESSOR MATCHES "\\+fp")
set(FLOAT hard)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "\\+nofp")
set(FLOAT soft)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m33(\\+|$)" OR
CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m55(\\+|$)" OR
CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m85(\\+|$)")
set(FLOAT hard)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m4(\\+|$)" OR
CMAKE_SYSTEM_PROCESSOR MATCHES "cortex-m7(\\+|$)")
set(FLOAT hard)
set(FPU_CONFIG "fpv4-sp-d16")
add_compile_options(-mfpu=${FPU_CONFIG})
add_link_options(-mfpu=${FPU_CONFIG})
else()
set(FLOAT soft)
endif()

if (FLOAT)
add_compile_options(-mfloat-abi=${FLOAT})
add_link_options(-mfloat-abi=${FLOAT})
endif()

add_link_options(LINKER:--nmagic,--gc-sections)

# Compilation warnings
add_compile_options(
# -Wall
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we (ET runtime) want to enable these eventually (or at least be cool when someone enables them) but right now we are not ready I guess

# -Wextra

# -Wcast-align
# -Wdouble-promotion
# -Wformat
# -Wmissing-field-initializers
# -Wnull-dereference
# -Wredundant-decls
# -Wshadow
# -Wswitch
# -Wswitch-default
# -Wunused
-Wno-redundant-decls
-Wno-psabi
)
53 changes: 53 additions & 0 deletions backends/arm/cmake/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set -e

#
# Setup toolchain
#
BASEDIR=`realpath $(dirname "$0")`
echo "building using build.sh in $BASEDIR"

ARCH=$(uname -i)
GCCPATH=${BASEDIR}/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi/bin/

echo $GCCPATH
if test -d "${GCCPATH}"; then
echo Using exising compiler ${GCCPATH}
else
pushd ${BASEDIR}/
./toolchain.sh
popd
fi
export PATH=${PATH}:${GCCPATH}

echo building with `arm-none-eabi-gcc -v 2>&1 | grep "^gcc"`


#
# Prepare and run clean build
#
rm -rf buck-out/ build/lib/ cmake-out/
rm -rf cmake-corstone
mkdir cmake-corstone
cd cmake-corstone

#cmake -DBUCK2=buck2 ..

#cmake --toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake ..
cmake -DFLATC_EXECUTABLE=flatc \
-DEXECUTORCH_BUILD_XNNPACK=OFF \
-DEXECUTORCH_BUILD_HOST_TARGETS=OFF \
-DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \
-DCMAKE_SYSTEM_PROCESSOR=cortex-m55+nodsp+nofp \
-DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \
--toolchain backends/arm/cmake/arm-none-eabi-gcc.cmake \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_ENABLE_LOGGING_RELEASE_MODE=ON \
..

cd ..
cmake --build cmake-corstone -j9 --target ethos_u ethosu_core_driver executorch portable_ops_lib portable_kernels
12 changes: 12 additions & 0 deletions backends/arm/cmake/toolchain.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
# Copyright 2023 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set -e

# Cross compiler for Arm baremetal (e.g. Corestone-300 FVP or silcon)
ARCH=$(uname -i)
curl -o gcc.tar.xz https://armkeil.blob.core.windows.net/developer/Files/downloads/gnu/12.3.rel1/binrel/arm-gnu-toolchain-12.3.rel1-${ARCH}-arm-none-eabi.tar.xz
tar xf gcc.tar.xz
export PATH=${PATH}:`(cd arm-gnu-toolchain-12.3.rel1-aarch64-arm-none-eabi/bin/; pwd)`
Loading