Skip to content

Commit

Permalink
[metal][macos]support metal on Mac OS(1/2) (PaddlePaddle#7347)
Browse files Browse the repository at this point in the history
* support metal on arm Mac OS

* add load metallib error;
add metallib compile on arm macos;
build_macos.sh support metal

* Update CMakeLists.txt

* Update build_macos.sh

* remove new hardware support on MacOS;
add with_testing
  • Loading branch information
zhangjun committed Nov 15, 2021
1 parent 333ad06 commit 1eca708
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 68 deletions.
9 changes: 7 additions & 2 deletions lite/api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,15 @@ else()
# 1. enable -flto compiling flag if toochain==gcc
# TODO (hong19860320): Disable lto temporarily since it causes fail to catch the exceptions in android when toolchain is gcc.
if (NOT (ARM_TARGET_LANG STREQUAL "clang"))
if (ARM_TARGET_OS STREQUAL "android" AND LITE_WITH_EXCEPTION)
if ((ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armmacos") AND LITE_WITH_EXCEPTION)
set(TARGET_COMIPILE_FLAGS "")
else()
set(TARGET_COMIPILE_FLAGS "-fdata-sections -flto")
endif()
else()
set(TARGET_COMIPILE_FLAGS "-fdata-sections")
if (NOT LITE_WITH_METAL)
set(TARGET_COMIPILE_FLAGS "-fdata-sections")
endif()
endif()
# 1.1 enable -flto on PADDLELITE_OBJS
set_target_properties(PADDLELITE_OBJS PROPERTIES COMPILE_FLAGS "${TARGET_COMIPILE_FLAGS}")
Expand All @@ -158,6 +160,9 @@ else()
add_library(paddle_light_api_shared SHARED $<TARGET_OBJECTS:PADDLELITE_OBJS>)
set_target_properties(paddle_light_api_shared PROPERTIES COMPILE_FLAGS "${TARGET_COMIPILE_FLAGS}")
# 2.1 link `paddle_light_api_shared` to third-party libs
if (LITE_WITH_METAL)
target_link_libraries(paddle_light_api_shared ${METAL_LIBRARY} ${MPS_LIBRARY} ${FOUNDATION_LIBRARY})
endif()
if (LITE_WITH_NPU)
# Need to add HIAI runtime libs (libhiai.so) dependency
target_link_libraries(paddle_light_api_shared ${npu_builder_libs} ${npu_runtime_libs})
Expand Down
5 changes: 3 additions & 2 deletions lite/backends/metal/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ FILE(GLOB LITE_METAL_KERNELS_SRC ${CMAKE_SOURCE_DIR}/lite/backends/metal/metal_k
${CMAKE_SOURCE_DIR}/lite/backends/metal/metal_kernel/texture/*.metal
)

IF (DEFINED SDK_VERSION)
IF (SDK_NAME STREQUAL "iphoneos")
#Defined by iOS toolchain
SET(SDK_NAME "iphoneos")
SET(METAL_STD_OPTION "-std=ios-metal1.1")
SET(TARGET_OPTION "-mios-version-min=${DEPLOYMENT_TARGET}")
ELSE ()
SET(SDK_NAME "macosx")
SET(METAL_STD_OPTION "-std=macos-metal1.1")
SET(TARGET_OPTION "")
ENDIF ()

Expand All @@ -41,6 +42,6 @@ lite_cc_library(metal_target_wrapper SRCS
target_wrapper.mm
)

target_link_libraries(metal_target_wrapper ${METAL_LIBRARY} ${FOUNDATION_LIBRARY})
target_link_libraries(metal_target_wrapper ${METAL_LIBRARY} ${MPS_LIBRARY} ${GRAPHIC} ${FOUNDATION_LIBRARY})

add_dependencies(metal_target_wrapper LiteMetalLIB)
3 changes: 2 additions & 1 deletion lite/backends/metal/metal_context_imp.mm
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,10 @@ - (void)dealloc {

- (void)setMetalPath:(std::string)path {
NSString* pathStr = cString2NSString(path);
NSError* libraryErr = nil;
if (pathStr) {
self.libPath = pathStr;
self.library = [self.device newLibraryWithFile:pathStr error:NULL];
self.library = [self.device newLibraryWithFile:pathStr error:&libraryErr];
}
if (nil == _library) {
LOG(INFO) << "Can't load metallib: " << [pathStr cStringUsingEncoding:NSUTF8StringEncoding];
Expand Down
124 changes: 61 additions & 63 deletions lite/tools/build_macos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ ARCH=armv8

CMAKE_EXTRA_OPTIONS=""
BUILD_EXTRA=OFF
BUILD_TRAIN=OFF
BUILD_JAVA=ON
BUILD_PYTHON=OFF
BUILD_DIR=$(pwd)
OPTMODEL_DIR=""
Expand All @@ -24,26 +22,13 @@ WITH_OPENCL=OFF
WITH_STATIC_MKL=OFF
WITH_AVX=ON
WITH_EXCEPTION=OFF
WITH_LIGHT_WEIGHT_FRAMEWORK=OFF
WITH_PROFILE=OFF
WITH_PRECISION_PROFILE=OFF
WITH_BENCHMARK=OFF
WITH_LTO=OFF
BUILD_ARM82_FP16=OFF
BUILD_ARM82_INT8_SDOT=OFF
BUILD_NPU=OFF
NPU_DDK_ROOT="$(pwd)/ai_ddk_lib/" # Download HiAI DDK from https://developer.huawei.com/consumer/cn/hiai/
BUILD_XPU=OFF
BUILD_XTCL=OFF
XPU_SDK_ROOT=""
XPU_SDK_URL=""
XPU_SDK_ENV=""
BUILD_APU=OFF
APU_DDK_ROOT="$(pwd)/apu_sdk_lib/"
BUILD_RKNPU=OFF
RKNPU_DDK_ROOT="$(pwd)/rknpu/"
WITH_HUAWEI_ASCEND_NPU=OFF # Huawei Ascend Builder/Runtime Libs on X86 host
# default installation path, ensure acllib/atc/opp directories are all in this root dir
HUAWEI_ASCEND_NPU_DDK_ROOT="/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux_gcc4.8.5"
PYTHON_EXECUTABLE_OPTION=""
workspace=$PWD/$(dirname $0)/../../
OPTMODEL_DIR=""
Expand Down Expand Up @@ -128,6 +113,21 @@ function make_armosx {
fi

build_dir=$workspace/build.macos.${os}.${arch}
if [ ${WITH_METAL} == "ON" ]; then
BUILD_EXTRA=ON
build_dir=${build_dir}.metal
fi

if [ ${WITH_OPENCL} == "ON" ]; then
build_dir=${build_dir}.opencl
prepare_opencl_source_code $workspace
fi

if [ ${WITH_TESTING} == "ON" ]; then
BUILD_EXTRA=ON
LITE_ON_TINY_PUBLISH=OFF
fi

if [ -d $build_dir ]
then
rm -rf $build_dir
Expand All @@ -145,11 +145,17 @@ function make_armosx {
touch ./${GEN_CODE_PATH_PREFIX}/__generated_code__.cc
cmake $workspace \
-DWITH_LITE=ON \
-DWITH_TESTING=${WITH_TESTING} \
-DLITE_WITH_ARM=ON \
-DLITE_ON_TINY_PUBLISH=ON \
-DLITE_WITH_METAL=${WITH_METAL} \
-DLITE_WITH_OPENCL=${WITH_OPENCL} \
-DLITE_ON_TINY_PUBLISH=${LITE_ON_TINY_PUBLISH} \
-DLITE_WITH_PROFILE=${WITH_PROFILE} \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DLITE_WITH_PRECISION_PROFILE=${WITH_PRECISION_PROFILE} \
-DLITE_WITH_OPENMP=OFF \
-DWITH_ARM_DOTPROD=OFF \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_LOG=$WITH_LOG \
-DLITE_WITH_EXCEPTION=$WITH_EXCEPTION \
Expand All @@ -162,6 +168,13 @@ function make_armosx {
-DARM_TARGET_OS=armmacos

make publish_inference -j$NUM_PROC
if [ "${WITH_BENCHMARK}" == "ON" ]; then
make benchmark_bin -j$NUM_PROC
elif [ "${WITH_TESTING}" == "ON" ]; then
make lite_compile_deps -j$NUM_PROC
else
make publish_inference -j$NUM_PROC
fi
cd -
}

Expand All @@ -175,17 +188,22 @@ function make_x86 {
set_benchmark_options
fi

if [ ${WITH_HUAWEI_ASCEND_NPU} == "ON" ]; then
export CXX=g++ # Huawei Ascend NPU need g++
build_directory=$BUILD_DIR/build.lite.huawei_ascend_npu
fi

if [ ${WITH_OPENCL} == "ON" ]; then
BUILD_EXTRA=ON
build_directory=$BUILD_DIR/build.lite.x86.opencl
prepare_opencl_source_code $root_dir $build_directory
fi

if [ ${WITH_METAL} == "ON" ]; then
BUILD_EXTRA=ON
build_directory=${build_directory}.metal
fi

if [ ${WITH_TESTING} == "ON" ]; then
BUILD_EXTRA=ON
LITE_ON_TINY_PUBLISH=OFF
fi

if [ ${BUILD_PYTHON} == "ON" ]; then
BUILD_EXTRA=ON
fi
Expand All @@ -205,12 +223,16 @@ function make_x86 {

cmake $root_dir -DWITH_MKL=${WITH_MKL} \
-DWITH_STATIC_MKL=${WITH_STATIC_MKL} \
-DWITH_TESTING=OFF \
-DWITH_TESTING=${WITH_TESTING} \
-DWITH_AVX=${WITH_AVX} \
-DWITH_MKLDNN=OFF \
-DLITE_WITH_X86=ON \
-DWITH_LITE=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=${WITH_LIGHT_WEIGHT_FRAMEWORK} \
-DLITE_ON_TINY_PUBLISH=${LITE_ON_TINY_PUBLISH} \
-DLITE_WITH_PROFILE=${WITH_PROFILE} \
-DLITE_WITH_PRECISION_PROFILE=${WITH_PRECISION_PROFILE} \
-DLITE_WITH_ARM=OFF \
-DLITE_WITH_OPENCL=${WITH_OPENCL} \
-DWITH_GPU=OFF \
Expand All @@ -220,22 +242,15 @@ function make_x86 {
-DLITE_OPTMODEL_DIR=${OPTMODEL_DIR} \
-DLITE_WITH_LOG=${WITH_LOG} \
-DLITE_WITH_EXCEPTION=$WITH_EXCEPTION \
-DLITE_WITH_PROFILE=${WITH_PROFILE} \
-DLITE_WITH_PRECISION_PROFILE=${WITH_PRECISION_PROFILE} \
-DLITE_WITH_LTO=${WITH_LTO} \
-DLITE_WITH_XPU=$BUILD_XPU \
-DLITE_WITH_XTCL=$BUILD_XTCL \
-DXPU_SDK_ROOT=$XPU_SDK_ROOT \
-DXPU_SDK_URL=$XPU_SDK_URL \
-DXPU_SDK_ENV=$XPU_SDK_ENV \
-DLITE_WITH_HUAWEI_ASCEND_NPU=$WITH_HUAWEI_ASCEND_NPU \
-DHUAWEI_ASCEND_NPU_DDK_ROOT=$HUAWEI_ASCEND_NPU_DDK_ROOT \
-DCMAKE_BUILD_TYPE=Release \
-DPY_VERSION=$PY_VERSION \
$PYTHON_EXECUTABLE_OPTION

if [ "${WITH_BENCHMARK}" == "ON" ]; then
make benchmark_bin -j$NUM_PROC
elif [ "${WITH_TESTING}" == "ON" ]; then
make lite_compile_deps -j$NUM_PROC
else
make publish_inference -j$NUM_PROC
fi
Expand All @@ -258,6 +273,9 @@ function print_usage {
echo -e "| --with_exception: (OFF|ON); controls whether to throw the exception when error occurs, default is OFF |"
echo -e "| --build_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP) |"
echo -e "| --with_benchmark: (OFF|ON); controls whether to compile benchmark binary, default is OFF, only support macos x86 |"
echo -e "| --with_extra: (OFF|ON); controls whether to publish extra operators and kernels for (sequence-related model such as OCR or NLP) |"
echo -e "| --with_benchmark: (OFF|ON); controls whether to compile benchmark binary, default is OFF |"
echo -e "| --with_testing: (OFF|ON); controls whether to compile unit test, default is OFF |"
echo -e "| |"
echo -e "| arguments of benchmark binary compiling for macos x86: |"
echo -e "| ./lite/tools/build_macos.sh --with_benchmark=ON x86 |"
Expand All @@ -280,7 +298,15 @@ function main {
# Parse command line.
for i in "$@"; do
case $i in
--build_extra=*)
--with_metal=*)
WITH_METAL="${i#*=}"
shift
;;
--with_opencl=*)
WITH_OPENCL="${i#*=}"
shift
;;
--with_extra=*)
BUILD_EXTRA="${i#*=}"
shift
;;
Expand Down Expand Up @@ -375,12 +401,8 @@ function main {
fi
shift
;;
--xpu_sdk_url=*)
XPU_SDK_URL="${i#*=}"
shift
;;
--xpu_sdk_env=*)
XPU_SDK_ENV="${i#*=}"
--tiny_publish=*)
LITE_ON_TINY_PUBLISH="${i#*=}"
shift
;;
--python_executable=*)
Expand All @@ -391,30 +413,6 @@ function main {
PY_VERSION="${i#*=}"
shift
;;
--build_apu=*)
BUILD_APU="${i#*=}"
shift
;;
--apu_ddk_root=*)
APU_DDK_ROOT="${i#*=}"
shift
;;
--build_rknpu=*)
BUILD_RKNPU="${i#*=}"
shift
;;
--rknpu_ddk_root=*)
RKNPU_DDK_ROOT="${i#*=}"
shift
;;
--with_huawei_ascend_npu=*)
WITH_HUAWEI_ASCEND_NPU="${i#*=}"
shift
;;
--huawei_ascend_npu_ddk_root=*)
HUAWEI_ASCEND_NPU_DDK_ROOT="${i#*=}"
shift
;;
--ios_deployment_target=*)
IOS_DEPLOYMENT_TARGET="${i#*=}"
shift
Expand Down

0 comments on commit 1eca708

Please sign in to comment.