Skip to content

Commit

Permalink
[FastTokenizer] Support FastTokenizer on Android (#3999)
Browse files Browse the repository at this point in the history
* [FastTokenizer] Support FastTokenizer on Android

* Update how_to_build_android.md

* fix code style

* fix code style

* add copyright

Co-authored-by: Zeyu Chen <[email protected]>
  • Loading branch information
DefTruth and ZeyuChen authored Dec 4, 2022
1 parent d73ec67 commit b07eed5
Show file tree
Hide file tree
Showing 8 changed files with 266 additions and 7 deletions.
23 changes: 21 additions & 2 deletions fast_tokenizer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.10)

project(tokenizers LANGUAGES CXX C VERSION 1.0)
Expand Down Expand Up @@ -103,8 +117,13 @@ endforeach()
ELSE(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC")
IF (NOT APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl -lpthread")
ENDIF()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl")
IF (NOT ANDROID)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lpthread")
ELSE()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Os")
ENDIF()
ENDIF()
set (PUBLIC_DEPEND_LIBS ${CMAKE_DL_LIBS})
ENDIF(WIN32)

Expand Down
50 changes: 49 additions & 1 deletion fast_tokenizer/cmake/external/gflags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,53 @@ ENDIF(WIN32)

INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})

IF(ANDROID)
set(CROSS_COMPILE_CMAKE_ARGS
"-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
"-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
"-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
"-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}"
"-DANDROID_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN}"
"-DANDROID_STL=${CMAKE_ANDROID_STL_TYPE}"
"-DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}"
"-DCMAKE_TOOLCHAIN_FILE=${CMAKE_ANDROID_NDK}/build/cmake/android.toolchain.cmake"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}"
"-DANDROID_PLATFORM=android-${ANDROID_NATIVE_API_LEVEL}"
"-D__ANDROID_API__=${ANDROID_NATIVE_API_LEVEL}")

ExternalProject_Add(
extern_gflags
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
GIT_REPOSITORY ${GFLAGS_REPOSITORY}
GIT_TAG ${GFLAGS_TAG}
PREFIX ${GFLAGS_PREFIX_DIR}
UPDATE_COMMAND ""
BUILD_COMMAND ${BUILD_COMMAND}
INSTALL_COMMAND ${INSTALL_COMMAND}
CMAKE_ARGS ${CROSS_COMPILE_CMAKE_ARGS}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DBUILD_STATIC_LIBS=ON
-DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
)
ELSE()
ExternalProject_Add(
extern_gflags
${EXTERNAL_PROJECT_LOG_ARGS}
Expand Down Expand Up @@ -58,6 +105,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
)
ENDIF()

ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
Expand All @@ -70,4 +118,4 @@ if (WIN32)
if (HAVE_SHLWAPI)
set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
endif(HAVE_SHLWAPI)
endif (WIN32)
endif (WIN32)
51 changes: 50 additions & 1 deletion fast_tokenizer/cmake/external/glog.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,54 @@ ENDIF(WIN32)

INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})

IF(ANDROID)
set(CROSS_COMPILE_CMAKE_ARGS
"-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
"-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
"-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
"-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}"
"-DANDROID_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN}"
"-DANDROID_STL=${CMAKE_ANDROID_STL_TYPE}"
"-DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}"
"-DCMAKE_TOOLCHAIN_FILE=${CMAKE_ANDROID_NDK}/build/cmake/android.toolchain.cmake"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}"
"-DANDROID_PLATFORM=android-${ANDROID_NATIVE_API_LEVEL}"
"-D__ANDROID_API__=${ANDROID_NATIVE_API_LEVEL}")

ExternalProject_Add(
extern_glog
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
GIT_REPOSITORY ${GLOG_REPOSITORY}
GIT_TAG ${GLOG_TAG}
DEPENDS gflags
PREFIX ${GLOG_PREFIX_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS ${CROSS_COMPILE_CMAKE_ARGS}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DWITH_GFLAGS=OFF
-DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GLOG_LIBRARIES}
)
ELSE()
ExternalProject_Add(
extern_glog
${EXTERNAL_PROJECT_LOG_ARGS}
Expand Down Expand Up @@ -61,8 +109,9 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GLOG_LIBRARIES}
)
ENDIF()

ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
ADD_DEPENDENCIES(glog extern_glog gflags)
LINK_LIBRARIES(glog)
LINK_LIBRARIES(glog)
31 changes: 30 additions & 1 deletion fast_tokenizer/cmake/external/icu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,19 @@ include(ExternalProject)
include (ByproductsICU)
SET(ICU_PREFIX_DIR ${THIRD_PARTY_PATH}/icu)
SET(ICU_INSTALL_DIR ${THIRD_PARTY_PATH}/install/icu)
SET(ICU_REPOSITORY ${GIT_URL}/unicode-org/icu.git)
if(ANDROID)
set(ICU_URL_PREFIX "https://bj.bcebos.com/fastdeploy/test")
# check ABI, toolchain
if((NOT ANDROID_ABI MATCHES "armeabi-v7a") AND (NOT ANDROID_ABI MATCHES "arm64-v8a"))
message(FATAL_ERROR "FastTokenizer for Android only support armeabi-v7a, arm64-v8a now.")
endif()
if(NOT ANDROID_TOOLCHAIN MATCHES "clang")
message(FATAL_ERROR "Currently, only support clang toolchain while cross compiling FastTokenizer for Android, but found ${ANDROID_TOOLCHAIN}.")
endif()
set(ICU_REPOSITORY ${ICU_URL_PREFIX}/icu-android-${ANDROID_ABI}.tgz)
else()
SET(ICU_REPOSITORY ${GIT_URL}/unicode-org/icu.git)
endif()
SET(ICU_TAG release-70-1)
set(FIND_OR_BUILD_ICU_DIR ${CMAKE_CURRENT_LIST_DIR})

Expand Down Expand Up @@ -70,6 +82,23 @@ ExternalProject_Add(
INSTALL_COMMAND make install prefix="" DESTDIR=${ICU_INSTALL_DIR} install
BUILD_BYPRODUCTS ${ICU_LIBRARIES}
)
elseif(ANDROID)
ExternalProject_Add(
extern_icu
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
URL ${ICU_REPOSITORY}
PREFIX ${ICU_PREFIX_DIR}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E remove_directory ${ICU_INSTALL_DIR} &&
${CMAKE_COMMAND} -E make_directory ${ICU_INSTALL_DIR} &&
${CMAKE_COMMAND} -E rename ${ICU_PREFIX_DIR}/src/extern_icu/lib/ ${ICU_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${ICU_PREFIX_DIR}/src/extern_icu/include ${ICU_INSTALL_DIR}/include
BUILD_BYPRODUCTS ${ICU_LIBRARIES}
)
else()
ExternalProject_Add(
extern_icu
Expand Down
53 changes: 51 additions & 2 deletions fast_tokenizer/cmake/external/re2.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,64 @@ IF(WIN32)
add_definitions(-DRE2_STATIC)
ELSEIF(APPLE)
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSEIF(ANDROID)
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSE()
IF(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSE()
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib64/libre2.a")
file(READ "/etc/issue" ETC_ISSUE)
string(REGEX MATCH "Debian|Ubuntu" DIST ${ETC_ISSUE})
IF(DIST STREQUAL "Debian")
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSEIF(DIST STREQUAL "Ubuntu")
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSE()
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib64/libre2.a")
ENDIF()
ENDIF()
ENDIF()

SET(RE2_INCLUDE_DIR ${RE2_INSTALL_DIR}/include)
INCLUDE_DIRECTORIES(${RE2_INCLUDE_DIR})

IF(ANDROID)
set(CROSS_COMPILE_CMAKE_ARGS
"-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
"-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
"-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
"-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}"
"-DANDROID_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN}"
"-DANDROID_STL=${CMAKE_ANDROID_STL_TYPE}"
"-DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}"
"-DCMAKE_TOOLCHAIN_FILE=${CMAKE_ANDROID_NDK}/build/cmake/android.toolchain.cmake"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}"
"-DANDROID_PLATFORM=android-${ANDROID_NATIVE_API_LEVEL}"
"-D__ANDROID_API__=${ANDROID_NATIVE_API_LEVEL}")

ExternalProject_Add(
extern_re2
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
GIT_REPOSITORY ${RE2_REPOSITORY}
GIT_TAG ${RE2_TAG}
PREFIX ${RE2_PREFIX_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS ${CROSS_COMPILE_CMAKE_ARGS}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX:PATH=${RE2_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
BUILD_BYPRODUCTS ${RE2_LIBRARIES}
)
ELSE()
ExternalProject_Add(
extern_re2
${EXTERNAL_PROJECT_LOG_ARGS}
Expand All @@ -51,9 +98,11 @@ ExternalProject_Add(
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX:PATH=${RE2_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
BUILD_BYPRODUCTS ${RE2_LIBRARIES}
)
ENDIF()

ADD_LIBRARY(re2 STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET re2 PROPERTY IMPORTED_LOCATION ${RE2_LIBRARIES})
ADD_DEPENDENCIES(re2 extern_re2)
ADD_DEPENDENCIES(re2 extern_re2)
29 changes: 29 additions & 0 deletions fast_tokenizer/docs/compile/how_to_build_android.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Android 编译

## 环境依赖

- cmake >= 3.10
- NDK >= 20

## 配置NDK
```bash
wget https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip
unzip android-ndk-r20b-linux-x86_64.zip # 会解压缩到 android-ndk-r20b 目录
export NDK_ROOT=${PWD}/android-ndk-r20b
```

## 编译C++库方法

```bash
git clone https://github.com/PaddlePaddle/PaddleNLP.git
cd PaddleNLP/fast_tokenizer
mkdir build & cd build
cmake .. -DCMAKE_TOOLCHAIN_FILE=$NDK_ROOT/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_NATIVE_API_LEVEL=android-21 -DANDROID_STL=c++_static -DWITH_TESTING=OFF -DWITH_PYTHON=OFF -DANDROID_TOOLCHAIN=clang
make -j8
```
编译后的C++库在当前目录下的`cpp`目录下。可以选择使用strip减少库体积:
```shell
$NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip libcore_tokenizers.so
```

更多编译选项说明参考[编译指南](./README.md)
18 changes: 18 additions & 0 deletions fast_tokenizer/run_build_android_armv7_lib.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mkdir build_android_armeabi_v7a
cd build_android_armeabi_v7a
cmake .. -DCMAKE_TOOLCHAIN_FILE=$NDK_ROOT/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_NATIVE_API_LEVEL=android-21 -DANDROID_STL=c++_static -DWITH_TESTING=OFF -DWITH_PYTHON=OFF -DANDROID_TOOLCHAIN=clang
make -j8
18 changes: 18 additions & 0 deletions fast_tokenizer/run_build_android_armv8_lib.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mkdir build_android_arm64_v8a
cd build_android_arm64_v8a
cmake .. -DCMAKE_TOOLCHAIN_FILE=$NDK_ROOT/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_NATIVE_API_LEVEL=android-21 -DANDROID_STL=c++_static -DWITH_TESTING=OFF -DWITH_PYTHON=OFF -DANDROID_TOOLCHAIN=clang
make -j8

0 comments on commit b07eed5

Please sign in to comment.