Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FastTokenizer] Support FastTokenizer on Android #3999

Merged
merged 9 commits into from
Dec 4, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions fast_tokenizer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.10)

project(tokenizers LANGUAGES CXX C VERSION 1.0)
Expand Down Expand Up @@ -103,8 +117,13 @@ endforeach()
ELSE(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -fPIC")
IF (NOT APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl -lpthread")
ENDIF()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ldl")
IF (NOT ANDROID)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lpthread")
ELSE()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Os")
ENDIF()
ENDIF()
set (PUBLIC_DEPEND_LIBS ${CMAKE_DL_LIBS})
ENDIF(WIN32)

Expand Down
50 changes: 49 additions & 1 deletion fast_tokenizer/cmake/external/gflags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,53 @@ ENDIF(WIN32)

INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})

IF(ANDROID)
set(CROSS_COMPILE_CMAKE_ARGS
"-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
"-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
"-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
"-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}"
"-DANDROID_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN}"
"-DANDROID_STL=${CMAKE_ANDROID_STL_TYPE}"
"-DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}"
"-DCMAKE_TOOLCHAIN_FILE=${CMAKE_ANDROID_NDK}/build/cmake/android.toolchain.cmake"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}"
"-DANDROID_PLATFORM=android-${ANDROID_NATIVE_API_LEVEL}"
"-D__ANDROID_API__=${ANDROID_NATIVE_API_LEVEL}")

ExternalProject_Add(
extern_gflags
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
GIT_REPOSITORY ${GFLAGS_REPOSITORY}
GIT_TAG ${GFLAGS_TAG}
PREFIX ${GFLAGS_PREFIX_DIR}
UPDATE_COMMAND ""
BUILD_COMMAND ${BUILD_COMMAND}
INSTALL_COMMAND ${INSTALL_COMMAND}
CMAKE_ARGS ${CROSS_COMPILE_CMAKE_ARGS}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DBUILD_STATIC_LIBS=ON
-DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
)
ELSE()
ExternalProject_Add(
extern_gflags
${EXTERNAL_PROJECT_LOG_ARGS}
Expand Down Expand Up @@ -58,6 +105,7 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
)
ENDIF()

ADD_LIBRARY(gflags STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
Expand All @@ -70,4 +118,4 @@ if (WIN32)
if (HAVE_SHLWAPI)
set_property(GLOBAL PROPERTY OS_DEPENDENCY_MODULES shlwapi.lib)
endif(HAVE_SHLWAPI)
endif (WIN32)
endif (WIN32)
51 changes: 50 additions & 1 deletion fast_tokenizer/cmake/external/glog.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,54 @@ ENDIF(WIN32)

INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})

IF(ANDROID)
set(CROSS_COMPILE_CMAKE_ARGS
"-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
"-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
"-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
"-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}"
"-DANDROID_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN}"
"-DANDROID_STL=${CMAKE_ANDROID_STL_TYPE}"
"-DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}"
"-DCMAKE_TOOLCHAIN_FILE=${CMAKE_ANDROID_NDK}/build/cmake/android.toolchain.cmake"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}"
"-DANDROID_PLATFORM=android-${ANDROID_NATIVE_API_LEVEL}"
"-D__ANDROID_API__=${ANDROID_NATIVE_API_LEVEL}")

ExternalProject_Add(
extern_glog
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
GIT_REPOSITORY ${GLOG_REPOSITORY}
GIT_TAG ${GLOG_TAG}
DEPENDS gflags
PREFIX ${GLOG_PREFIX_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS ${CROSS_COMPILE_CMAKE_ARGS}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DWITH_GFLAGS=OFF
-DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GLOG_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR:PATH=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GLOG_LIBRARIES}
)
ELSE()
ExternalProject_Add(
extern_glog
${EXTERNAL_PROJECT_LOG_ARGS}
Expand Down Expand Up @@ -61,8 +109,9 @@ ExternalProject_Add(
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
BUILD_BYPRODUCTS ${GLOG_LIBRARIES}
)
ENDIF()

ADD_LIBRARY(glog STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET glog PROPERTY IMPORTED_LOCATION ${GLOG_LIBRARIES})
ADD_DEPENDENCIES(glog extern_glog gflags)
LINK_LIBRARIES(glog)
LINK_LIBRARIES(glog)
31 changes: 30 additions & 1 deletion fast_tokenizer/cmake/external/icu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,19 @@ include(ExternalProject)
include (ByproductsICU)
SET(ICU_PREFIX_DIR ${THIRD_PARTY_PATH}/icu)
SET(ICU_INSTALL_DIR ${THIRD_PARTY_PATH}/install/icu)
SET(ICU_REPOSITORY ${GIT_URL}/unicode-org/icu.git)
if(ANDROID)
set(ICU_URL_PREFIX "https://bj.bcebos.com/fastdeploy/test")
# check ABI, toolchain
if((NOT ANDROID_ABI MATCHES "armeabi-v7a") AND (NOT ANDROID_ABI MATCHES "arm64-v8a"))
message(FATAL_ERROR "FastTokenizer for Android only support armeabi-v7a, arm64-v8a now.")
endif()
if(NOT ANDROID_TOOLCHAIN MATCHES "clang")
message(FATAL_ERROR "Currently, only support clang toolchain while cross compiling FastTokenizer for Android, but found ${ANDROID_TOOLCHAIN}.")
endif()
set(ICU_REPOSITORY ${ICU_URL_PREFIX}/icu-android-${ANDROID_ABI}.tgz)
else()
SET(ICU_REPOSITORY ${GIT_URL}/unicode-org/icu.git)
endif()
SET(ICU_TAG release-70-1)
set(FIND_OR_BUILD_ICU_DIR ${CMAKE_CURRENT_LIST_DIR})

Expand Down Expand Up @@ -70,6 +82,23 @@ ExternalProject_Add(
INSTALL_COMMAND make install prefix="" DESTDIR=${ICU_INSTALL_DIR} install
BUILD_BYPRODUCTS ${ICU_LIBRARIES}
)
elseif(ANDROID)
ExternalProject_Add(
extern_icu
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
URL ${ICU_REPOSITORY}
PREFIX ${ICU_PREFIX_DIR}
CONFIGURE_COMMAND ""
UPDATE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E remove_directory ${ICU_INSTALL_DIR} &&
${CMAKE_COMMAND} -E make_directory ${ICU_INSTALL_DIR} &&
${CMAKE_COMMAND} -E rename ${ICU_PREFIX_DIR}/src/extern_icu/lib/ ${ICU_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${ICU_PREFIX_DIR}/src/extern_icu/include ${ICU_INSTALL_DIR}/include
BUILD_BYPRODUCTS ${ICU_LIBRARIES}
)
else()
ExternalProject_Add(
extern_icu
Expand Down
53 changes: 51 additions & 2 deletions fast_tokenizer/cmake/external/re2.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,64 @@ IF(WIN32)
add_definitions(-DRE2_STATIC)
ELSEIF(APPLE)
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSEIF(ANDROID)
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSE()
IF(CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSE()
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib64/libre2.a")
file(READ "/etc/issue" ETC_ISSUE)
string(REGEX MATCH "Debian|Ubuntu" DIST ${ETC_ISSUE})
IF(DIST STREQUAL "Debian")
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSEIF(DIST STREQUAL "Ubuntu")
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib/libre2.a")
ELSE()
SET(RE2_LIBRARIES "${RE2_INSTALL_DIR}/lib64/libre2.a")
ENDIF()
ENDIF()
ENDIF()

SET(RE2_INCLUDE_DIR ${RE2_INSTALL_DIR}/include)
INCLUDE_DIRECTORIES(${RE2_INCLUDE_DIR})

IF(ANDROID)
set(CROSS_COMPILE_CMAKE_ARGS
"-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
"-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
"-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
"-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}"
"-DANDROID_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN}"
"-DANDROID_STL=${CMAKE_ANDROID_STL_TYPE}"
"-DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}"
"-DCMAKE_TOOLCHAIN_FILE=${CMAKE_ANDROID_NDK}/build/cmake/android.toolchain.cmake"
"-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}"
"-DANDROID_PLATFORM=android-${ANDROID_NATIVE_API_LEVEL}"
"-D__ANDROID_API__=${ANDROID_NATIVE_API_LEVEL}")

ExternalProject_Add(
extern_re2
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
GIT_REPOSITORY ${RE2_REPOSITORY}
GIT_TAG ${RE2_TAG}
PREFIX ${RE2_PREFIX_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS ${CROSS_COMPILE_CMAKE_ARGS}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX:PATH=${RE2_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
BUILD_BYPRODUCTS ${RE2_LIBRARIES}
)
ELSE()
ExternalProject_Add(
extern_re2
${EXTERNAL_PROJECT_LOG_ARGS}
Expand All @@ -51,9 +98,11 @@ ExternalProject_Add(
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX:PATH=${RE2_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
BUILD_BYPRODUCTS ${RE2_LIBRARIES}
)
ENDIF()

ADD_LIBRARY(re2 STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET re2 PROPERTY IMPORTED_LOCATION ${RE2_LIBRARIES})
ADD_DEPENDENCIES(re2 extern_re2)
ADD_DEPENDENCIES(re2 extern_re2)
29 changes: 29 additions & 0 deletions fast_tokenizer/docs/compile/how_to_build_android.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Android 编译

## 环境依赖

- cmake >= 3.10
- NDK >= 20

## 配置NDK
```bash
wget https://dl.google.com/android/repository/android-ndk-r20b-linux-x86_64.zip
unzip android-ndk-r20b-linux-x86_64.zip # 会解压缩到 android-ndk-r20b 目录
export NDK_ROOT=${PWD}/android-ndk-r20b
```

## 编译C++库方法

```bash
git clone https://github.com/PaddlePaddle/PaddleNLP.git
cd PaddleNLP/fast_tokenizer
mkdir build & cd build
cmake .. -DCMAKE_TOOLCHAIN_FILE=$NDK_ROOT/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_NATIVE_API_LEVEL=android-21 -DANDROID_STL=c++_static -DWITH_TESTING=OFF -DWITH_PYTHON=OFF -DANDROID_TOOLCHAIN=clang
make -j8
```
编译后的C++库在当前目录下的`cpp`目录下。可以选择使用strip减少库体积:
```shell
$NDK_ROOT/toolchains/llvm/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip libcore_tokenizers.so
```

更多编译选项说明参考[编译指南](./README.md)
18 changes: 18 additions & 0 deletions fast_tokenizer/run_build_android_armv7_lib.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mkdir build_android_armeabi_v7a
cd build_android_armeabi_v7a
cmake .. -DCMAKE_TOOLCHAIN_FILE=$NDK_ROOT/build/cmake/android.toolchain.cmake -DANDROID_ABI="armeabi-v7a" -DANDROID_NATIVE_API_LEVEL=android-21 -DANDROID_STL=c++_static -DWITH_TESTING=OFF -DWITH_PYTHON=OFF -DANDROID_TOOLCHAIN=clang
make -j8
18 changes: 18 additions & 0 deletions fast_tokenizer/run_build_android_armv8_lib.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mkdir build_android_arm64_v8a
cd build_android_arm64_v8a
cmake .. -DCMAKE_TOOLCHAIN_FILE=$NDK_ROOT/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_NATIVE_API_LEVEL=android-21 -DANDROID_STL=c++_static -DWITH_TESTING=OFF -DWITH_PYTHON=OFF -DANDROID_TOOLCHAIN=clang
make -j8