Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: openvinotoolkit/openvino_tokenizers
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 2024.3.0.0
Choose a base ref
...
head repository: openvinotoolkit/openvino_tokenizers
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: releases/2024/3
Choose a head ref
  • 1 commit
  • 5 files changed
  • 1 contributor

Commits on Aug 6, 2024

  1. Conda forge fixes for 2024.3 (#213)

    * Ability to build w/o C++ extension
    
    * Avoid installation of PRCE2 artifacts together with OpenVINO Tokenizers (#211)
    
    * Use tarballs
    
    * Added friendly names for jobs
    
    * Use manylinux package
    
    * Use more options
    
    * Some extra fixes
    ilya-lavrenov authored Aug 6, 2024

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature.
    Copy the full SHA
    b1b6f9c View commit details
Showing with 86 additions and 56 deletions.
  1. +3 −2 .github/workflows/linux.yml
  2. +3 −1 .github/workflows/mac.yml
  3. +3 −1 .github/workflows/windows.yml
  4. +36 −16 CMakeLists.txt
  5. +41 −36 src/CMakeLists.txt
5 changes: 3 additions & 2 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
@@ -16,12 +16,13 @@ concurrency:
env:
PYTHON_VERSION: '3.11'
OV_BRANCH: 'releases/2024/3'
OV_TARBALL: ''
OV_TARBALL: 'https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.3/linux/l_openvino_toolkit_centos7_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz'

permissions: read-all

jobs:
openvino_download:
name: Try to download prebuilt OpenVINO
outputs:
status: ${{ steps.openvino_download.outcome }}
timeout-minutes: 10
@@ -51,6 +52,7 @@ jobs:
if-no-files-found: 'error'

openvino_build:
name: Build OpenVINO
needs: [openvino_download]
if: needs.openvino_download.outputs.status != 'success'
timeout-minutes: 150
@@ -350,4 +352,3 @@ jobs:
run: |
python3 -m pytest tokenizers_test.py
working-directory: ${{ env.OPENVINO_TOKENIZERS_REPO }}/tests

4 changes: 3 additions & 1 deletion .github/workflows/mac.yml
Original file line number Diff line number Diff line change
@@ -17,12 +17,13 @@ env:
PYTHON_VERSION: '3.11'
MACOSX_DEPLOYMENT_TARGET: '10.12'
OV_BRANCH: 'releases/2024/3'
OV_TARBALL: ''
OV_TARBALL: 'https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.3/macos/m_openvino_toolkit_macos_12_6_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz'

permissions: read-all

jobs:
openvino_download:
name: Try to download prebuilt OpenVINO
outputs:
status: ${{ steps.openvino_download.outcome }}
timeout-minutes: 10
@@ -52,6 +53,7 @@ jobs:
if-no-files-found: 'error'

openvino_build:
name: Build OpenVINO
needs: [openvino_download]
if: needs.openvino_download.outputs.status != 'success'
timeout-minutes: 150
4 changes: 3 additions & 1 deletion .github/workflows/windows.yml
Original file line number Diff line number Diff line change
@@ -16,12 +16,13 @@ concurrency:
env:
PYTHON_VERSION: '3.11'
OV_BRANCH: 'releases/2024/3'
OV_TARBALL: ''
OV_TARBALL: 'https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.3/windows/w_openvino_toolkit_windows_2024.3.0.16041.1e3b88e4e3f_x86_64.zip'

permissions: read-all

jobs:
openvino_download:
name: Try to download prebuilt OpenVINO
outputs:
status: ${{ steps.openvino_download.outcome }}
timeout-minutes: 10
@@ -51,6 +52,7 @@ jobs:
if-no-files-found: 'error'

openvino_build:
name: Build OpenVINO
needs: [openvino_download]
if: needs.openvino_download.outputs.status != 'success'
timeout-minutes: 150
52 changes: 36 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -27,24 +27,44 @@ project(openvino_tokenizers

include(cmake/platforms.cmake)

# Looking for OpenVINO in the python distribution. It doesn't work for cross-compiling build
if(NOT CMAKE_CROSSCOMPILING)
find_package(Python3 REQUIRED)
execute_process(
COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')"
OUTPUT_VARIABLE OpenVINO_DIR_PY
ERROR_QUIET
)
endif()

# Find OpenVINODeveloperPackage first to compile with SDL flags
find_package(OpenVINODeveloperPackage QUIET PATHS "${OpenVINO_DIR}")
if(NOT OpenVINODeveloperPackage_FOUND)
find_package(OpenVINO REQUIRED COMPONENTS Runtime OPTIONAL_COMPONENTS TensorFlow PATHS "${OpenVINO_DIR_PY}")
endif()
option(BUILD_CPP_EXTENSION "Builds C++ extension for OpenVINO Tokenizers" ON)

if(DEFINED PY_BUILD_CMAKE_PACKAGE_VERSION AND NOT PY_BUILD_CMAKE_PACKAGE_VERSION EQUAL CMAKE_PROJECT_VERSION)
message(FATAL_ERROR "CMAKE_PROJECT_VERSION (${CMAKE_PROJECT_VERSION}) is not equal to PY_BUILD_CMAKE_PACKAGE_VERSION (${PY_BUILD_CMAKE_PACKAGE_VERSION})")
endif()

add_subdirectory(src)
if(BUILD_CPP_EXTENSION)
# Looking for OpenVINO in the python distribution. It doesn't work for cross-compiling build
if(NOT CMAKE_CROSSCOMPILING)
find_package(Python3 REQUIRED)
execute_process(
COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')"
OUTPUT_VARIABLE OpenVINO_DIR_PY
ERROR_QUIET
)
endif()

# Find OpenVINODeveloperPackage first to compile with SDL flags
find_package(OpenVINODeveloperPackage QUIET PATHS "${OpenVINO_DIR}")
if(NOT OpenVINODeveloperPackage_FOUND)
find_package(OpenVINO REQUIRED COMPONENTS Runtime OPTIONAL_COMPONENTS TensorFlow PATHS "${OpenVINO_DIR_PY}")
endif()

add_subdirectory(src)
endif()

# install python files

install(FILES "${openvino_tokenizers_SOURCE_DIR}/LICENSE"
"${openvino_tokenizers_SOURCE_DIR}/third-party-programs.txt"
"${openvino_tokenizers_SOURCE_DIR}/SECURITY.md"
DESTINATION "${PY_BUILD_CMAKE_PACKAGE_NAME}-${PY_BUILD_CMAKE_PACKAGE_VERSION}.dist-info"
COMPONENT openvino_tokenizers_licenses
EXCLUDE_FROM_ALL)

configure_file("${openvino_tokenizers_SOURCE_DIR}/cmake/templates/__version__.py.in"
"${openvino_tokenizers_BINARY_DIR}/python/__version__.py" @ONLY)
install(FILES "${openvino_tokenizers_BINARY_DIR}/python/__version__.py"
DESTINATION "openvino_tokenizers"
COMPONENT openvino_tokenizers_python
EXCLUDE_FROM_ALL)
77 changes: 41 additions & 36 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -20,6 +20,10 @@ if(POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif()

if(POLICY CMP0169)
cmake_policy(SET CMP0169 OLD)
endif()

function(ov_tokenizers_set_cxx_standard)
foreach(build_type "" "_DEBUG" "_MINSIZEREL" "_RELEASE" "_RELWITHDEBINFO")
set(flag_var "CMAKE_CXX_FLAGS${build_type}")
@@ -32,15 +36,15 @@ function(ov_tokenizers_set_cxx_standard)
set(CMAKE_CXX_STANDARD_REQUIRED ON PARENT_SCOPE)
endfunction()

if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0")
endif()

ov_tokenizers_set_cxx_standard()

set(CMAKE_POSITION_INDEPENDENT_CODE ON)

if(WIN32)
# Set the runtime library globally
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
endif()

# openvino::runtime exports _GLIBCXX_USE_CXX11_ABI=0 on CentOS7.
# It needs to be propagated to every library openvino_tokenizers links with.
# That prohibits linkage with prebuilt libraries because they aren't compiled with _GLIBCXX_USE_CXX11_ABI=0.
@@ -50,7 +54,8 @@ if("_GLIBCXX_USE_CXX11_ABI=0" IN_LIST OPENVINO_RUNTIME_COMPILE_DEFINITIONS)
set(USE_ABI0 ON CACHE BOOL "Set -D_GLIBCXX_USE_CXX11_ABI to 0 for fast_tokenizers")
endif()

option(BUILD_FAST_TOKENIZERS "Compile core_tokenizers instead of downloading prebuilt library. The option is forced to ON if _GLIBCXX_USE_CXX11_ABI=0" ${USE_ABI0})
# The option is forced to ON if _GLIBCXX_USE_CXX11_ABI=0
cmake_dependent_option(BUILD_FAST_TOKENIZERS "Compile core_tokenizers instead of downloading prebuilt library" OFF "NOT USE_ABI0" ON)

#
# Compile flags
@@ -79,6 +84,11 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${c_cxx_flags}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${cxx_flags} ${c_cxx_flags}")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${c_cxx_flags}")

if(BUILD_FAST_TOKENIZERS)
# FastTokenizers use static MSVC runtime, hence we have to use it as well
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
endif()

#
# Dependencies
#
@@ -150,7 +160,6 @@ if(BUILD_FAST_TOKENIZERS)
set(EXTERNAL_PROJECT_SOURCE_DIR ${CMAKE_BINARY_DIR}/_deps/fast_tokenizer/src)
set(EXTERNAL_PROJECT_BINARY_DIR ${CMAKE_BINARY_DIR}/_deps/fast_tokenizer/build)
set(EXTERNAL_PROJECT_SUBBUILD_DIR ${CMAKE_BINARY_DIR}/_deps/fast_tokenizer/sub-build)
set(WITH_PYTHON OFF CACHE BOOL "")

FetchContent_Declare(
fast_tokenizer
@@ -194,6 +203,7 @@ else()
URL_HASH SHA256=1ae8ccfdb1066a731bba6ee0881baad5efd2cd661acd9569b689f2586e1a50e9
)
function(ov_tokenizers_build_static_re2)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
set(BUILD_SHARED_LIBS OFF)
FetchContent_GetProperties(re2)
if(NOT re2_POPULATED)
@@ -254,6 +264,28 @@ function(ov_tokenizers_link_fast_tokenizer)
target_link_libraries(${TARGET_NAME} PRIVATE ${FAST_TOKENIZER_LIBS})
endfunction()

function(ov_tokenizers_link_pcre2)
FetchContent_Declare(
prce2
URL https://github.com/PCRE2Project/pcre2/archive/refs/tags/pcre2-10.44.zip
URL_HASH SHA256=2d87bd1700bd1993ddea7c56aad2b0373ac2b3d52d9cc78842a6d061ffaf0925
)
FetchContent_GetProperties(prce2)
if(NOT prce2_POPULATED)
FetchContent_Populate(prce2)

set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
set(PCRE2_STATIC_PIC ON)
set(PCRE2_BUILD_TESTS OFF)

add_subdirectory(${prce2_SOURCE_DIR} ${prce2_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()

target_include_directories(${TARGET_NAME} PRIVATE ${pcre2_BINARY_DIR})
target_link_libraries(${TARGET_NAME} PRIVATE pcre2-8)
target_compile_definitions(${TARGET_NAME} PRIVATE PCRE2_CODE_UNIT_WIDTH=8)
endfunction()

#
# Build library
#
@@ -271,26 +303,14 @@ add_library(${TARGET_NAME} SHARED ${SRC})

ov_tokenizers_link_sentencepiece(${TARGET_NAME})
ov_tokenizers_link_fast_tokenizer(${TARGET_NAME})
ov_tokenizers_link_pcre2(${TARGET_NAME})

string(REPLACE " " ";" extra_flags "${c_cxx_flags} ${cxx_flags}")
set_target_properties(${TARGET_NAME} PROPERTIES COMPILE_OPTIONS "${extra_flags}")

target_compile_definitions(${TARGET_NAME} PRIVATE IMPLEMENT_OPENVINO_EXTENSION_API)
target_link_libraries(${TARGET_NAME} PRIVATE openvino::runtime)

# Fetch PCRE2
include(FetchContent)
FetchContent_Declare(
PCRE2
URL https://github.com/PCRE2Project/pcre2/archive/refs/tags/pcre2-10.44.zip
URL_HASH SHA256=2d87bd1700bd1993ddea7c56aad2b0373ac2b3d52d9cc78842a6d061ffaf0925
)
FetchContent_MakeAvailable(PCRE2)

target_include_directories(${TARGET_NAME} PRIVATE ${PCRE2_BINARY_DIR})
target_link_libraries(${TARGET_NAME} PRIVATE pcre2-8)
target_compile_definitions(${TARGET_NAME} PRIVATE PCRE2_CODE_UNIT_WIDTH=8)

if(OpenVINO_Frontend_TensorFlow_FOUND)
target_link_libraries(${TARGET_NAME} PRIVATE openvino::frontend::tensorflow)
target_compile_definitions(${TARGET_NAME} PRIVATE OpenVINO_Frontend_TensorFlow_FOUND)
@@ -393,26 +413,11 @@ if(extra_libs)
install(FILES ${extra_libs} DESTINATION ${extra_libs_location} COMPONENT openvino_tokenizers)
endif()

install(FILES "${openvino_tokenizers_SOURCE_DIR}/LICENSE"
"${openvino_tokenizers_SOURCE_DIR}/third-party-programs.txt"
"${openvino_tokenizers_SOURCE_DIR}/SECURITY.md"
DESTINATION "${PY_BUILD_CMAKE_PACKAGE_NAME}-${PY_BUILD_CMAKE_PACKAGE_VERSION}.dist-info"
COMPONENT openvino_tokenizers_licenses
EXCLUDE_FROM_ALL)

install(FILES "${openvino_tokenizers_SOURCE_DIR}/LICENSE"
"${openvino_tokenizers_SOURCE_DIR}/third-party-programs.txt"
"${openvino_tokenizers_SOURCE_DIR}/README.md"
DESTINATION "docs/openvino_tokenizers"
COMPONENT openvino_tokenizers_docs
EXCLUDE_FROM_ALL)

configure_file("${openvino_tokenizers_SOURCE_DIR}/cmake/templates/__version__.py.in"
"${openvino_tokenizers_BINARY_DIR}/python/__version__.py" @ONLY)
install(FILES "${openvino_tokenizers_BINARY_DIR}/python/__version__.py"
DESTINATION "openvino_tokenizers"
COMPONENT openvino_tokenizers_python
EXCLUDE_FROM_ALL)
COMPONENT openvino_tokenizers_docs)

#
# Cpack configuration