jetson orin nano super AI模型部署之路(九)tensorrt C++ api环境配置和编译运行

6 minute read

Published:

一、CMakeLists文件介绍

cmake_minimum_required(VERSION 3.18) # 指定 CMake 的最低版本要求为 3.18
project(tensorrt_cpp_api) # 定义项目名称为 tensorrt_cpp_api

# 使用 ccache 加速重新编译
include(cmake/ccache.cmake)

# 设置 C++ 标准和编译优化选项
set(CMAKE_CXX_STANDARD 17) # 设置 C++ 标准为 C++17
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Ofast -DNDEBUG -Wno-deprecated-declarations")
# -Wall: 启用所有警告
# -Ofast: 启用最高级别的优化
# -DNDEBUG: 禁用调试模式
# -Wno-deprecated-declarations: 忽略废弃 API 的警告

# 设置 CMake 模块路径,用于查找 FindTensorRT.cmake
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})

# 如果用户未指定 TensorRT 的根目录,则设置默认路径
# 在当前的CMakeList中,会首先调用FindTensorRT.cmake去寻找tensorrt相关的文件和路径,所以如果基于我提供的docker image用
# 不会出现找不到tensorrt的情况。
# 如果真的找不到,可以自己查找一下写到下面的set(TensorRT_DIR /home/cyrus/work/libs/TensorRT-10.0.0.6/)这里。
# 查找方式:找 `NvInfer.h`
# find / -name NvInfer.h 2>/dev/null
# 找到头文件后,比如路径是:
# /home/cyrus/work/libs/TensorRT-10.0.0.6/include/NvInfer.h
# 那么 root dir 应该就是:
# /home/cyrus/work/libs/TensorRT-10.0.0.6/
# 确认目录结构是否正确
# 一个典型的 TensorRT 安装目录看起来像这样:
# TensorRT-10.0.0.6/
# ├── bin/
# ├── data/
# ├── doc/
# ├── include/
# ├── lib/
# ├── python/
# ├── samples/
# └── targets/

if (NOT TensorRT_DIR)
    set(TensorRT_DIR /home/cyrus/work/libs/TensorRT-10.0.0.6/) # 默认 TensorRT 安装路径
endif()

# 设置 CUDA 工具链的根目录
set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda) # 默认 CUDA 安装路径

# 查找所需的依赖库
find_package(TensorRT REQUIRED) # 查找 TensorRT 库
find_package(CUDA REQUIRED)     # 查找 CUDA 库
find_package(OpenCV REQUIRED)   # 查找 OpenCV 库
find_package(fmt REQUIRED)      # 查找 fmt 库(用于格式化日志输出)

# 创建共享库 tensorrt_cpp_api
add_library(tensorrt_cpp_api SHARED
        src/engine.cpp) # 指定共享库的源文件为 src/engine.cpp

# 设置共享库的包含路径和链接库
target_include_directories(tensorrt_cpp_api PUBLIC 
    ${OpenCV_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${TensorRT_INCLUDE_DIRS} include include/interfaces)
# 包含路径:
# - ${OpenCV_INCLUDE_DIRS}: OpenCV 的头文件路径
# - ${CUDA_INCLUDE_DIRS}: CUDA 的头文件路径
# - ${TensorRT_INCLUDE_DIRS}: TensorRT 的头文件路径
# - include 和 include/interfaces: 项目自定义的头文件路径

target_link_libraries(tensorrt_cpp_api PUBLIC 
    ${OpenCV_LIBS} ${CUDA_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${TensorRT_LIBRARIES} fmt::fmt)
# 链接库:
# - ${OpenCV_LIBS}: OpenCV 的库文件
# - ${CUDA_LIBRARIES}: CUDA 的库文件
# - ${CMAKE_THREAD_LIBS_INIT}: 线程库
# - ${TensorRT_LIBRARIES}: TensorRT 的库文件
# - fmt::fmt: fmt 库

# 创建可执行文件 run_inference_benchmark
add_executable(run_inference_benchmark src/main.cpp) # 指定可执行文件的源文件为 src/main.cpp

# 设置可执行文件的链接库
target_link_libraries(run_inference_benchmark tensorrt_cpp_api fmt::fmt)
# 链接库:
# - tensorrt_cpp_api: 共享库
# - fmt::fmt: fmt 库

在下面这行命令后面,是FindTensorRT.cmake

# 设置 CMake 模块路径,用于查找 FindTensorRT.cmake
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})

二、FindTensorRT.cmake介绍

FindTensorRT.cmake 是一个 CMake 模块文件,用于查找和配置 NVIDIA TensorRT 的头文件和库文件。它的主要功能包括:

定义输出变量:

TensorRT_INCLUDE_DIRS:TensorRT 的头文件路径。

TensorRT_LIBRARIES:TensorRT 的库文件路径。

TensorRT_FOUND:是否成功找到 TensorRT。

TensorRT_VERSION_STRING:TensorRT 的版本号(如 8.5.2)。

TensorRT_VERSION_MAJOR、TensorRT_VERSION_MINOR、TensorRT_VERSION_PATCH:TensorRT 的主版本号、次版本号和补丁版本号。

支持用户指定路径:

用户可以通过设置 TensorRT_DIR 来指定 TensorRT 的安装根目录。 如果未设置,则会在默认路径(如 usr)中查找。 查找 TensorRT 的头文件和库文件:

使用 find_path 和 find_library 查找 TensorRT 的头文件(如 NvInfer.h)和库文件(如 nvinfer、nvonnxparser)。

提取 TensorRT 版本号:

从 NvInfer.h 文件中提取 TensorRT 的主版本号、次版本号和补丁版本号。

定义 CMake 目标:

如果找到 TensorRT,则定义一个 TensorRT::TensorRT 目标,方便项目中引用。

FindTensorRT.cmake的代码如下:

# source:
# https://github.com/NVIDIA/tensorrt-laboratory/blob/master/cmake/FindTensorRT.cmake

# 该模块定义以下变量:
#
# ::
#
#   TensorRT_INCLUDE_DIRS - TensorRT 的头文件路径
#   TensorRT_LIBRARIES    - TensorRT 的库文件路径
#   TensorRT_FOUND        - 是否成功找到 TensorRT
#
# ::
#
#   TensorRT_VERSION_STRING - TensorRT 的版本号 (x.y.z)
#   TensorRT_VERSION_MAJOR  - 主版本号 (x)
#   TensorRT_VERSION_MINOR  - 次版本号 (y)
#   TensorRT_VERSION_PATCH  - 补丁版本号 (z)
#
# 提示:
# 用户可以通过设置 `TensorRT_DIR` 来指定 TensorRT 的安装根目录。

# 定义搜索路径列表
set(_TensorRT_SEARCHES)

# 如果用户指定了 TensorRT_DIR,则将其作为搜索路径
if(TensorRT_DIR)
    set(_TensorRT_SEARCH_ROOT PATHS ${TensorRT_DIR} NO_DEFAULT_PATH)
    list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_ROOT)
endif()

# 添加默认的搜索路径(如 /usr)
set(_TensorRT_SEARCH_NORMAL
        PATHS "/usr"
        )
list(APPEND _TensorRT_SEARCHES _TensorRT_SEARCH_NORMAL)

# 查找 TensorRT 的头文件路径
foreach(search ${_TensorRT_SEARCHES})
    find_path(TensorRT_INCLUDE_DIR NAMES NvInfer.h ${${search}} PATH_SUFFIXES include)
endforeach()

# 查找 TensorRT 的核心库文件路径
if(NOT TensorRT_LIBRARY)
    foreach(search ${_TensorRT_SEARCHES})
        find_library(TensorRT_LIBRARY NAMES nvinfer ${${search}} PATH_SUFFIXES lib)
    endforeach()
endif()

# 查找 TensorRT 的 ONNX 解析器库文件路径
if(NOT TensorRT_NVONNXPARSER_LIBRARY)
    foreach(search ${_TensorRT_SEARCHES})
        find_library(TensorRT_NVONNXPARSER_LIBRARY NAMES nvonnxparser ${${search}} PATH_SUFFIXES lib)
    endforeach()
endif()

# 将 TensorRT_INCLUDE_DIR 标记为高级选项
mark_as_advanced(TensorRT_INCLUDE_DIR)

# 如果找到头文件 NvInfer.h,则提取 TensorRT 的版本号
if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInfer.h")
    # 从 NvInfer.h 文件中提取主版本号、次版本号和补丁版本号
    file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$")
    file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$")
    file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$")

    # 使用正则表达式提取版本号
    string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}")
    string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}")
    string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}")
    set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}")
endif()

# 使用标准的 CMake 包处理模块
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(TensorRT REQUIRED_VARS TensorRT_LIBRARY TensorRT_INCLUDE_DIR VERSION_VAR TensorRT_VERSION_STRING)

# 如果找到 TensorRT,则设置相关变量和目标
if(TensorRT_FOUND)
    # 设置头文件路径
    set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR})

    # 设置库文件路径
    if(NOT TensorRT_LIBRARIES)
        set(TensorRT_LIBRARIES ${TensorRT_LIBRARY} ${TensorRT_NVONNXPARSER_LIBRARY} ${TensorRT_NVPARSERS_LIBRARY})
    endif()

    # 定义 TensorRT::TensorRT 目标
    if(NOT TARGET TensorRT::TensorRT)
        add_library(TensorRT::TensorRT UNKNOWN IMPORTED)
        # 设置头文件路径
        set_target_properties(TensorRT::TensorRT PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIRS}")
        # 设置库文件路径
        set_property(TARGET TensorRT::TensorRT APPEND PROPERTY IMPORTED_LOCATION "${TensorRT_LIBRARY}")
    endif()
endif()

三、编译和运行

编译和运行tensorrt cpp demo程序,需要GPU的opencv以及其他很多依赖,我已经将其封装到了开箱即用的docker image中。直接执行docker pull wenyan5986/yolo_tensorrt_cpp:jetpack62拉下来使用即可。注意我使用的是jetpack6.2版本,L4T 36.4.3。对于jetpack5.x版本,用这个docker image应该会出现编译失败的现象。

接下来clone代码,我已经把代码和onnx文件都放在了git仓库中,直接拉取即可:

git clone https://github.com/borninfreedom/tensorrt_cpp_api_demo.git

接下来执行编译。首先cd到tensorrt cpp api的目录。

mkdir build
cd build
cmake ..
make -j$(nproc)

编译完成后,在build文件夹中,会生成run_inference_benchmark可执行文件,直接执行即可。

./run_inference_benchmark --onnx_model ../models/yolov8n.onnx

我的输出为:

[2025-05-18 06:57:14.550] [warning] LOG_LEVEL environment variable not set. Using default log level (info).
[2025-05-18 06:57:14.572] [info] Engine name: yolov8n.engine.Orin.fp16.1.1.-1.-1.-1
[2025-05-18 06:57:14.572] [info] Searching for engine file with name: ./yolov8n.engine.Orin.fp16.1.1.-1.-1.-1
[2025-05-18 06:57:14.572] [info] Engine not found, generating. This could take a while...
[2025-05-18 06:57:14.679] [info] [MemUsageChange] Init CUDA: CPU +12, GPU +0, now: CPU 34, GPU 4128 (MiB)
[2025-05-18 06:57:17.500] [info] [MemUsageChange] Init builder kernel library: CPU +947, GPU +1248, now: CPU 1024, GPU 5423 (MiB)
[2025-05-18 06:57:17.567] [info] Model only supports fixed batch size of 1
[2025-05-18 06:57:17.567] [info] Engine name: yolov8n.engine.Orin.fp16.1.1.-1.-1.-1
[2025-05-18 06:57:17.632] [info] Local timing cache in use. Profiling results in this builder pass will not be stored.
[2025-05-18 07:04:42.149] [info] Detected 1 inputs and 3 output network tensors.
[2025-05-18 07:04:46.829] [info] Total Host Persistent Memory: 388624 bytes
[2025-05-18 07:04:46.829] [info] Total Device Persistent Memory: 0 bytes
[2025-05-18 07:04:46.829] [info] Max Scratch Memory: 1075200 bytes
[2025-05-18 07:04:46.829] [info] [BlockAssignment] Started assigning block shifts. This will take 114 steps to complete.
[2025-05-18 07:04:46.840] [info] [BlockAssignment] Algorithm ShiftNTopDown took 10.4014ms to assign 7 blocks to 114 nodes requiring 10035712 bytes.
[2025-05-18 07:04:46.840] [info] Total Activation Memory: 10035200 bytes
[2025-05-18 07:04:46.844] [info] Total Weights Memory: 6430240 bytes
[2025-05-18 07:04:47.016] [info] Engine generation completed in 449.386 seconds.
[2025-05-18 07:04:47.023] [info] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 0 MiB, GPU 260 MiB
[2025-05-18 07:04:47.088] [info] [MemUsageStats] Peak memory usage during Engine building and serialization: CPU: 1816 MiB
[2025-05-18 07:04:47.113] [info] Success, saved engine to ./yolov8n.engine.Orin.fp16.1.1.-1.-1.-1
[2025-05-18 07:04:47.544] [info] Loading TensorRT engine file at path: ./yolov8n.engine.Orin.fp16.1.1.-1.-1.-1
[2025-05-18 07:04:47.547] [info] Loaded engine size: 9 MiB
[2025-05-18 07:04:47.587] [info] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +9, now: CPU 0, GPU 15 (MiB)
[2025-05-18 07:04:47.658] [info] Warming up the network...
[2025-05-18 07:04:49.226] [info] Running benchmarks (1000 iterations)...
[2025-05-18 07:04:56.817] [info] Benchmarking complete!
[2025-05-18 07:04:56.817] [info] ======================
[2025-05-18 07:04:56.817] [info] Avg time per sample:
[2025-05-18 07:04:56.820] [info] Avg time per sample: 7.59 ms
[2025-05-18 07:04:56.820] [info] Batch size: 1
[2025-05-18 07:04:56.820] [info] Avg FPS: 131 fps
[2025-05-18 07:04:56.820] [info] ======================

[2025-05-18 07:04:56.820] [info] Batch 0, output 0
[2025-05-18 07:04:56.820] [info] 3.407227 16.550781 20.929688 29.898438 43.664062 54.828125 62.062500 66.000000 70.046875 73.000000 ...

可以看到当前在我的机器上的FPS等信息。