diff options
Diffstat (limited to '15089.diff')
-rw-r--r-- | 15089.diff | 476 |
1 files changed, 476 insertions, 0 deletions
diff --git a/15089.diff b/15089.diff new file mode 100644 index 000000000000..30a3be0bcbeb --- /dev/null +++ b/15089.diff @@ -0,0 +1,476 @@ +diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +index e7e989cf17..6f76036b8e 100644 +--- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h ++++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +@@ -31,4 +31,10 @@ struct OrtTensorRTProviderOptionsV2 { + int trt_force_sequential_engine_build; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true + int trt_context_memory_sharing_enable; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true + int trt_layer_norm_fp32_fallback; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true ++ int trt_build_heuristics_enable; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true ++ int trt_sparsity_enable; // Control if sparsity can be used by TRT. Default 0 = false, 1 = true ++ int trt_builder_optimization_level; // Set the builder optimization level. WARNING: levels below 2 do not guarantee good engine performance, but greatly improve build time. Default 2, valid range [0-4] ++ int trt_auxiliary_streams; // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics ++ const char* trt_tactic_sources; // pecify the tactics to be used by adding (+) or removing (-) tactics from the default ++ // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS" + }; +diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +index ca815fd788..ef96bc0e6f 100644 +--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc ++++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +@@ -119,6 +119,67 @@ bool SetDynamicRange(nvinfer1::INetworkDefinition& network, std::unordered_map<s + } + } // namespace + ++std::vector<std::string> SplitToStringVec(std::string const& s, char separator) { ++ std::vector<std::string> splitted; ++ ++ for (size_t start = 0; start < s.length();) { ++ size_t separatorIndex = s.find(separator, start); ++ if (separatorIndex == std::string::npos) { ++ separatorIndex = s.length(); ++ } ++ splitted.emplace_back(s.substr(start, separatorIndex - start)); ++ start = separatorIndex + 1; ++ } ++ ++ return splitted; ++} ++ ++nvinfer1::TacticSources GetTacticSourceFromString(std::string& tactic_sting) { ++ nvinfer1::TacticSources disabledTactics = 0; ++ nvinfer1::TacticSources enabledTactics = 0; ++ std::vector<std::string> tacticList = SplitToStringVec(tactic_sting, ','); ++ for (auto& t : tacticList) { ++ bool enable{false}; ++ if (t.front() == '+') { ++ enable = true; ++ } else if (t.front() != '-') { ++ LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic source must be prefixed with + or - skipping: " << t; ++ } ++ t.erase(0, 1); ++ ++ const auto toUpper = [](std::string& sourceName) { ++ std::transform( ++ sourceName.begin(), sourceName.end(), sourceName.begin(), [](char c) { return std::toupper(c); }); ++ return sourceName; ++ }; ++ ++ nvinfer1::TacticSource source{}; ++ t = toUpper(t); ++ if (t == "CUBLAS") { ++ source = nvinfer1::TacticSource::kCUBLAS; ++ } else if (t == "CUBLASLT" || t == "CUBLAS_LT") { ++ source = nvinfer1::TacticSource::kCUBLAS_LT; ++ } else if (t == "CUDNN") { ++ source = nvinfer1::TacticSource::kCUDNN; ++ } else if (t == "EDGE_MASK_CONVOLUTIONS") { ++ source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS; ++ } else if (t == "JIT_CONVOLUTIONS") { ++ source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS; ++ } else { ++ LOGS_DEFAULT(WARNING) << "[TensorRT EP] Tactic source was not found with name: " << t; ++ } ++ ++ uint32_t sourceBit = 1U << static_cast<uint32_t>(source); ++ ++ if (enable) { ++ enabledTactics |= sourceBit; ++ } else { ++ disabledTactics |= sourceBit; ++ } ++ } ++ return enabledTactics & ~disabledTactics; ++} ++ + namespace google { + namespace protobuf { + void ShutdownProtobufLibrary(); +@@ -324,6 +385,11 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv + if (fp16_enable_) { + layer_norm_fp32_fallback_ = info.layer_norm_fp32_fallback; + } ++ build_heuristics_enable_ = info.build_heuristics_enable; ++ sparsity_enable_ = info.sparsity_enable; ++ builder_optimization_level_ = info.builder_optimization_level; ++ auxiliary_streams_ = info.auxiliary_streams; ++ tactic_sources_ = info.tactic_sources; + } else { + const std::string max_partition_iterations_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kMaxPartitionIterations); + if (!max_partition_iterations_env.empty()) { +@@ -418,6 +484,31 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv + if (!layer_norm_fp32_fallback_env.empty()) { + layer_norm_fp32_fallback_ = (std::stoi(layer_norm_fp32_fallback_env) == 0 ? false : true); + } ++ ++ const std::string build_heuristics_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kBuildHeuristics); ++ if (!build_heuristics_env.empty()) { ++ build_heuristics_enable_ = (std::stoi(build_heuristics_env) == 0 ? false : true); ++ } ++ ++ const std::string sparsity_enable_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kSparsityEnable); ++ if (!sparsity_enable_env.empty()) { ++ sparsity_enable_ = (std::stoi(sparsity_enable_env) == 0 ? false : true); ++ } ++ ++ const std::string builder_optimization_level_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kBuilderOptimizationLevel); ++ if (!builder_optimization_level_env.empty()) { ++ builder_optimization_level_ = std::stoi(builder_optimization_level_env); ++ } ++ ++ const std::string auxiliary_streams_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kAuxiliaryStreams); ++ if (!auxiliary_streams_env.empty()) { ++ auxiliary_streams_ = std::stoi(auxiliary_streams_env); ++ } ++ ++ const std::string tactic_sources_env = onnxruntime::GetEnvironmentVar(tensorrt_env_vars::kTacticSources); ++ if (!tactic_sources_env.empty()) { ++ tactic_sources_ = tactic_sources_env; ++ } + } + + // Validate setting +@@ -483,7 +574,12 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv + << ", trt_engine_decryption_lib_path: " << engine_decryption_lib_path_ + << ", trt_force_sequential_engine_build: " << force_sequential_engine_build_ + << ", trt_context_memory_sharing_enable: " << context_memory_sharing_enable_ +- << ", trt_layer_norm_fp32_fallback: " << layer_norm_fp32_fallback_; ++ << ", trt_layer_norm_fp32_fallback: " << layer_norm_fp32_fallback_ ++ << ", trt_build_heuristics_enable: " << build_heuristics_enable_ ++ << ", trt_sparsity_enable: " << sparsity_enable_ ++ << ", trt_builder_optimization_level: " << builder_optimization_level_ ++ << ", trt_auxiliary_streams: " << auxiliary_streams_ ++ << ", trt_tactic_sources: " << tactic_sources_; + } + + TensorrtExecutionProvider::~TensorrtExecutionProvider() { +@@ -1366,6 +1462,38 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd + } + } + ++ // enable sparse weights ++ if (sparsity_enable_) { ++ trt_config->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Sparse weights are allowed"; ++ } ++ ++ // enable builder heuristics ++ if (build_heuristics_enable_) { ++ trt_config->setFlag(nvinfer1::BuilderFlag::kENABLE_TACTIC_HEURISTIC ); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder heuristics are enabled"; ++ } ++ ++ // switch optimizaion level ++ if (builder_optimization_level_ != 2) { ++ trt_config->setBuilderOptimizationLevel(builder_optimization_level_); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder optimization level is set to " << builder_optimization_level_; ++ } ++ ++ // limit auxiliary streams ++ if (auxiliary_streams_ >= 0) { ++ trt_config->setMaxAuxStreams(auxiliary_streams_); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Auxiliary streams are se to " << auxiliary_streams_; ++ } ++ ++ // limit used tactic sources ++ if (!tactic_sources_.empty()) { ++ nvinfer1::TacticSources tactics = trt_config->getTacticSources(); ++ tactics |= GetTacticSourceFromString(tactic_sources_); ++ trt_config->setTacticSources(tactics); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Tactic sources are limited using " << tactic_sources_; ++ } ++ + // Build TRT engine here if the graph doesn't have dynamic shape input. Otherwise engine will + // be built at runtime + std::unique_ptr<nvinfer1::ICudaEngine> trt_engine; +@@ -1498,13 +1626,19 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd + NodeComputeInfo compute_info; + compute_info.create_state_func = [=](ComputeContext* context, FunctionState* state) { + std::unique_ptr<TensorrtFuncState> p = std::make_unique<TensorrtFuncState>(); ++ // translate tactic sources string to nvinfer1::TacticSources ++ nvinfer1::TacticSources tactics = 0; ++ if (!tactic_sources_.empty()) { ++ tactics = GetTacticSourceFromString(tactic_sources_); ++ } + *p = {context->allocate_func, context->release_func, context->allocator_handle, &parsers_[context->node_name], + &engines_[context->node_name], &contexts_[context->node_name], &builders_[context->node_name], + &networks_[context->node_name], input_info_[context->node_name], output_info_[context->node_name], + input_shape_ranges_[context->node_name], &tensorrt_mu_, fp16_enable_, int8_enable_, int8_calibration_cache_available_, + dla_enable_, dla_core_, &max_workspace_size_, trt_node_name_with_precision, engine_cache_enable_, cache_path_, + runtime_.get(), nullptr, allocator_, context_memory_sharing_enable_, &max_ctx_mem_size_, &context_memory_, +- dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_}; ++ dynamic_range_map, engine_decryption_enable_, engine_decryption_, engine_encryption_, ++ build_heuristics_enable_, sparsity_enable_, builder_optimization_level_, auxiliary_streams_ , !tactic_sources_.empty(), tactics}; + *state = p.release(); + return 0; + }; +@@ -1779,6 +1913,38 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd + trt_config->setDLACore(trt_state->dla_core); + } + ++ // enable sparse weights ++ if (trt_state->sparsity_enable) { ++ trt_config->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Sparse weights are allowed"; ++ } ++ ++ // enable builder heuristics ++ if (trt_state->build_heuristics_enable) { ++ trt_config->setFlag(nvinfer1::BuilderFlag::kENABLE_TACTIC_HEURISTIC ); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder heuristics are enabled"; ++ } ++ ++ // switch optimizaion level ++ if (trt_state->builder_optimization_level != 2) { ++ trt_config->setBuilderOptimizationLevel(trt_state->builder_optimization_level); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Builder optimization level is set to " << builder_optimization_level_; ++ } ++ ++ // limit auxiliary streams ++ if (trt_state->auxiliary_streams >= 0) { ++ trt_config->setMaxAuxStreams(trt_state->auxiliary_streams); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Auxiliary streams are se to " << trt_state->auxiliary_streams; ++ } ++ ++ // limit used tactic sources ++ if (trt_state->filter_tactic_sources) { ++ nvinfer1::TacticSources tactics = trt_config->getTacticSources(); ++ tactics |= trt_state->tactic_sources; ++ trt_config->setTacticSources(tactics); ++ LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Tactic sources are limited using bitmask " << tactics; ++ } ++ + // Build engine + { + auto lock = GetApiLock(); +diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +index 4558b75fee..042495e961 100644 +--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h ++++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +@@ -30,6 +30,11 @@ static const std::string kDecryptionLibPath = "ORT_TENSORRT_ENGINE_DECRYPTION_LI + static const std::string kForceSequentialEngineBuild = "ORT_TENSORRT_FORCE_SEQUENTIAL_ENGINE_BUILD"; + static const std::string kContextMemorySharingEnable = "ORT_TENSORRT_CONTEXT_MEMORY_SHARING_ENABLE"; + static const std::string kLayerNormFP32Fallback = "ORT_TENSORRT_LAYER_NORM_FP32_FALLBACK"; ++static const std::string kBuildHeuristics = "ORT_TENSORRT_BUILD_HEURISTICS_ENABLE"; ++static const std::string kSparsityEnable = "ORT_TENSORRT_SPARSITY_ENABLE"; ++static const std::string kBuilderOptimizationLevel = "ORT_TENSORRT_BUILDER_OPTIMIZATION_LEVEL"; ++static const std::string kAuxiliaryStreams = "ORT_TENSORRT_AUXILIARY_STREAMS"; ++static const std::string kTacticSources = "ORT_TENSORRT_TACTIC_SOURCES"; + // Old env variable for backward compatibility + static const std::string kEngineCachePath = "ORT_TENSORRT_ENGINE_CACHE_PATH"; + } // namespace tensorrt_env_vars +@@ -114,6 +119,12 @@ struct TensorrtFuncState { + bool engine_decryption_enable = false; + int (*engine_decryption)(const char*, char*, size_t*) = nullptr; + int (*engine_encryption)(const char*, char*, size_t) = nullptr; ++ bool build_heuristics_enable = false; ++ bool sparsity_enable = false; ++ int builder_optimization_level = 2; ++ int auxiliary_streams = -1; ++ bool filter_tactic_sources = false; ++ nvinfer1::TacticSources tactic_sources; + }; + + // Logical device representation. +@@ -163,6 +174,11 @@ class TensorrtExecutionProvider : public IExecutionProvider { + bool int8_use_native_tensorrt_calibration_table_ = false; + bool dump_subgraphs_ = false; + bool engine_cache_enable_ = false; ++ bool build_heuristics_enable_ = false; ++ bool sparsity_enable_ = false; ++ int builder_optimization_level_ = 2; ++ int auxiliary_streams_ = -1; ++ std::string tactic_sources_; + std::string cache_path_, engine_decryption_lib_path_; + std::unique_ptr<nvinfer1::IRuntime> runtime_ = nullptr; + OrtMutex tensorrt_mu_; +diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +index 2db405d512..b431cfc53b 100644 +--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc ++++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +@@ -30,6 +30,11 @@ constexpr const char* kForceSequentialEngineBuild = "trt_force_sequential_engine + // add new provider option name here. + constexpr const char* kContextMemorySharingEnable = "trt_context_memory_sharing_enable"; + constexpr const char* kLayerNormFP32Fallback = "trt_layer_norm_fp32_fallback"; ++constexpr const char* kBuildHeuristics = "trt_build_heuristics_enable"; ++constexpr const char* kSparsityEnable = "trt_sparsity_enable"; ++constexpr const char* kBuilderOptimizationLevel = "trt_builder_optimization_level"; ++constexpr const char* kAuxiliaryStreams = "trt_auxiliary_streams"; ++constexpr const char* kTacticSources = "trt_tactic_sources"; + } // namespace provider_option_names + } // namespace tensorrt + +@@ -66,6 +71,11 @@ TensorrtExecutionProviderInfo TensorrtExecutionProviderInfo::FromProviderOptions + .AddAssignmentToReference(tensorrt::provider_option_names::kForceSequentialEngineBuild, info.force_sequential_engine_build) + .AddAssignmentToReference(tensorrt::provider_option_names::kContextMemorySharingEnable, info.context_memory_sharing_enable) + .AddAssignmentToReference(tensorrt::provider_option_names::kLayerNormFP32Fallback, info.layer_norm_fp32_fallback) ++ .AddAssignmentToReference(tensorrt::provider_option_names::kBuildHeuristics, info.build_heuristics_enable) ++ .AddAssignmentToReference(tensorrt::provider_option_names::kSparsityEnable, info.sparsity_enable) ++ .AddAssignmentToReference(tensorrt::provider_option_names::kBuilderOptimizationLevel, info.builder_optimization_level) ++ .AddAssignmentToReference(tensorrt::provider_option_names::kAuxiliaryStreams, info.auxiliary_streams) ++ .AddAssignmentToReference(tensorrt::provider_option_names::kTacticSources, info.tactic_sources) + .Parse(options)); // add new provider option here. + + return info; +@@ -93,6 +103,11 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const TensorrtE + // add new provider option here. + {tensorrt::provider_option_names::kContextMemorySharingEnable, MakeStringWithClassicLocale(info.context_memory_sharing_enable)}, + {tensorrt::provider_option_names::kLayerNormFP32Fallback, MakeStringWithClassicLocale(info.layer_norm_fp32_fallback)}, ++ {tensorrt::provider_option_names::kBuildHeuristics, MakeStringWithClassicLocale(info.build_heuristics_enable)}, ++ {tensorrt::provider_option_names::kSparsityEnable, MakeStringWithClassicLocale(info.sparsity_enable)}, ++ {tensorrt::provider_option_names::kBuilderOptimizationLevel, MakeStringWithClassicLocale(info.builder_optimization_level)}, ++ {tensorrt::provider_option_names::kAuxiliaryStreams, MakeStringWithClassicLocale(info.auxiliary_streams)}, ++ {tensorrt::provider_option_names::kTacticSources, MakeStringWithClassicLocale(info.tactic_sources)}, + }; + return options; + } +diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +index 1f1fdb679f..d0715756a2 100644 +--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h ++++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +@@ -33,6 +33,11 @@ struct TensorrtExecutionProviderInfo { + bool force_sequential_engine_build{false}; + bool context_memory_sharing_enable{false}; + bool layer_norm_fp32_fallback{false}; ++ bool build_heuristics_enable{false}; ++ bool sparsity_enable{false}; ++ int builder_optimization_level{2}; ++ int auxiliary_streams{-1}; ++ std::string tactic_sources{""}; + + static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); + static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info); +diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +index dd6915878e..8e0f1e50c6 100644 +--- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc ++++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +@@ -70,6 +70,11 @@ struct Tensorrt_Provider : Provider { + info.force_sequential_engine_build = options.trt_force_sequential_engine_build != 0; + info.context_memory_sharing_enable = options.trt_context_memory_sharing_enable != 0; + info.layer_norm_fp32_fallback = options.trt_layer_norm_fp32_fallback != 0; ++ info.build_heuristics_enable = options.trt_build_heuristics_enable != 0; ++ info.build_heuristics_enable = options.trt_sparsity_enable; ++ info.build_heuristics_enable = options.trt_builder_optimization_level; ++ info.build_heuristics_enable = options.trt_auxiliary_streams; ++ info.build_heuristics_enable = options.trt_tactic_sources == nullptr ? "" : options.trt_tactic_sources; + return std::make_shared<TensorrtProviderFactory>(info); + } + +@@ -137,6 +142,24 @@ struct Tensorrt_Provider : Provider { + trt_options.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build; + trt_options.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable; + trt_options.trt_layer_norm_fp32_fallback = internal_options.layer_norm_fp32_fallback; ++ trt_options.trt_build_heuristics_enable = internal_options.build_heuristics_enable; ++ trt_options.trt_sparsity_enable = internal_options.build_heuristics_enable; ++ trt_options.trt_builder_optimization_level = internal_options.build_heuristics_enable; ++ trt_options.trt_auxiliary_streams = internal_options.build_heuristics_enable; ++ str_size = internal_options.tactic_sources.size(); ++ if (str_size == 0) { ++ trt_options.trt_tactic_sources = nullptr; ++ } else { ++ dest = new char[str_size + 1]; ++#ifdef _MSC_VER ++ strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size); ++#else ++ strncpy(dest, internal_options.tactic_sources.c_str(), str_size); ++#endif ++ dest[str_size] = '\0'; ++ trt_options.trt_tactic_sources = (const char*)dest; ++ } ++ + } + + ProviderOptions GetProviderOptions(const void* provider_options) override { +diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc +index 81510120f4..17545f2c06 100644 +--- a/onnxruntime/core/session/provider_bridge_ort.cc ++++ b/onnxruntime/core/session/provider_bridge_ort.cc +@@ -1274,6 +1274,11 @@ OrtTensorRTProviderOptionsV2 OrtTensorRTProviderOptionsToOrtTensorRTProviderOpti + // Use default value as this field is not available in OrtTensorRTProviderOptionsV + trt_options_converted.trt_context_memory_sharing_enable = 0; + trt_options_converted.trt_layer_norm_fp32_fallback = 0; ++ trt_options_converted.trt_build_heuristics_enable = 0; ++ trt_options_converted.trt_sparsity_enable = 0; ++ trt_options_converted.trt_builder_optimization_level = 2; ++ trt_options_converted.trt_auxiliary_streams = -1; ++ trt_options_converted.trt_tactic_sources = ""; + return trt_options_converted; + } + +diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc +index f61fe7b878..dc54198d55 100644 +--- a/onnxruntime/python/onnxruntime_pybind_state.cc ++++ b/onnxruntime/python/onnxruntime_pybind_state.cc +@@ -366,7 +366,12 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance( + nullptr, + 0, + 0, +- 0}; ++ 0, ++ 0, ++ 0, ++ 2, ++ -1, ++ nullptr}; + for (auto option : it->second) { + if (option.first == "device_id") { + if (!option.second.empty()) { +diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc +index 5465f81270..172c83be60 100644 +--- a/onnxruntime/test/providers/cpu/model_tests.cc ++++ b/onnxruntime/test/providers/cpu/model_tests.cc +@@ -701,7 +701,8 @@ TEST_P(ModelTest, Run) { + if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) { + OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30, + 1, // enable fp16 +- 0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0}; ++ 0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0, 0, 0, 0, ++ 2, -1, nullptr}; + ortso.AppendExecutionProvider_TensorRT_V2(params); + } else { + OrtTensorRTProviderOptionsV2* ep_option = nullptr; +diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +index f74ecd9213..2c358d0912 100644 +--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc ++++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +@@ -151,7 +151,12 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string + nullptr, + 0, + 0, +- 0}; ++ 0, ++ 0, ++ 0, ++ 2, ++ -1, ++ nullptr}; + + params.trt_engine_cache_enable = 1; + std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(¶ms); +@@ -222,7 +227,12 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string + nullptr, + 0, + 0, +- 0}; ++ 0, ++ 0, ++ 0, ++ 2, ++ -1, ++ nullptr}; + + params.trt_engine_cache_enable = 1; + std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(¶ms); +@@ -386,7 +396,12 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { + nullptr, + 0, + 0, +- 0}; ++ 0, ++ 0, ++ 0, ++ 2, ++ -1, ++ nullptr}; + + if (cache_type.compare("engine") == 0) { + |