Skip to content

trt

Type Aliases:

Classes:

  • TRT

    TensorRT backend for Nvidia GPUs using the core.trt plugin.

  • TRT_RTX

    TensorRT RTX backend for Nvidia RTX GPUs using the core.trt_rtx plugin.

Attributes:

LOGGING_VERBOSITY_MAP module-attribute

LOGGING_VERBOSITY_MAP = {DEBUG: 0, INFO: 1, WARNING: 2, ERROR: 3, CRITICAL: 4}

logger module-attribute

logger = getLogger(__name__)

Shape

Shape = tuple[int, int]

TRT dataclass

TRT(
    *,
    plugin: Plugin,
    device_id: int = 0,
    num_streams: int = 1,
    use_cuda_graph: bool = True,
    verbosity: SupportsInt | Severity | Severity | None = None,
    fp16: bool | None = None,
    fp16_blacklist_ops: Collection[str] | None = None,
    bf16: bool | None = None,
    tf32: bool = False,
    strict_nans: bool = False,
    static_shape: bool = True,
    min_shapes: Shape = (0, 0),
    opt_shapes: Shape | None = None,
    max_shapes: Shape | None = None,
    edge_mask_convolutions: bool = True,
    jit_convolutions: bool = True,
    sparse_weights: bool = False,
    workspace: int | None = None,
    builder_optimization_level: int = 3,
    max_aux_streams: int | None = None,
    max_num_tactics: int | None = None,
    tiling_optimization_level: SupportsInt
    | TilingOptimizationLevel
    | TilingOptimizationLevel = 0,
    l2_limit_for_tiling: int = -1,
    avg_timing_iterations: int = 1,
    tactic_dram: int | None = None,
    weight_streaming: bool = False,
    force_rebuild: bool = False,
    max_threads: int | None = None,
)

Bases: Backend

TensorRT backend for Nvidia GPUs using the core.trt plugin.

Classes:

Methods:

Attributes:

MIGX class-attribute instance-attribute

MIGX = MIGX

NCNN class-attribute instance-attribute

NCNN = NCNN

NCNN_VK class-attribute instance-attribute

NCNN_VK = NCNN

ORT class-attribute instance-attribute

ORT = ORT

ORT_COREML class-attribute instance-attribute

ORT_COREML = ORT_COREML

ORT_CPU class-attribute instance-attribute

ORT_CPU = ORT_CPU

ORT_CUDA class-attribute instance-attribute

ORT_CUDA = ORT_CUDA

ORT_DML class-attribute instance-attribute

ORT_DML = ORT_DML

OV class-attribute instance-attribute

OV = OV

OV_CPU class-attribute instance-attribute

OV_CPU = OV_CPU

OV_GPU class-attribute instance-attribute

OV_GPU = OV_GPU

OV_NPU class-attribute instance-attribute

OV_NPU = OV_NPU

TRT class-attribute instance-attribute

TRT = TRT

TRT_RTX class-attribute instance-attribute

TRT_RTX = TRT_RTX

avg_timing_iterations class-attribute instance-attribute

avg_timing_iterations: int = 1

Number of averaging iterations when timing tactics. Higher values produce more stable tactic selection.

bf16 class-attribute instance-attribute

bf16: bool | None = None

Convert the ONNX model to BF16 before building. Default to False.

builder_optimization_level class-attribute instance-attribute

builder_optimization_level: int = 3

TensorRT builder optimization level.

device_id class-attribute instance-attribute

device_id: int = 0

CUDA device index.

edge_mask_convolutions class-attribute instance-attribute

edge_mask_convolutions: bool = True

Enable TensorRT edge-mask convolution tactics.

flexible_output_prop class-attribute

flexible_output_prop: str = 'MlrtFlexible'

force_rebuild class-attribute instance-attribute

force_rebuild: bool = field(default=False, repr=False)

Force a full engine rebuild, ignoring any cached engine.

fp16 class-attribute instance-attribute

fp16: bool | None = None

Convert the ONNX model to FP16 before building. Default to True.

fp16_blacklist_ops class-attribute instance-attribute

fp16_blacklist_ops: Collection[str] | None = None

ONNX node or op names to keep in FP32 during FP16 conversion.

jit_convolutions class-attribute instance-attribute

jit_convolutions: bool = True

Enable TensorRT JIT convolution tactics.

l2_limit_for_tiling class-attribute instance-attribute

l2_limit_for_tiling: int = -1

L2 cache usage hint for tiling optimization.

max_aux_streams class-attribute instance-attribute

max_aux_streams: int | None = None

Maximum auxiliary streams used by TensorRT kernels.

max_num_tactics class-attribute instance-attribute

max_num_tactics: int | None = None

Maximum number of tactics considered per layer.

max_shapes class-attribute instance-attribute

max_shapes: Shape | None = None

Maximum dynamic input tile size as (width, height). Defaults to the inference tile size.

max_threads class-attribute instance-attribute

max_threads: int | None = field(default=None, repr=False)

Maximum number of builder threads. Limits CPU usage during engine build.

min_shapes class-attribute instance-attribute

min_shapes: Shape = (0, 0)

Minimum dynamic input tile size as (width, height).

num_streams class-attribute instance-attribute

num_streams: int = 1

Number of parallel plugin inference streams.

opt_shapes class-attribute instance-attribute

opt_shapes: Shape | None = None

Optimal input tile size as (width, height). Defaults to the inference tile size.

plugin class-attribute instance-attribute

plugin: Plugin

sparse_weights class-attribute instance-attribute

sparse_weights: bool = False

Allow the builder to exploit structured sparsity in weights.

static_shape class-attribute instance-attribute

static_shape: bool = True

Build a fixed-shape engine when true.

strict_nans class-attribute instance-attribute

strict_nans: bool = False

Disable float optimizations (0*x => 0, x-x => 0, x/x => 1) to preserve NaN/Inf propagation.

tactic_dram class-attribute instance-attribute

tactic_dram: int | None = None

DRAM limit in bytes for the optimizer during tactic selection. Prevents OOM on memory-constrained systems.

tf32 class-attribute instance-attribute

tf32: bool = False

Allow TensorRT TF32 tactics.

tiling_optimization_level class-attribute instance-attribute

tiling_optimization_level: (
    SupportsInt | TilingOptimizationLevel | TilingOptimizationLevel
) = 0

TensorRT tiling optimization search level.

use_cuda_graph class-attribute instance-attribute

use_cuda_graph: bool = True

Enable CUDA graph execution for compatible engines to improve performance and reduce CPU overhead.

verbosity class-attribute instance-attribute

verbosity: SupportsInt | Severity | Severity | None = field(
    default=None, repr=False
)

TensorRT/plugin logging severity.

version property

version: tuple[int, int, int]

weight_streaming class-attribute instance-attribute

weight_streaming: bool = False

Stream weights from host to device to reduce GPU memory at the cost of performance.

workspace class-attribute instance-attribute

workspace: int | None = None

Workspace memory pool limit in bytes.

OutputFormat

Bases: IntEnum

Output format for the backend plugin.

Attributes:

FP16 class-attribute instance-attribute

FP16 = 1

FP32 class-attribute instance-attribute

FP32 = 0

autoselect classmethod

autoselect(device_id: int = 0, **kwargs: Any) -> Backend

Try to select the best backend for the current system.

Parameters:

  • device_id

    (int, default: 0 ) –

    The GPU device id.

  • **kwargs

    (Any, default: {} ) –

    Additional arguments to pass to the backend.

Returns:

  • Backend

    The selected backend.

Source code in vsscale/mlrt/backend/base.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
@classmethod
def autoselect(cls, device_id: int = 0, **kwargs: Any) -> Backend:
    """
    Try to select the best backend for the current system.

    Args:
        device_id: The GPU device id.
        **kwargs: Additional arguments to pass to the backend.

    Returns:
        The selected backend.
    """

    gpu = get_gpu(device_id)
    vendor = (
        cast(str | None, gpu.vendor)
        if gpu
        else "apple"
        # macOS x86_64 is unsupported
        if platform.system().lower() == "darwin" and platform.machine() == "x86_64"
        else None
    )

    match vendor:
        # Windows & Linux
        case "nvidia":
            if hasattr(core, "trt"):
                backend = Backend.TRT
            elif hasattr(core, "trt_rtx"):
                backend = Backend.TRT_RTX
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ort"):
                backend = Backend.ORT_CUDA
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "amd":
            if platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "migx"):
                backend = Backend.MIGX
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "intel":
            # device-smi can't detect Intel NPUs in 0.5.6
            # https://github.com/ModelCloud/Device-SMI#roadmap
            if hasattr(core, "ov"):
                backend = Backend.OV_GPU
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # macOS ARM64 & x86_64
        case "apple":
            if hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            elif hasattr(core, "ort"):
                backend = Backend.ORT_COREML
            else:
                backend = Backend.OV_CPU
        case _:
            backend = Backend.OV_CPU

    del gpu

    return backend(**kwargs)

build

build(
    network_path: Path,
    engine_path: Path,
    channels: int,
    tilesize: Shape,
    input_name: str,
) -> None
Source code in vsscale/mlrt/backend/trt.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
def build(
    self,
    network_path: Path,
    engine_path: Path,
    channels: int,
    tilesize: Shape,
    input_name: str,
) -> None:
    trt_logger = self.logger
    builder = self.trt.Builder(trt_logger)

    if self.max_threads is not None:
        builder.max_threads = self.max_threads

    network = builder.create_network()
    parser = self.trt.OnnxParser(network, trt_logger)

    if not parser.parse_from_file(str(network_path)):
        errors = [str(parser.get_error(i)) for i in range(parser.num_errors)]
        raise CustomRuntimeError(f"Failed to parse ONNX model: {network_path}\n" + "\n".join(errors))

    config = builder.create_builder_config()

    # Delegate builder setup
    self.configure_builder_config(config, network)
    self.setup_optimization_profile(builder, network, config, channels, input_name, tilesize)

    # Timing Cache
    timing_cache_path = Path(f"{engine_path}.cache")
    timing_cache_data = b""
    if timing_cache_path.exists():
        timing_cache_data = timing_cache_path.read_bytes()

    timing_cache = config.create_timing_cache(timing_cache_data)
    config.set_timing_cache(timing_cache, ignore_mismatch=True)

    # Build
    logger.info(f"Building TensorRT {self.__class__.__name__} engine from {network_path}...")
    serialized = builder.build_serialized_network(network, config)

    if not serialized:
        raise CustomRuntimeError(f"TensorRT engine build failed for {network_path}")

    engine_path.write_bytes(serialized)

    # Save Cache
    updated_cache = config.get_timing_cache()
    timing_cache_path.write_bytes(updated_cache.serialize())

    logger.info(f"Engine saved to {engine_path}")

build_engine

build_engine(
    network_path: Path,
    channels: int,
    tilesize: Shape,
    input_name: str = "input",
) -> Path

Build or retrieve a cached TensorRT engine.

Parameters:

  • network_path

    (Path) –

    Path to the ONNX model.

  • channels

    (int) –

    Number of model input channels.

  • tilesize

    (Shape) –

    Inference tile size as (width, height).

  • input_name

    (str, default: 'input' ) –

    Name of the model input tensor.

Returns:

  • Path

    Path to the serialized engine file.

Source code in vsscale/mlrt/backend/trt.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def build_engine(self, network_path: Path, channels: int, tilesize: Shape, input_name: str = "input") -> Path:
    """
    Build or retrieve a cached TensorRT engine.

    Args:
        network_path: Path to the ONNX model.
        channels: Number of model input channels.
        tilesize: Inference tile size as `(width, height)`.
        input_name: Name of the model input tensor.

    Returns:
        Path to the serialized engine file.
    """
    if self.fp16:
        network_path = self._convert_onnx_fp16(network_path)
    elif self.bf16:
        network_path = self._convert_onnx_bf16(network_path)

    dirname = get_artifacts_folder()
    dirname.mkdir(parents=True, exist_ok=True)
    identity = self.get_identity(network_path, channels, tilesize)
    engine_path = dirname / f"{identity}.engine"

    if not self.force_rebuild and engine_path.is_file() and engine_path.stat().st_size >= 1024:
        return engine_path

    self.build(
        network_path=network_path,
        engine_path=engine_path,
        channels=channels,
        tilesize=tilesize,
        input_name=input_name,
    )

    return engine_path

configure_builder_config

configure_builder_config(
    config: IBuilderConfig, network: INetworkDefinition
) -> None
Source code in vsscale/mlrt/backend/trt.py
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
def configure_builder_config(self, config: trt.IBuilderConfig, network: trt.INetworkDefinition) -> None:
    if self.workspace is not None:
        config.set_memory_pool_limit(self.trt.MemoryPoolType.WORKSPACE, self.workspace)

    if self.tactic_dram is not None:
        config.set_memory_pool_limit(self.trt.MemoryPoolType.TACTIC_DRAM, self.tactic_dram)

    if not self.tf32:
        config.flags &= ~(1 << self.trt.BuilderFlag.TF32.value)

    if self.sparse_weights:
        config.flags |= 1 << self.trt.BuilderFlag.SPARSE_WEIGHTS.value

    if self.strict_nans:
        config.flags |= 1 << self.trt.BuilderFlag.STRICT_NANS.value

    if self.weight_streaming:
        config.flags |= 1 << self.trt.BuilderFlag.WEIGHT_STREAMING.value

    self.configure_tactic_sources(config)
    self.configure_optimization_settings(config)

configure_optimization_settings

configure_optimization_settings(config: IBuilderConfig) -> None
Source code in vsscale/mlrt/backend/trt.py
341
342
343
344
345
346
347
348
349
350
351
352
353
def configure_optimization_settings(self, config: trt.IBuilderConfig) -> None:
    config.builder_optimization_level = self.builder_optimization_level
    config.avg_timing_iterations = self.avg_timing_iterations

    if self.max_aux_streams is not None:
        config.max_aux_streams = self.max_aux_streams

    if self.max_num_tactics is not None:
        config.max_num_tactics = self.max_num_tactics

    if int(self.tiling_optimization_level) != 0:
        config.tiling_optimization_level = self.trt.TilingOptimizationLevel(self.tiling_optimization_level)
        config.l2_limit_for_tiling = self.l2_limit_for_tiling

configure_tactic_sources

configure_tactic_sources(config: IBuilderConfig) -> None
Source code in vsscale/mlrt/backend/trt.py
326
327
328
329
330
331
332
333
334
335
336
337
338
339
def configure_tactic_sources(self, config: trt.IBuilderConfig) -> None:
    tactic_sources = config.get_tactic_sources()

    if self.edge_mask_convolutions:
        tactic_sources |= 1 << self.trt.TacticSource.EDGE_MASK_CONVOLUTIONS.value
    else:
        tactic_sources &= ~(1 << self.trt.TacticSource.EDGE_MASK_CONVOLUTIONS.value)

    if self.jit_convolutions:
        tactic_sources |= 1 << self.trt.TacticSource.JIT_CONVOLUTIONS.value
    else:
        tactic_sources &= ~(1 << self.trt.TacticSource.JIT_CONVOLUTIONS.value)

    config.set_tactic_sources(tactic_sources)

get_args

get_args(clips: VideoNode | Sequence[VideoNode]) -> dict[str, Any]

Return backend plugin arguments derived from this configuration.

Source code in vsscale/mlrt/backend/trt.py
184
185
186
187
188
189
190
def get_args(self, clips: vs.VideoNode | Sequence[vs.VideoNode]) -> dict[str, Any]:
    return {
        "device_id": self.device_id,
        "use_cuda_graph": self.use_cuda_graph,
        "num_streams": self.num_streams,
        "verbosity": self.verbosity,
    }

get_identity

get_identity(network_path: Path, channels: int, tilesize: Shape) -> int
Source code in vsscale/mlrt/backend/trt.py
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def get_identity(self, network_path: Path, channels: int, tilesize: Shape) -> int:
    checksum = zlib.crc32(network_path.read_bytes())

    command = [
        "nvidia-smi",
        "-i",
        str(self.device_id),
        "--query-gpu=name,driver_version",
        "--format=csv,noheader,nounits",
    ]
    res = subprocess.run(command, capture_output=True, text=True, check=True)
    device = [d.strip().replace(" ", "_") for d in res.stdout.split(",")]

    components = (
        str(self),
        str(self.version),
        str(sys.version_info[:2]),
        network_path.name,
        f"{checksum:x}",
        str(channels),
        str(tilesize),
        *device,
    )
    return zlib.crc32(bytes("|".join(components), "utf-8"))

inference

inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]

Run inference with this backend.

Parameters:

  • clips

    (VideoNode | Sequence[VideoNode]) –

    Input clip or clips passed to the backend model.

  • network_path

    (str | PathLike[str]) –

    Path to the model file or backend artifact.

  • overlap

    (tuple[int, int]) –

    Horizontal and vertical tile overlap in pixels.

  • tilesize

    (tuple[int, int]) –

    Horizontal and vertical tile size in pixels.

  • flexible

    (bool, default: False ) –

    Return each flexible output plane as a separate clip.

  • **kwargs

    (Any, default: {} ) –

    Additional backend plugin arguments forwarded unchanged.

Returns:

  • VideoNode | list[VideoNode]

    A single output clip, or a list of output clips when flexible is enabled.

Source code in vsscale/mlrt/backend/trt.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
@copy_signature(Backend.inference)
def inference(
    self,
    clips: vs.VideoNode | Sequence[vs.VideoNode],
    network_path: str | os.PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> vs.VideoNode | list[vs.VideoNode]:
    clips = to_arr(clips)
    channels = sum(clip.format.num_planes for clip in clips)
    engine_path = self.build_engine(Path(network_path), channels, tilesize)

    if self.fp16 or self.bf16:
        # Clips must be in fp16 format is fp16 or bf16 mode is enabled,
        # otherwise the TRT plugins error out.
        clips = [depth(c, 16, sample_type=vs.SampleType.FLOAT) for c in clips]
    else:
        clips = [depth(c, 32) for c in clips]

    return super().inference(clips, engine_path, overlap, tilesize, flexible=flexible, **kwargs)

logger classmethod

logger() -> ILogger
Source code in vsscale/mlrt/backend/trt.py
152
153
154
155
156
157
@classproperty.cached
@classmethod
def logger(cls) -> tensorrt.ILogger:
    from ._trt import Logger

    return Logger(logger)

setup_optimization_profile

setup_optimization_profile(
    builder: Builder,
    network: INetworkDefinition,
    config: IBuilderConfig,
    channels: int,
    input_name: str,
    tilesize: Shape,
) -> None
Source code in vsscale/mlrt/backend/trt.py
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
def setup_optimization_profile(
    self,
    builder: trt.Builder,
    network: trt.INetworkDefinition,
    config: trt.IBuilderConfig,
    channels: int,
    input_name: str,
    tilesize: Shape,
) -> None:
    profile = builder.create_optimization_profile()
    opt_shapes = self.trt.Dims(self.opt_shapes or tilesize)
    max_shapes = self.trt.Dims(self.max_shapes or tilesize)

    input_names = [network.get_input(i).name for i in range(network.num_inputs)]
    if input_name not in input_names:
        logger.debug("input_name %r isn't in the input network", input_name)
        if network.num_inputs == 1:
            input_name = input_names[0]
        else:
            raise CustomValueError(f"Input name '{input_name}' not found in network inputs: {input_names}")

    if self.static_shape:
        shape = self.trt.Dims((1, channels, opt_shapes[1], opt_shapes[0]))

        for i in range(network.num_inputs):
            input_tensor = network.get_input(i)
            if input_tensor.name == input_name:
                input_tensor.shape = shape

        profile.set_shape(input_name, shape, shape, shape)
    else:
        profile.set_shape(
            input_name,
            self.trt.Dims((1, channels, self.min_shapes[1], self.min_shapes[0])),
            self.trt.Dims((1, channels, opt_shapes[1], opt_shapes[0])),
            self.trt.Dims((1, channels, max_shapes[1], max_shapes[0])),
        )

    config.add_optimization_profile(profile)

TRT_RTX dataclass

TRT_RTX(
    *,
    plugin: Plugin,
    device_id: int = 0,
    num_streams: int = 1,
    use_cuda_graph: bool = True,
    verbosity: SupportsInt | Severity | Severity | None = None,
    fp16: bool | None = None,
    fp16_blacklist_ops: Collection[str] | None = None,
    bf16: bool | None = None,
    tf32: bool = False,
    strict_nans: bool = False,
    static_shape: bool = True,
    min_shapes: Shape = (0, 0),
    opt_shapes: Shape | None = None,
    max_shapes: Shape | None = None,
    edge_mask_convolutions: bool = True,
    jit_convolutions: bool = True,
    sparse_weights: bool = False,
    workspace: int | None = None,
    builder_optimization_level: int = 3,
    max_aux_streams: int | None = None,
    max_num_tactics: int | None = None,
    tiling_optimization_level: SupportsInt
    | TilingOptimizationLevel
    | TilingOptimizationLevel = 0,
    l2_limit_for_tiling: int = -1,
    avg_timing_iterations: int = 1,
    tactic_dram: int | None = None,
    weight_streaming: bool = False,
    force_rebuild: bool = False,
    max_threads: int | None = None,
)

Bases: TRT

TensorRT RTX backend for Nvidia RTX GPUs using the core.trt_rtx plugin.

Classes:

Methods:

Attributes:

MIGX class-attribute instance-attribute

MIGX = MIGX

NCNN class-attribute instance-attribute

NCNN = NCNN

NCNN_VK class-attribute instance-attribute

NCNN_VK = NCNN

ORT class-attribute instance-attribute

ORT = ORT

ORT_COREML class-attribute instance-attribute

ORT_COREML = ORT_COREML

ORT_CPU class-attribute instance-attribute

ORT_CPU = ORT_CPU

ORT_CUDA class-attribute instance-attribute

ORT_CUDA = ORT_CUDA

ORT_DML class-attribute instance-attribute

ORT_DML = ORT_DML

OV class-attribute instance-attribute

OV = OV

OV_CPU class-attribute instance-attribute

OV_CPU = OV_CPU

OV_GPU class-attribute instance-attribute

OV_GPU = OV_GPU

OV_NPU class-attribute instance-attribute

OV_NPU = OV_NPU

TRT class-attribute instance-attribute

TRT = TRT

TRT_RTX class-attribute instance-attribute

TRT_RTX = TRT_RTX

avg_timing_iterations class-attribute instance-attribute

avg_timing_iterations: int = 1

Number of averaging iterations when timing tactics. Higher values produce more stable tactic selection.

bf16 class-attribute instance-attribute

bf16: bool | None = None

Convert the ONNX model to BF16 before building. Default to False.

builder_optimization_level class-attribute instance-attribute

builder_optimization_level: int = 3

TensorRT builder optimization level.

device_id class-attribute instance-attribute

device_id: int = 0

CUDA device index.

edge_mask_convolutions class-attribute instance-attribute

edge_mask_convolutions: bool = True

Enable TensorRT edge-mask convolution tactics.

flexible_output_prop class-attribute

flexible_output_prop: str = 'MlrtFlexible'

force_rebuild class-attribute instance-attribute

force_rebuild: bool = field(default=False, repr=False)

Force a full engine rebuild, ignoring any cached engine.

fp16 class-attribute instance-attribute

fp16: bool | None = None

Convert the ONNX model to FP16 before building. Default to True.

fp16_blacklist_ops class-attribute instance-attribute

fp16_blacklist_ops: Collection[str] | None = None

ONNX node or op names to keep in FP32 during FP16 conversion.

jit_convolutions class-attribute instance-attribute

jit_convolutions: bool = True

Enable TensorRT JIT convolution tactics.

l2_limit_for_tiling class-attribute instance-attribute

l2_limit_for_tiling: int = -1

L2 cache usage hint for tiling optimization.

max_aux_streams class-attribute instance-attribute

max_aux_streams: int | None = None

Maximum auxiliary streams used by TensorRT kernels.

max_num_tactics class-attribute instance-attribute

max_num_tactics: int | None = None

Maximum number of tactics considered per layer.

max_shapes class-attribute instance-attribute

max_shapes: Shape | None = None

Maximum dynamic input tile size as (width, height). Defaults to the inference tile size.

max_threads class-attribute instance-attribute

max_threads: int | None = field(default=None, repr=False)

Maximum number of builder threads. Limits CPU usage during engine build.

min_shapes class-attribute instance-attribute

min_shapes: Shape = (0, 0)

Minimum dynamic input tile size as (width, height).

num_streams class-attribute instance-attribute

num_streams: int = 1

Number of parallel plugin inference streams.

opt_shapes class-attribute instance-attribute

opt_shapes: Shape | None = None

Optimal input tile size as (width, height). Defaults to the inference tile size.

plugin class-attribute instance-attribute

plugin = trt_rtx

sparse_weights class-attribute instance-attribute

sparse_weights: bool = False

Allow the builder to exploit structured sparsity in weights.

static_shape class-attribute instance-attribute

static_shape: bool = True

Build a fixed-shape engine when true.

strict_nans class-attribute instance-attribute

strict_nans: bool = False

Disable float optimizations (0*x => 0, x-x => 0, x/x => 1) to preserve NaN/Inf propagation.

tactic_dram class-attribute instance-attribute

tactic_dram: int | None = None

DRAM limit in bytes for the optimizer during tactic selection. Prevents OOM on memory-constrained systems.

tf32 class-attribute instance-attribute

tf32: bool = False

Allow TensorRT TF32 tactics.

tiling_optimization_level class-attribute instance-attribute

tiling_optimization_level: (
    SupportsInt | TilingOptimizationLevel | TilingOptimizationLevel
) = 0

TensorRT tiling optimization search level.

use_cuda_graph class-attribute instance-attribute

use_cuda_graph: bool = True

Enable CUDA graph execution for compatible engines to improve performance and reduce CPU overhead.

verbosity class-attribute instance-attribute

verbosity: SupportsInt | Severity | Severity | None = field(
    default=None, repr=False
)

TensorRT/plugin logging severity.

version property

version: tuple[int, int, int]

weight_streaming class-attribute instance-attribute

weight_streaming: bool = False

Stream weights from host to device to reduce GPU memory at the cost of performance.

workspace class-attribute instance-attribute

workspace: int | None = None

Workspace memory pool limit in bytes.

OutputFormat

Bases: IntEnum

Output format for the backend plugin.

Attributes:

FP16 class-attribute instance-attribute

FP16 = 1

FP32 class-attribute instance-attribute

FP32 = 0

autoselect classmethod

autoselect(device_id: int = 0, **kwargs: Any) -> Backend

Try to select the best backend for the current system.

Parameters:

  • device_id

    (int, default: 0 ) –

    The GPU device id.

  • **kwargs

    (Any, default: {} ) –

    Additional arguments to pass to the backend.

Returns:

  • Backend

    The selected backend.

Source code in vsscale/mlrt/backend/base.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
@classmethod
def autoselect(cls, device_id: int = 0, **kwargs: Any) -> Backend:
    """
    Try to select the best backend for the current system.

    Args:
        device_id: The GPU device id.
        **kwargs: Additional arguments to pass to the backend.

    Returns:
        The selected backend.
    """

    gpu = get_gpu(device_id)
    vendor = (
        cast(str | None, gpu.vendor)
        if gpu
        else "apple"
        # macOS x86_64 is unsupported
        if platform.system().lower() == "darwin" and platform.machine() == "x86_64"
        else None
    )

    match vendor:
        # Windows & Linux
        case "nvidia":
            if hasattr(core, "trt"):
                backend = Backend.TRT
            elif hasattr(core, "trt_rtx"):
                backend = Backend.TRT_RTX
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ort"):
                backend = Backend.ORT_CUDA
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "amd":
            if platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "migx"):
                backend = Backend.MIGX
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "intel":
            # device-smi can't detect Intel NPUs in 0.5.6
            # https://github.com/ModelCloud/Device-SMI#roadmap
            if hasattr(core, "ov"):
                backend = Backend.OV_GPU
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # macOS ARM64 & x86_64
        case "apple":
            if hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            elif hasattr(core, "ort"):
                backend = Backend.ORT_COREML
            else:
                backend = Backend.OV_CPU
        case _:
            backend = Backend.OV_CPU

    del gpu

    return backend(**kwargs)

build

build(
    network_path: Path,
    engine_path: Path,
    channels: int,
    tilesize: Shape,
    input_name: str,
) -> None
Source code in vsscale/mlrt/backend/trt.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
def build(
    self,
    network_path: Path,
    engine_path: Path,
    channels: int,
    tilesize: Shape,
    input_name: str,
) -> None:
    trt_logger = self.logger
    builder = self.trt.Builder(trt_logger)

    if self.max_threads is not None:
        builder.max_threads = self.max_threads

    network = builder.create_network()
    parser = self.trt.OnnxParser(network, trt_logger)

    if not parser.parse_from_file(str(network_path)):
        errors = [str(parser.get_error(i)) for i in range(parser.num_errors)]
        raise CustomRuntimeError(f"Failed to parse ONNX model: {network_path}\n" + "\n".join(errors))

    config = builder.create_builder_config()

    # Delegate builder setup
    self.configure_builder_config(config, network)
    self.setup_optimization_profile(builder, network, config, channels, input_name, tilesize)

    # Timing Cache
    timing_cache_path = Path(f"{engine_path}.cache")
    timing_cache_data = b""
    if timing_cache_path.exists():
        timing_cache_data = timing_cache_path.read_bytes()

    timing_cache = config.create_timing_cache(timing_cache_data)
    config.set_timing_cache(timing_cache, ignore_mismatch=True)

    # Build
    logger.info(f"Building TensorRT {self.__class__.__name__} engine from {network_path}...")
    serialized = builder.build_serialized_network(network, config)

    if not serialized:
        raise CustomRuntimeError(f"TensorRT engine build failed for {network_path}")

    engine_path.write_bytes(serialized)

    # Save Cache
    updated_cache = config.get_timing_cache()
    timing_cache_path.write_bytes(updated_cache.serialize())

    logger.info(f"Engine saved to {engine_path}")

build_engine

build_engine(
    network_path: Path,
    channels: int,
    tilesize: Shape,
    input_name: str = "input",
) -> Path

Build or retrieve a cached TensorRT engine.

Parameters:

  • network_path

    (Path) –

    Path to the ONNX model.

  • channels

    (int) –

    Number of model input channels.

  • tilesize

    (Shape) –

    Inference tile size as (width, height).

  • input_name

    (str, default: 'input' ) –

    Name of the model input tensor.

Returns:

  • Path

    Path to the serialized engine file.

Source code in vsscale/mlrt/backend/trt.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def build_engine(self, network_path: Path, channels: int, tilesize: Shape, input_name: str = "input") -> Path:
    """
    Build or retrieve a cached TensorRT engine.

    Args:
        network_path: Path to the ONNX model.
        channels: Number of model input channels.
        tilesize: Inference tile size as `(width, height)`.
        input_name: Name of the model input tensor.

    Returns:
        Path to the serialized engine file.
    """
    if self.fp16:
        network_path = self._convert_onnx_fp16(network_path)
    elif self.bf16:
        network_path = self._convert_onnx_bf16(network_path)

    dirname = get_artifacts_folder()
    dirname.mkdir(parents=True, exist_ok=True)
    identity = self.get_identity(network_path, channels, tilesize)
    engine_path = dirname / f"{identity}.engine"

    if not self.force_rebuild and engine_path.is_file() and engine_path.stat().st_size >= 1024:
        return engine_path

    self.build(
        network_path=network_path,
        engine_path=engine_path,
        channels=channels,
        tilesize=tilesize,
        input_name=input_name,
    )

    return engine_path

configure_builder_config

configure_builder_config(
    config: IBuilderConfig, network: INetworkDefinition
) -> None
Source code in vsscale/mlrt/backend/trt.py
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
def configure_builder_config(self, config: trt.IBuilderConfig, network: trt.INetworkDefinition) -> None:
    if self.workspace is not None:
        config.set_memory_pool_limit(self.trt.MemoryPoolType.WORKSPACE, self.workspace)

    if self.tactic_dram is not None:
        config.set_memory_pool_limit(self.trt.MemoryPoolType.TACTIC_DRAM, self.tactic_dram)

    if not self.tf32:
        config.flags &= ~(1 << self.trt.BuilderFlag.TF32.value)

    if self.sparse_weights:
        config.flags |= 1 << self.trt.BuilderFlag.SPARSE_WEIGHTS.value

    if self.strict_nans:
        config.flags |= 1 << self.trt.BuilderFlag.STRICT_NANS.value

    if self.weight_streaming:
        config.flags |= 1 << self.trt.BuilderFlag.WEIGHT_STREAMING.value

    self.configure_tactic_sources(config)
    self.configure_optimization_settings(config)

configure_optimization_settings

configure_optimization_settings(config: IBuilderConfig) -> None
Source code in vsscale/mlrt/backend/trt.py
341
342
343
344
345
346
347
348
349
350
351
352
353
def configure_optimization_settings(self, config: trt.IBuilderConfig) -> None:
    config.builder_optimization_level = self.builder_optimization_level
    config.avg_timing_iterations = self.avg_timing_iterations

    if self.max_aux_streams is not None:
        config.max_aux_streams = self.max_aux_streams

    if self.max_num_tactics is not None:
        config.max_num_tactics = self.max_num_tactics

    if int(self.tiling_optimization_level) != 0:
        config.tiling_optimization_level = self.trt.TilingOptimizationLevel(self.tiling_optimization_level)
        config.l2_limit_for_tiling = self.l2_limit_for_tiling

configure_tactic_sources

configure_tactic_sources(config: IBuilderConfig) -> None
Source code in vsscale/mlrt/backend/trt.py
326
327
328
329
330
331
332
333
334
335
336
337
338
339
def configure_tactic_sources(self, config: trt.IBuilderConfig) -> None:
    tactic_sources = config.get_tactic_sources()

    if self.edge_mask_convolutions:
        tactic_sources |= 1 << self.trt.TacticSource.EDGE_MASK_CONVOLUTIONS.value
    else:
        tactic_sources &= ~(1 << self.trt.TacticSource.EDGE_MASK_CONVOLUTIONS.value)

    if self.jit_convolutions:
        tactic_sources |= 1 << self.trt.TacticSource.JIT_CONVOLUTIONS.value
    else:
        tactic_sources &= ~(1 << self.trt.TacticSource.JIT_CONVOLUTIONS.value)

    config.set_tactic_sources(tactic_sources)

get_args

get_args(clips: VideoNode | Sequence[VideoNode]) -> dict[str, Any]

Return backend plugin arguments derived from this configuration.

Source code in vsscale/mlrt/backend/trt.py
184
185
186
187
188
189
190
def get_args(self, clips: vs.VideoNode | Sequence[vs.VideoNode]) -> dict[str, Any]:
    return {
        "device_id": self.device_id,
        "use_cuda_graph": self.use_cuda_graph,
        "num_streams": self.num_streams,
        "verbosity": self.verbosity,
    }

get_identity

get_identity(network_path: Path, channels: int, tilesize: Shape) -> int
Source code in vsscale/mlrt/backend/trt.py
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def get_identity(self, network_path: Path, channels: int, tilesize: Shape) -> int:
    checksum = zlib.crc32(network_path.read_bytes())

    command = [
        "nvidia-smi",
        "-i",
        str(self.device_id),
        "--query-gpu=name,driver_version",
        "--format=csv,noheader,nounits",
    ]
    res = subprocess.run(command, capture_output=True, text=True, check=True)
    device = [d.strip().replace(" ", "_") for d in res.stdout.split(",")]

    components = (
        str(self),
        str(self.version),
        str(sys.version_info[:2]),
        network_path.name,
        f"{checksum:x}",
        str(channels),
        str(tilesize),
        *device,
    )
    return zlib.crc32(bytes("|".join(components), "utf-8"))

inference

inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]

Run inference with this backend.

Parameters:

  • clips

    (VideoNode | Sequence[VideoNode]) –

    Input clip or clips passed to the backend model.

  • network_path

    (str | PathLike[str]) –

    Path to the model file or backend artifact.

  • overlap

    (tuple[int, int]) –

    Horizontal and vertical tile overlap in pixels.

  • tilesize

    (tuple[int, int]) –

    Horizontal and vertical tile size in pixels.

  • flexible

    (bool, default: False ) –

    Return each flexible output plane as a separate clip.

  • **kwargs

    (Any, default: {} ) –

    Additional backend plugin arguments forwarded unchanged.

Returns:

  • VideoNode | list[VideoNode]

    A single output clip, or a list of output clips when flexible is enabled.

Source code in vsscale/mlrt/backend/trt.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
@copy_signature(Backend.inference)
def inference(
    self,
    clips: vs.VideoNode | Sequence[vs.VideoNode],
    network_path: str | os.PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> vs.VideoNode | list[vs.VideoNode]:
    clips = to_arr(clips)
    channels = sum(clip.format.num_planes for clip in clips)
    engine_path = self.build_engine(Path(network_path), channels, tilesize)

    if self.fp16 or self.bf16:
        # Clips must be in fp16 format is fp16 or bf16 mode is enabled,
        # otherwise the TRT plugins error out.
        clips = [depth(c, 16, sample_type=vs.SampleType.FLOAT) for c in clips]
    else:
        clips = [depth(c, 32) for c in clips]

    return super().inference(clips, engine_path, overlap, tilesize, flexible=flexible, **kwargs)

logger classmethod

logger() -> ILogger
Source code in vsscale/mlrt/backend/trt.py
493
494
495
496
497
498
@classproperty.cached
@classmethod
def logger(cls) -> tensorrt_rtx.ILogger:  # type: ignore[override]
    from ._trt_rtx import Logger

    return Logger(logger)

setup_optimization_profile

setup_optimization_profile(
    builder: Builder,
    network: INetworkDefinition,
    config: IBuilderConfig,
    channels: int,
    input_name: str,
    tilesize: Shape,
) -> None
Source code in vsscale/mlrt/backend/trt.py
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
def setup_optimization_profile(
    self,
    builder: trt.Builder,
    network: trt.INetworkDefinition,
    config: trt.IBuilderConfig,
    channels: int,
    input_name: str,
    tilesize: Shape,
) -> None:
    profile = builder.create_optimization_profile()
    opt_shapes = self.trt.Dims(self.opt_shapes or tilesize)
    max_shapes = self.trt.Dims(self.max_shapes or tilesize)

    input_names = [network.get_input(i).name for i in range(network.num_inputs)]
    if input_name not in input_names:
        logger.debug("input_name %r isn't in the input network", input_name)
        if network.num_inputs == 1:
            input_name = input_names[0]
        else:
            raise CustomValueError(f"Input name '{input_name}' not found in network inputs: {input_names}")

    if self.static_shape:
        shape = self.trt.Dims((1, channels, opt_shapes[1], opt_shapes[0]))

        for i in range(network.num_inputs):
            input_tensor = network.get_input(i)
            if input_tensor.name == input_name:
                input_tensor.shape = shape

        profile.set_shape(input_name, shape, shape, shape)
    else:
        profile.set_shape(
            input_name,
            self.trt.Dims((1, channels, self.min_shapes[1], self.min_shapes[0])),
            self.trt.Dims((1, channels, opt_shapes[1], opt_shapes[0])),
            self.trt.Dims((1, channels, max_shapes[1], max_shapes[0])),
        )

    config.add_optimization_profile(profile)