Skip to content

ort

Classes:

  • ORT

    Base ONNX Runtime backend configuration.

  • ORT_COREML

    ONNX Runtime Core ML execution provider.

  • ORT_CPU

    ONNX Runtime CPU execution provider.

  • ORT_CUDA

    ONNX Runtime CUDA execution provider for Nvidia GPUs.

  • ORT_DML

    ONNX Runtime DirectML execution provider for D3D12 devices.

Attributes:

logger module-attribute

logger = getLogger(__name__)

ORT dataclass

ORT(
    *,
    num_streams: int = 1,
    verbosity: int | None = None,
    fp16: bool = True,
    fp16_blacklist_ops: Collection[str] | None = None,
)

Bases: BackendAutoConvertFloat

Base ONNX Runtime backend configuration.

Initialize the backend.

Parameters:

  • num_streams

    (int, default: 1 ) –

    Number of parallel inference streams.

  • verbosity

    (int | None, default: None ) –

    ONNX Runtime logging verbosity.

  • fp16

    (bool, default: True ) –

    Convert model execution to FP16 where supported.

  • fp16_blacklist_ops

    (Collection[str] | None, default: None ) –

    ONNX node or op names to keep in FP32 during FP16 conversion.

Classes:

Methods:

  • autoselect

    Try to select the best backend for the current system.

  • get_args

    Return backend plugin arguments derived from this configuration.

  • inference

    Run inference with this backend.

Attributes:

Source code in vsscale/mlrt/backend/ort.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def __init__(
    self,
    *,
    num_streams: int = 1,
    verbosity: int | None = None,
    fp16: bool = True,
    fp16_blacklist_ops: Collection[str] | None = None,
) -> None:
    """
    Initialize the backend.

    Args:
        num_streams: Number of parallel inference streams.
        verbosity: ONNX Runtime logging verbosity.
        fp16: Convert model execution to FP16 where supported.
        fp16_blacklist_ops: ONNX node or op names to keep in FP32 during FP16 conversion.
    """
    object.__setattr__(self, "fp16", fp16)
    object.__setattr__(self, "fp16_blacklist_ops", fp16_blacklist_ops)
    object.__setattr__(self, "num_streams", num_streams)
    object.__setattr__(
        self,
        "verbosity",
        ORT.Verbosity.from_logging(logger.getEffectiveLevel()) if verbosity is None else verbosity,
    )
    super().__init__()

MIGX class-attribute instance-attribute

MIGX = MIGX

NCNN class-attribute instance-attribute

NCNN = NCNN

NCNN_VK class-attribute instance-attribute

NCNN_VK = NCNN

ORT class-attribute instance-attribute

ORT = ORT

ORT_COREML class-attribute instance-attribute

ORT_COREML = ORT_COREML

ORT_CPU class-attribute instance-attribute

ORT_CPU = ORT_CPU

ORT_CUDA class-attribute instance-attribute

ORT_CUDA = ORT_CUDA

ORT_DML class-attribute instance-attribute

ORT_DML = ORT_DML

OV class-attribute instance-attribute

OV = OV

OV_CPU class-attribute instance-attribute

OV_CPU = OV_CPU

OV_GPU class-attribute instance-attribute

OV_GPU = OV_GPU

OV_NPU class-attribute instance-attribute

OV_NPU = OV_NPU

TRT class-attribute instance-attribute

TRT = TRT

TRT_RTX class-attribute instance-attribute

TRT_RTX = TRT_RTX

flexible_output_prop class-attribute

flexible_output_prop: str = 'MlrtFlexible'

fp16 instance-attribute

fp16: bool | None

fp16_blacklist_ops instance-attribute

fp16_blacklist_ops: Collection[str] | None

num_streams instance-attribute

num_streams: int

plugin class-attribute instance-attribute

plugin = ort

provider class-attribute

provider: str

verbosity instance-attribute

verbosity: int

Verbosity

Bases: IntEnum

Methods:

Attributes:

ERROR class-attribute instance-attribute

ERROR = 3

FATAL class-attribute instance-attribute

FATAL = 4

INFO class-attribute instance-attribute

INFO = 1

VERBOSE class-attribute instance-attribute

VERBOSE = 0

WARNING class-attribute instance-attribute

WARNING = 2

from_logging classmethod

from_logging(level: int) -> Verbosity
Source code in vsscale/mlrt/backend/ort.py
31
32
33
34
35
36
37
38
39
40
@classmethod
def from_logging(cls, level: int) -> ORT.Verbosity:
    mapping = {
        DEBUG: cls.VERBOSE,
        INFO: cls.INFO,
        WARNING: cls.WARNING,
        ERROR: cls.ERROR,
        CRITICAL: cls.FATAL,
    }
    return mapping.get(level, cls.WARNING)

autoselect classmethod

autoselect(device_id: int = 0, **kwargs: Any) -> Backend

Try to select the best backend for the current system.

Parameters:

  • device_id

    (int, default: 0 ) –

    The GPU device id.

  • **kwargs

    (Any, default: {} ) –

    Additional arguments to pass to the backend.

Returns:

  • Backend

    The selected backend.

Source code in vsscale/mlrt/backend/base.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
@classmethod
def autoselect(cls, device_id: int = 0, **kwargs: Any) -> Backend:
    """
    Try to select the best backend for the current system.

    Args:
        device_id: The GPU device id.
        **kwargs: Additional arguments to pass to the backend.

    Returns:
        The selected backend.
    """

    gpu = get_gpu(device_id)
    vendor = (
        cast(str | None, gpu.vendor)
        if gpu
        else "apple"
        # macOS x86_64 is unsupported
        if platform.system().lower() == "darwin" and platform.machine() == "x86_64"
        else None
    )

    match vendor:
        # Windows & Linux
        case "nvidia":
            if hasattr(core, "trt"):
                backend = Backend.TRT
            elif hasattr(core, "trt_rtx"):
                backend = Backend.TRT_RTX
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ort"):
                backend = Backend.ORT_CUDA
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "amd":
            if platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "migx"):
                backend = Backend.MIGX
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "intel":
            # device-smi can't detect Intel NPUs in 0.5.6
            # https://github.com/ModelCloud/Device-SMI#roadmap
            if hasattr(core, "ov"):
                backend = Backend.OV_GPU
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # macOS ARM64 & x86_64
        case "apple":
            if hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            elif hasattr(core, "ort"):
                backend = Backend.ORT_COREML
            else:
                backend = Backend.OV_CPU
        case _:
            backend = Backend.OV_CPU

    del gpu

    return backend(**kwargs)

get_args

get_args(clips: VideoNode | Sequence[VideoNode]) -> dict[str, Any]

Return backend plugin arguments derived from this configuration.

Source code in vsscale/mlrt/backend/ort.py
78
79
80
81
82
83
84
85
def get_args(self, clips: vs.VideoNode | Sequence[vs.VideoNode]) -> dict[str, Any]:
    return super().get_args(clips) | {
        "fp16": self.fp16,
        "provider": self.provider,
        "num_streams": self.num_streams,
        "verbosity": self.verbosity,
        "fp16_blacklist_ops": self.fp16_blacklist_ops,
    }

inference

inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[False] = ...,
    **kwargs: Any,
) -> VideoNode
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[True],
    **kwargs: Any,
) -> list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = ...,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]

Run inference with this backend.

Parameters:

  • clips

    (VideoNode | Sequence[VideoNode]) –

    Input clip or clips passed to the backend model.

  • network_path

    (str | PathLike[str]) –

    Path to the model file or backend artifact.

  • overlap

    (tuple[int, int]) –

    Horizontal and vertical tile overlap in pixels.

  • tilesize

    (tuple[int, int]) –

    Horizontal and vertical tile size in pixels.

  • flexible

    (bool, default: False ) –

    Return each flexible output plane as a separate clip.

  • **kwargs

    (Any, default: {} ) –

    Additional backend plugin arguments forwarded unchanged.

Returns:

  • VideoNode | list[VideoNode]

    A single output clip, or a list of output clips when flexible is enabled.

Source code in vsscale/mlrt/backend/base.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def inference(
    self,
    clips: vs.VideoNode | Sequence[vs.VideoNode],
    network_path: str | os.PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> vs.VideoNode | list[vs.VideoNode]:
    """
    Run inference with this backend.

    Args:
        clips: Input clip or clips passed to the backend model.
        network_path: Path to the model file or backend artifact.
        overlap: Horizontal and vertical tile overlap in pixels.
        tilesize: Horizontal and vertical tile size in pixels.
        flexible: Return each flexible output plane as a separate clip.
        **kwargs: Additional backend plugin arguments forwarded unchanged.

    Returns:
        A single output clip, or a list of output clips when `flexible` is enabled.
    """
    UnsupportedSampleTypeError.check(clips, vs.FLOAT, self.__class__)

    args = self.get_args(clips)

    if flexible:
        args = args.copy()
        args["flexible_output_prop"] = self.flexible_output_prop

    logger.info("Calling %s.Model", self.plugin.namespace)
    logger.info("Clips: %r", clips)
    logger.info("Network Path: %s", network_path)
    logger.info("overlap=%s, tilesize=%s, %s", overlap, tilesize, args | kwargs)
    output = self.plugin.Model(clips, network_path, overlap, tilesize, **args | kwargs)

    if flexible:
        clip = output["clip"]
        num_planes = output["num_planes"]

        output = [clip.std.PropToClip(prop=f"{self.flexible_output_prop}{i}") for i in range(num_planes)]

    return output

ORT_COREML dataclass

ORT_COREML(
    *,
    ml_program: int = NEURAL_NETWORK,
    fp16: bool = True,
    fp16_blacklist_ops: Collection[str] | None = None,
    num_streams: int = 1,
    verbosity: int | None = None,
)

Bases: ORT

ONNX Runtime Core ML execution provider.

Initialize the backend.

Parameters:

  • num_streams

    (int, default: 1 ) –

    Number of parallel inference streams.

  • verbosity

    (int | None, default: None ) –

    ONNX Runtime logging verbosity.

  • fp16

    (bool, default: True ) –

    Convert model execution to FP16 where supported.

  • fp16_blacklist_ops

    (Collection[str] | None, default: None ) –

    ONNX node or op names to keep in FP32 during FP16 conversion.

Classes:

Methods:

  • autoselect

    Try to select the best backend for the current system.

  • get_args

    Return backend plugin arguments derived from this configuration.

  • inference

    Run inference with this backend.

Attributes:

Source code in vsscale/mlrt/backend/ort.py
206
207
208
209
210
211
212
213
214
215
216
def __init__(
    self,
    *,
    ml_program: int = Provider.NEURAL_NETWORK,
    fp16: bool = True,
    fp16_blacklist_ops: Collection[str] | None = None,
    num_streams: int = 1,
    verbosity: int | None = None,
) -> None:
    object.__setattr__(self, "ml_program", ORT_COREML.Provider(ml_program))
    super().__init__(num_streams=num_streams, verbosity=verbosity, fp16=fp16, fp16_blacklist_ops=fp16_blacklist_ops)

MIGX class-attribute instance-attribute

MIGX = MIGX

NCNN class-attribute instance-attribute

NCNN = NCNN

NCNN_VK class-attribute instance-attribute

NCNN_VK = NCNN

ORT class-attribute instance-attribute

ORT = ORT

ORT_COREML class-attribute instance-attribute

ORT_COREML = ORT_COREML

ORT_CPU class-attribute instance-attribute

ORT_CPU = ORT_CPU

ORT_CUDA class-attribute instance-attribute

ORT_CUDA = ORT_CUDA

ORT_DML class-attribute instance-attribute

ORT_DML = ORT_DML

OV class-attribute instance-attribute

OV = OV

OV_CPU class-attribute instance-attribute

OV_CPU = OV_CPU

OV_GPU class-attribute instance-attribute

OV_GPU = OV_GPU

OV_NPU class-attribute instance-attribute

OV_NPU = OV_NPU

TRT class-attribute instance-attribute

TRT = TRT

TRT_RTX class-attribute instance-attribute

TRT_RTX = TRT_RTX

flexible_output_prop class-attribute

flexible_output_prop: str = 'MlrtFlexible'

fp16 instance-attribute

fp16: bool | None

fp16_blacklist_ops instance-attribute

fp16_blacklist_ops: Collection[str] | None

ml_program instance-attribute

ml_program: Provider

Core ML provider mode.

num_streams instance-attribute

num_streams: int

plugin class-attribute instance-attribute

plugin = ort

provider class-attribute instance-attribute

provider = 'COREML'

verbosity instance-attribute

verbosity: int

Provider

Bases: IntEnum

Attributes:

ML_PROGRAM class-attribute instance-attribute

ML_PROGRAM = 1

NEURAL_NETWORK class-attribute instance-attribute

NEURAL_NETWORK = 0

Verbosity

Bases: IntEnum

Methods:

Attributes:

ERROR class-attribute instance-attribute

ERROR = 3

FATAL class-attribute instance-attribute

FATAL = 4

INFO class-attribute instance-attribute

INFO = 1

VERBOSE class-attribute instance-attribute

VERBOSE = 0

WARNING class-attribute instance-attribute

WARNING = 2

from_logging classmethod

from_logging(level: int) -> Verbosity
Source code in vsscale/mlrt/backend/ort.py
31
32
33
34
35
36
37
38
39
40
@classmethod
def from_logging(cls, level: int) -> ORT.Verbosity:
    mapping = {
        DEBUG: cls.VERBOSE,
        INFO: cls.INFO,
        WARNING: cls.WARNING,
        ERROR: cls.ERROR,
        CRITICAL: cls.FATAL,
    }
    return mapping.get(level, cls.WARNING)

autoselect classmethod

autoselect(device_id: int = 0, **kwargs: Any) -> Backend

Try to select the best backend for the current system.

Parameters:

  • device_id

    (int, default: 0 ) –

    The GPU device id.

  • **kwargs

    (Any, default: {} ) –

    Additional arguments to pass to the backend.

Returns:

  • Backend

    The selected backend.

Source code in vsscale/mlrt/backend/base.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
@classmethod
def autoselect(cls, device_id: int = 0, **kwargs: Any) -> Backend:
    """
    Try to select the best backend for the current system.

    Args:
        device_id: The GPU device id.
        **kwargs: Additional arguments to pass to the backend.

    Returns:
        The selected backend.
    """

    gpu = get_gpu(device_id)
    vendor = (
        cast(str | None, gpu.vendor)
        if gpu
        else "apple"
        # macOS x86_64 is unsupported
        if platform.system().lower() == "darwin" and platform.machine() == "x86_64"
        else None
    )

    match vendor:
        # Windows & Linux
        case "nvidia":
            if hasattr(core, "trt"):
                backend = Backend.TRT
            elif hasattr(core, "trt_rtx"):
                backend = Backend.TRT_RTX
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ort"):
                backend = Backend.ORT_CUDA
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "amd":
            if platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "migx"):
                backend = Backend.MIGX
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "intel":
            # device-smi can't detect Intel NPUs in 0.5.6
            # https://github.com/ModelCloud/Device-SMI#roadmap
            if hasattr(core, "ov"):
                backend = Backend.OV_GPU
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # macOS ARM64 & x86_64
        case "apple":
            if hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            elif hasattr(core, "ort"):
                backend = Backend.ORT_COREML
            else:
                backend = Backend.OV_CPU
        case _:
            backend = Backend.OV_CPU

    del gpu

    return backend(**kwargs)

get_args

get_args(clips: VideoNode | Sequence[VideoNode]) -> dict[str, Any]

Return backend plugin arguments derived from this configuration.

Source code in vsscale/mlrt/backend/ort.py
218
219
def get_args(self, clips: vs.VideoNode | Sequence[vs.VideoNode]) -> dict[str, Any]:
    return super().get_args(clips) | {"ml_program": self.ml_program}

inference

inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[False] = ...,
    **kwargs: Any,
) -> VideoNode
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[True],
    **kwargs: Any,
) -> list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = ...,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]

Run inference with this backend.

Parameters:

  • clips

    (VideoNode | Sequence[VideoNode]) –

    Input clip or clips passed to the backend model.

  • network_path

    (str | PathLike[str]) –

    Path to the model file or backend artifact.

  • overlap

    (tuple[int, int]) –

    Horizontal and vertical tile overlap in pixels.

  • tilesize

    (tuple[int, int]) –

    Horizontal and vertical tile size in pixels.

  • flexible

    (bool, default: False ) –

    Return each flexible output plane as a separate clip.

  • **kwargs

    (Any, default: {} ) –

    Additional backend plugin arguments forwarded unchanged.

Returns:

  • VideoNode | list[VideoNode]

    A single output clip, or a list of output clips when flexible is enabled.

Source code in vsscale/mlrt/backend/base.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def inference(
    self,
    clips: vs.VideoNode | Sequence[vs.VideoNode],
    network_path: str | os.PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> vs.VideoNode | list[vs.VideoNode]:
    """
    Run inference with this backend.

    Args:
        clips: Input clip or clips passed to the backend model.
        network_path: Path to the model file or backend artifact.
        overlap: Horizontal and vertical tile overlap in pixels.
        tilesize: Horizontal and vertical tile size in pixels.
        flexible: Return each flexible output plane as a separate clip.
        **kwargs: Additional backend plugin arguments forwarded unchanged.

    Returns:
        A single output clip, or a list of output clips when `flexible` is enabled.
    """
    UnsupportedSampleTypeError.check(clips, vs.FLOAT, self.__class__)

    args = self.get_args(clips)

    if flexible:
        args = args.copy()
        args["flexible_output_prop"] = self.flexible_output_prop

    logger.info("Calling %s.Model", self.plugin.namespace)
    logger.info("Clips: %r", clips)
    logger.info("Network Path: %s", network_path)
    logger.info("overlap=%s, tilesize=%s, %s", overlap, tilesize, args | kwargs)
    output = self.plugin.Model(clips, network_path, overlap, tilesize, **args | kwargs)

    if flexible:
        clip = output["clip"]
        num_planes = output["num_planes"]

        output = [clip.std.PropToClip(prop=f"{self.flexible_output_prop}{i}") for i in range(num_planes)]

    return output

ORT_CPU dataclass

ORT_CPU()

Bases: ORT

ONNX Runtime CPU execution provider.

Classes:

Methods:

  • autoselect

    Try to select the best backend for the current system.

  • get_args

    Return backend plugin arguments derived from this configuration.

  • inference

    Run inference with this backend.

Attributes:

MIGX class-attribute instance-attribute

MIGX = MIGX

NCNN class-attribute instance-attribute

NCNN = NCNN

NCNN_VK class-attribute instance-attribute

NCNN_VK = NCNN

ORT class-attribute instance-attribute

ORT = ORT

ORT_COREML class-attribute instance-attribute

ORT_COREML = ORT_COREML

ORT_CPU class-attribute instance-attribute

ORT_CPU = ORT_CPU

ORT_CUDA class-attribute instance-attribute

ORT_CUDA = ORT_CUDA

ORT_DML class-attribute instance-attribute

ORT_DML = ORT_DML

OV class-attribute instance-attribute

OV = OV

OV_CPU class-attribute instance-attribute

OV_CPU = OV_CPU

OV_GPU class-attribute instance-attribute

OV_GPU = OV_GPU

OV_NPU class-attribute instance-attribute

OV_NPU = OV_NPU

TRT class-attribute instance-attribute

TRT = TRT

TRT_RTX class-attribute instance-attribute

TRT_RTX = TRT_RTX

flexible_output_prop class-attribute

flexible_output_prop: str = 'MlrtFlexible'

fp16 instance-attribute

fp16: bool | None

fp16_blacklist_ops instance-attribute

fp16_blacklist_ops: Collection[str] | None

num_streams instance-attribute

num_streams: int

plugin class-attribute instance-attribute

plugin = ort

provider class-attribute instance-attribute

provider = 'CPU'

verbosity instance-attribute

verbosity: int

Verbosity

Bases: IntEnum

Methods:

Attributes:

ERROR class-attribute instance-attribute

ERROR = 3

FATAL class-attribute instance-attribute

FATAL = 4

INFO class-attribute instance-attribute

INFO = 1

VERBOSE class-attribute instance-attribute

VERBOSE = 0

WARNING class-attribute instance-attribute

WARNING = 2

from_logging classmethod

from_logging(level: int) -> Verbosity
Source code in vsscale/mlrt/backend/ort.py
31
32
33
34
35
36
37
38
39
40
@classmethod
def from_logging(cls, level: int) -> ORT.Verbosity:
    mapping = {
        DEBUG: cls.VERBOSE,
        INFO: cls.INFO,
        WARNING: cls.WARNING,
        ERROR: cls.ERROR,
        CRITICAL: cls.FATAL,
    }
    return mapping.get(level, cls.WARNING)

autoselect classmethod

autoselect(device_id: int = 0, **kwargs: Any) -> Backend

Try to select the best backend for the current system.

Parameters:

  • device_id

    (int, default: 0 ) –

    The GPU device id.

  • **kwargs

    (Any, default: {} ) –

    Additional arguments to pass to the backend.

Returns:

  • Backend

    The selected backend.

Source code in vsscale/mlrt/backend/base.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
@classmethod
def autoselect(cls, device_id: int = 0, **kwargs: Any) -> Backend:
    """
    Try to select the best backend for the current system.

    Args:
        device_id: The GPU device id.
        **kwargs: Additional arguments to pass to the backend.

    Returns:
        The selected backend.
    """

    gpu = get_gpu(device_id)
    vendor = (
        cast(str | None, gpu.vendor)
        if gpu
        else "apple"
        # macOS x86_64 is unsupported
        if platform.system().lower() == "darwin" and platform.machine() == "x86_64"
        else None
    )

    match vendor:
        # Windows & Linux
        case "nvidia":
            if hasattr(core, "trt"):
                backend = Backend.TRT
            elif hasattr(core, "trt_rtx"):
                backend = Backend.TRT_RTX
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ort"):
                backend = Backend.ORT_CUDA
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "amd":
            if platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "migx"):
                backend = Backend.MIGX
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "intel":
            # device-smi can't detect Intel NPUs in 0.5.6
            # https://github.com/ModelCloud/Device-SMI#roadmap
            if hasattr(core, "ov"):
                backend = Backend.OV_GPU
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # macOS ARM64 & x86_64
        case "apple":
            if hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            elif hasattr(core, "ort"):
                backend = Backend.ORT_COREML
            else:
                backend = Backend.OV_CPU
        case _:
            backend = Backend.OV_CPU

    del gpu

    return backend(**kwargs)

get_args

get_args(clips: VideoNode | Sequence[VideoNode]) -> dict[str, Any]

Return backend plugin arguments derived from this configuration.

Source code in vsscale/mlrt/backend/ort.py
78
79
80
81
82
83
84
85
def get_args(self, clips: vs.VideoNode | Sequence[vs.VideoNode]) -> dict[str, Any]:
    return super().get_args(clips) | {
        "fp16": self.fp16,
        "provider": self.provider,
        "num_streams": self.num_streams,
        "verbosity": self.verbosity,
        "fp16_blacklist_ops": self.fp16_blacklist_ops,
    }

inference

inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[False] = ...,
    **kwargs: Any,
) -> VideoNode
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[True],
    **kwargs: Any,
) -> list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = ...,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]

Run inference with this backend.

Parameters:

  • clips

    (VideoNode | Sequence[VideoNode]) –

    Input clip or clips passed to the backend model.

  • network_path

    (str | PathLike[str]) –

    Path to the model file or backend artifact.

  • overlap

    (tuple[int, int]) –

    Horizontal and vertical tile overlap in pixels.

  • tilesize

    (tuple[int, int]) –

    Horizontal and vertical tile size in pixels.

  • flexible

    (bool, default: False ) –

    Return each flexible output plane as a separate clip.

  • **kwargs

    (Any, default: {} ) –

    Additional backend plugin arguments forwarded unchanged.

Returns:

  • VideoNode | list[VideoNode]

    A single output clip, or a list of output clips when flexible is enabled.

Source code in vsscale/mlrt/backend/base.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def inference(
    self,
    clips: vs.VideoNode | Sequence[vs.VideoNode],
    network_path: str | os.PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> vs.VideoNode | list[vs.VideoNode]:
    """
    Run inference with this backend.

    Args:
        clips: Input clip or clips passed to the backend model.
        network_path: Path to the model file or backend artifact.
        overlap: Horizontal and vertical tile overlap in pixels.
        tilesize: Horizontal and vertical tile size in pixels.
        flexible: Return each flexible output plane as a separate clip.
        **kwargs: Additional backend plugin arguments forwarded unchanged.

    Returns:
        A single output clip, or a list of output clips when `flexible` is enabled.
    """
    UnsupportedSampleTypeError.check(clips, vs.FLOAT, self.__class__)

    args = self.get_args(clips)

    if flexible:
        args = args.copy()
        args["flexible_output_prop"] = self.flexible_output_prop

    logger.info("Calling %s.Model", self.plugin.namespace)
    logger.info("Clips: %r", clips)
    logger.info("Network Path: %s", network_path)
    logger.info("overlap=%s, tilesize=%s, %s", overlap, tilesize, args | kwargs)
    output = self.plugin.Model(clips, network_path, overlap, tilesize, **args | kwargs)

    if flexible:
        clip = output["clip"]
        num_planes = output["num_planes"]

        output = [clip.std.PropToClip(prop=f"{self.flexible_output_prop}{i}") for i in range(num_planes)]

    return output

ORT_CUDA dataclass

ORT_CUDA(
    *,
    num_streams: int = 1,
    verbosity: int | None = None,
    device_id: int = 0,
    cudnn_benchmark: bool = True,
    use_cuda_graph: bool = False,
    fp16: bool = True,
    fp16_blacklist_ops: Collection[str] | None = None,
    tf32: bool = False,
    prefer_nhwc: bool = False,
)

Bases: ORT

ONNX Runtime CUDA execution provider for Nvidia GPUs.

Initialize the backend.

Parameters:

  • num_streams

    (int, default: 1 ) –

    Number of parallel inference streams.

  • verbosity

    (int | None, default: None ) –

    ONNX Runtime logging verbosity.

  • device_id

    (int, default: 0 ) –

    CUDA device index.

  • cudnn_benchmark

    (bool, default: True ) –

    Let cuDNN search for faster convolution algorithms.

  • use_cuda_graph

    (bool, default: False ) –

    Enable CUDA graph capture to improve performance and reduce CPU overhead for compatible models.

  • fp16

    (bool, default: True ) –

    Convert model execution to FP16 where supported.

  • fp16_blacklist_ops

    (Collection[str] | None, default: None ) –

    ONNX node or op names to keep in FP32 during FP16 conversion.

  • tf32

    (bool, default: False ) –

    Allow TensorFloat-32 math on supported Nvidia GPUs.

  • prefer_nhwc

    (bool, default: False ) –

    Prefer NHWC layout where ONNX Runtime supports it.

Classes:

Methods:

  • autoselect

    Try to select the best backend for the current system.

  • get_args

    Return backend plugin arguments derived from this configuration.

  • inference

    Run inference with this backend.

Attributes:

Source code in vsscale/mlrt/backend/ort.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
def __init__(
    self,
    *,
    num_streams: int = 1,
    verbosity: int | None = None,
    device_id: int = 0,
    cudnn_benchmark: bool = True,
    use_cuda_graph: bool = False,
    fp16: bool = True,
    fp16_blacklist_ops: Collection[str] | None = None,
    tf32: bool = False,
    prefer_nhwc: bool = False,
) -> None:
    """
    Initialize the backend.

    Args:
        num_streams: Number of parallel inference streams.
        verbosity: ONNX Runtime logging verbosity.
        device_id: CUDA device index.
        cudnn_benchmark: Let cuDNN search for faster convolution algorithms.
        use_cuda_graph: Enable CUDA graph capture to improve performance and reduce CPU overhead
            for compatible models.
        fp16: Convert model execution to FP16 where supported.
        fp16_blacklist_ops: ONNX node or op names to keep in FP32 during FP16 conversion.
        tf32: Allow TensorFloat-32 math on supported Nvidia GPUs.
        prefer_nhwc: Prefer NHWC layout where ONNX Runtime supports it.
    """
    object.__setattr__(self, "device_id", device_id)
    object.__setattr__(self, "cudnn_benchmark", cudnn_benchmark)
    object.__setattr__(self, "use_cuda_graph", use_cuda_graph)
    object.__setattr__(self, "prefer_nhwc", prefer_nhwc)
    object.__setattr__(self, "tf32", tf32)
    super().__init__(num_streams=num_streams, verbosity=verbosity, fp16=fp16, fp16_blacklist_ops=fp16_blacklist_ops)

MIGX class-attribute instance-attribute

MIGX = MIGX

NCNN class-attribute instance-attribute

NCNN = NCNN

NCNN_VK class-attribute instance-attribute

NCNN_VK = NCNN

ORT class-attribute instance-attribute

ORT = ORT

ORT_COREML class-attribute instance-attribute

ORT_COREML = ORT_COREML

ORT_CPU class-attribute instance-attribute

ORT_CPU = ORT_CPU

ORT_CUDA class-attribute instance-attribute

ORT_CUDA = ORT_CUDA

ORT_DML class-attribute instance-attribute

ORT_DML = ORT_DML

OV class-attribute instance-attribute

OV = OV

OV_CPU class-attribute instance-attribute

OV_CPU = OV_CPU

OV_GPU class-attribute instance-attribute

OV_GPU = OV_GPU

OV_NPU class-attribute instance-attribute

OV_NPU = OV_NPU

TRT class-attribute instance-attribute

TRT = TRT

TRT_RTX class-attribute instance-attribute

TRT_RTX = TRT_RTX

cudnn_benchmark instance-attribute

cudnn_benchmark: bool

device_id instance-attribute

device_id: int

flexible_output_prop class-attribute

flexible_output_prop: str = 'MlrtFlexible'

fp16 instance-attribute

fp16: bool | None

fp16_blacklist_ops instance-attribute

fp16_blacklist_ops: Collection[str] | None

num_streams instance-attribute

num_streams: int

plugin class-attribute instance-attribute

plugin = ort

prefer_nhwc instance-attribute

prefer_nhwc: bool

provider class-attribute instance-attribute

provider = 'CUDA'

tf32 instance-attribute

tf32: bool

use_cuda_graph instance-attribute

use_cuda_graph: bool

verbosity instance-attribute

verbosity: int

Verbosity

Bases: IntEnum

Methods:

Attributes:

ERROR class-attribute instance-attribute

ERROR = 3

FATAL class-attribute instance-attribute

FATAL = 4

INFO class-attribute instance-attribute

INFO = 1

VERBOSE class-attribute instance-attribute

VERBOSE = 0

WARNING class-attribute instance-attribute

WARNING = 2

from_logging classmethod

from_logging(level: int) -> Verbosity
Source code in vsscale/mlrt/backend/ort.py
31
32
33
34
35
36
37
38
39
40
@classmethod
def from_logging(cls, level: int) -> ORT.Verbosity:
    mapping = {
        DEBUG: cls.VERBOSE,
        INFO: cls.INFO,
        WARNING: cls.WARNING,
        ERROR: cls.ERROR,
        CRITICAL: cls.FATAL,
    }
    return mapping.get(level, cls.WARNING)

autoselect classmethod

autoselect(device_id: int = 0, **kwargs: Any) -> Backend

Try to select the best backend for the current system.

Parameters:

  • device_id

    (int, default: 0 ) –

    The GPU device id.

  • **kwargs

    (Any, default: {} ) –

    Additional arguments to pass to the backend.

Returns:

  • Backend

    The selected backend.

Source code in vsscale/mlrt/backend/base.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
@classmethod
def autoselect(cls, device_id: int = 0, **kwargs: Any) -> Backend:
    """
    Try to select the best backend for the current system.

    Args:
        device_id: The GPU device id.
        **kwargs: Additional arguments to pass to the backend.

    Returns:
        The selected backend.
    """

    gpu = get_gpu(device_id)
    vendor = (
        cast(str | None, gpu.vendor)
        if gpu
        else "apple"
        # macOS x86_64 is unsupported
        if platform.system().lower() == "darwin" and platform.machine() == "x86_64"
        else None
    )

    match vendor:
        # Windows & Linux
        case "nvidia":
            if hasattr(core, "trt"):
                backend = Backend.TRT
            elif hasattr(core, "trt_rtx"):
                backend = Backend.TRT_RTX
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ort"):
                backend = Backend.ORT_CUDA
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "amd":
            if platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "migx"):
                backend = Backend.MIGX
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "intel":
            # device-smi can't detect Intel NPUs in 0.5.6
            # https://github.com/ModelCloud/Device-SMI#roadmap
            if hasattr(core, "ov"):
                backend = Backend.OV_GPU
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # macOS ARM64 & x86_64
        case "apple":
            if hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            elif hasattr(core, "ort"):
                backend = Backend.ORT_COREML
            else:
                backend = Backend.OV_CPU
        case _:
            backend = Backend.OV_CPU

    del gpu

    return backend(**kwargs)

get_args

get_args(clips: VideoNode | Sequence[VideoNode]) -> dict[str, Any]

Return backend plugin arguments derived from this configuration.

Source code in vsscale/mlrt/backend/ort.py
147
148
149
150
151
152
153
154
def get_args(self, clips: vs.VideoNode | Sequence[vs.VideoNode]) -> dict[str, Any]:
    return super().get_args(clips) | {
        "device_id": self.device_id,
        "cudnn_benchmark": self.cudnn_benchmark,
        "use_cuda_graph": self.use_cuda_graph,
        "prefer_nhwc": self.prefer_nhwc,
        "tf32": self.tf32,
    }

inference

inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[False] = ...,
    **kwargs: Any,
) -> VideoNode
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[True],
    **kwargs: Any,
) -> list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = ...,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]

Run inference with this backend.

Parameters:

  • clips

    (VideoNode | Sequence[VideoNode]) –

    Input clip or clips passed to the backend model.

  • network_path

    (str | PathLike[str]) –

    Path to the model file or backend artifact.

  • overlap

    (tuple[int, int]) –

    Horizontal and vertical tile overlap in pixels.

  • tilesize

    (tuple[int, int]) –

    Horizontal and vertical tile size in pixels.

  • flexible

    (bool, default: False ) –

    Return each flexible output plane as a separate clip.

  • **kwargs

    (Any, default: {} ) –

    Additional backend plugin arguments forwarded unchanged.

Returns:

  • VideoNode | list[VideoNode]

    A single output clip, or a list of output clips when flexible is enabled.

Source code in vsscale/mlrt/backend/base.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def inference(
    self,
    clips: vs.VideoNode | Sequence[vs.VideoNode],
    network_path: str | os.PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> vs.VideoNode | list[vs.VideoNode]:
    """
    Run inference with this backend.

    Args:
        clips: Input clip or clips passed to the backend model.
        network_path: Path to the model file or backend artifact.
        overlap: Horizontal and vertical tile overlap in pixels.
        tilesize: Horizontal and vertical tile size in pixels.
        flexible: Return each flexible output plane as a separate clip.
        **kwargs: Additional backend plugin arguments forwarded unchanged.

    Returns:
        A single output clip, or a list of output clips when `flexible` is enabled.
    """
    UnsupportedSampleTypeError.check(clips, vs.FLOAT, self.__class__)

    args = self.get_args(clips)

    if flexible:
        args = args.copy()
        args["flexible_output_prop"] = self.flexible_output_prop

    logger.info("Calling %s.Model", self.plugin.namespace)
    logger.info("Clips: %r", clips)
    logger.info("Network Path: %s", network_path)
    logger.info("overlap=%s, tilesize=%s, %s", overlap, tilesize, args | kwargs)
    output = self.plugin.Model(clips, network_path, overlap, tilesize, **args | kwargs)

    if flexible:
        clip = output["clip"]
        num_planes = output["num_planes"]

        output = [clip.std.PropToClip(prop=f"{self.flexible_output_prop}{i}") for i in range(num_planes)]

    return output

ORT_DML dataclass

ORT_DML(
    *,
    device_id: int = 0,
    fp16: bool = True,
    fp16_blacklist_ops: Collection[str] | None = None,
    num_streams: int = 1,
    verbosity: int | None = None,
)

Bases: ORT

ONNX Runtime DirectML execution provider for D3D12 devices.

Initialize the backend.

Parameters:

  • device_id

    (int, default: 0 ) –

    DirectML adapter index.

  • num_streams

    (int, default: 1 ) –

    Number of parallel inference streams.

  • verbosity

    (int | None, default: None ) –

    ONNX Runtime logging verbosity.

  • fp16

    (bool, default: True ) –

    Convert model execution to FP16 where supported.

  • fp16_blacklist_ops

    (Collection[str] | None, default: None ) –

    ONNX node or op names to keep in FP32 during FP16 conversion.

Classes:

Methods:

  • autoselect

    Try to select the best backend for the current system.

  • get_args

    Return backend plugin arguments derived from this configuration.

  • inference

    Run inference with this backend.

Attributes:

Source code in vsscale/mlrt/backend/ort.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
def __init__(
    self,
    *,
    device_id: int = 0,
    fp16: bool = True,
    fp16_blacklist_ops: Collection[str] | None = None,
    num_streams: int = 1,
    verbosity: int | None = None,
) -> None:
    """
    Initialize the backend.

    Args:
        device_id: DirectML adapter index.
        num_streams: Number of parallel inference streams.
        verbosity: ONNX Runtime logging verbosity.
        fp16: Convert model execution to FP16 where supported.
        fp16_blacklist_ops: ONNX node or op names to keep in FP32 during FP16 conversion.
    """
    object.__setattr__(self, "device_id", device_id)
    super().__init__(num_streams=num_streams, verbosity=verbosity, fp16=fp16, fp16_blacklist_ops=fp16_blacklist_ops)

MIGX class-attribute instance-attribute

MIGX = MIGX

NCNN class-attribute instance-attribute

NCNN = NCNN

NCNN_VK class-attribute instance-attribute

NCNN_VK = NCNN

ORT class-attribute instance-attribute

ORT = ORT

ORT_COREML class-attribute instance-attribute

ORT_COREML = ORT_COREML

ORT_CPU class-attribute instance-attribute

ORT_CPU = ORT_CPU

ORT_CUDA class-attribute instance-attribute

ORT_CUDA = ORT_CUDA

ORT_DML class-attribute instance-attribute

ORT_DML = ORT_DML

OV class-attribute instance-attribute

OV = OV

OV_CPU class-attribute instance-attribute

OV_CPU = OV_CPU

OV_GPU class-attribute instance-attribute

OV_GPU = OV_GPU

OV_NPU class-attribute instance-attribute

OV_NPU = OV_NPU

TRT class-attribute instance-attribute

TRT = TRT

TRT_RTX class-attribute instance-attribute

TRT_RTX = TRT_RTX

device_id instance-attribute

device_id: int

flexible_output_prop class-attribute

flexible_output_prop: str = 'MlrtFlexible'

fp16 instance-attribute

fp16: bool | None

fp16_blacklist_ops instance-attribute

fp16_blacklist_ops: Collection[str] | None

num_streams instance-attribute

num_streams: int

plugin class-attribute instance-attribute

plugin = ort

provider class-attribute instance-attribute

provider = 'DML'

verbosity instance-attribute

verbosity: int

Verbosity

Bases: IntEnum

Methods:

Attributes:

ERROR class-attribute instance-attribute

ERROR = 3

FATAL class-attribute instance-attribute

FATAL = 4

INFO class-attribute instance-attribute

INFO = 1

VERBOSE class-attribute instance-attribute

VERBOSE = 0

WARNING class-attribute instance-attribute

WARNING = 2

from_logging classmethod

from_logging(level: int) -> Verbosity
Source code in vsscale/mlrt/backend/ort.py
31
32
33
34
35
36
37
38
39
40
@classmethod
def from_logging(cls, level: int) -> ORT.Verbosity:
    mapping = {
        DEBUG: cls.VERBOSE,
        INFO: cls.INFO,
        WARNING: cls.WARNING,
        ERROR: cls.ERROR,
        CRITICAL: cls.FATAL,
    }
    return mapping.get(level, cls.WARNING)

autoselect classmethod

autoselect(device_id: int = 0, **kwargs: Any) -> Backend

Try to select the best backend for the current system.

Parameters:

  • device_id

    (int, default: 0 ) –

    The GPU device id.

  • **kwargs

    (Any, default: {} ) –

    Additional arguments to pass to the backend.

Returns:

  • Backend

    The selected backend.

Source code in vsscale/mlrt/backend/base.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
@classmethod
def autoselect(cls, device_id: int = 0, **kwargs: Any) -> Backend:
    """
    Try to select the best backend for the current system.

    Args:
        device_id: The GPU device id.
        **kwargs: Additional arguments to pass to the backend.

    Returns:
        The selected backend.
    """

    gpu = get_gpu(device_id)
    vendor = (
        cast(str | None, gpu.vendor)
        if gpu
        else "apple"
        # macOS x86_64 is unsupported
        if platform.system().lower() == "darwin" and platform.machine() == "x86_64"
        else None
    )

    match vendor:
        # Windows & Linux
        case "nvidia":
            if hasattr(core, "trt"):
                backend = Backend.TRT
            elif hasattr(core, "trt_rtx"):
                backend = Backend.TRT_RTX
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ort"):
                backend = Backend.ORT_CUDA
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "amd":
            if platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "migx"):
                backend = Backend.MIGX
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # Windows & Linux
        case "intel":
            # device-smi can't detect Intel NPUs in 0.5.6
            # https://github.com/ModelCloud/Device-SMI#roadmap
            if hasattr(core, "ov"):
                backend = Backend.OV_GPU
            elif platform.system().lower() == "windows" and hasattr(core, "ort"):
                backend = Backend.ORT_DML
            elif hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            else:
                backend = Backend.OV_CPU
        # macOS ARM64 & x86_64
        case "apple":
            if hasattr(core, "ncnn"):
                backend = Backend.NCNN_VK
            elif hasattr(core, "ort"):
                backend = Backend.ORT_COREML
            else:
                backend = Backend.OV_CPU
        case _:
            backend = Backend.OV_CPU

    del gpu

    return backend(**kwargs)

get_args

get_args(clips: VideoNode | Sequence[VideoNode]) -> dict[str, Any]

Return backend plugin arguments derived from this configuration.

Source code in vsscale/mlrt/backend/ort.py
188
189
def get_args(self, clips: vs.VideoNode | Sequence[vs.VideoNode]) -> dict[str, Any]:
    return super().get_args(clips) | {"device_id": self.device_id}

inference

inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[False] = ...,
    **kwargs: Any,
) -> VideoNode
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: Literal[True],
    **kwargs: Any,
) -> list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = ...,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]
inference(
    clips: VideoNode | Sequence[VideoNode],
    network_path: str | PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> VideoNode | list[VideoNode]

Run inference with this backend.

Parameters:

  • clips

    (VideoNode | Sequence[VideoNode]) –

    Input clip or clips passed to the backend model.

  • network_path

    (str | PathLike[str]) –

    Path to the model file or backend artifact.

  • overlap

    (tuple[int, int]) –

    Horizontal and vertical tile overlap in pixels.

  • tilesize

    (tuple[int, int]) –

    Horizontal and vertical tile size in pixels.

  • flexible

    (bool, default: False ) –

    Return each flexible output plane as a separate clip.

  • **kwargs

    (Any, default: {} ) –

    Additional backend plugin arguments forwarded unchanged.

Returns:

  • VideoNode | list[VideoNode]

    A single output clip, or a list of output clips when flexible is enabled.

Source code in vsscale/mlrt/backend/base.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def inference(
    self,
    clips: vs.VideoNode | Sequence[vs.VideoNode],
    network_path: str | os.PathLike[str],
    /,
    overlap: tuple[int, int],
    tilesize: tuple[int, int],
    *,
    flexible: bool = False,
    **kwargs: Any,
) -> vs.VideoNode | list[vs.VideoNode]:
    """
    Run inference with this backend.

    Args:
        clips: Input clip or clips passed to the backend model.
        network_path: Path to the model file or backend artifact.
        overlap: Horizontal and vertical tile overlap in pixels.
        tilesize: Horizontal and vertical tile size in pixels.
        flexible: Return each flexible output plane as a separate clip.
        **kwargs: Additional backend plugin arguments forwarded unchanged.

    Returns:
        A single output clip, or a list of output clips when `flexible` is enabled.
    """
    UnsupportedSampleTypeError.check(clips, vs.FLOAT, self.__class__)

    args = self.get_args(clips)

    if flexible:
        args = args.copy()
        args["flexible_output_prop"] = self.flexible_output_prop

    logger.info("Calling %s.Model", self.plugin.namespace)
    logger.info("Clips: %r", clips)
    logger.info("Network Path: %s", network_path)
    logger.info("overlap=%s, tilesize=%s, %s", overlap, tilesize, args | kwargs)
    output = self.plugin.Model(clips, network_path, overlap, tilesize, **args | kwargs)

    if flexible:
        clip = output["clip"]
        num_planes = output["num_planes"]

        output = [clip.std.PropToClip(prop=f"{self.flexible_output_prop}{i}") for i in range(num_planes)]

    return output