Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions include/tvm/relax/attrs/vision.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,28 @@ struct ROIAlignAttrs : public AttrsNodeReflAdapter<ROIAlignAttrs> {
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("relax.attrs.ROIAlignAttrs", ROIAlignAttrs, BaseAttrsNode);
}; // struct ROIAlignAttrs

/*! \brief Attributes for multibox_transform_loc (SSD / TFLite-style box decode). */
struct MultiboxTransformLocAttrs : public AttrsNodeReflAdapter<MultiboxTransformLocAttrs> {
bool clip;
double threshold;
ffi::Array<double> variances;
bool keep_background;

static void RegisterReflection() {
namespace refl = tvm::ffi::reflection;
refl::ObjectDef<MultiboxTransformLocAttrs>()
.def_ro("clip", &MultiboxTransformLocAttrs::clip, "Clip decoded ymin,xmin,ymax,xmax to [0,1].")
.def_ro("threshold", &MultiboxTransformLocAttrs::threshold,
"After softmax, zero scores strictly below this value.")
.def_ro("variances", &MultiboxTransformLocAttrs::variances,
"(x,y,w,h) scales = TFLite 1/x_scale,1/y_scale,1/w_scale,1/h_scale on encodings.")
.def_ro("keep_background", &MultiboxTransformLocAttrs::keep_background,
"If false, force output scores[:,0,:] to 0 (background class).");
}
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("relax.attrs.MultiboxTransformLocAttrs",
MultiboxTransformLocAttrs, BaseAttrsNode);
}; // struct MultiboxTransformLocAttrs

} // namespace relax
} // namespace tvm

Expand Down
12 changes: 6 additions & 6 deletions python/tvm/relax/frontend/tflite/tflite_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3205,9 +3205,10 @@ def convert_dequantize(self, op):
def convert_detection_postprocess(self, op):
"""Convert TFLite_Detection_PostProcess"""
raise NotImplementedError(
"DETECTION_POSTPROCESS requires vision ops (multibox_transform_loc, "
"non_max_suppression, get_valid_counts) not yet available in Relax. "
"See https://github.com/apache/tvm/issues/XXXX"
"DETECTION_POSTPROCESS is not wired in this frontend yet: it still needs "
"Relax NMS / get_valid_counts / related vision helpers (see dead code below). "
"relax.vision.multibox_transform_loc exists; tracking: "
"https://github.com/apache/tvm/issues/18928"
)
flexbuffer = op.CustomOptionsAsNumpy().tobytes()
custom_options = FlexBufferDecoder(flexbuffer).decode()
Expand Down Expand Up @@ -3340,9 +3341,8 @@ def convert_nms_v5(self, op):
"""Convert TFLite NonMaxSuppressionV5"""
# https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/non-max-suppression-v5
raise NotImplementedError(
"NON_MAX_SUPPRESSION_V5 requires vision ops (get_valid_counts, "
"non_max_suppression) not yet available in Relax. "
"See https://github.com/apache/tvm/issues/XXXX"
"NON_MAX_SUPPRESSION_V5 is not wired in this frontend yet (needs get_valid_counts, "
"non_max_suppression, etc.). Tracking: https://github.com/apache/tvm/issues/18928"
)

input_tensors = self.get_input_tensors(op)
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/relax/op/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@
tanh,
trunc,
)
from .vision import all_class_non_max_suppression, roi_align
from .vision import all_class_non_max_suppression, multibox_transform_loc, roi_align


def _register_op_make():
Expand Down
5 changes: 5 additions & 0 deletions python/tvm/relax/op/op_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,11 @@ class ROIAlignAttrs(Attrs):
"""Attributes for vision.roi_align"""


@tvm_ffi.register_object("relax.attrs.MultiboxTransformLocAttrs")
class MultiboxTransformLocAttrs(Attrs):
"""Attributes for vision.multibox_transform_loc"""


@tvm_ffi.register_object("relax.attrs.Conv1DAttrs")
class Conv1DAttrs(Attrs):
"""Attributes for nn.conv1d"""
Expand Down
1 change: 1 addition & 0 deletions python/tvm/relax/op/vision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
# under the License.
"""VISION operators."""

from .multibox_transform_loc import *
from .nms import *
from .roi_align import *
77 changes: 77 additions & 0 deletions python/tvm/relax/op/vision/multibox_transform_loc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Multibox location transform for object detection."""

from . import _ffi_api


def multibox_transform_loc(
cls_pred,
loc_pred,
anchor,
clip=False,
threshold=0.0,
variances=(1.0, 1.0, 1.0, 1.0),
keep_background=True,
):
"""SSD / TFLite-style decode: priors + offsets → boxes; logits → softmax scores.

Box decode follows TFLite ``DecodeCenterSizeBoxes``; expected tensor layout matches
``tflite_frontend.convert_detection_postprocess`` (loc reorder yxhw→xywh, anchor ltrb).

Parameters
----------
cls_pred : relax.Expr
``[B, C, N]`` class logits (pre-softmax).
loc_pred : relax.Expr
``[B, 4*N]`` per-anchor encodings as ``(x,y,w,h)`` after reorder (see above).
anchor : relax.Expr
``[1, N, 4]`` priors: ``(left, top, right, bottom)``.
clip : bool
If True, clip ``ymin,xmin,ymax,xmax`` to ``[0, 1]``.
threshold : float
After softmax, multiply scores by mask ``(score >= threshold)``.
variances : tuple of 4 floats
``(x,y,w,h)`` = TFLite ``1/x_scale, 1/y_scale, 1/w_scale, 1/h_scale``.
keep_background : bool
If False, set output scores at class index 0 to zero.

Returns
-------
result : relax.Expr
Tuple ``(boxes, scores)``: ``boxes`` is ``[B, N, 4]`` as ``(ymin,xmin,ymax,xmax)``;
``scores`` is ``[B, C, N]`` softmax, post-processed like the implementation.

Notes
-----
**Shape/dtype (checked in ``FInferStructInfo`` when static):**

- ``cls_pred``: 3-D; ``loc_pred``: 2-D; ``anchor``: 3-D.
- ``cls_pred``, ``loc_pred``, ``anchor`` dtypes must match.
- ``N = cls_pred.shape[2]``; ``loc_pred.shape[1] == 4*N``; ``anchor.shape == [1,N,4]``.
- ``loc_pred.shape[1]`` must be divisible by 4.
- ``cls_pred.shape[0]`` must equal ``loc_pred.shape[0]`` (batch).
"""
return _ffi_api.multibox_transform_loc(
cls_pred,
loc_pred,
anchor,
clip,
threshold,
variances,
keep_background,
)
24 changes: 24 additions & 0 deletions python/tvm/relax/transform/legalize_ops/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,27 @@ def _roi_align(bb: BlockBuilder, call: Call) -> Expr:
aligned=call.attrs.aligned,
layout=call.attrs.layout,
)


@register_legalize("relax.vision.multibox_transform_loc")
def _multibox_transform_loc(bb: BlockBuilder, call: Call) -> Expr:
variances = tuple(float(x) for x in call.attrs.variances)

def _te(cls_pred, loc_pred, anchor):
return topi.vision.multibox_transform_loc(
cls_pred,
loc_pred,
anchor,
variances,
clip=call.attrs.clip,
threshold=call.attrs.threshold,
keep_background=call.attrs.keep_background,
)

return bb.call_te(
_te,
call.args[0],
call.args[1],
call.args[2],
primfunc_name_hint="multibox_transform_loc",
)
72 changes: 72 additions & 0 deletions python/tvm/topi/testing/multibox_transform_loc_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=invalid-name
"""Numpy reference for multibox_transform_loc."""

import numpy as np


def _softmax(x, axis):
x_max = np.max(x, axis=axis, keepdims=True)
exp = np.exp(x - x_max)
return exp / np.sum(exp, axis=axis, keepdims=True)


def multibox_transform_loc_python(
cls_pred,
loc_pred,
anchor,
variances,
clip=False,
threshold=0.0,
keep_background=True,
):
"""Reference implementation aligned with ``topi.vision.multibox_transform_loc``."""
B, C, N = cls_pred.shape
loc = loc_pred.reshape(B, N, 4)
scores = _softmax(cls_pred.astype("float64"), axis=1).astype(np.float32)
if threshold > 0.0:
scores = np.where(scores >= threshold, scores, 0.0).astype(np.float32)
if not keep_background:
scores = scores.copy()
scores[:, 0, :] = 0.0

vx, vy, vw, vh = variances
boxes = np.zeros((B, N, 4), dtype=np.float32)
for b in range(B):
for a in range(N):
l, t, r, br = anchor[0, a, :]
ay = (t + br) * 0.5
ax = (l + r) * 0.5
ah = br - t
aw = r - l
ex, ey, ew, eh = loc[b, a, :]
ycenter = ey * vy * ah + ay
xcenter = ex * vx * aw + ax
half_h = 0.5 * np.exp(eh * vh) * ah
half_w = 0.5 * np.exp(ew * vw) * aw
ymin = ycenter - half_h
xmin = xcenter - half_w
ymax = ycenter + half_h
xmax = xcenter + half_w
if clip:
ymin = np.clip(ymin, 0.0, 1.0)
xmin = np.clip(xmin, 0.0, 1.0)
ymax = np.clip(ymax, 0.0, 1.0)
xmax = np.clip(xmax, 0.0, 1.0)
boxes[b, a, :] = (ymin, xmin, ymax, xmax)
return boxes, scores
1 change: 1 addition & 0 deletions python/tvm/topi/vision/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
# under the License.
"""Vision operators."""

from .multibox_transform_loc import *
from .nms import *
from .roi_align import *
121 changes: 121 additions & 0 deletions python/tvm/topi/vision/multibox_transform_loc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=invalid-name
"""Multibox location transform (SSD / TFLite DetectionPostProcess decode)."""

import tvm
from tvm import te, topi


def multibox_transform_loc(
cls_pred,
loc_pred,
anchor,
variances,
clip=False,
threshold=0.0,
keep_background=True,
):
"""TFLite ``DecodeCenterSizeBoxes``-style decode + softmax score post-process.

Inputs must match Relax op contracts: ``cls_pred [B,C,N]``, ``loc_pred [B,4*N]``,
``anchor [1,N,4]`` ltrb; per-anchor loc order ``(x,y,w,h)`` after yxhw→xywh reorder.

Parameters
----------
cls_pred : te.Tensor
``[B, C, N]`` logits.
loc_pred : te.Tensor
``[B, 4*N]`` encodings ``(x,y,w,h)`` per anchor.
anchor : te.Tensor
``[1, N, 4]`` ``(left, top, right, bottom)``.
variances : tuple of 4 float
``(x,y,w,h)`` = ``1/x_scale, 1/y_scale, 1/w_scale, 1/h_scale`` (TFLite).
clip : bool
Clip ``ymin,xmin,ymax,xmax`` to ``[0,1]``.
threshold : float
After softmax: ``scores *= (scores >= threshold)``.
keep_background : bool
If False: ``scores[:,0,:] = 0``.

Returns
-------
boxes : te.Tensor
``[B, N, 4]`` as ``(ymin,xmin,ymax,xmax)``.
scores : te.Tensor
``[B, C, N]`` softmax, then threshold mask and optional background zero.
"""
dtype = cls_pred.dtype
B = cls_pred.shape[0]
num_anchors = cls_pred.shape[2]
loc_reshaped = topi.reshape(loc_pred, [B, num_anchors, 4])

vx = tvm.tirx.const(float(variances[0]), dtype)
vy = tvm.tirx.const(float(variances[1]), dtype)
vw = tvm.tirx.const(float(variances[2]), dtype)
vh = tvm.tirx.const(float(variances[3]), dtype)
half = tvm.tirx.const(0.5, dtype)
zero = tvm.tirx.const(0.0, dtype)
one = tvm.tirx.const(1.0, dtype)
th = tvm.tirx.const(float(threshold), dtype)

def decode_bbox(b, a, k):
l = anchor[0, a, 0]
t = anchor[0, a, 1]
r = anchor[0, a, 2]
br = anchor[0, a, 3]
ay = (t + br) * half
ax = (l + r) * half
ah = br - t
aw = r - l
ex = loc_reshaped[b, a, 0]
ey = loc_reshaped[b, a, 1]
ew = loc_reshaped[b, a, 2]
eh = loc_reshaped[b, a, 3]
ycenter = ey * vy * ah + ay
xcenter = ex * vx * aw + ax
half_h = half * te.exp(eh * vh) * ah
half_w = half * te.exp(ew * vw) * aw
ymin = ycenter - half_h
xmin = xcenter - half_w
ymax = ycenter + half_h
xmax = xcenter + half_w
if clip:
ymin = te.max(zero, te.min(one, ymin))
xmin = te.max(zero, te.min(one, xmin))
ymax = te.max(zero, te.min(one, ymax))
xmax = te.max(zero, te.min(one, xmax))
return tvm.tirx.Select(
k == 0,
ymin,
tvm.tirx.Select(k == 1, xmin, tvm.tirx.Select(k == 2, ymax, xmax)),
)

boxes = te.compute((B, num_anchors, 4), decode_bbox, name="multibox_boxes")

scores = topi.nn.softmax(cls_pred, axis=1)
mask = topi.cast(topi.greater_equal(scores, th), dtype)
scores = scores * mask
if not keep_background:

def zero_bg(b, c, n):
s = scores[b, c, n]
return te.if_then_else(c == 0, zero, s)

scores = te.compute(scores.shape, zero_bg, name="multibox_scores_bg")

return [boxes, scores]
Loading
Loading