Rename NormalizeLabelsInDatasetd to RemapLabelsToSequentiald and fix label ordering bug

aymuos15 · aymuos15 · commit e1f6041087c1 · 2026-01-03T17:12:31.000Z
### Description

Rename NormalizeLabelsInDatasetd to RemapLabelsToSequentiald to better describe
its actual functionality. The old name was confusing as it suggests normalization
when it actually remaps arbitrary label values to sequential indices (0, 1, 2, 3, ...).

### Bug Fix

Fixed a bug where the order of labels in the input dictionary affected the output.
Previously, if background appeared first (e.g., `{background: 0, organ1: 1, organ2: 2}`),
the transform would skip index 1 and produce `{background: 0, organ1: 2, organ2: 3}`.
This was caused by enumerate starting at 1 for all items but skipping background
without adjusting the index. The fix excludes background from enumeration and
handles it separately.

### Changes

- Renamed NormalizeLabelsInDatasetd to RemapLabelsToSequentiald
- Fixed label ordering bug by excluding background from enumeration
- Kept NormalizeLabelsInDatasetd as deprecated alias for backward compatibility
- Enhanced documentation to clearly explain remapping behavior
- Added alphabetical sorting for deterministic output ordering
- Added tests for deprecated name warning and proper remapping

### Types of changes
- [x] Non-breaking change (fix or new feature that would not break existing functionality)
- [x] New tests added to cover the changes

Signed-off-by: Soumya Snigdha Kundu &lt;soumya_snigdha.kundu@kcl.ac.uk&gt;
diff --git a/monai/apps/deepedit/__init__.py b/monai/apps/deepedit/__init__.py
@@ -8,3 +8,37 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from .transforms import (
+    AddGuidanceFromPointsDeepEditd,
+    AddGuidanceSignalDeepEditd,
+    AddInitialSeedPointDeepEditd,
+    AddInitialSeedPointMissingLabelsd,
+    AddRandomGuidanceDeepEditd,
+    DiscardAddGuidanced,
+    FindAllValidSlicesDeepEditd,
+    FindAllValidSlicesMissingLabelsd,
+    FindDiscrepancyRegionsDeepEditd,
+    NormalizeLabelsInDatasetd,
+    RemapLabelsToSequentiald,
+    ResizeGuidanceMultipleLabelDeepEditd,
+    SingleLabelSelectiond,
+    SplitPredsLabeld,
+)
+
+__all__ = [
+    "AddGuidanceFromPointsDeepEditd",
+    "AddGuidanceSignalDeepEditd",
+    "AddInitialSeedPointDeepEditd",
+    "AddInitialSeedPointMissingLabelsd",
+    "AddRandomGuidanceDeepEditd",
+    "DiscardAddGuidanced",
+    "FindAllValidSlicesDeepEditd",
+    "FindAllValidSlicesMissingLabelsd",
+    "FindDiscrepancyRegionsDeepEditd",
+    "NormalizeLabelsInDatasetd",
+    "RemapLabelsToSequentiald",
+    "ResizeGuidanceMultipleLabelDeepEditd",
+    "SingleLabelSelectiond",
+    "SplitPredsLabeld",
+]
diff --git a/monai/apps/deepedit/transforms.py b/monai/apps/deepedit/transforms.py
@@ -84,18 +84,44 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> dict[Hashable, np.nda
         return d
 
 
-class NormalizeLabelsInDatasetd(MapTransform):
+class RemapLabelsToSequentiald(MapTransform):
+    """
+    Remap label values from a dataset-specific schema to sequential indices (0, 1, 2, 3, ...).
+
+    This transform takes labels with arbitrary values defined in a label dictionary and remaps them
+    to a sequential range starting from 1 (with background always set to 0). This is useful for
+    standardizing labels across different datasets or ensuring labels are in a contiguous range.
+
+    The output label indices are assigned in alphabetical order by label name to ensure
+    deterministic behavior regardless of input dictionary ordering.
+
+    Args:
+        keys: The ``keys`` parameter will be used to get and set the actual data item to transform
+        label_names: Dictionary mapping label names to their current values in the dataset.
+            For example: {"spleen": 1, "liver": 6, "background": 0}
+            Will be remapped to: {"background": 0, "liver": 1, "spleen": 2}
+            (alphabetically sorted, excluding background)
+        allow_missing_keys: If True, missing keys in the data dictionary will not raise an error
+
+    Example:
+        >>> transform = RemapLabelsToSequentiald(
+        ...     keys="label",
+        ...     label_names={"liver": 6, "spleen": 1, "background": 0}
+        ... )
+        >>> # Input label has values [0, 1, 6]
+        >>> # Output label will have values [0, 1, 2] (background=0, liver=1, spleen=2)
+        >>> # And updates d["label_names"] to {"background": 0, "liver": 1, "spleen": 2}
+
+    Note:
+        - Background label (if present) is always mapped to 0
+        - Non-background labels are mapped to sequential indices 1, 2, 3, ... in alphabetical order
+        - Undefined labels (not in label_names) will be set to 0 (background)
+        - The transform updates the data dictionary with a new "label_names" key containing the remapped values
+    """
 
     def __init__(
         self, keys: KeysCollection, label_names: dict[str, int] | None = None, allow_missing_keys: bool = False
     ):
-        """
-        Normalize label values according to label names dictionary
-
-        Args:
-            keys: The ``keys`` parameter will be used to get and set the actual data item to transform
-            label_names: all label names
-        """
         super().__init__(keys, allow_missing_keys)
 
         self.label_names = label_names or {}
@@ -106,13 +132,20 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> dict[Hashable, np.nda
             # Dictionary containing new label numbers
             new_label_names = {}
             label = np.zeros(d[key].shape)
-            # Making sure the range values and number of labels are the same
-            for idx, (key_label, val_label) in enumerate(self.label_names.items(), start=1):
-                if key_label != "background":
-                    new_label_names[key_label] = idx
-                    label[d[key] == val_label] = idx
-                if key_label == "background":
-                    new_label_names["background"] = 0
+
+            # Sort label names to ensure deterministic ordering (exclude background)
+            sorted_labels = sorted(
+                [(k, v) for k, v in self.label_names.items() if k != "background"]
+            )
+
+            # Always set background to 0 first
+            if "background" in self.label_names:
+                new_label_names["background"] = 0
+
+            # Assign sequential indices to sorted non-background labels
+            for idx, (key_label, val_label) in enumerate(sorted_labels, start=1):
+                new_label_names[key_label] = idx
+                label[d[key] == val_label] = idx
 
             d["label_names"] = new_label_names
             if isinstance(d[key], MetaTensor):
@@ -122,6 +155,28 @@ def __call__(self, data: Mapping[Hashable, np.ndarray]) -> dict[Hashable, np.nda
         return d
 
 
+class NormalizeLabelsInDatasetd(RemapLabelsToSequentiald):
+    """
+    .. deprecated:: 1.5.0
+        `NormalizeLabelsInDatasetd` is deprecated. Use :class:`RemapLabelsToSequentiald` instead.
+
+    This class is maintained for backward compatibility. Please use RemapLabelsToSequentiald
+    which better describes the transform's functionality.
+    """
+
+    def __init__(
+        self, keys: KeysCollection, label_names: dict[str, int] | None = None, allow_missing_keys: bool = False
+    ):
+        warnings.warn(
+            "NormalizeLabelsInDatasetd is deprecated and will be removed in a future version. "
+            "Please use RemapLabelsToSequentiald instead, which better describes what the transform does: "
+            "remapping label values to sequential indices (0, 1, 2, 3, ...).",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(keys, label_names, allow_missing_keys)
+
+
 class SingleLabelSelectiond(MapTransform):
 
     def __init__(
diff --git a/tests/apps/deepedit/test_deepedit_transforms.py b/tests/apps/deepedit/test_deepedit_transforms.py
@@ -25,6 +25,7 @@
     FindAllValidSlicesMissingLabelsd,
     FindDiscrepancyRegionsDeepEditd,
     NormalizeLabelsInDatasetd,
+    RemapLabelsToSequentiald,
     ResizeGuidanceMultipleLabelDeepEditd,
     SingleLabelSelectiond,
     SplitPredsLabeld,
@@ -282,6 +283,70 @@ def test_correct_results(self, arguments, input_data, expected_result):
         result = add_fn(input_data)
         self.assertEqual(len(np.unique(result["label"])), expected_result)
 
+    def test_ordering_determinism(self):
+        """Test that different input ordering produces the same output (alphabetical)"""
+        # Create a label array with different label values
+        label = np.array([[[0, 1, 6, 3]]])  # background=0, spleen=1, liver=6, kidney=3
+
+        # Test case 1: liver first, then kidney, then spleen
+        data1 = {"label": label.copy()}
+        transform1 = RemapLabelsToSequentiald(
+            keys="label",
+            label_names={"liver": 6, "kidney": 3, "spleen": 1, "background": 0}
+        )
+        result1 = transform1(data1)
+
+        # Test case 2: spleen first, then kidney, then liver (different order)
+        data2 = {"label": label.copy()}
+        transform2 = RemapLabelsToSequentiald(
+            keys="label",
+            label_names={"spleen": 1, "kidney": 3, "liver": 6, "background": 0}
+        )
+        result2 = transform2(data2)
+
+        # Both should produce the same output (alphabetically sorted)
+        # Expected mapping: background=0, kidney=1, liver=2, spleen=3
+        np.testing.assert_array_equal(result1["label"], result2["label"])
+
+        # Verify the actual mapping is alphabetical
+        expected_output = np.array([[[0, 3, 2, 1]]])  # kidney=1, liver=2, spleen=3, background=0
+        np.testing.assert_array_equal(result1["label"], expected_output)
+
+        # Verify label_names is correct
+        self.assertEqual(result1["label_names"], {"background": 0, "kidney": 1, "liver": 2, "spleen": 3})
+        self.assertEqual(result2["label_names"], {"background": 0, "kidney": 1, "liver": 2, "spleen": 3})
+
+    def test_multiple_labels(self):
+        """Test with multiple non-background labels"""
+        label = np.array([[[0, 1, 2, 5]]])  # background, spleen, kidney, liver
+        data = {"label": label.copy()}
+        transform = RemapLabelsToSequentiald(
+            keys="label",
+            label_names={"spleen": 1, "kidney": 2, "liver": 5, "background": 0}
+        )
+        result = transform(data)
+
+        # Expected: background=0, kidney=1, liver=2, spleen=3 (alphabetical)
+        expected = np.array([[[0, 3, 1, 2]]])
+        np.testing.assert_array_equal(result["label"], expected)
+        self.assertEqual(result["label_names"], {"background": 0, "kidney": 1, "liver": 2, "spleen": 3})
+
+    def test_deprecated_name_warning(self):
+        """Test that using the deprecated name raises a warning"""
+        import warnings
+
+        data = {"label": np.array([[[0, 1]]])}
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            transform = NormalizeLabelsInDatasetd(keys="label", label_names={"spleen": 1, "background": 0})
+            result = transform(data)
+
+            # Check that a deprecation warning was raised
+            self.assertEqual(len(w), 1)
+            self.assertTrue(issubclass(w[0].category, DeprecationWarning))
+            self.assertIn("RemapLabelsToSequentiald", str(w[0].message))
+
 
 class TestResizeGuidanceMultipleLabelCustomd(unittest.TestCase):