microsoft · JiuqingSong · May 19, 2026 · BryanValverdeU · May 20, 2026 · JiuqingSong
diff --git a/packages/roosterjs-content-model-dom/lib/modelApi/creators/createText.ts b/packages/roosterjs-content-model-dom/lib/modelApi/creators/createText.ts
@@ -19,9 +19,10 @@ export function createText(
     link?: ReadonlyContentModelLink,
     code?: ReadonlyContentModelCode
 ): ContentModelText {
+    const filterText = stripInvisibleUnicode(text);
     const result: ContentModelText = {
         segmentType: 'Text',
-        text: text,
+        text: filterText,
         format: { ...format },
     };
 
@@ -35,3 +36,17 @@ export function createText(
 
     return result;
 }
+
+// According to https://embracethered.com/blog/posts/2024/hiding-and-finding-text-with-unicode-tags/
+// there are some invisible unicode characters in the range of U+E0000 to U+EFFFF, which are used for hiding text in HTML.
+// We need to strip them out before processing the pasted content, otherwise they will be treated as normal text and cause unexpected behavior.
+const INVISIBLE_UNICODE_REGEX = /[\u{E0000}-\u{EFFFF}]/gu;
+
+/**
+ * Strip invisible unicode characters from the given string
+ * @param value The string to be processed
+ * @returns The string with invisible unicode characters removed
+ */
+function stripInvisibleUnicode(value: string): string {
+    return value.replace(INVISIBLE_UNICODE_REGEX, '');
+}
diff --git a/packages/roosterjs-content-model-dom/test/endToEndTest.ts b/packages/roosterjs-content-model-dom/test/endToEndTest.ts
@@ -3028,6 +3028,40 @@ describe('End to end test for DOM => Model => DOM/TEXT', () => {
         );
     });
 
+    it('Text with invisible unicode tag characters is stripped, meaningful invisible chars preserved', () => {
+        // Source HTML contains U+E0041 / U+E0042 (unicode tag range — must be stripped)
+        // mixed with U+200B (ZWSP), U+200D (ZWJ), U+202E (RLO), U+202C (PDF)
+        // which must be preserved.
+        runTest(
+            '<p>a\u{E0041}b\u{200B}c\u{E0042}d\u{202E}evil\u{202C}e</p>',
+            {
+                blockGroupType: 'Document',
+                blocks: [
+                    {
+                        blockType: 'Paragraph',
+                        segments: [
+                            {
+                                segmentType: 'Text',
+                                text: 'ab\u{200B}cd\u{202E}evil\u{202C}e',
+                                format: {},
+                            },
+                        ],
+                        format: {
+                            marginTop: '1em',
+                            marginBottom: '1em',
+                        },
+                        decorator: {
+                            tagName: 'p',
+                            format: {},
+                        },
+                    },
+                ],
+            },
+            'ab\u{200B}cd\u{202E}evil\u{202C}e',
+            '<p>ab\u{200B}cd\u{202E}evil\u{202C}e</p>'
+        );
+    });
+
     it('LI without UL followed by other blocks', () => {
         runTest(
             '<li>test</li><div>other</div>',

diff --git a/packages/roosterjs-content-model-dom/test/modelApi/creators/creatorsTest.ts b/packages/roosterjs-content-model-dom/test/modelApi/creators/creatorsTest.ts
@@ -233,6 +233,74 @@ describe('Creators', () => {
         });
     });
 
+    it('createText with invisible unicode characters', () => {
+        const text = 'a\u{E0041}b\u{E0042}c';
+        const result = createText(text);
+
+        expect(result).toEqual({
+            segmentType: 'Text',
+            format: {},
+            text: 'abc',
+        });
+    });
+
+    it('createText with only invisible unicode characters', () => {
+        const text = '\u{E0000}\u{E007F}\u{EFFFF}';
+        const result = createText(text);
+
+        expect(result).toEqual({
+            segmentType: 'Text',
+            format: {},
+            text: '',
+        });
+    });
+
+    it('createText with invisible unicode at boundary range', () => {
+        const text = '\u{DFFFF}start\u{E0000}mid\u{EFFFF}end\u{F0000}';
+        const result = createText(text);
+
+        expect(result).toEqual({
+            segmentType: 'Text',
+            format: {},
+            text: '\u{DFFFF}startmidend\u{F0000}',
+        });
+    });
+
+    it('createText preserves meaningful invisible characters outside the tag range', () => {
+        //  = Zero-Width Space, ‍ = Zero-Width Joiner,
+        // ‮ = Right-to-Left Override, ‬ = Pop Directional Formatting
+        const text = 'ab‍c‮d‬e';
+        const result = createText(text);
+
+        expect(result).toEqual({
+            segmentType: 'Text',
+            format: {},
+            text: 'ab‍c‮d‬e',
+        });
+    });
+
+    it('createText strips only tag-range chars, keeps meaningful invisible chars', () => {
+        const text = 'a\u{E0041}b‮\u{E0042}c';
+        const result = createText(text);
+
+        expect(result).toEqual({
+            segmentType: 'Text',
+            format: {},
+            text: 'ab‮c',
+        });
+    });
+
+    it('createText does not strip visible characters', () => {
+        const text = 'hello world 你好   ';
+        const result = createText(text);
+
+        expect(result).toEqual({
+            segmentType: 'Text',
+            format: {},
+            text: 'hello world 你好   ',
+        });
+    });
+
     it('createTableRow', () => {
         const row = createTableRow();