Skip to content

Commit 55737ed

Browse files
committed
Rewrite _tokenize with 2-phase model
Replace per-line reparsing with single-pass tokenization: - Read all lines via readline, parse once, yield tokens - Fix token type values (COMMENT=65, NL=66, OP=55) - Fix NEWLINE/NL end positions and implicit newline handling - Fix DEDENT positions via look-ahead to next non-DEDENT token - Handle FSTRING_MIDDLE brace unescaping ({{ → {, }} → }) - Emit implicit NL before ENDMARKER when source lacks trailing newline - Raise IndentationError from lexer errors - Remove 13 expectedFailure marks for now-passing tests
1 parent bf2b993 commit 55737ed

File tree

5 files changed

+750
-407
lines changed

5 files changed

+750
-407
lines changed

Lib/test/test_tabnanny.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,6 @@ def validate_cmd(self, *args, stdout="", stderr="", partial=False, expect_failur
316316
self.assertListEqual(out.splitlines(), stdout.splitlines())
317317
self.assertListEqual(err.splitlines(), stderr.splitlines())
318318

319-
@unittest.expectedFailure # TODO: RUSTPYTHON; Should displays error when errored python file is given.
320319
def test_with_errored_file(self):
321320
"""Should displays error when errored python file is given."""
322321
with TemporaryPyFile(SOURCE_CODES["wrong_indented"]) as file_path:

Lib/test/test_tokenize.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1167,7 +1167,6 @@ async def bar(): pass
11671167
DEDENT '' (7, 0) (7, 0)
11681168
""")
11691169

1170-
@unittest.expectedFailure # TODO: RUSTPYTHON; + " NEWLINE '\\n' (4, 1) (4, 2)"]
11711170
def test_newline_after_parenthesized_block_with_comment(self):
11721171
self.check_tokenize('''\
11731172
[
@@ -1192,7 +1191,6 @@ def test_closing_parenthesis_from_different_line(self):
11921191
NAME 'x' (1, 3) (1, 4)
11931192
""")
11941193

1195-
@unittest.expectedFailure # TODO: RUSTPYTHON; ' FSTRING_END "\'\'\'" (2, 68) (2, 71)']
11961194
def test_multiline_non_ascii_fstring(self):
11971195
self.check_tokenize("""\
11981196
a = f'''
@@ -1204,7 +1202,6 @@ def test_multiline_non_ascii_fstring(self):
12041202
FSTRING_END "\'\'\'" (2, 68) (2, 71)
12051203
""")
12061204

1207-
@unittest.expectedFailure # TODO: RUSTPYTHON; Diff is 696 characters long. Set self.maxDiff to None to see it.
12081205
def test_multiline_non_ascii_fstring_with_expr(self):
12091206
self.check_tokenize("""\
12101207
f'''
@@ -2176,7 +2173,6 @@ def test_string_concatenation(self):
21762173
# Two string literals on the same line
21772174
self.check_roundtrip("'' ''")
21782175

2179-
@unittest.expectedFailure # TODO: RUSTPYTHON
21802176
def test_random_files(self):
21812177
# Test roundtrip on random python modules.
21822178
# pass the '-ucpu' option to process the full directory.
@@ -2214,7 +2210,6 @@ def test_indentation_semantics_retained(self):
22142210

22152211

22162212
class InvalidPythonTests(TestCase):
2217-
@unittest.expectedFailure # TODO: RUSTPYTHON; Diff is 1046 characters long. Set self.maxDiff to None to see it.
22182213
def test_number_followed_by_name(self):
22192214
# See issue #gh-105549
22202215
source = "2sin(x)"
@@ -2254,7 +2249,6 @@ def check_tokenize(self, s, expected):
22542249
)
22552250
self.assertEqual(result, expected.rstrip().splitlines())
22562251

2257-
@unittest.expectedFailure # TODO: RUSTPYTHON
22582252
def test_encoding(self):
22592253
def readline(encoding):
22602254
yield "1+1".encode(encoding)
@@ -2386,7 +2380,6 @@ def test_float(self):
23862380
NUMBER '3.14e159' (1, 4) (1, 12)
23872381
""")
23882382

2389-
@unittest.expectedFailure # TODO: RUSTPYTHON
23902383
def test_string(self):
23912384

23922385
self.check_tokenize('x = \'\'; y = ""', """\
@@ -2818,7 +2811,6 @@ def test_unary(self):
28182811
NUMBER '1' (1, 22) (1, 23)
28192812
""")
28202813

2821-
@unittest.expectedFailure # TODO: RUSTPYTHON
28222814
def test_selector(self):
28232815

28242816
self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
@@ -2841,7 +2833,6 @@ def test_selector(self):
28412833
RPAR ')' (2, 29) (2, 30)
28422834
""")
28432835

2844-
@unittest.expectedFailure # TODO: RUSTPYTHON
28452836
def test_method(self):
28462837

28472838
self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
@@ -2859,7 +2850,6 @@ def test_method(self):
28592850
NAME 'pass' (2, 14) (2, 18)
28602851
""")
28612852

2862-
@unittest.expectedFailure # TODO: RUSTPYTHON
28632853
def test_tabs(self):
28642854

28652855
self.check_tokenize('@staticmethod\ndef foo(x,y): pass', """\
@@ -3144,7 +3134,6 @@ async def bar(): pass
31443134
DEDENT '' (6, -1) (6, -1)
31453135
""")
31463136

3147-
@unittest.expectedFailure # TODO: RUSTPYTHON
31483137
def test_unicode(self):
31493138

31503139
self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\
@@ -3394,7 +3383,6 @@ def f():
33943383
with contextlib.redirect_stderr(StringIO()):
33953384
_ = self.invoke_tokenize('--unknown')
33963385

3397-
@unittest.expectedFailure # TODO: RUSTPYTHON
33983386
def test_without_flag(self):
33993387
# test 'python -m tokenize source.py'
34003388
source = 'a = 1'
@@ -3408,7 +3396,6 @@ def test_without_flag(self):
34083396
'''
34093397
self.check_output(source, expect)
34103398

3411-
@unittest.expectedFailure # TODO: RUSTPYTHON
34123399
def test_exact_flag(self):
34133400
# test 'python -m tokenize -e/--exact source.py'
34143401
source = 'a = 1'

0 commit comments

Comments
 (0)