Skip to content

Commit 6e09d1b

Browse files
committed
win codecs
1 parent 258ac74 commit 6e09d1b

File tree

6 files changed

+654
-166
lines changed

6 files changed

+654
-166
lines changed

Lib/_pycodecs.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,7 +1109,7 @@ def unicode_call_errorhandler(errors, encoding,
11091109
else:
11101110
exceptionObject = UnicodeEncodeError(encoding, input, startinpos, endinpos, reason)
11111111
res = errorHandler(exceptionObject)
1112-
if isinstance(res, tuple) and isinstance(res[0], str) and isinstance(res[1], int):
1112+
if isinstance(res, tuple) and isinstance(res[0], (str, bytes)) and isinstance(res[1], int):
11131113
newpos = res[1]
11141114
if (newpos < 0):
11151115
newpos = len(input) + newpos
@@ -1159,7 +1159,11 @@ def unicode_encode_ucs1(p, size, errors, limit):
11591159
while collend < len(p) and ord(p[collend]) >= limit:
11601160
collend += 1
11611161
x = unicode_call_errorhandler(errors, encoding, reason, p, collstart, collend, False)
1162-
res += x[0].encode()
1162+
replacement = x[0]
1163+
if isinstance(replacement, bytes):
1164+
res += replacement
1165+
else:
1166+
res += replacement.encode()
11631167
pos = x[1]
11641168

11651169
return res
@@ -1376,12 +1380,16 @@ def PyUnicode_EncodeCharmap(p, size, mapping='latin-1', errors='strict'):
13761380
except KeyError:
13771381
x = unicode_call_errorhandler(errors, "charmap",
13781382
"character maps to <undefined>", p, inpos, inpos+1, False)
1379-
try:
1380-
for y in x[0]:
1381-
res += charmapencode_output(ord(y), mapping)
1382-
except KeyError:
1383-
raise UnicodeEncodeError("charmap", p, inpos, inpos+1,
1384-
"character maps to <undefined>")
1383+
replacement = x[0]
1384+
if isinstance(replacement, bytes):
1385+
res += list(replacement)
1386+
else:
1387+
try:
1388+
for y in replacement:
1389+
res += charmapencode_output(ord(y), mapping)
1390+
except KeyError:
1391+
raise UnicodeEncodeError("charmap", p, inpos, inpos+1,
1392+
"character maps to <undefined>")
13851393
inpos += 1
13861394
return res
13871395

Lib/encodings/__init__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,3 +172,23 @@ def _alias_mbcs(encoding):
172172
pass
173173

174174
codecs.register(_alias_mbcs)
175+
176+
from ._win_cp_codecs import create_win32_code_page_codec
177+
178+
def win32_code_page_search_function(encoding):
179+
encoding = encoding.lower()
180+
if not encoding.startswith('cp'):
181+
return None
182+
try:
183+
cp = int(encoding[2:])
184+
except ValueError:
185+
return None
186+
# Test if the code page is supported
187+
try:
188+
codecs.code_page_encode(cp, 'x')
189+
except (OverflowError, OSError):
190+
return None
191+
192+
return create_win32_code_page_codec(cp)
193+
194+
codecs.register(win32_code_page_search_function)

Lib/encodings/_win_cp_codecs.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import codecs
2+
3+
def create_win32_code_page_codec(cp):
4+
from codecs import code_page_encode, code_page_decode
5+
6+
def encode(input, errors='strict'):
7+
return code_page_encode(cp, input, errors)
8+
9+
def decode(input, errors='strict'):
10+
return code_page_decode(cp, input, errors, True)
11+
12+
class IncrementalEncoder(codecs.IncrementalEncoder):
13+
def encode(self, input, final=False):
14+
return code_page_encode(cp, input, self.errors)[0]
15+
16+
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
17+
def _buffer_decode(self, input, errors, final):
18+
return code_page_decode(cp, input, errors, final)
19+
20+
class StreamWriter(codecs.StreamWriter):
21+
def encode(self, input, errors='strict'):
22+
return code_page_encode(cp, input, errors)
23+
24+
class StreamReader(codecs.StreamReader):
25+
def decode(self, input, errors, final):
26+
return code_page_decode(cp, input, errors, final)
27+
28+
return codecs.CodecInfo(
29+
name=f'cp{cp}',
30+
encode=encode,
31+
decode=decode,
32+
incrementalencoder=IncrementalEncoder,
33+
incrementaldecoder=IncrementalDecoder,
34+
streamreader=StreamReader,
35+
streamwriter=StreamWriter,
36+
)

Lib/test/test_codecs.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3399,7 +3399,6 @@ def test_invalid_code_page(self):
33993399
self.assertRaises(OSError, codecs.code_page_encode, 123, 'a')
34003400
self.assertRaises(OSError, codecs.code_page_decode, 123, b'a')
34013401

3402-
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
34033402
def test_code_page_name(self):
34043403
self.assertRaisesRegex(UnicodeEncodeError, 'cp932',
34053404
codecs.code_page_encode, 932, '\xff')
@@ -3501,7 +3500,6 @@ def test_cp932(self):
35013500
(b'\x81\x00abc', 'backslashreplace', '\\x81\x00abc'),
35023501
))
35033502

3504-
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
35053503
def test_cp1252(self):
35063504
self.check_encode(1252, (
35073505
('abc', 'strict', b'abc'),
@@ -3520,7 +3518,6 @@ def test_cp1252(self):
35203518
(b'\xff', 'strict', '\xff'),
35213519
))
35223520

3523-
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
35243521
def test_cp708(self):
35253522
self.check_encode(708, (
35263523
('abc2%', 'strict', b'abc2%'),
@@ -3550,7 +3547,6 @@ def test_cp708(self):
35503547
(b'[\xa0]', 'surrogatepass', None),
35513548
))
35523549

3553-
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
35543550
def test_cp20106(self):
35553551
self.check_encode(20106, (
35563552
('abc', 'strict', b'abc'),
@@ -3596,7 +3592,6 @@ def test_cp_utf7(self):
35963592
(b'[\xff]', 'strict', '[\xff]'),
35973593
))
35983594

3599-
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
36003595
def test_multibyte_encoding(self):
36013596
self.check_decode(932, (
36023597
(b'\x84\xe9\x80', 'ignore', '\u9a3e'),
@@ -3630,7 +3625,6 @@ def test_code_page_decode_flags(self):
36303625
self.assertEqual(codecs.code_page_decode(42, b'abc'),
36313626
('\uf061\uf062\uf063', 3))
36323627

3633-
@unittest.expectedFailureIfWindows("TODO: RUSTPYTHON")
36343628
def test_incremental(self):
36353629
decoded = codecs.code_page_decode(932, b'\x82', 'strict', False)
36363630
self.assertEqual(decoded, ('', 0))

0 commit comments

Comments
 (0)