From e3506cade2c1eed0f3f63f42a575ea166fedd75b Mon Sep 17 00:00:00 2001 From: Mark Thomas Date: Mon, 11 May 2026 14:25:16 +0100 Subject: [PATCH 1/3] Implement stricter checks for fields using RFC 2231 / RFC 5987 Values where invalid characters appear in the extended value will now be ignored. --- .../fileupload2/core/ParameterParser.java | 3 ++ .../fileupload2/core/RFC2231Utils.java | 32 +++++++++++++++++-- .../core/RFC2231UtilityTestCase.java | 13 ++++++++ src/changes/changes.xml | 1 + 4 files changed, 47 insertions(+), 2 deletions(-) diff --git a/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/ParameterParser.java b/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/ParameterParser.java index 513b9376f..0daf413a8 100644 --- a/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/ParameterParser.java +++ b/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/ParameterParser.java @@ -176,6 +176,9 @@ public Map parse(final char[] charArray, final int offset, final if (paramValue != null) { try { paramValue = RFC2231Utils.hasEncodedValue(paramName) ? RFC2231Utils.decodeText(paramValue) : MimeUtils.decodeText(paramValue); + } catch (final IllegalArgumentException iae) { + // Treat invalid values as if they were not provided + paramValue = null; } catch (final UnsupportedEncodingException ignored) { // let's keep the original value in this case } diff --git a/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java b/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java index 11c8c749a..d3bc9625a 100644 --- a/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java +++ b/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java @@ -52,14 +52,38 @@ final class RFC2231Utils { */ private static final byte[] HEX_DECODE = new byte[MASK_128]; + private static final boolean[] ATTR_CHAR = new boolean[128]; + // create a ASCII decoded array of Hexadecimal values static { for (var i = 0; i < HEX_DIGITS.length; i++) { HEX_DECODE[HEX_DIGITS[i]] = (byte) i; HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i; } + + for (var i = 0; i < 128; i++) { + if (i < 32 || i == ' ' || i == '\"' || i == '%' || i == '\'' || i == '(' || i == ')' || i == '*' || + i == ',' || i == '/' || i == ':' || i == ';' || i == '<' || i == '=' || i == '>' || i == '?' || + i == '@' || i == '[' || i == '\\' || i == ']' || i == '{' || i == '}' || i == 127) { + // Not valid attr-char + ATTR_CHAR[i] = false; + } else { + ATTR_CHAR[i] = true; + } + } } + + static boolean isAttrChar(char c) { + // Fast for valid values. Slow for some invalid ones. + try { + return ATTR_CHAR[c]; + } catch (ArrayIndexOutOfBoundsException e) { + return false; + } + } + + /** * Decodes a string of text obtained from a HTTP header as per RFC 2231 * @@ -71,9 +95,10 @@ final class RFC2231Utils { * * @param encodedText Text to be decoded has a format of {@code ''} and ASCII only * @return Decoded text based on charset encoding + * @throws IllegalArgumentException The encoded text contained characters not permitted by RFC 2231 * @throws UnsupportedEncodingException The requested character set wasn't found. */ - static String decodeText(final String encodedText) throws UnsupportedEncodingException { + static String decodeText(final String encodedText) throws IllegalArgumentException, UnsupportedEncodingException { final var langDelimitStart = encodedText.indexOf('\''); if (langDelimitStart == -1) { // missing charset @@ -89,6 +114,7 @@ static String decodeText(final String encodedText) throws UnsupportedEncodingExc return new String(bytes, getJavaCharset(mimeCharset)); } + /** * Converts {@code text} to their corresponding Hex value. * @@ -107,8 +133,10 @@ private static byte[] fromHex(final String text) { final var b1 = HEX_DECODE[text.charAt(i++) & MASK]; final var b2 = HEX_DECODE[text.charAt(i++) & MASK]; out.write(b1 << shift | b2); - } else { + } else if (isAttrChar(c)) { out.write((byte) c); + } else { + throw new IllegalArgumentException(); } } return out.toByteArray(); diff --git a/commons-fileupload2-core/src/test/java/org/apache/commons/fileupload2/core/RFC2231UtilityTestCase.java b/commons-fileupload2-core/src/test/java/org/apache/commons/fileupload2/core/RFC2231UtilityTestCase.java index a2ec817d5..e937bd283 100644 --- a/commons-fileupload2-core/src/test/java/org/apache/commons/fileupload2/core/RFC2231UtilityTestCase.java +++ b/commons-fileupload2-core/src/test/java/org/apache/commons/fileupload2/core/RFC2231UtilityTestCase.java @@ -82,4 +82,17 @@ void testStripDelimiter() { final var nameWithoutAsterisk = "paramname"; assertEquals("paramname", RFC2231Utils.stripDelimiter(nameWithoutAsterisk)); } + + + @Test + void testDecodeNonTokenChracters() throws Exception { + assertThrows(IllegalArgumentException.class, () -> RFC2231Utils.decodeText("ISO-8859-1''Not*allowed")); + } + + + @Test + void testDecodeUTF8Chracters() throws Exception { + assertThrows(IllegalArgumentException.class, () -> RFC2231Utils.decodeText("UTF-8''\\u8a2e")); + } + } diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 68947466e..5856784f7 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,6 +45,7 @@ The type attribute can be add,update,fix,remove. Fix migration documentation to mention Java 11. Unable to parse requests for file uploads with special characters in filename on Windows. + Implement stricter checks for fields using RFC 2231 / RFC 5987 and ignore values where invalid characters appear in the extended value. Bump org.apache.commons:commons-parent from 96 to 99. From f38194f1bf1c5e77a585663f3c4b9abcf77302d9 Mon Sep 17 00:00:00 2001 From: Mark Thomas Date: Mon, 11 May 2026 15:58:15 +0100 Subject: [PATCH 2/3] Fix checkstyle warnings --- .../fileupload2/core/RFC2231Utils.java | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java b/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java index d3bc9625a..9ae187796 100644 --- a/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java +++ b/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java @@ -47,12 +47,26 @@ final class RFC2231Utils { */ private static final int MASK_128 = 0x80; + /** + * ASCII DEL character. + */ + private static final int DEL = 127; + + /** + * Number of ASCII code points. + */ + private static final int ASCII_CODE_POINT_COUNT = 128; + /** * The Hexadecimal decode value. */ private static final byte[] HEX_DECODE = new byte[MASK_128]; - private static final boolean[] ATTR_CHAR = new boolean[128]; + /** + * Flags, one for each ASCII code point, that indicate if that code point is valid for use in an RFC 5987 extended + * attribute value. + */ + private static final boolean[] ATTR_CHAR = new boolean[ASCII_CODE_POINT_COUNT]; // create a ASCII decoded array of Hexadecimal values static { @@ -61,10 +75,11 @@ final class RFC2231Utils { HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i; } - for (var i = 0; i < 128; i++) { - if (i < 32 || i == ' ' || i == '\"' || i == '%' || i == '\'' || i == '(' || i == ')' || i == '*' || - i == ',' || i == '/' || i == ':' || i == ';' || i == '<' || i == '=' || i == '>' || i == '?' || - i == '@' || i == '[' || i == '\\' || i == ']' || i == '{' || i == '}' || i == 127) { + for (var i = 0; i < ASCII_CODE_POINT_COUNT; i++) { + // See RFC 5987 + if (i < ' ' || i == ' ' || i == '\"' || i == '%' || i == '\'' || i == '(' || i == ')' || i == '*' || i == ',' + || i == '/' || i == ':' || i == ';' || i == '<' || i == '=' || i == '>' || i == '?' || i == '@' + || i == '[' || i == '\\' || i == ']' || i == '{' || i == '}' || i == DEL) { // Not valid attr-char ATTR_CHAR[i] = false; } else { @@ -74,11 +89,11 @@ final class RFC2231Utils { } - static boolean isAttrChar(char c) { + static boolean isAttrChar(final char c) { // Fast for valid values. Slow for some invalid ones. try { return ATTR_CHAR[c]; - } catch (ArrayIndexOutOfBoundsException e) { + } catch (final ArrayIndexOutOfBoundsException e) { return false; } } From 28f24cd784ad1e81af7473ef8cb52031096b439e Mon Sep 17 00:00:00 2001 From: Mark Thomas Date: Tue, 12 May 2026 07:38:28 +0100 Subject: [PATCH 3/3] Also reject invalid hex values and minor improvements Prompted by CoPilot review --- .../fileupload2/core/RFC2231Utils.java | 29 ++++++++----------- .../core/RFC2231UtilityTestCase.java | 9 ++++-- src/changes/changes.xml | 2 +- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java b/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java index 9ae187796..3e3d13d81 100644 --- a/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java +++ b/commons-fileupload2-core/src/main/java/org/apache/commons/fileupload2/core/RFC2231Utils.java @@ -42,11 +42,6 @@ final class RFC2231Utils { */ private static final byte MASK = 0x7f; - /** - * The Hexadecimal representation of 128. - */ - private static final int MASK_128 = 0x80; - /** * ASCII DEL character. */ @@ -60,7 +55,7 @@ final class RFC2231Utils { /** * The Hexadecimal decode value. */ - private static final byte[] HEX_DECODE = new byte[MASK_128]; + private static final byte[] HEX_DECODE = new byte[ASCII_CODE_POINT_COUNT]; /** * Flags, one for each ASCII code point, that indicate if that code point is valid for use in an RFC 5987 extended @@ -70,6 +65,11 @@ final class RFC2231Utils { // create a ASCII decoded array of Hexadecimal values static { + // Initialise all values to invalid + for (var i = 0; i < ASCII_CODE_POINT_COUNT; i++) { + HEX_DECODE[i] = -1; + } + // Configure the valid hex digits for (var i = 0; i < HEX_DIGITS.length; i++) { HEX_DECODE[HEX_DIGITS[i]] = (byte) i; HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i; @@ -77,12 +77,9 @@ final class RFC2231Utils { for (var i = 0; i < ASCII_CODE_POINT_COUNT; i++) { // See RFC 5987 - if (i < ' ' || i == ' ' || i == '\"' || i == '%' || i == '\'' || i == '(' || i == ')' || i == '*' || i == ',' + if (!(i < ' ' || i == ' ' || i == '\"' || i == '%' || i == '\'' || i == '(' || i == ')' || i == '*' || i == ',' || i == '/' || i == ':' || i == ';' || i == '<' || i == '=' || i == '>' || i == '?' || i == '@' - || i == '[' || i == '\\' || i == ']' || i == '{' || i == '}' || i == DEL) { - // Not valid attr-char - ATTR_CHAR[i] = false; - } else { + || i == '[' || i == '\\' || i == ']' || i == '{' || i == '}' || i == DEL)) { ATTR_CHAR[i] = true; } } @@ -90,12 +87,7 @@ final class RFC2231Utils { static boolean isAttrChar(final char c) { - // Fast for valid values. Slow for some invalid ones. - try { - return ATTR_CHAR[c]; - } catch (final ArrayIndexOutOfBoundsException e) { - return false; - } + return c < ASCII_CODE_POINT_COUNT && ATTR_CHAR[c]; } @@ -147,6 +139,9 @@ private static byte[] fromHex(final String text) { } final var b1 = HEX_DECODE[text.charAt(i++) & MASK]; final var b2 = HEX_DECODE[text.charAt(i++) & MASK]; + if (b1 < 0 || b2 < 0) { + throw new IllegalArgumentException(); + } out.write(b1 << shift | b2); } else if (isAttrChar(c)) { out.write((byte) c); diff --git a/commons-fileupload2-core/src/test/java/org/apache/commons/fileupload2/core/RFC2231UtilityTestCase.java b/commons-fileupload2-core/src/test/java/org/apache/commons/fileupload2/core/RFC2231UtilityTestCase.java index e937bd283..bc431de02 100644 --- a/commons-fileupload2-core/src/test/java/org/apache/commons/fileupload2/core/RFC2231UtilityTestCase.java +++ b/commons-fileupload2-core/src/test/java/org/apache/commons/fileupload2/core/RFC2231UtilityTestCase.java @@ -85,14 +85,19 @@ void testStripDelimiter() { @Test - void testDecodeNonTokenChracters() throws Exception { + void testDecodeNonTokenCharacters() throws Exception { assertThrows(IllegalArgumentException.class, () -> RFC2231Utils.decodeText("ISO-8859-1''Not*allowed")); } @Test - void testDecodeUTF8Chracters() throws Exception { + void testDecodeUTF8Characters() throws Exception { assertThrows(IllegalArgumentException.class, () -> RFC2231Utils.decodeText("UTF-8''\\u8a2e")); } + + @Test + void testDecodeInvalidHex() throws Exception { + assertThrows(IllegalArgumentException.class, () -> RFC2231Utils.decodeText("ISO-8859-1''hello%HHworld")); + } } diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 5856784f7..e2e80e003 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,7 +45,7 @@ The type attribute can be add,update,fix,remove. Fix migration documentation to mention Java 11. Unable to parse requests for file uploads with special characters in filename on Windows. - Implement stricter checks for fields using RFC 2231 / RFC 5987 and ignore values where invalid characters appear in the extended value. + Implement stricter checks for fields using RFC 2231 / RFC 5987. Invalid extended values will be ignored. Bump org.apache.commons:commons-parent from 96 to 99.