-
Notifications
You must be signed in to change notification settings - Fork 433
Sanitize strings to prevent UTF errors #2357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,130 @@ | ||
| /* | ||
| * Copyright 2026 Red Hat Inc., Durham, North Carolina. | ||
| * All Rights Reserved. | ||
| * | ||
| * This library is free software; you can redistribute it and/or | ||
| * modify it under the terms of the GNU Lesser General Public | ||
| * License as published by the Free Software Foundation; either | ||
| * version 2.1 of the License, or (at your option) any later version. | ||
| * | ||
| * This library is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| * Lesser General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU Lesser General Public | ||
| * License along with this library; if not, write to the Free Software | ||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||
| * | ||
| * Authors: | ||
| * Jan Černý <jcerny@redhat.com> | ||
| */ | ||
|
|
||
| #ifdef HAVE_CONFIG_H | ||
| #include <config.h> | ||
| #endif | ||
|
|
||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| #include <stdint.h> | ||
|
|
||
| #include "oscap_utf8.h" | ||
|
|
||
| static int _utf8_char_len(const uint8_t *s, size_t remaining) | ||
|
Check failure on line 33 in src/common/oscap_utf8.c
|
||
| { | ||
| uint8_t b = s[0]; | ||
|
|
||
| if (b <= 0x7F) | ||
| return 1; | ||
|
|
||
| if (b >= 0xC2 && b <= 0xDF) { | ||
| if (remaining < 2 || (s[1] & 0xC0) != 0x80) | ||
| return -1; | ||
| return 2; | ||
| } | ||
|
|
||
| if (b >= 0xE0 && b <= 0xEF) { | ||
| if (remaining < 3 || (s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) | ||
| return -1; | ||
| if (b == 0xE0 && s[1] < 0xA0) | ||
| return -1; | ||
| if (b == 0xED && s[1] > 0x9F) | ||
| return -1; | ||
| return 3; | ||
| } | ||
|
|
||
| if (b >= 0xF0 && b <= 0xF4) { | ||
| if (remaining < 4 || (s[1] & 0xC0) != 0x80 || | ||
| (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80) | ||
| return -1; | ||
| if (b == 0xF0 && s[1] < 0x90) | ||
| return -1; | ||
| if (b == 0xF4 && s[1] > 0x8F) | ||
| return -1; | ||
| return 4; | ||
| } | ||
|
|
||
| return -1; | ||
| } | ||
|
|
||
| char *oscap_sanitize_utf8(const char *input, size_t input_len, size_t *output_len) | ||
| { | ||
| const uint8_t *in = (const uint8_t *)input; | ||
| size_t i = 0; | ||
|
|
||
| while (i < input_len) { | ||
| int clen = _utf8_char_len(in + i, input_len - i); | ||
| if (clen < 0) | ||
| break; | ||
| i += clen; | ||
| } | ||
|
|
||
| if (i == input_len) | ||
| return NULL; | ||
|
|
||
| size_t alloc = input_len + 64; | ||
| uint8_t *out = malloc(alloc + 1); | ||
| if (out == NULL) | ||
| return NULL; | ||
|
|
||
| if (i > 0) | ||
| memcpy(out, in, i); | ||
| size_t o = i; | ||
|
|
||
| while (i < input_len) { | ||
| int clen = _utf8_char_len(in + i, input_len - i); | ||
| if (clen < 0) { | ||
| if (o + 3 > alloc) { | ||
| alloc = alloc * 2; | ||
| uint8_t *tmp = realloc(out, alloc + 1); | ||
| if (tmp == NULL) { | ||
|
Check failure on line 100 in src/common/oscap_utf8.c
|
||
| free(out); | ||
| return NULL; | ||
| } | ||
| out = tmp; | ||
| } | ||
| out[o++] = 0xEF; | ||
| out[o++] = 0xBF; | ||
| out[o++] = 0xBD; | ||
| i++; | ||
| } else { | ||
| if (o + clen > alloc) { | ||
| alloc = alloc * 2; | ||
| uint8_t *tmp = realloc(out, alloc + 1); | ||
| if (tmp == NULL) { | ||
|
Check failure on line 114 in src/common/oscap_utf8.c
|
||
| free(out); | ||
| return NULL; | ||
| } | ||
| out = tmp; | ||
| } | ||
| memcpy(out + o, in + i, clen); | ||
| o += clen; | ||
| i += clen; | ||
| } | ||
| } | ||
|
|
||
| out[o] = '\0'; | ||
| if (output_len != NULL) | ||
| *output_len = o; | ||
| return (char *)out; | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,41 @@ | ||||||
| /* | ||||||
| * Copyright 2026 Red Hat Inc., Durham, North Carolina. | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| * All Rights Reserved. | ||||||
| * | ||||||
| * This library is free software; you can redistribute it and/or | ||||||
| * modify it under the terms of the GNU Lesser General Public | ||||||
| * License as published by the Free Software Foundation; either | ||||||
| * version 2.1 of the License, or (at your option) any later version. | ||||||
| * | ||||||
| * This library is distributed in the hope that it will be useful, | ||||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||||||
| * Lesser General Public License for more details. | ||||||
| * | ||||||
| * You should have received a copy of the GNU Lesser General Public | ||||||
| * License along with this library; if not, write to the Free Software | ||||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||||
| * | ||||||
| * Authors: | ||||||
| * Jan Černý <jcerny@redhat.com> | ||||||
| */ | ||||||
|
|
||||||
| #ifndef OSCAP_UTF8_H_ | ||||||
| #define OSCAP_UTF8_H_ | ||||||
|
|
||||||
| #include <stddef.h> | ||||||
|
|
||||||
| /** | ||||||
| * Sanitize a string by replacing invalid UTF-8 byte sequences with the | ||||||
| * Unicode replacement character (U+FFFD). | ||||||
| * | ||||||
| * @param input the input string (not necessarily null-terminated) | ||||||
| * @param input_len byte length of the input | ||||||
| * @param output_len if not NULL, set to the byte length of the sanitized string | ||||||
| * @return newly allocated null-terminated sanitized string if any invalid | ||||||
| * sequences were found (caller must free), or NULL if the input | ||||||
| * is already valid UTF-8 | ||||||
| */ | ||||||
| char *oscap_sanitize_utf8(const char *input, size_t input_len, size_t *output_len); | ||||||
|
|
||||||
| #endif | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| #!/usr/bin/env bash | ||
| . $builddir/tests/test_common.sh | ||
|
|
||
| set -e -o pipefail | ||
|
|
||
| oval_def="$(mktemp)" | ||
| stdout="$(mktemp)" | ||
| stderr="$(mktemp)" | ||
| result="$(mktemp)" | ||
| temp_dir="$(mktemp -d)" | ||
| cp $srcdir/test_pcre_nonutf_characters.xml "$oval_def" | ||
| sed -i "s;TEMP_DIR_PLACEHOLDER;$temp_dir;" "$oval_def" | ||
| normal_file="$temp_dir/normal_filename" | ||
| touch "$normal_file" | ||
| evil_file=$temp_dir/$(printf "evil_filename_\334_non_utf8_character") | ||
| touch "$evil_file" | ||
|
|
||
| $OSCAP oval eval --results "$result" "$oval_def" >"$stdout" 2>"$stderr" | ||
|
|
||
| assert_exists 2 '/oval_results/results/system/oval_system_characteristics/system_data/unix-sys:file_item' | ||
| grep -q "Definition oval:org.mitre.oval.test:def:1: true" "$stdout" | ||
| grep -q "W: oscap: Replaced invalid UTF-8 byte sequence(s) with the replacement character (U+FFFD) in .*" "$stderr" | ||
|
|
||
| rm -f "$oval_def" | ||
| rm -f "$stdout" "$stderr" "$result" | ||
| rm -rf "$temp_dir" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| <?xml version="1.0" encoding="UTF-8"?> | ||
| <oval_definitions xsi:schemaLocation="http://oval.mitre.org/XMLSchema/oval-definitions-5 oval-definitions-schema.xsd http://oval.mitre.org/XMLSchema/oval-definitions-5#unix unix-definitions-schema.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://oval.mitre.org/XMLSchema/oval-definitions-5" xmlns:oval-def="http://oval.mitre.org/XMLSchema/oval-definitions-5" xmlns:oval="http://oval.mitre.org/XMLSchema/oval-common-5" xmlns:unix-def="http://oval.mitre.org/XMLSchema/oval-definitions-5#unix"> | ||
| <generator> | ||
| <oval:schema_version>5.10</oval:schema_version> | ||
| <oval:timestamp>2009-01-12T10:41:00-05:00</oval:timestamp> | ||
| </generator> | ||
| <definitions> | ||
| <definition id="oval:org.mitre.oval.test:def:1" version="1" class="miscellaneous"> | ||
| <metadata> | ||
| <title>Test for pattern match operation on file names</title> | ||
| <description>The definition should not produce any error during the evaluation</description> | ||
| </metadata> | ||
| <criteria> | ||
| <criterion comment="Test that files from the TEMP_DIR_PLACEHOLDER directory are collected." test_ref="oval:org.mitre.oval.test:tst:1"/> | ||
| </criteria> | ||
| </definition> | ||
| </definitions> | ||
|
|
||
| <tests> | ||
| <file_test id="oval:org.mitre.oval.test:tst:1" version="1" comment="file test" check_existence="at_least_one_exists" check="all" xmlns="http://oval.mitre.org/XMLSchema/oval-definitions-5#unix"> | ||
| <object object_ref="oval:org.mitre.oval.test:obj:1"/> | ||
| </file_test> | ||
| </tests> | ||
|
|
||
| <objects> | ||
| <file_object id="oval:org.mitre.oval.test:obj:1" version="1" comment="Files in etc" xmlns="http://oval.mitre.org/XMLSchema/oval-definitions-5#unix"> | ||
| <path operation="equals">TEMP_DIR_PLACEHOLDER</path> | ||
| <filename operation="pattern match">^.*$</filename> | ||
| </file_object> | ||
| </objects> | ||
|
|
||
| </oval_definitions> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.