diff --git a/app/api/v2/handlers/operation_api.py b/app/api/v2/handlers/operation_api.py index 17bf13645..3f0e02586 100644 --- a/app/api/v2/handlers/operation_api.py +++ b/app/api/v2/handlers/operation_api.py @@ -157,7 +157,7 @@ async def get_operation_report(self, request: web.Request): access = await self.get_request_permissions(request) output = await self._read_output_parameter_(request) report = await self._api_manager.get_operation_report(operation_id, access, output) - return web.json_response(report) + return web.json_response(report, dumps=lambda obj: json.dumps(obj, ensure_ascii=True, default=str)) @aiohttp_apispec.docs(tags=['operations'], summary='Get Operation Event Logs', @@ -179,7 +179,7 @@ async def get_operation_event_logs(self, request: web.Request): access = await self.get_request_permissions(request) output = await self._read_output_parameter_(request) report = await self._api_manager.get_operation_event_logs(operation_id, access, output) - return web.json_response(report) + return web.json_response(report, dumps=lambda obj: json.dumps(obj, ensure_ascii=True, default=str)) @aiohttp_apispec.docs(tags=['operations'], summary='Get Links from Operation', diff --git a/app/objects/c_operation.py b/app/objects/c_operation.py index 9170b472a..98e97b3ef 100644 --- a/app/objects/c_operation.py +++ b/app/objects/c_operation.py @@ -320,22 +320,26 @@ async def report(self, file_svc, data_svc, output=False): for step in self.chain: step_report = dict(link_id=step.id, ability_id=step.ability.ability_id, - command=self.decode_bytes(step.command), - plaintext_command=self.decode_bytes(step.plaintext_command), + command=self._sanitize_for_json(self.decode_bytes(step.command)), + plaintext_command=self._sanitize_for_json(self.decode_bytes(step.plaintext_command)), delegated=step.decide.strftime(self.TIME_FORMAT), run=step.finish, status=step.status, platform=step.executor.platform, executor=step.executor.name, pid=step.pid, - description=step.ability.description, - name=step.ability.name, + description=self._sanitize_for_json(step.ability.description), + name=self._sanitize_for_json(step.ability.name), attack=dict(tactic=step.ability.tactic, - technique_name=step.ability.technique_name, + technique_name=self._sanitize_for_json(step.ability.technique_name), technique_id=step.ability.technique_id)) if output and step.output: - results = self.decode_bytes(file_svc.read_result_file(step.unique)) - step_report['output'] = json.loads(results.replace('\\r\\n', '').replace('\\n', '')) + try: + results = self.decode_bytes(file_svc.read_result_file(step.unique)) + step_report['output'] = json.loads(results.replace('\\r\\n', '').replace('\\n', '')) + except Exception: + step_report['output'] = dict(stdout=self._sanitize_for_json(results if 'results' in dir() else ''), + stderr='', exit_code='') if step.agent_reported_time: step_report['agent_reported_time'] = step.agent_reported_time.strftime(self.TIME_FORMAT) agents_steps.setdefault(step.paw, {'steps': []})['steps'].append(step_report) @@ -348,8 +352,16 @@ async def report(self, file_svc, data_svc, output=False): async def event_logs(self, file_svc, data_svc, output=False): # Ignore discarded / high visibility links that did not actually run. - return [await self._convert_link_to_event_log(step, file_svc, data_svc, output=output) for step in self.chain - if not step.can_ignore()] + event_logs = [] + for step in self.chain: + if step.can_ignore(): + continue + try: + event_logs.append(await self._convert_link_to_event_log(step, file_svc, data_svc, output=output)) + except Exception: + logging.warning('Error converting link %s to event log for operation %s, skipping', + step.id, self.name, exc_info=True) + return event_logs async def cede_control_to_planner(self, services): planner = await self._get_planning_module(services) @@ -376,7 +388,7 @@ async def write_event_logs_to_disk(self, file_svc, data_svc, output=False): logging.debug('Wrote event logs for operation %s to disk at %s/%s' % (self.name, event_logs_dir, file_name)) async def _write_logs_to_disk(self, logs, file_name, dest_dir, file_svc): - logs_dumps = json.dumps(logs) + os.linesep + logs_dumps = json.dumps(logs, ensure_ascii=True, default=str) + os.linesep await file_svc.save_file(file_name, logs_dumps.encode(), dest_dir, encrypt=False) async def _load_objective(self, data_svc): @@ -386,8 +398,8 @@ async def _load_objective(self, data_svc): self.objective = deepcopy(obj[0]) async def _convert_link_to_event_log(self, link, file_svc, data_svc, output=False): - event_dict = dict(command=self.decode_bytes(link.command), - plaintext_command=self.decode_bytes(link.plaintext_command), + event_dict = dict(command=self._sanitize_for_json(self.decode_bytes(link.command)), + plaintext_command=self._sanitize_for_json(self.decode_bytes(link.plaintext_command)), delegated_timestamp=link.decide.strftime(self.TIME_FORMAT), collected_timestamp=link.collect.strftime(self.TIME_FORMAT) if link.collect else None, finished_timestamp=link.finish, @@ -400,8 +412,12 @@ async def _convert_link_to_event_log(self, link, file_svc, data_svc, output=Fals operation_metadata=self._get_operation_metadata_for_event_log(), attack_metadata=self._get_attack_metadata_for_event_log(link.ability)) if output and link.output: - results = self.decode_bytes(file_svc.read_result_file(link.unique)) - event_dict['output'] = json.loads(results.replace('\\r\\n', '').replace('\\n', '')) + try: + results = self.decode_bytes(file_svc.read_result_file(link.unique)) + event_dict['output'] = json.loads(results.replace('\\r\\n', '').replace('\\n', '')) + except Exception: + event_dict['output'] = dict(stdout=self._sanitize_for_json(results if 'results' in dir() else ''), + stderr='', exit_code='') if link.agent_reported_time: event_dict['agent_reported_time'] = link.agent_reported_time.strftime(self.TIME_FORMAT) return event_dict @@ -533,10 +549,32 @@ def _check_reason_skipped(self, agent, ability, op_facts, state, agent_executors return dict(reason='Other', reason_id=self.Reason.OTHER.value, ability_id=ability.ability_id, ability_name=ability.name) + @staticmethod + def _sanitize_for_json(s): + """Remove characters that are problematic for JSON serialization. + + Strips surrogate characters, null bytes, and other non-serializable + unicode from strings to ensure they can always be safely passed to + json.dumps without raising encoding errors. + """ + if not isinstance(s, str): + return s + try: + # Round-trip through utf-8 to normalize surrogates and bad chars + sanitized = s.encode('utf-8', errors='surrogatepass').decode('utf-8', errors='replace') + # Strip null bytes and other control characters (except tab, newline, carriage return) + sanitized = ''.join(c if c in ('\t', '\n', '\r') or (c >= ' ' or ord(c) > 127) else '\ufffd' for c in sanitized) + # Final verification: ensure json.dumps won't choke + json.dumps(sanitized) + return sanitized + except Exception: + # Last resort: ASCII-only representation + return s.encode('ascii', errors='replace').decode('ascii') + def _get_operation_metadata_for_event_log(self): - return dict(operation_name=self.name, + return dict(operation_name=self._sanitize_for_json(self.name), operation_start=self.start.strftime(self.TIME_FORMAT), - operation_adversary=self.adversary.name) + operation_adversary=self._sanitize_for_json(self.adversary.name)) def _emit_state_change_event(self, from_state, to_state): event_svc = BaseService.get_service('event_svc') @@ -556,13 +594,13 @@ def _emit_state_change_event(self, from_state, to_state): @staticmethod def _get_ability_metadata_for_event_log(ability): return dict(ability_id=ability.ability_id, - ability_name=ability.name, - ability_description=ability.description) + ability_name=Operation._sanitize_for_json(ability.name), + ability_description=Operation._sanitize_for_json(ability.description)) @staticmethod def _get_attack_metadata_for_event_log(ability): return dict(tactic=ability.tactic, - technique_name=ability.technique_name, + technique_name=Operation._sanitize_for_json(ability.technique_name), technique_id=ability.technique_id) @staticmethod diff --git a/app/objects/secondclass/c_link.py b/app/objects/secondclass/c_link.py index 7afd4dc7e..d119c0429 100644 --- a/app/objects/secondclass/c_link.py +++ b/app/objects/secondclass/c_link.py @@ -134,8 +134,14 @@ def status(self): @property def display(self): dump = LinkSchema(exclude=['jitter']).dump(self) - dump['command'] = self.decode_bytes(dump['command']) - dump['plaintext_command'] = self.decode_bytes(dump['plaintext_command']) + try: + dump['command'] = self.decode_bytes(dump['command']) + except Exception: + pass # Keep the raw base64-encoded command + try: + dump['plaintext_command'] = self.decode_bytes(dump['plaintext_command']) + except Exception: + pass # Keep the raw base64-encoded plaintext command return dump @status.setter diff --git a/app/utility/base_world.py b/app/utility/base_world.py index 5fe028aa7..61ff3849c 100644 --- a/app/utility/base_world.py +++ b/app/utility/base_world.py @@ -60,7 +60,9 @@ def set_config(name, prop, value): @staticmethod def decode_bytes(s, strip_newlines=True): - decoded = b64decode(s).decode('utf-8', errors='ignore') + decoded = b64decode(s).decode('utf-8', errors='replace') + # Remove surrogate characters (U+D800-U+DFFF) that can break JSON serialization + decoded = decoded.encode('utf-8', errors='surrogatepass').decode('utf-8', errors='replace') return decoded.replace('\r\n', '').replace('\n', '') if strip_newlines else decoded @staticmethod diff --git a/tests/objects/test_operation.py b/tests/objects/test_operation.py index 4a7a2ddb4..1179228a5 100644 --- a/tests/objects/test_operation.py +++ b/tests/objects/test_operation.py @@ -731,3 +731,28 @@ async def test_init_source_seeds_relationship_with_resolved_facts(self, knowledg assert seeded_rel.target.value == 's3cr3t', ( 'Relationship target fact value should be resolved from the source fact list, not None' ) + + def test_sanitize_for_json(self): + """Test that Operation._sanitize_for_json properly handles un-serializable inputs.""" + # Contains a null byte, normal string, and a valid dictionary + test_cases = [ + 'normal string', + 'string\x00with null', + {'dict_key': 'dict_val'}, + '🔥 emoji', # Emojis are fine but shouldn't crash + {'nested': 'bad\x00string'} # Dictionary with nested bad string + ] + + for case in test_cases: + sanitized = Operation._sanitize_for_json(case) + # The output must be JSON serializable + serialized = json.dumps(sanitized) + assert serialized is not None + + # Test surrogate fallback specifically + bad_surrogate = 'hello\ud800world' + sanitized_surrogate = Operation._sanitize_for_json(bad_surrogate) + try: + json.dumps(sanitized_surrogate) + except UnicodeEncodeError: + pytest.fail("Sanitized surrogate failed serialization")