diff --git a/.releaserc.cjs b/.releaserc.cjs index d1f698b..0d46f84 100644 --- a/.releaserc.cjs +++ b/.releaserc.cjs @@ -30,11 +30,7 @@ module.exports = { ["@semantic-release/changelog", {}], ["@semantic-release/git", { "assets": ["CHANGELOG.md", "pyproject.toml", "uv.lock"], - "message": "chore(release): ${nextRelease.version}\n\n${nextRelease.notes}\n\nSigned-off-by: " + - (process.env.GIT_AUTHOR_NAME || process.env.GIT_COMMITTER_NAME || "Release Bot") + - " <" + - (process.env.GIT_AUTHOR_EMAIL || process.env.GIT_COMMITTER_EMAIL || "noreply@github.com") + - ">" + "message": "chore(release): ${nextRelease.version}\n\n${nextRelease.notes}\n\nSigned-off-by: semantic-release-bot " }], ["@semantic-release/github", {}] ] diff --git a/CHANGELOG.md b/CHANGELOG.md index 3640030..18d4623 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## [0.2.0-rc.1](https://github.com/disafronov/python-logging-objects-with-schema/compare/v0.1.4...v0.2.0-rc.1) (2025-12-09) + +### Features + +* **schema_logger:** add support for custom forbidden keys in SchemaLogger ([1d678f5](https://github.com/disafronov/python-logging-objects-with-schema/commit/1d678f5ea8332e4afab2b96ed23bf61a928ad7cd)) + ## [0.1.4](https://github.com/disafronov/python-logging-objects-with-schema/compare/v0.1.3...v0.1.4) (2025-12-08) ### Bug Fixes diff --git a/README.md b/README.md index b6d8ead..602fe3a 100644 --- a/README.md +++ b/README.md @@ -250,3 +250,78 @@ Example: With `extra={"request_id": "abc-123"}`, the value appears in both `ServicePayload.RequestID` and `ServicePayload.Metadata.ID`. + +## Inheritance and custom forbidden root keys + +`SchemaLogger` supports inheritance, allowing subclasses to add additional +forbidden root keys for schema validation. This is useful when you need to +prevent certain root keys from being used in your schema beyond the builtin +`logging.LogRecord` attributes. + +### Basic inheritance + +Each subclass can pass the `forbidden_keys` parameter to the parent's +`__init__()` method. The builtin set of forbidden keys (standard `logging.LogRecord` +attributes) is always present and cannot be replaced - additional keys are +merged with the builtin set. + +Example: + +```python +from logging_objects_with_schema import SchemaLogger +import logging + +class MyLogger(SchemaLogger): + def __init__(self, name: str, level: int = logging.NOTSET) -> None: + # Add custom forbidden keys + super().__init__(name, level, forbidden_keys={"custom_forbidden_key"}) +``` + +### Multi-level inheritance + +When creating a hierarchy of loggers, each subclass can pass `forbidden_keys` +from its own subclasses to the parent, merging them with its own set. The +library does not automatically propagate keys up the inheritance chain - each +subclass must implement this logic itself. + +Example: + +```python +from logging_objects_with_schema import SchemaLogger +import logging + +class ParentLogger(SchemaLogger): + def __init__( + self, name: str, level: int = logging.NOTSET, forbidden_keys: set[str] | None = None + ) -> None: + # Merge parent's keys with keys from child + parent_keys = {"parent_forbidden_key"} + if forbidden_keys: + parent_keys = parent_keys | forbidden_keys + super().__init__(name, level, forbidden_keys=parent_keys) + +class ChildLogger(ParentLogger): + def __init__(self, name: str, level: int = logging.NOTSET) -> None: + # Pass child's keys to parent, which will merge them + super().__init__(name, level, forbidden_keys={"child_forbidden_key"}) +``` + +In this example, the final set of forbidden keys will be: + +- Builtin `logging.LogRecord` attributes (always present) +- `parent_forbidden_key` (from `ParentLogger`) +- `child_forbidden_key` (from `ChildLogger`) + +All keys are merged together - they are not replaced, only supplemented. + +### Important notes + +- The builtin set of forbidden keys (standard `logging.LogRecord` attributes) + is always present and cannot be replaced or removed +- Additional forbidden keys are merged with the builtin set, not replaced +- Each subclass must implement the logic to pass `forbidden_keys` to its parent + if it wants to propagate keys from its own subclasses +- The `forbidden_keys` parameter is optional - if not provided, only builtin + keys are used, maintaining 100% backward compatibility +- `None` and empty `set()` are semantically equivalent for `forbidden_keys` - + both mean "no additional forbidden keys" and produce the same result diff --git a/pyproject.toml b/pyproject.toml index 72e6b01..d12255a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "uv_build" [project] name = "logging-objects-with-schema" -version = "0.1.4" +version = "0.2.0rc1" description = "Proxy logging wrapper that validates extra fields against a JSON schema." readme = "README.md" requires-python = ">=3.10" diff --git a/src/logging_objects_with_schema/schema_loader.py b/src/logging_objects_with_schema/schema_loader.py index a9587d4..d717042 100644 --- a/src/logging_objects_with_schema/schema_loader.py +++ b/src/logging_objects_with_schema/schema_loader.py @@ -109,10 +109,12 @@ def _create_empty_compiled_schema_with_problems( # cache stores the result of compiling that file. Both are thread-safe and # use double-checked locking to avoid race conditions. -# Compiled schema cache: Key is absolute schema_path, Value is tuple of -# (_CompiledSchema, list[_SchemaProblem]). This cache stores both successful -# compilations and failures (with problems list). -_SCHEMA_CACHE: dict[Path, tuple[_CompiledSchema, list[_SchemaProblem]]] = {} +# Compiled schema cache: Key is tuple of (absolute schema_path, frozenset of +# forbidden_keys), Value is tuple of (_CompiledSchema, list[_SchemaProblem]). +# This cache stores both successful compilations and failures (with problems list). +_SCHEMA_CACHE: dict[ + tuple[Path, frozenset[str]], tuple[_CompiledSchema, list[_SchemaProblem]] +] = {} _cache_lock = threading.RLock() @@ -625,11 +627,30 @@ def _get_builtin_logrecord_attributes() -> set[str]: def _check_root_conflicts( - schema_dict: Mapping[str, Any], problems: list[_SchemaProblem] + schema_dict: Mapping[str, Any], + problems: list[_SchemaProblem], + forbidden_keys: set[str] | None = None, ) -> None: - """Check schema root keys for conflicts with reserved logging fields.""" + """Check schema root keys for conflicts with reserved logging fields. + Args: + schema_dict: The schema dictionary to check. + problems: List to collect validation problems. + forbidden_keys: Additional forbidden root keys to check against. + These keys are merged with builtin LogRecord attributes. + Builtin keys cannot be replaced, only supplemented. + Note: None and empty set() are semantically equivalent - both + mean "no additional forbidden keys" and produce the same result. + """ + # Builtin keys always present, cannot be replaced forbidden_root_keys = _get_builtin_logrecord_attributes() + # Merge with additional forbidden keys if provided. + # We use `if forbidden_keys:` instead of `if forbidden_keys is not None:` because + # both None and empty set() semantically mean "no additional forbidden keys". + # This check treats them equivalently: None is falsy, and empty set() is also + # falsy, so both cases skip the merge operation, which is the correct behavior. + if forbidden_keys: + forbidden_root_keys = forbidden_root_keys | forbidden_keys for key in schema_dict.keys(): if key in forbidden_root_keys: @@ -640,7 +661,9 @@ def _check_root_conflicts( ) -def _compile_schema_internal() -> tuple[_CompiledSchema, list[_SchemaProblem]]: +def _compile_schema_internal( + forbidden_keys: set[str] | None = None, +) -> tuple[_CompiledSchema, list[_SchemaProblem]]: """Compile JSON schema into ``_CompiledSchema`` and collect all problems. The function loads the raw JSON schema, validates its structure, checks @@ -649,13 +672,22 @@ def _compile_schema_internal() -> tuple[_CompiledSchema, list[_SchemaProblem]]: discovered during this process are reported as :class:`_SchemaProblem` instances. - Results are cached process-wide: the cache key is the absolute schema - file path and the value is a tuple ``(_CompiledSchema, list[_SchemaProblem])``. - Once a schema for a given path has been observed (including the cases when - it is missing or invalid), subsequent calls always return the cached result - without re-reading or re-compiling the schema. To pick up on-disk changes - to the schema, the application must restart the process. See the README - section \"Schema caching and thread safety\" for more details. + Results are cached process-wide: the cache key is a tuple of the absolute + schema file path and the set of additional forbidden keys. The value is a + tuple ``(_CompiledSchema, list[_SchemaProblem])``. Once a schema for a + given path and forbidden keys set has been observed (including the cases + when it is missing or invalid), subsequent calls always return the cached + result without re-reading or re-compiling the schema. To pick up on-disk + changes to the schema, the application must restart the process. See the + README section \"Schema caching and thread safety\" for more details. + + Args: + forbidden_keys: Additional forbidden root keys to check against. + These keys are merged with builtin LogRecord attributes. + Builtin keys cannot be replaced, only supplemented. + Note: None and empty set() are semantically equivalent - both + mean "no additional forbidden keys" and produce the same result. + They also produce the same cache key, so cached results are shared. This function never raises exceptions. It always returns the best-effort compiled schema together with a list of problems detected during processing @@ -664,17 +696,18 @@ def _compile_schema_internal() -> tuple[_CompiledSchema, list[_SchemaProblem]]: Performance considerations: First compilation of a schema involves file I/O, JSON parsing, and tree traversal. For typical schemas (< 1000 nodes), this takes < 10ms. All - subsequent calls for the same schema path return immediately from cache - (< 0.1ms). The cache is process-wide and persists for the application - lifetime. + subsequent calls for the same schema path and forbidden keys return + immediately from cache (< 0.1ms). The cache is process-wide and persists + for the application lifetime. Limitations: - Schema changes on disk are not automatically reloaded; the application must be restarted to pick up changes - Very large schemas (> 10,000 nodes) may cause noticeable compilation overhead on first load, but this is uncommon in practice - - The cache uses absolute file paths as keys, so schema files must be - accessible via the same path throughout the application lifetime + - The cache uses absolute file paths and forbidden keys sets as keys, so + schema files must be accessible via the same path throughout the + application lifetime Note: This function is used internally by :class:`SchemaLogger` and by the @@ -685,12 +718,21 @@ def _compile_schema_internal() -> tuple[_CompiledSchema, list[_SchemaProblem]]: Tuple of (_CompiledSchema, list[_SchemaProblem]). """ schema_path = _get_schema_path() + # Create cache key that includes forbidden keys. + # We use `frozenset(forbidden_keys or ())` instead of checking for None explicitly + # because both None and empty set() should produce the same cache key (frozenset()). + # This is semantically correct: both None and set() mean "no additional forbidden + # keys", so they should share the same cached compilation result. The `or ()` + # operator converts None to empty tuple, which frozenset() then converts to empty + # frozenset, and empty set() also becomes empty frozenset, ensuring cache key + # consistency. + cache_key = (schema_path, frozenset(forbidden_keys or ())) # Fast-path: First check (with lock for thread-safety) if we have already attempted - # to compile schema for this path. This provides thread-safe cache access - # in the common case when the schema is already cached. + # to compile schema for this path and forbidden keys set. This provides thread-safe + # cache access in the common case when the schema is already cached. with _cache_lock: - cached = _SCHEMA_CACHE.get(schema_path) + cached = _SCHEMA_CACHE.get(cache_key) if cached is not None: return cached @@ -712,16 +754,16 @@ def _compile_schema_internal() -> tuple[_CompiledSchema, list[_SchemaProblem]]: # thread might have compiled the schema (or handled the same error) while # we were processing the exception. If so, use the cached result. with _cache_lock: - cached = _SCHEMA_CACHE.get(schema_path) + cached = _SCHEMA_CACHE.get(cache_key) if cached is not None: return cached - _SCHEMA_CACHE[schema_path] = result + _SCHEMA_CACHE[cache_key] = result return result # Check root key conflicts before compiling the tree. This allows us to # catch conflicts early and report them as schema problems. We do this # before tree compilation to avoid unnecessary work if there are conflicts. - _check_root_conflicts(raw_schema, problems) + _check_root_conflicts(raw_schema, problems, forbidden_keys) # Compile the schema tree into leaves. Each root key becomes a separate # tree that we compile recursively. Problems are collected as we go. @@ -748,9 +790,9 @@ def _compile_schema_internal() -> tuple[_CompiledSchema, list[_SchemaProblem]]: # instead of overwriting it with our own (which might be different if the file # was modified between reads). with _cache_lock: - cached = _SCHEMA_CACHE.get(schema_path) + cached = _SCHEMA_CACHE.get(cache_key) if cached is not None: return cached - _SCHEMA_CACHE[schema_path] = result + _SCHEMA_CACHE[cache_key] = result return result diff --git a/src/logging_objects_with_schema/schema_logger.py b/src/logging_objects_with_schema/schema_logger.py index 234561d..8adba12 100644 --- a/src/logging_objects_with_schema/schema_logger.py +++ b/src/logging_objects_with_schema/schema_logger.py @@ -69,7 +69,12 @@ class SchemaLogger(logging.Logger): >>> logger.info("request processed", extra={"request_id": "abc-123"}) """ - def __init__(self, name: str, level: int = logging.NOTSET) -> None: + def __init__( + self, + name: str, + level: int = logging.NOTSET, + forbidden_keys: set[str] | None = None, + ) -> None: """Initialise the schema-aware logger. The schema is compiled once during construction. If any @@ -80,12 +85,20 @@ def __init__(self, name: str, level: int = logging.NOTSET) -> None: Args: name: Logger name (same as :class:`logging.Logger`). level: Logger level (same as :class:`logging.Logger`). + forbidden_keys: Additional forbidden root keys to check against. + These keys are merged with builtin LogRecord attributes. + Builtin keys cannot be replaced, only supplemented. + Subclasses can override this method and pass their own + forbidden keys to the parent, merging them with keys from + their own subclasses if needed. + Note: None and empty set() are semantically equivalent - both + mean "no additional forbidden keys" and produce the same result. """ # Validate schema before creating the logger instance to avoid # registering a broken logger in the logging manager cache. # Schema is compiled and cached first, then problems are checked. try: - compiled, problems = _compile_schema_internal() + compiled, problems = _compile_schema_internal(forbidden_keys) except (OSError, ValueError, RuntimeError) as exc: # Convert system-level exceptions to _SchemaProblem so they can be # handled the same way as schema validation problems. diff --git a/tests/private/test_schema_loader.py b/tests/private/test_schema_loader.py index 55d9435..744f0ed 100644 --- a/tests/private/test_schema_loader.py +++ b/tests/private/test_schema_loader.py @@ -1412,7 +1412,7 @@ def test_check_root_conflicts_reports_conflicts() -> None: }, } - check_root_conflicts(schema_dict, problems) + check_root_conflicts(schema_dict, problems, forbidden_keys=None) assert len(problems) == 2 assert all("conflicts with reserved logging fields" in p.message for p in problems) @@ -1430,7 +1430,7 @@ def test_check_root_conflicts_no_conflicts() -> None: }, } - check_root_conflicts(schema_dict, problems) + check_root_conflicts(schema_dict, problems, forbidden_keys=None) assert problems == [] @@ -1440,7 +1440,7 @@ def test_check_root_conflicts_empty_schema() -> None: problems: list[_SchemaProblem] = [] schema_dict: dict[str, Any] = {} - check_root_conflicts(schema_dict, problems) + check_root_conflicts(schema_dict, problems, forbidden_keys=None) assert problems == [] @@ -1596,3 +1596,114 @@ def mock_load_raises( # Restore original monkeypatch.setattr(schema_loader, "_load_raw_schema", original_load) monkeypatch.setattr(schema_loader, "_SCHEMA_CACHE", original_cache) + + +def test_check_root_conflicts_with_additional_forbidden_keys() -> None: + """_check_root_conflicts should merge additional forbidden keys with builtin. + + Additional forbidden keys are merged with builtin LogRecord attributes. + """ + problems: list[_SchemaProblem] = [] + schema_dict = { + "custom_forbidden": { + "Value": {"type": "str", "source": "value"}, + }, + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + } + + # Without additional keys, custom_forbidden should pass + check_root_conflicts(schema_dict, problems, forbidden_keys=None) + assert len(problems) == 0 + + # With additional keys, custom_forbidden should be reported + problems.clear() + check_root_conflicts(schema_dict, problems, forbidden_keys={"custom_forbidden"}) + assert len(problems) == 1 + assert "custom_forbidden" in problems[0].message + assert "conflicts with reserved logging fields" in problems[0].message + + +def test_check_root_conflicts_merges_builtin_and_additional_keys() -> None: + """_check_root_conflicts should merge builtin and additional forbidden keys.""" + problems: list[_SchemaProblem] = [] + schema_dict = { + "name": { # Builtin key + "Value": {"type": "str", "source": "value"}, + }, + "custom_key": { # Additional key + "Value": {"type": "str", "source": "value2"}, + }, + "ServicePayload": { # Valid key + "RequestID": {"type": "str", "source": "request_id"}, + }, + } + + check_root_conflicts(schema_dict, problems, forbidden_keys={"custom_key"}) + + # Both builtin and additional keys should be reported + assert len(problems) == 2 + problem_messages = [p.message for p in problems] + assert any("name" in msg and "conflicts" in msg for msg in problem_messages) + assert any("custom_key" in msg and "conflicts" in msg for msg in problem_messages) + + +def test_compile_schema_internal_with_forbidden_keys( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """_compile_schema_internal should accept and use forbidden_keys parameter.""" + monkeypatch.chdir(tmp_path) + schema_file = tmp_path / _SCHEMA_FILE_NAME + schema_file.write_text( + '{"ServicePayload": {"RequestID": {"type": "str", "source": "request_id"}}}', + encoding="utf-8", + ) + + # Clear cache + with schema_loader._cache_lock: + schema_loader._SCHEMA_CACHE.clear() + + # Compile without forbidden keys + compiled1, problems1 = compile_schema_internal(forbidden_keys=None) + assert not problems1 + + # Compile with forbidden keys that don't conflict + compiled2, problems2 = compile_schema_internal(forbidden_keys={"some_key"}) + assert not problems2 + + # Compile with forbidden keys that conflict + compiled3, problems3 = compile_schema_internal(forbidden_keys={"ServicePayload"}) + assert len(problems3) == 1 + assert "ServicePayload" in problems3[0].message + assert "conflicts with reserved logging fields" in problems3[0].message + + +def test_compile_schema_internal_caches_by_forbidden_keys( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """_compile_schema_internal should cache separately for different forbidden_keys.""" + monkeypatch.chdir(tmp_path) + schema_file = tmp_path / _SCHEMA_FILE_NAME + schema_file.write_text( + '{"ServicePayload": {"RequestID": {"type": "str", "source": "request_id"}}}', + encoding="utf-8", + ) + + # Clear cache + with schema_loader._cache_lock: + schema_loader._SCHEMA_CACHE.clear() + + # First compilation with no forbidden keys + compiled1, problems1 = compile_schema_internal(forbidden_keys=None) + assert not problems1 + + # Second compilation with same forbidden keys should return cached result + compiled2, problems2 = compile_schema_internal(forbidden_keys=None) + assert compiled2 is compiled1 # Should be the same object from cache + assert problems2 is problems1 + + # Compilation with different forbidden keys should create new entry + compiled3, problems3 = compile_schema_internal(forbidden_keys={"some_key"}) + assert compiled3 is not compiled1 # Different cache entry + assert not problems3 diff --git a/tests/private/test_schema_logger.py b/tests/private/test_schema_logger.py index 69b4e2a..e350de8 100644 --- a/tests/private/test_schema_logger.py +++ b/tests/private/test_schema_logger.py @@ -2,11 +2,19 @@ from __future__ import annotations +import logging from io import StringIO +from pathlib import Path from unittest.mock import MagicMock, patch +import pytest + from logging_objects_with_schema.errors import _SchemaProblem -from logging_objects_with_schema.schema_logger import _log_schema_problems_and_exit +from logging_objects_with_schema.schema_logger import ( + SchemaLogger, + _log_schema_problems_and_exit, +) +from tests.helpers import _write_schema def test_log_schema_problems_and_exit_writes_to_stderr() -> None: @@ -102,3 +110,614 @@ def test_log_schema_problems_and_exit_flushes_stderr() -> None: mock_stderr.flush.assert_called_once() # Verify write was called mock_stderr.write.assert_called_once() + + +# Tests for SchemaLogger.__init__ + + +def test_schema_logger_init_with_none_forbidden_keys( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger.__init__ should work with forbidden_keys=None.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger", forbidden_keys=None) + + assert isinstance(logger, SchemaLogger) + assert isinstance(logger, logging.Logger) + assert logger.name == "test_logger" + assert hasattr(logger, "_schema") + assert not logger._schema.is_empty + + +def test_schema_logger_init_with_empty_set_forbidden_keys( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger.__init__ should work with forbidden_keys=set().""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger", forbidden_keys=set()) + + assert isinstance(logger, SchemaLogger) + assert isinstance(logger, logging.Logger) + assert logger.name == "test_logger" + assert hasattr(logger, "_schema") + assert not logger._schema.is_empty + + +def test_schema_logger_init_with_non_empty_forbidden_keys( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger.__init__ should work with non-empty forbidden_keys.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger", forbidden_keys={"custom_key"}) + + assert isinstance(logger, SchemaLogger) + assert isinstance(logger, logging.Logger) + assert logger.name == "test_logger" + assert hasattr(logger, "_schema") + assert not logger._schema.is_empty + + +def test_schema_logger_init_sets_schema_correctly( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger.__init__ should set self._schema correctly.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + "UserID": {"type": "int", "source": "user_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + + assert hasattr(logger, "_schema") + assert not logger._schema.is_empty + assert len(logger._schema.leaves) == 2 + sources = {leaf.source for leaf in logger._schema.leaves} + assert sources == {"request_id", "user_id"} + + +def test_schema_logger_init_calls_super_init( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger.__init__ should call super().__init__ with correct parameters.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger", level=logging.INFO) + + assert isinstance(logger, logging.Logger) + assert logger.name == "test_logger" + assert logger.level == logging.INFO + + +# Tests for SchemaLogger._log + + +def test_schema_logger_log_with_none_extra( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should handle None extra parameter.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + logger._log(logging.INFO, "test message", (), extra=None) + + output = stream.getvalue() + assert "test message" in output + + +def test_schema_logger_log_with_empty_extra( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should handle empty extra dict.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + logger._log(logging.INFO, "test message", (), extra={}) + + output = stream.getvalue() + assert "test message" in output + + +def test_schema_logger_log_with_valid_data( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should log valid data without problems.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + logger._log(logging.INFO, "test message", (), extra={"request_id": "abc-123"}) + + output = stream.getvalue() + assert "test message" in output + # Should not contain validation errors + assert "validation_errors" not in output + + +def test_schema_logger_log_with_invalid_data_logs_error_after( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should log validation errors as ERROR after main message.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + logger._log(logging.INFO, "test message", (), extra={"request_id": 123}) + + output = stream.getvalue() + lines = output.strip().split("\n") + # First line should be the main message + assert "test message" in lines[0] + # Second line should be the validation error + assert "ERROR" in lines[1] if len(lines) > 1 else False + assert "validation_errors" in output + + +def test_schema_logger_log_increments_stacklevel( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should increment stacklevel correctly.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(filename)s:%(lineno)d")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + def test_function() -> None: + logger._log(logging.INFO, "test", (), stacklevel=1) + + test_function() + # The output should point to test_function, not to _log + output = stream.getvalue() + assert "test_schema_logger.py" in output + + +def test_schema_logger_log_handles_json_loads_exception( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should handle json.loads exception gracefully.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + # Create a mock _DataProblem with invalid JSON + from logging_objects_with_schema.errors import _DataProblem + from logging_objects_with_schema.schema_applier import _apply_schema_internal + + original_apply = _apply_schema_internal + + def mock_apply(*args, **kwargs): + result, problems = original_apply(*args, **kwargs) + # Add a problem with invalid JSON + problems.append(_DataProblem("invalid json {")) + return result, problems + + with patch( + "logging_objects_with_schema.schema_logger._apply_schema_internal", mock_apply + ): + logger._log(logging.INFO, "test", (), extra={"request_id": "abc"}) + + output = stream.getvalue() + # Should handle the exception and log a fallback error + assert "test" in output + assert "validation_errors" in output + + +def test_schema_logger_log_handles_json_dumps_exception( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should handle json.dumps exception gracefully.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + # Mock json.dumps to raise an exception only on first call + import json + + original_dumps = json.dumps + call_count = 0 + + def mock_dumps(*args, **kwargs): + nonlocal call_count + call_count += 1 + # Raise exception only on first call (when trying to serialize + # validation_errors). Allow second call (fallback error serialization) + # to succeed. + if call_count == 1 and "validation_errors" in str(args): + raise TypeError("Serialization failed") + return original_dumps(*args, **kwargs) + + with patch("json.dumps", mock_dumps): + logger._log(logging.INFO, "test", (), extra={"request_id": 123}) + + output = stream.getvalue() + # Should handle the exception and log a fallback error + assert "test" in output + assert "validation_errors" in output + + +def test_schema_logger_log_handles_callhandlers_exception( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should handle callHandlers exception gracefully.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + + # Create a handler that raises an exception + class FailingHandler(logging.StreamHandler): + def emit(self, record): + if "validation_errors" in record.getMessage(): + raise RuntimeError("Handler failed") + super().emit(record) + + failing_handler = FailingHandler(stream) + logger.addHandler(failing_handler) + logger.setLevel(logging.INFO) + + stderr_capture = StringIO() + with patch("sys.stderr", stderr_capture): + logger._log(logging.INFO, "test", (), extra={"request_id": 123}) + + output = stream.getvalue() + stderr_output = stderr_capture.getvalue() + # Main message should be logged + assert "test" in output + # Error about handler failure should be written to stderr + assert "Error in logging handler" in stderr_output + + +def test_schema_logger_log_uses_findcaller_for_python_311_plus( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should use findCaller for Python 3.11+.""" + import sys + + if sys.version_info < (3, 11): + pytest.skip("Test only for Python 3.11+") + + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + with patch.object(logger, "findCaller") as mock_findcaller: + mock_findcaller.return_value = ("test.py", 42, "test_func", None) + logger._log(logging.INFO, "test", (), extra={"request_id": 123}) + + # findCaller should be called for validation error logging + assert mock_findcaller.called + + +def test_schema_logger_log_uses_inspect_stack_fallback_for_old_python( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should use inspect.stack() fallback for Python < 3.11.""" + import sys + + if sys.version_info >= (3, 11): + pytest.skip("Test only for Python < 3.11") + + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + with patch("inspect.stack") as mock_stack: + # Mock stack to return a frame + mock_frame = MagicMock() + mock_frame.filename = "test.py" + mock_frame.lineno = 42 + mock_frame.function = "test_func" + mock_stack.return_value = [None, None, mock_frame] + + logger._log(logging.INFO, "test", (), extra={"request_id": 123}) + + # inspect.stack should be called for validation error logging + assert mock_stack.called + + +def test_schema_logger_log_fallback_to_findcaller_when_stack_too_short( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should fallback to findCaller when stack is too short.""" + import sys + + if sys.version_info >= (3, 11): + pytest.skip("Test only for Python < 3.11") + + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + with ( + patch("inspect.stack") as mock_stack, + patch.object(logger, "findCaller") as mock_findcaller, + ): + # Mock stack to return a short stack (less than expected) + mock_stack.return_value = [None] # Too short + mock_findcaller.return_value = ("test.py", 42, "test_func", None) + + logger._log(logging.INFO, "test", (), extra={"request_id": 123}, stacklevel=10) + + # findCaller should be called as fallback + assert mock_findcaller.called + + +def test_schema_logger_log_makerecord_called_with_correct_params( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should call makeRecord with correct parameters.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + stream = StringIO() + handler = logging.StreamHandler(stream) + handler.setFormatter(logging.Formatter("%(message)s")) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + with patch.object(logger, "makeRecord") as mock_makerecord: + mock_makerecord.return_value = logging.LogRecord( + name="test_logger", + level=logging.ERROR, + pathname="test.py", + lineno=42, + msg="test error", + args=(), + exc_info=None, + ) + + logger._log(logging.INFO, "test", (), extra={"request_id": 123}) + + # makeRecord should be called for validation error + assert mock_makerecord.called + call_args = mock_makerecord.call_args + assert call_args[0][0] == "test_logger" # name + assert call_args[0][1] == logging.ERROR # level + assert call_args[0][4] == "test error" or "validation_errors" in str( + call_args[0][4] + ) # msg + + +def test_schema_logger_log_validation_errors_logged_after_main_message( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger._log should log validation errors after main message.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + }, + ) + + logger = SchemaLogger("test_logger") + messages = [] + + class MessageCaptureHandler(logging.Handler): + def emit(self, record): + messages.append((record.levelno, record.getMessage())) + + handler = MessageCaptureHandler() + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + logger._log(logging.INFO, "main message", (), extra={"request_id": 123}) + + # First message should be the main INFO message + assert len(messages) >= 1 + assert messages[0][0] == logging.INFO + assert "main message" in messages[0][1] + + # Second message should be the validation ERROR + if len(messages) > 1: + assert messages[1][0] == logging.ERROR + assert "validation_errors" in messages[1][1] diff --git a/tests/test_integration.py b/tests/test_integration.py index d372aa2..e6af312 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -547,3 +547,181 @@ def fake_exit(code: int) -> None: finally: monkeypatch.setattr(schema_loader, "_get_schema_path", original_get_schema_path) logging.setLoggerClass(logging.Logger) + + +def test_schema_logger_with_forbidden_keys( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """SchemaLogger should accept forbidden_keys parameter and use it for validation.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + "CustomForbidden": { + "Value": {"type": "str", "source": "value"}, + }, + }, + ) + + # Without forbidden_keys, CustomForbidden should be valid + logger1 = SchemaLogger("logger1") + assert isinstance(logger1, SchemaLogger) + + # With forbidden_keys, CustomForbidden should cause schema validation to fail + import sys + from io import StringIO + + stderr_output = StringIO() + original_stderr = sys.stderr + sys.stderr = stderr_output + + exit_called = False + exit_code = None + + def mock_exit(code: int) -> None: + nonlocal exit_called, exit_code + exit_called = True + exit_code = code + + monkeypatch.setattr("os._exit", mock_exit) + + try: + SchemaLogger("logger2", forbidden_keys={"CustomForbidden"}) + except SystemExit: + pass + finally: + sys.stderr = original_stderr + + assert exit_called + assert exit_code == 1 + assert "CustomForbidden" in stderr_output.getvalue() + assert "conflicts with reserved logging fields" in stderr_output.getvalue() + + +def test_schema_logger_inheritance_with_forbidden_keys( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Subclass of SchemaLogger can pass forbidden_keys to parent.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + "ChildForbidden": { + "Value": {"type": "str", "source": "value"}, + }, + }, + ) + + class ChildLogger(SchemaLogger): + def __init__(self, name: str, level: int = logging.NOTSET) -> None: + # Child can pass its own forbidden keys to parent + super().__init__(name, level, forbidden_keys={"ChildForbidden"}) + + # Creating child logger should fail because ChildForbidden conflicts + import sys + from io import StringIO + + stderr_output = StringIO() + original_stderr = sys.stderr + sys.stderr = stderr_output + + exit_called = False + exit_code = None + + def mock_exit(code: int) -> None: + nonlocal exit_called, exit_code + exit_called = True + exit_code = code + + monkeypatch.setattr("os._exit", mock_exit) + + try: + ChildLogger("child_logger") + except SystemExit: + pass + finally: + sys.stderr = original_stderr + + assert exit_called + assert exit_code == 1 + assert "ChildForbidden" in stderr_output.getvalue() + + +def test_schema_logger_inheritance_merges_forbidden_keys( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Subclass can merge forbidden_keys from its own subclass and pass to parent.""" + monkeypatch.chdir(tmp_path) + _write_schema( + tmp_path, + { + "ServicePayload": { + "RequestID": {"type": "str", "source": "request_id"}, + }, + "ParentForbidden": { + "Value": {"type": "str", "source": "value"}, + }, + "ChildForbidden": { + "Value": {"type": "str", "source": "value2"}, + }, + }, + ) + + class ParentLogger(SchemaLogger): + def __init__( + self, + name: str, + level: int = logging.NOTSET, + forbidden_keys: set[str] | None = None, + ) -> None: + # Parent merges its own keys with keys from child + parent_keys = {"ParentForbidden"} + if forbidden_keys: + parent_keys = parent_keys | forbidden_keys + super().__init__(name, level, forbidden_keys=parent_keys) + + class ChildLogger(ParentLogger): + def __init__(self, name: str, level: int = logging.NOTSET) -> None: + # Child passes its keys to parent, which merges them + super().__init__(name, level, forbidden_keys={"ChildForbidden"}) + + # Creating child logger should fail because both ParentForbidden and + # ChildForbidden conflict + import sys + from io import StringIO + + stderr_output = StringIO() + original_stderr = sys.stderr + sys.stderr = stderr_output + + exit_called = False + exit_code = None + + def mock_exit(code: int) -> None: + nonlocal exit_called, exit_code + exit_called = True + exit_code = code + + monkeypatch.setattr("os._exit", mock_exit) + + try: + ChildLogger("child_logger") + except SystemExit: + pass + finally: + sys.stderr = original_stderr + + assert exit_called + assert exit_code == 1 + output = stderr_output.getvalue() + assert "ParentForbidden" in output + assert "ChildForbidden" in output diff --git a/uv.lock b/uv.lock index bf04f12..edde39d 100644 --- a/uv.lock +++ b/uv.lock @@ -1394,7 +1394,7 @@ version = "0.6.3" [[package]] name = "logging-objects-with-schema" -version = "0.1.4" +version = "0.2.0rc1" [package.source] editable = "."