From 547b76f511bb936db8a8ad053af690dc9c76e2d0 Mon Sep 17 00:00:00 2001
From: Dmitrii Safronov <zimniy@cyberbrain.cc>
Date: Wed, 6 May 2026 02:26:55 +0400
Subject: [PATCH 1/3] test: Improve Test Isolation in Conftest

fix: enhance clear_schema_cache fixture to clear all schema caches before and after each test, ensuring proper test isolation by removing any residual schema data from previous tests. This prevents potential test interference and ensures reliable test results.
Signed-off-by: Dmitrii Safronov <zimniy@cyberbrain.cc>
---
 tests/conftest.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 318e7f1..75ba783 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,11 +7,15 @@
 
 @pytest.fixture(autouse=True)
 def clear_schema_cache() -> None:
-    """Clear schema path cache before each test to ensure test isolation."""
-    with schema_loader._path_cache_lock:
-        schema_loader._resolved_schema_path = None
-        schema_loader._cached_cwd = None
+    """Clear all schema caches before each test to ensure test isolation."""
+
+    def _clear() -> None:
+        with schema_loader._path_cache_lock:
+            schema_loader._resolved_schema_path = None
+            schema_loader._cached_cwd = None
+        with schema_loader._cache_lock:
+            schema_loader._SCHEMA_CACHE.clear()
+
+    _clear()
     yield
-    with schema_loader._path_cache_lock:
-        schema_loader._resolved_schema_path = None
-        schema_loader._cached_cwd = None
+    _clear()

From 5a63141520c2abf36ecc28d9b96949402f5ec40d Mon Sep 17 00:00:00 2001
From: Dmitrii Safronov <zimniy@cyberbrain.cc>
Date: Wed, 6 May 2026 02:27:42 +0400
Subject: [PATCH 2/3] docs: Update schema loader cache key and locking
 mechanism

Improve cache key to include forbidden keys and clarify RLock usage for thread safety.

Signed-off-by: Dmitrii Safronov <zimniy@cyberbrain.cc>
---
 src/logging_objects_with_schema/schema_loader.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/logging_objects_with_schema/schema_loader.py b/src/logging_objects_with_schema/schema_loader.py
index b56bb19..a851fc3 100644
--- a/src/logging_objects_with_schema/schema_loader.py
+++ b/src/logging_objects_with_schema/schema_loader.py
@@ -114,7 +114,7 @@ def _create_empty_compiled_schema_with_problems(
 # 2. Compiled schema cache (_SCHEMA_CACHE): Caches the compiled schema and
 #    validation problems for a given schema file path. This avoids re-parsing
 #    and re-compiling the schema JSON on every logger creation. The cache key
-#    is the absolute schema file path from the path cache.
+#    is a tuple of (absolute schema file path, frozenset of forbidden_keys).
 #
 # These caches work together: path cache finds the file location, compiled
 # cache stores the result of compiling that file. Both are thread-safe and
@@ -137,9 +137,10 @@ def _create_empty_compiled_schema_with_problems(
 # was not found.
 _cached_cwd: Path | None = None
 # RLock for thread-safe access to path cache variables.
-# RLock (not Lock) is needed because helper functions (_check_cached_found_file_path,
-# _check_cached_missing_file_path) are called from _get_schema_path() which already
-# holds the lock, and these helpers also need to acquire the lock.
+# RLock (not Lock) allows the same thread to acquire the lock multiple times without
+# deadlocking, which guards against any future refactoring where a helper might
+# independently acquire the lock. Currently, helpers are always called while the
+# caller already holds the lock and do not re-acquire it themselves.
 _path_cache_lock = threading.RLock()
 
 

From f07ad7804e451278419a80bc266d44626635c526 Mon Sep 17 00:00:00 2001
From: Dmitrii Safronov <zimniy@cyberbrain.cc>
Date: Wed, 6 May 2026 02:50:51 +0400
Subject: [PATCH 3/3] docs: tighten inline comments; document _CompiledSchema
 fields

Replace verbose obvious comments with concise "why" explanations.
Add Attributes section to _CompiledSchema docstring covering  source_to_leaves, known_sources, and is_empty.
Remove dangling <!--placeholder --> from README.

Signed-off-by: Dmitrii Safronov <zimniy@cyberbrain.cc>
---
 README.md                                     |   2 -
 .../schema_applier.py                         |  26 +--
 .../schema_loader.py                          | 151 ++++--------------
 .../schema_logger.py                          |  51 ++----
 4 files changed, 44 insertions(+), 186 deletions(-)

diff --git a/README.md b/README.md
index 0059da1..45679b0 100644
--- a/README.md
+++ b/README.md
@@ -341,5 +341,3 @@ All keys are merged together - they are not replaced, only supplemented.
   keys are used, maintaining 100% backward compatibility
 - `None` and empty `set()` are semantically equivalent for `forbidden_keys` -
   both mean "no additional forbidden keys" and produce the same result
-
-<!--placeholder -->
diff --git a/src/logging_objects_with_schema/schema_applier.py b/src/logging_objects_with_schema/schema_applier.py
index 8ef41f8..487dbd2 100644
--- a/src/logging_objects_with_schema/schema_applier.py
+++ b/src/logging_objects_with_schema/schema_applier.py
@@ -56,13 +56,9 @@ def _validate_list_value(
         return _DataProblem(_create_validation_error_dict(source, error_msg, value))
 
     if len(value) == 0:
-        # Empty lists are always valid
         return None
 
-    # Collect unique type names of items that don't match the expected type.
-    # We use a set comprehension to get unique type names (not the types themselves)
-    # for the error message. This gives a clear, readable error message showing
-    # which types were found (e.g., "int, str") vs what was expected.
+    # __name__ gives "int, str" in error messages instead of "<class 'int'>".
     invalid_item_types = {
         type(item).__name__ for item in value if type(item) is not item_expected_type
     }
@@ -95,22 +91,14 @@ def _set_nested_value(
         value: The value to set at the target location.
     """
     current = target
-    # Navigate through intermediate dictionaries, creating them as needed.
-    # We iterate through all keys except the last one (path[:-1]) to build
-    # the nested structure.
     for key in path[:-1]:
         child = current.get(key)
-        # If the key doesn't exist or exists but is not a dict, create a new dict.
-        # This overwrites any non-dict value that might have been there (which
-        # shouldn't happen in normal operation, but we handle it defensively).
-        # We use isinstance() instead of checking for None because we need to
-        # ensure the value is actually a dict, not just that the key exists.
+        # isinstance catches both missing keys and non-dict values at the same key.
         if not isinstance(child, dict):
             child = {}
             current[key] = child
         current = child
 
-    # Set the final value at the last key in the path
     current[path[-1]] = value
 
 
@@ -133,10 +121,7 @@ def _validate_and_apply_leaf(
         extra: The target dictionary to write the value to if validation passes.
         problems: List to append validation problems to.
     """
-    # Use strict type checking (type() is) instead of isinstance() to
-    # prevent bool values from passing validation for int types (since
-    # bool is a subclass of int). This ensures that the actual
-    # runtime type matches the schema type exactly.
+    # type() is, not isinstance(): bool subclasses int and must not pass int validation.
     if type(value) is not leaf.expected_type:
         error_msg = (
             f"has type {type(value).__name__}, "
@@ -147,10 +132,7 @@ def _validate_and_apply_leaf(
         )
         return
 
-    # For lists, validate that all elements strictly match the declared
-    # item_expected_type (homogeneous primitive list).
-    # Note: isinstance() check is needed for type narrowing (mypy), even though
-    # type(value) is list is already guaranteed by the check above.
+    # isinstance() for mypy narrowing; type(value) is list is already guaranteed above.
     if leaf.expected_type is list and isinstance(value, list):
         list_problem = _validate_list_value(value, source, leaf.item_expected_type)
         if list_problem is not None:
diff --git a/src/logging_objects_with_schema/schema_loader.py b/src/logging_objects_with_schema/schema_loader.py
index a851fc3..62a300f 100644
--- a/src/logging_objects_with_schema/schema_loader.py
+++ b/src/logging_objects_with_schema/schema_loader.py
@@ -53,6 +53,16 @@ class _CompiledSchema:
     This class is part of the internal implementation and is not considered
     a public API. Its signature and behaviour may change between releases
     without preserving backward compatibility.
+
+    Attributes:
+        leaves: Flat list of all schema leaves. This is the only constructor
+            argument; all other attributes are derived from it in
+            ``__post_init__``.
+        source_to_leaves: Maps each source field name to the list of leaves
+            that read from it. Populated by ``__post_init__``.
+        known_sources: Frozenset of all source field names appearing in the
+            schema. Used for O(1) redundant-field detection during validation.
+            Populated by ``__post_init__``.
     """
 
     leaves: list[_SchemaLeaf]
@@ -159,15 +169,11 @@ def _find_schema_file() -> Path | None:
     while True:
         schema_path = current / _SCHEMA_FILE_NAME
         if schema_path.exists():
-            # Use resolve() to get an absolute path and resolve any symbolic links.
-            # This ensures we have a canonical path that can be used as a cache key
-            # and for consistent error reporting.
+            # resolve() canonicalizes symlinks so the path is stable as a cache key.
             return schema_path.resolve()
 
-        # Move to parent directory
         parent = current.parent
         if parent == current:
-            # Reached filesystem root, stop searching
             break
         current = parent
 
@@ -199,18 +205,13 @@ def _check_cached_found_file_path() -> Path | None:
     if _resolved_schema_path is None:
         return None
 
-    # Check if this is actually a found file cache (not missing file cache)
-    # by checking _cached_cwd. We don't need 'global' here since we only read it.
+    # _cached_cwd being non-None means we cached a missing-file path, not a found one.
     if _cached_cwd is not None:
-        # This path was cached as missing file, not found file
         return None
 
-    # Schema file was found, absolute path doesn't depend on CWD
-    # Return it if it still exists
     if _resolved_schema_path.exists():
         return _resolved_schema_path
 
-    # Cached file no longer exists; invalidate cache so caller will re-search
     _resolved_schema_path = None
     return None
 
@@ -231,16 +232,12 @@ def _check_cached_missing_file_path() -> Path | None:
     if _resolved_schema_path is None or _cached_cwd is None:
         return None
 
-    # Cached path is based on CWD when file was not found,
-    # check if CWD changed
     current_cwd = _get_current_working_directory()
     if current_cwd != _cached_cwd:
-        # CWD changed, invalidate cache and re-search from new CWD
         _resolved_schema_path = None
         _cached_cwd = None
         return None
 
-    # CWD unchanged, return cached path
     return _resolved_schema_path
 
 
@@ -296,27 +293,22 @@ def _get_schema_path() -> Path:
         Absolute path where the schema file is located or expected to be.
     """
     with _path_cache_lock:
-        # Check cached path for missing file first (CWD-dependent)
-        # This must be checked before found file cache, because
-        # _check_cached_found_file_path would invalidate the cache if file
-        # doesn't exist, even for missing file cache.
+        # Missing-file cache checked first: _check_cached_found_file_path would
+        # invalidate it on existence check failure if we called it first.
         if _cached_cwd is not None:
             cached_path = _check_cached_missing_file_path()
             if cached_path is not None:
                 return cached_path
 
-        # Check cached path for found file (CWD-independent)
         cached_path = _check_cached_found_file_path()
         if cached_path is not None:
             return cached_path
 
-        # Search for schema file
         found_path = _find_schema_file()
         if found_path is not None:
             return _cache_and_return_found_path(found_path)
 
-        # Schema file not found, return absolute path in current working directory
-        # (this path may not exist, but allows caller to report proper error)
+        # Return expected path even if missing — callers use it in error messages.
         return _cache_and_return_missing_path()
 
 
@@ -345,18 +337,14 @@ def _load_raw_schema(schema_path: Path) -> tuple[dict[str, Any], Path]:
         Tuple of (schema data, schema file path).
     """
     if not schema_path.exists():
-        # Let the caller decide how to report this.
         raise FileNotFoundError(f"Schema file not found: {schema_path}")
 
     try:
         with schema_path.open("r", encoding="utf-8") as f:
             data = json.load(f)
     except OSError as exc:
-        # Normalise I/O errors when reading the schema file (e.g., permission
-        # denied, file not found) to ValueError so that _compile_schema_internal()
-        # can report them as _SchemaProblem instances instead of leaking raw
-        # OSError to callers. Note: System-level OSError (e.g., from os.getcwd())
-        # is not caught here and propagates directly.
+        # Converted to ValueError so _compile_schema_internal treats it as a
+        # _SchemaProblem. System-level OSError (e.g. os.getcwd()) propagates uncaught.
         raise ValueError(
             f"Failed to read schema file {schema_path}: {exc}",
         ) from exc
@@ -364,8 +352,7 @@ def _load_raw_schema(schema_path: Path) -> tuple[dict[str, Any], Path]:
         raise ValueError(f"Failed to parse JSON schema: {exc}") from exc
 
     if not isinstance(data, dict):
-        # Normalise non-object top-level schemas into a ValueError so that the
-        # caller can report it as a _SchemaProblem while keeping type safety.
+        # ValueError lets the caller treat a non-object top level the same as bad JSON.
         raise ValueError("Top-level schema must be a JSON object")
 
     return data, schema_path
@@ -439,27 +426,19 @@ def _determine_node_type_and_validate(
     source_value = value_dict.get("source")
     item_type_value = value_dict.get("item_type")
 
-    # Check if node has leaf properties
-    # Leaf properties are: type (required, string), source (required, string),
-    # item_type (optional, string). If type/source/item_type are objects,
-    # they are children, not properties
+    # type/source/item_type as objects means children, not leaf properties.
     has_leaf_properties = (
         isinstance(type_value, str)
         or isinstance(source_value, str)
         or isinstance(item_type_value, str)
     )
 
-    # Check if node has children (any field that is an object/Mapping)
-    # Children can have ANY names, including type, source, item_type - this is
-    # valid for inner nodes. If type, source, or item_type are objects,
-    # they count as children
+    # Any field whose value is an object is a child, even if named "type" or "source".
     has_children = any(
         isinstance(field_value, Mapping) for field_value in value_dict.values()
     )
 
-    # Validate node structure
     if has_leaf_properties and has_children:
-        # Node cannot have both properties and children
         problems.append(
             _SchemaProblem(
                 f"Invalid node at {_format_path(path, key)}: "
@@ -470,7 +449,6 @@ def _determine_node_type_and_validate(
         return (None, False)
 
     if not has_leaf_properties and not has_children:
-        # Node must be either a leaf or have children
         problems.append(
             _SchemaProblem(
                 f"Invalid node at {_format_path(path, key)}: "
@@ -480,7 +458,6 @@ def _determine_node_type_and_validate(
         )
         return (None, False)
 
-    # Node is valid - determine type
     if has_leaf_properties:
         return ("leaf", True)
     else:  # has_children
@@ -507,10 +484,7 @@ def _validate_and_create_leaf(
     leaf_type = value_dict.get("type")
     leaf_source = value_dict.get("source")
 
-    # This is supposed to be a leaf - validate required fields first.
-    # Type must be a string (not None, not empty, and not other types like bool/int)
     type_invalid = _is_empty_or_none(leaf_type) or not isinstance(leaf_type, str)
-    # Source must be a string (not None, not empty, and not other types like bool/int)
     source_invalid = _is_empty_or_none(leaf_source) or not isinstance(leaf_source, str)
 
     if type_invalid:
@@ -532,12 +506,7 @@ def _validate_and_create_leaf(
     if type_invalid or source_invalid:
         return None
 
-    # Note: Check for children is done in _determine_node_type_and_validate()
-    # before this function is called, so we don't need to check here.
-
-    # Convert to string before lookup to handle cases where the JSON parser
-    # might return non-string types (though this shouldn't happen with valid JSON).
-    # This ensures type safety and consistent behavior.
+    # Children check is upstream in _determine_node_type_and_validate.
     expected_type = _TYPE_MAP.get(str(leaf_type))
     if expected_type is None:
         problems.append(
@@ -548,12 +517,8 @@ def _validate_and_create_leaf(
         return None
 
     item_expected_type: type | None = None
-    # For list-typed leaves we require an explicit, primitive item_type
-    # to ensure element homogeneity (e.g. list[str], list[int]).
     if expected_type is list:
         item_type_name = value_dict.get("item_type")
-        # Item type must be a string (not None, not empty, and not other types
-        # like bool/int)
         item_type_invalid = _is_empty_or_none(item_type_name) or not isinstance(
             item_type_name, str
         )
@@ -568,7 +533,6 @@ def _validate_and_create_leaf(
             return None
 
         item_expected_type = _TYPE_MAP.get(str(item_type_name))
-        # Item type must be a primitive (str, int, float, bool), not list
         if item_expected_type is None or item_expected_type is list:
             problems.append(
                 _SchemaProblem(
@@ -581,9 +545,6 @@ def _validate_and_create_leaf(
 
     return _SchemaLeaf(
         path=path + (key,),
-        # Convert to string to ensure type consistency. Even though source should
-        # be a string from JSON, this guards against unexpected types and ensures
-        # the _SchemaLeaf always has a string source.
         source=str(leaf_source),
         expected_type=expected_type,
         item_expected_type=item_expected_type,
@@ -622,10 +583,7 @@ def _compile_schema_tree(
     Yields:
         _SchemaLeaf objects found in the tree.
     """
-    # Check for excessive nesting depth (DoS protection: prevent deeply nested
-    # schemas that could cause stack overflow or excessive memory usage).
-    # This check happens before processing the node to avoid unnecessary work
-    # and to provide clear error messages about the problematic path.
+    # DoS guard: deeply nested schemas could overflow the call stack.
     if len(path) > MAX_SCHEMA_DEPTH:
         problems.append(
             _SchemaProblem(
@@ -644,30 +602,21 @@ def _compile_schema_tree(
             )
             continue
 
-        # Create a mutable copy of the value dict. This is necessary because
-        # we may need to modify it during processing, and the original value
-        # might be a read-only Mapping (e.g., from JSON parsing).
+        # dict() because the original Mapping may be read-only.
         value_dict = dict(value)
 
-        # Determine node type and validate structure
-        # (checks for mixed nodes, empty nodes, etc.)
         node_type, is_valid = _determine_node_type_and_validate(
             value_dict, path, key, problems
         )
 
         if not is_valid:
-            # Node has validation errors, skip processing but continue with other nodes
             continue
 
         if node_type == "leaf":
-            # Process as leaf node
             leaf = _validate_and_create_leaf(value_dict, path, key, problems)
             if leaf is not None:
                 yield leaf
         else:  # node_type == "inner"
-            # Process as inner node - recurse into children
-            # Note: node_type can only be "leaf" or "inner" when is_valid is True.
-            # If is_valid is False, we already continue above.
             for child_leaf in _compile_schema_tree(value_dict, path + (key,), problems):
                 yield child_leaf
 
@@ -704,9 +653,7 @@ def _get_builtin_logrecord_attributes() -> set[str]:
         >>> "ServicePayload" in forbidden
         False
     """
-    # Create a minimal LogRecord instance to inspect its attributes
-    # This is necessary because LogRecord attributes are not defined as
-    # class attributes but are set in __init__
+    # LogRecord attributes are set in __init__, not on the class — must instantiate.
     record = logging.LogRecord(
         name="",
         level=0,
@@ -717,10 +664,6 @@ def _get_builtin_logrecord_attributes() -> set[str]:
         exc_info=None,
     )
 
-    # Use dir() to get all attributes, then filter out:
-    # - Private attributes (starting with _)
-    # - Callable attributes (methods)
-    # This leaves only data fields that represent actual LogRecord attributes
     forbidden = set()
     for attr_name in dir(record):
         if attr_name.startswith("_"):
@@ -748,13 +691,8 @@ def _check_root_conflicts(
             Note: None and empty set() are semantically equivalent - both
             mean "no additional forbidden keys" and produce the same result.
     """
-    # Builtin keys always present, cannot be replaced
     forbidden_root_keys = _get_builtin_logrecord_attributes()
-    # Merge with additional forbidden keys if provided.
-    # We use `if forbidden_keys:` instead of `if forbidden_keys is not None:` because
-    # both None and empty set() semantically mean "no additional forbidden keys".
-    # This check treats them equivalently: None is falsy, and empty set() is also
-    # falsy, so both cases skip the merge operation, which is the correct behavior.
+    # if forbidden_keys: treats None and empty set() equally — both mean no additions.
     if forbidden_keys:
         forbidden_root_keys = forbidden_root_keys | forbidden_keys
 
@@ -824,31 +762,15 @@ def _compile_schema_internal(
         Tuple of (_CompiledSchema, list[_SchemaProblem]).
     """
     schema_path = _get_schema_path()
-    # Create cache key that includes forbidden keys.
-    # We use `frozenset(forbidden_keys or ())` instead of checking for None explicitly
-    # because both None and empty set() should produce the same cache key (frozenset()).
-    # This is semantically correct: both None and set() mean "no additional forbidden
-    # keys", so they should share the same cached compilation result. The `or ()`
-    # operator converts None to empty tuple, which frozenset() then converts to empty
-    # frozenset, and empty set() also becomes empty frozenset, ensuring cache key
-    # consistency.
+    # frozenset(forbidden_keys or ()) treats None and set() as the same cache key.
     cache_key = (schema_path, frozenset(forbidden_keys or ()))
 
-    # Fast-path: First check (with lock for thread-safety) if we have already attempted
-    # to compile schema for this path and forbidden keys set. This provides thread-safe
-    # cache access in the common case when the schema is already cached.
+    # Fast path: check cache under lock before doing expensive compilation.
     with _cache_lock:
         cached = _SCHEMA_CACHE.get(cache_key)
     if cached is not None:
         return cached
 
-    # Schema not in cache. We need to compile it. However, between the check above
-    # and now, another thread might have started (or even finished) compiling the
-    # same schema. We use double-checked locking to handle this race condition:
-    # after doing the expensive work (loading/compiling), we check the cache again
-    # while holding the lock. If another thread already compiled it, we use that
-    # result instead of storing our own (which might be different if the file changed).
-
     problems: list[_SchemaProblem] = []
 
     try:
@@ -856,9 +778,7 @@ def _compile_schema_internal(
     except (FileNotFoundError, ValueError) as exc:
         problems.append(_SchemaProblem(str(exc)))
         result = _create_empty_compiled_schema_with_problems(problems)
-        # Double-checked locking: Check cache again while holding lock. Another
-        # thread might have compiled the schema (or handled the same error) while
-        # we were processing the exception. If so, use the cached result.
+        # DCL: another thread may have stored a result while we handled the error.
         with _cache_lock:
             cached = _SCHEMA_CACHE.get(cache_key)
             if cached is not None:
@@ -866,13 +786,8 @@ def _compile_schema_internal(
             _SCHEMA_CACHE[cache_key] = result
         return result
 
-    # Check root key conflicts before compiling the tree. This allows us to
-    # catch conflicts early and report them as schema problems. We do this
-    # before tree compilation to avoid unnecessary work if there are conflicts.
     _check_root_conflicts(raw_schema, problems, forbidden_keys)
 
-    # Compile the schema tree into leaves. Each root key becomes a separate
-    # tree that we compile recursively. Problems are collected as we go.
     leaves: list[_SchemaLeaf] = []
     for key, value in raw_schema.items():
         if not isinstance(value, Mapping):
@@ -881,20 +796,14 @@ def _compile_schema_internal(
             )
             continue
 
-        # Create a mutable copy of the value dict for recursive compilation.
-        # This ensures we can safely process nested structures without modifying
-        # the original schema dictionary.
+        # dict() because the original Mapping may be read-only.
         for leaf in _compile_schema_tree(dict(value), (key,), problems):
             leaves.append(leaf)
 
     compiled = _CompiledSchema(leaves=leaves)
     result = (compiled, problems)
 
-    # Double-checked locking: Check cache again while holding lock. Another thread
-    # might have compiled the schema while we were doing the expensive compilation
-    # work (parsing JSON, validating, building leaves). If so, use the cached result
-    # instead of overwriting it with our own (which might be different if the file
-    # was modified between reads).
+    # DCL: another thread may have stored a result while we compiled.
     with _cache_lock:
         cached = _SCHEMA_CACHE.get(cache_key)
         if cached is not None:
diff --git a/src/logging_objects_with_schema/schema_logger.py b/src/logging_objects_with_schema/schema_logger.py
index 704a0ce..77643cb 100644
--- a/src/logging_objects_with_schema/schema_logger.py
+++ b/src/logging_objects_with_schema/schema_logger.py
@@ -31,13 +31,10 @@ def _log_schema_problems_and_exit(problems: list[_SchemaProblem]) -> None:
     Args:
         problems: List of schema problems to log.
     """
-    # Format error message with details of all problems
-    # (same format as data problems)
     problem_messages = [problem.message for problem in problems]
     error_msg = f"Schema has problems: {'; '.join(problem_messages)}\n"
     sys.stderr.write(error_msg)
     sys.stderr.flush()
-    # Use os._exit() for immediate termination without cleanup handlers
     os._exit(1)
 
 
@@ -86,35 +83,20 @@ def __init__(
                 Note: None and empty set() are semantically equivalent - both
                 mean "no additional forbidden keys" and produce the same result.
         """
-        # Validate schema before creating the logger instance to avoid
-        # registering a broken logger in the logging manager cache.
-        # Schema is compiled and cached first, then problems are checked.
+        # Compile before super().__init__() to avoid registering a broken logger
+        # in logging's manager cache.
         try:
             compiled, problems = _compile_schema_internal(forbidden_keys)
         except (OSError, ValueError, RuntimeError) as exc:
-            # Convert system-level exceptions to _SchemaProblem so they can be
-            # handled the same way as schema validation problems.
-            # - OSError: system-level file system issues (e.g., os.getcwd() failures
-            #   when the current working directory is inaccessible or deleted).
-            #   Note: OSError that occurs when reading the schema file (e.g., permission
-            #   denied, I/O errors) is converted to _SchemaProblem in _load_raw_schema()
-            #   and does not reach this exception handler.
-            # - ValueError: path resolution issues (e.g., invalid path characters,
-            #   malformed paths during schema file discovery)
-            # - RuntimeError: threading issues (e.g., lock acquisition problems)
-            # Note: JSON parsing and schema structure validation errors are
-            # converted to _SchemaProblem instances and do not raise ValueError here.
-            # Note: System exceptions (KeyboardInterrupt, SystemExit) are not
-            # caught, which is the correct behavior.
+            # OSError: os.getcwd() failed (inaccessible CWD). File-level OSError is
+            # already converted to _SchemaProblem inside _load_raw_schema and won't
+            # reach here. ValueError: path resolution. RuntimeError: lock failure.
             problems = [_SchemaProblem(f"Schema compilation failed: {exc}")]
             compiled = _CompiledSchema(leaves=[])
 
         if problems:
-            # Schema is invalid; log problems and terminate without creating
-            # the logger instance.
             _log_schema_problems_and_exit(problems)
 
-        # Schema is valid; create the logger instance.
         super().__init__(name, level)
         self._schema: _CompiledSchema = compiled
 
@@ -150,12 +132,7 @@ def _log(
             extra or {},
         )
 
-        # Emit the main log record first, even if there are validation problems.
-        # This ensures 100% compatibility with standard logger behavior: the user's
-        # log message is always emitted, and validation errors are reported separately
-        # as additional ERROR messages. This approach guarantees that the application
-        # continues to work normally even when validation problems occur (no exceptions
-        # are raised, no log records are lost).
+        # Emit user's message first — validation errors follow as separate ERRORs.
         super()._log(
             level,
             msg,
@@ -163,15 +140,11 @@ def _log(
             exc_info=exc_info,
             extra=structured_extra,
             stack_info=stack_info,
-            # Increment stacklevel to account for this override frame so that
-            # caller information points to user code instead of SchemaLogger._log.
-            stacklevel=stacklevel + 1,
+            stacklevel=stacklevel + 1,  # +1 skips this override frame
         )
 
-        # If there were validation problems, log them as separate ERROR messages
-        # after the main log record has been emitted. This ensures the main message
-        # is always logged first, and validation errors are clearly separated.
         if data_problems:
+            # stack_info=False: stack trace already attached to the main record above.
             fn, lno, func, sinfo = self.findCaller(
                 stack_info=False, stacklevel=stacklevel + 1
             )
@@ -180,12 +153,8 @@ def _log(
             try:
                 error_msg = json.dumps({"validation_errors": validation_errors})
             except (TypeError, ValueError) as exc:
-                # Defensive handling: if serialization fails, create a fallback
-                # error message. This should never happen in normal operation since
-                # validation_errors contains only dicts with primitive values (all
-                # values are already serialized via repr()), but protects against
-                # unexpected data corruption or edge cases in JSON serialization.
-                # The fallback ensures we always have a valid JSON error message.
+                # Should never happen: repr() in _create_validation_error_dict
+                # guarantees all values are JSON-serializable strings.
                 error_msg = json.dumps(
                     {
                         "validation_errors": [