diff --git a/mellea/backends/openai.py b/mellea/backends/openai.py index 1eea93511..f9c24b7d3 100644 --- a/mellea/backends/openai.py +++ b/mellea/backends/openai.py @@ -832,14 +832,7 @@ async def _generate_from_chat_context_standard( ) # Convert our linearized context into a sequence of chat messages. Template formatters have a standard way of doing this. messages: list[Message] = self.formatter.to_chat_messages(linearized_context) - # Add the final message. - match action: - case ALoraRequirement(): - raise Exception( - "The OpenAI backend does not currently support activated LoRAs." - ) - case _: - messages.extend(self.formatter.to_chat_messages([action])) + messages.extend(self.formatter.to_chat_messages([action])) conversation: list[dict] = [] system_prompt = model_opts.get(ModelOption.SYSTEM_PROMPT, "") diff --git a/test/stdlib/components/intrinsic/test_rag.py b/test/stdlib/components/intrinsic/test_rag.py index bc76fa650..fb472414c 100644 --- a/test/stdlib/components/intrinsic/test_rag.py +++ b/test/stdlib/components/intrinsic/test_rag.py @@ -187,6 +187,21 @@ def test_context_relevance(backend_4_0): assert result == "irrelevant" +def _compare_hallucination(result: list[dict], expected: list[dict]): + """Special function to compare the result and expected output for hallucination detection. + + There are slight differences in explanations depending on where the test is run. + """ + for r, e in zip(result, expected, strict=True): + assert r["response_begin"] == e["response_begin"] + assert r["response_end"] == e["response_end"] + assert r["response_text"] == e["response_text"] + assert r["faithfulness"] == e["faithfulness"] + + # Specifically don't check the explanation due to mentioned differences. + # assert result["explanation"] == expected["explanation"] + + @pytest.mark.qualitative def test_hallucination_detection(backend): """Verify that the hallucination detection intrinsic functions properly.""" @@ -196,11 +211,11 @@ def test_hallucination_detection(backend): # First call triggers adapter loading result = rag.flag_hallucinated_content(assistant_response, docs, context, backend) _dump_output_json("hallucination_detection.json", result) - assert result == expected + _compare_hallucination(result, expected) # Second call hits a different code path from the first one result = rag.flag_hallucinated_content(assistant_response, docs, context, backend) - assert result == expected + _compare_hallucination(result, expected) @pytest.mark.qualitative @@ -303,7 +318,7 @@ def test_hallucination_detection_resolve(backend): expected = _read_output_json("hallucination_detection.json") result = rag.flag_hallucinated_content(None, docs, context, backend) - assert result == expected + _compare_hallucination(result, expected) @pytest.mark.qualitative