Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 97 additions & 11 deletions src/babel_validation/sources/github/github_issues_test_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,21 @@


class GitHubIssueTest:
def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[list[str]] = None):
def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[list[str]] = None, repo_id: str = ""):
self.github_issue = github_issue
self.assertion = assertion
if param_sets is None:
param_sets = []
self.param_sets = param_sets
if not isinstance(self.param_sets, list):
raise ValueError(f"param_sets must be a list when creating a GitHubIssueTest({self.github_issue}, {self.assertion}, {self.param_sets})")
self.repo_id = repo_id

self.logger = logging.getLogger(str(self))
self.logger.info(f"Creating GitHubIssueTest for {github_issue.html_url} {assertion}({param_sets})")

def __str__(self):
return f"{self.github_issue.repository.organization.name}/{self.github_issue.repository.name}#{self.github_issue.number}: {self.assertion}({len(self.param_sets)} param sets: {json.dumps(self.param_sets)})"
return f"{self.repo_id}#{self.github_issue.number}: {self.assertion}({len(self.param_sets)} param sets: {json.dumps(self.param_sets)})"

def test_with_nodenorm(self, nodenorm: CachedNodeNorm) -> Iterator[TestResult]:
handler = ASSERTION_HANDLERS.get(self.assertion.lower())
Expand Down Expand Up @@ -83,7 +84,7 @@ def __init__(self, github_token: str, github_repositories=None):
self.babeltest_pattern = re.compile(r'{{BabelTest\|.*?}}')
self.babeltest_yaml_pattern = re.compile(r'```yaml\s+babel_tests:\s+.*?\s+```', re.DOTALL)

def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIssueTest]:
def get_test_issues_from_issue(self, github_issue: Issue.Issue, repo_id: str = "") -> list[GitHubIssueTest]:
"""
Extract test rows from a single GitHub issue.

Expand All @@ -102,15 +103,11 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs
src/babel_validation/assertions/README.md or inspect ASSERTION_HANDLERS.keys().

:param github_issue: A single GitHub issue to extract test cases from.
:param repo_id: The repository identifier string (e.g. "NCATSTranslator/Babel").
:return: A list of GitHubIssueTest objects found in the issue body.
"""

github_issue_id = f"{github_issue.number}"
# Ideally, we would use:
# f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}"
# But that is very slow.
# TODO: Wrap Issue.Issue so that we can store orgName and repoName locally so we don't need to call out
# to figure it out.
github_issue_id = f"{repo_id}#{github_issue.number}" if repo_id else f"{github_issue.number}"
self.logger.debug(f"Looking for tests in issue {github_issue_id}: {github_issue.title} ({str(github_issue.state)}, {github_issue.html_url})")

# Is there an issue body at all?
Expand All @@ -135,7 +132,7 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs
if len(params) < 2:
raise ValueError(f"Too few parameters found in BabelTest in issue {github_issue_id}: {match}")
else:
testrows.append(GitHubIssueTest(github_issue, params[0], [params[1:]]))
testrows.append(GitHubIssueTest(github_issue, params[0], [params[1:]], repo_id=repo_id))

babeltest_yaml_matches = re.findall(self.babeltest_yaml_pattern, github_issue.body)
if babeltest_yaml_matches:
Expand All @@ -158,10 +155,64 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs
param_sets.append(param_set)
else:
raise RuntimeError(f"Unknown parameter set type {param_set} in issue {github_issue_id}")
testrows.append(GitHubIssueTest(github_issue, assertion, param_sets))
testrows.append(GitHubIssueTest(github_issue, assertion, param_sets, repo_id=repo_id))

return testrows

def get_specific_test_issues(self, issue_specs: list[str]) -> list:
"""
Fetch only the issues identified by issue_specs (values from --issue flag).

Accepted formats (same as --issue CLI option):
- "637" bare number → try all configured repos
- "Babel#637" repo#number → match by repo name
- "NCATSTranslator/Babel#637" org/repo#number → exact match

Uses the single-issue API endpoint (fast: 1–4 calls instead of paginating
through all issues).
"""
import pytest as _pytest
from github import GithubException

result = []
seen = set() # avoid duplicates if a spec matches the same issue twice

for spec in issue_specs:
if '/' in spec and '#' in spec:
# "NCATSTranslator/Babel#637"
repo_id, issue_num_str = spec.rsplit('#', 1)
repos_to_check = [(repo_id, int(issue_num_str))]
elif '#' in spec:
# "Babel#637"
repo_name, issue_num_str = spec.split('#', 1)
repos_to_check = [
(repo_id, int(issue_num_str))
for repo_id in self.github_repositories
if repo_id.split('/')[-1] == repo_name
]
else:
# bare number "637"
repos_to_check = [(repo_id, int(spec)) for repo_id in self.github_repositories]

for repo_id, issue_num in repos_to_check:
key = (repo_id, issue_num)
if key in seen:
continue
seen.add(key)
try:
repo = self.github.get_repo(repo_id, lazy=True)
issue = repo.get_issue(issue_num)
tests = self.get_test_issues_from_issue(issue, repo_id=repo_id)
for test_issue in tests:
result.append(_pytest.param(test_issue, id=str(test_issue)))
except GithubException as e:
if e.status == 404:
pass # issue doesn't exist in this repo, that's fine
else:
raise

return result

def get_all_issues(self, github_repositories = None) -> Iterator[Issue.Issue]:
"""
Get a list of test rows from one or more repositories.
Expand All @@ -183,3 +234,38 @@ def get_all_issues(self, github_repositories = None) -> Iterator[Issue.Issue]:
yield issue

self.logger.info(f"Found {issue_count} issues in GitHub repository {repo_id}")

def get_all_test_issues(self, github_repositories=None) -> list:
"""
Get all BabelTest assertions across one or more repositories as a flat list of pytest ParameterSets.

Each GitHubIssueTest (one assertion from one issue) becomes its own ParameterSet, so pytest
reports a separate pass/fail for every assertion rather than collapsing an entire issue into
a single test.

Issues that contain no BabelTest assertions are silently skipped.

:param github_repositories: A list of GitHub repositories to search for test cases. If none is
provided, we default to the list specified when creating this GitHubIssuesTestCases class.
:return: A list of pytest.param objects, one per GitHubIssueTest.
"""
import pytest as _pytest

if github_repositories is None:
github_repositories = self.github_repositories

result = []
for repo_id in github_repositories:
self.logger.info(f"Looking up issues in GitHub repository {repo_id}")
repo = self.github.get_repo(repo_id, lazy=True)

issue_count = 0
for issue in tqdm(repo.get_issues(state='all', sort='updated'), desc=repo_id):
issue_count += 1
tests = self.get_test_issues_from_issue(issue, repo_id=repo_id)
for test_issue in tests:
result.append(_pytest.param(test_issue, id=str(test_issue)))

self.logger.info(f"Found {issue_count} issues in GitHub repository {repo_id}")

return result
24 changes: 24 additions & 0 deletions tests/github_issues/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os

import dotenv

from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssuesTestCases

dotenv.load_dotenv()
_github_issues_test_cases = GitHubIssuesTestCases(os.getenv('GITHUB_TOKEN'), [
'NCATSTranslator/Babel',
'NCATSTranslator/NodeNormalization',
'NCATSTranslator/NameResolution',
'TranslatorSRI/babel-validation',
])


def pytest_generate_tests(metafunc):
if "github_issue_test" not in metafunc.fixturenames:
return
selected_issues = metafunc.config.getoption('issue', default=[])
if selected_issues:
params = _github_issues_test_cases.get_specific_test_issues(selected_issues)
else:
params = _github_issues_test_cases.get_all_test_issues()
metafunc.parametrize("github_issue_test", params)
84 changes: 22 additions & 62 deletions tests/github_issues/test_github_issues.py
Original file line number Diff line number Diff line change
@@ -1,74 +1,34 @@
import itertools
import os

import dotenv
import pytest
from github import Issue

from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssuesTestCases
from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssueTest
from src.babel_validation.services.nameres import CachedNameRes
from src.babel_validation.services.nodenorm import CachedNodeNorm
from src.babel_validation.core.testrow import TestResult, TestStatus

# Helper functions
def get_github_issue_id(github_issue: Issue.Issue):
return f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}"

# Initialize the test.
dotenv.load_dotenv()
github_token = os.getenv('GITHUB_TOKEN')
github_issues_test_cases = GitHubIssuesTestCases(github_token, [
'NCATSTranslator/Babel', # https://github.com/NCATSTranslator/Babel
'NCATSTranslator/NodeNormalization', # https://github.com/NCATSTranslator/NodeNormalization
'NCATSTranslator/NameResolution', # https://github.com/NCATSTranslator/NameResolution
'TranslatorSRI/babel-validation', # https://github.com/TranslatorSRI/babel-validation
])

@pytest.mark.parametrize("github_issue", github_issues_test_cases.get_all_issues())
def test_github_issue(target_info, github_issue, selected_github_issues):
# If github_issues is provided, we can skip all others.
if selected_github_issues:
# Check all three possible ways in which this issue might be specified.
github_issue_matched = False
for selected_github_issue in selected_github_issues:
if '/' in selected_github_issue:
github_issue_matched = (f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}" == selected_github_issue)
elif '#' in selected_github_issue:
github_issue_matched = (f"{github_issue.repository.name}#{github_issue.number}" == selected_github_issue)
else:
github_issue_matched = int(selected_github_issue) == github_issue.number
if github_issue_matched:
break

if github_issue_matched:
# This issue is one of those that should be tested.
pass
else:
pytest.skip(f"GitHub Issue {str(github_issue)} not included in list of GitHub issues to be tested: {selected_github_issues}.")
return

# Test this issue with NodeNorm.
def test_github_issue(target_info, github_issue_test: GitHubIssueTest):
nodenorm = CachedNodeNorm.from_url(target_info['NodeNormURL'])
nameres = CachedNameRes.from_url(target_info['NameResURL'])
tests = github_issues_test_cases.get_test_issues_from_issue(github_issue)
if not tests:
pytest.skip(f"No tests found in issue {github_issue}")
return

for test_issue in tests:
results_nodenorm = test_issue.test_with_nodenorm(nodenorm)
results_nameres = test_issue.test_with_nameres(nodenorm, nameres)

for result in itertools.chain(results_nodenorm, results_nameres):
match result:
case TestResult(status=TestStatus.Passed, message=message):
assert True, f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}"

case TestResult(status=TestStatus.Failed, message=message):
assert False, f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}"

case TestResult(status=TestStatus.Skipped, message=message):
pytest.skip(f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}")

case _:
assert False, f"Unknown result from {get_github_issue_id(github_issue)}: {result}"
results = itertools.chain(
github_issue_test.test_with_nodenorm(nodenorm),
github_issue_test.test_with_nameres(nodenorm, nameres),
)

issue_label = (
f"{github_issue_test.repo_id}#{github_issue_test.github_issue.number}"
f" ({github_issue_test.github_issue.state})"
)

for result in results:
match result:
case TestResult(status=TestStatus.Passed, message=message):
assert True, f"{issue_label}: {message}"
case TestResult(status=TestStatus.Failed, message=message):
assert False, f"{issue_label}: {message}"
case TestResult(status=TestStatus.Skipped, message=message):
pytest.skip(f"{issue_label}: {message}")
case _:
assert False, f"Unknown result from {issue_label}: {result}"