TranslatorSRI · gaurav · Feb 19, 2026 · Feb 19, 2026
diff --git a/src/babel_validation/sources/github/github_issues_test_cases.py b/src/babel_validation/sources/github/github_issues_test_cases.py
@@ -15,20 +15,21 @@
 
 
 class GitHubIssueTest:
-    def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[list[str]] = None):
+    def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[list[str]] = None, repo_id: str = ""):
         self.github_issue = github_issue
         self.assertion = assertion
         if param_sets is None:
             param_sets = []
         self.param_sets = param_sets
         if not isinstance(self.param_sets, list):
             raise ValueError(f"param_sets must be a list when creating a GitHubIssueTest({self.github_issue}, {self.assertion}, {self.param_sets})")
+        self.repo_id = repo_id
 
         self.logger = logging.getLogger(str(self))
         self.logger.info(f"Creating GitHubIssueTest for {github_issue.html_url} {assertion}({param_sets})")
 
     def __str__(self):
-        return f"{self.github_issue.repository.organization.name}/{self.github_issue.repository.name}#{self.github_issue.number}: {self.assertion}({len(self.param_sets)} param sets: {json.dumps(self.param_sets)})"
+        return f"{self.repo_id}#{self.github_issue.number}: {self.assertion}({len(self.param_sets)} param sets: {json.dumps(self.param_sets)})"
 
     def test_with_nodenorm(self, nodenorm: CachedNodeNorm) -> Iterator[TestResult]:
         handler = ASSERTION_HANDLERS.get(self.assertion.lower())
@@ -83,7 +84,7 @@ def __init__(self, github_token: str, github_repositories=None):
         self.babeltest_pattern = re.compile(r'{{BabelTest\|.*?}}')
         self.babeltest_yaml_pattern = re.compile(r'```yaml\s+babel_tests:\s+.*?\s+```', re.DOTALL)
 
-    def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIssueTest]:
+    def get_test_issues_from_issue(self, github_issue: Issue.Issue, repo_id: str = "") -> list[GitHubIssueTest]:
         """
         Extract test rows from a single GitHub issue.
 
@@ -102,15 +103,11 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs
         src/babel_validation/assertions/README.md or inspect ASSERTION_HANDLERS.keys().
 
         :param github_issue: A single GitHub issue to extract test cases from.
+        :param repo_id: The repository identifier string (e.g. "NCATSTranslator/Babel").
         :return: A list of GitHubIssueTest objects found in the issue body.
         """
 
-        github_issue_id = f"{github_issue.number}"
-            # Ideally, we would use:
-            #   f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}"
-            # But that is very slow.
-            # TODO: Wrap Issue.Issue so that we can store orgName and repoName locally so we don't need to call out
-            # to figure it out.
+        github_issue_id = f"{repo_id}#{github_issue.number}" if repo_id else f"{github_issue.number}"
         self.logger.debug(f"Looking for tests in issue {github_issue_id}: {github_issue.title} ({str(github_issue.state)}, {github_issue.html_url})")
 
         # Is there an issue body at all?
@@ -135,7 +132,7 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs
                 if len(params) < 2:
                     raise ValueError(f"Too few parameters found in BabelTest in issue {github_issue_id}: {match}")
                 else:
-                    testrows.append(GitHubIssueTest(github_issue, params[0], [params[1:]]))
+                    testrows.append(GitHubIssueTest(github_issue, params[0], [params[1:]], repo_id=repo_id))
 
         babeltest_yaml_matches = re.findall(self.babeltest_yaml_pattern, github_issue.body)
         if babeltest_yaml_matches:
@@ -158,10 +155,64 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs
                             param_sets.append(param_set)
                         else:
                             raise RuntimeError(f"Unknown parameter set type {param_set} in issue {github_issue_id}")
-                    testrows.append(GitHubIssueTest(github_issue, assertion, param_sets))
+                    testrows.append(GitHubIssueTest(github_issue, assertion, param_sets, repo_id=repo_id))
 
         return testrows
 
+    def get_specific_test_issues(self, issue_specs: list[str]) -> list:
+        """
+        Fetch only the issues identified by issue_specs (values from --issue flag).
+
+        Accepted formats (same as --issue CLI option):
+        - "637"                       bare number → try all configured repos
+        - "Babel#637"                 repo#number → match by repo name
+        - "NCATSTranslator/Babel#637" org/repo#number → exact match
+
+        Uses the single-issue API endpoint (fast: 1–4 calls instead of paginating
+        through all issues).
+        """
+        import pytest as _pytest
+        from github import GithubException
+
+        result = []
+        seen = set()  # avoid duplicates if a spec matches the same issue twice
+
+        for spec in issue_specs:
+            if '/' in spec and '#' in spec:
+                # "NCATSTranslator/Babel#637"
+                repo_id, issue_num_str = spec.rsplit('#', 1)
+                repos_to_check = [(repo_id, int(issue_num_str))]
+            elif '#' in spec:
+                # "Babel#637"
+                repo_name, issue_num_str = spec.split('#', 1)
+                repos_to_check = [
+                    (repo_id, int(issue_num_str))
+                    for repo_id in self.github_repositories
+                    if repo_id.split('/')[-1] == repo_name
+                ]
+            else:
+                # bare number "637"
+                repos_to_check = [(repo_id, int(spec)) for repo_id in self.github_repositories]
+
+            for repo_id, issue_num in repos_to_check:
+                key = (repo_id, issue_num)
+                if key in seen:
+                    continue
+                seen.add(key)
+                try:
+                    repo = self.github.get_repo(repo_id, lazy=True)
+                    issue = repo.get_issue(issue_num)
+                    tests = self.get_test_issues_from_issue(issue, repo_id=repo_id)
+                    for test_issue in tests:
+                        result.append(_pytest.param(test_issue, id=str(test_issue)))
+                except GithubException as e:
+                    if e.status == 404:
+                        pass  # issue doesn't exist in this repo, that's fine
+                    else:
+                        raise
+
+        return result
+
     def get_all_issues(self, github_repositories = None) -> Iterator[Issue.Issue]:
         """
         Get a list of test rows from one or more repositories.
@@ -183,3 +234,38 @@ def get_all_issues(self, github_repositories = None) -> Iterator[Issue.Issue]:
                 yield issue
 
             self.logger.info(f"Found {issue_count} issues in GitHub repository {repo_id}")
+
+    def get_all_test_issues(self, github_repositories=None) -> list:
+        """
+        Get all BabelTest assertions across one or more repositories as a flat list of pytest ParameterSets.
+
+        Each GitHubIssueTest (one assertion from one issue) becomes its own ParameterSet, so pytest
+        reports a separate pass/fail for every assertion rather than collapsing an entire issue into
+        a single test.
+
+        Issues that contain no BabelTest assertions are silently skipped.
+
+        :param github_repositories: A list of GitHub repositories to search for test cases. If none is
+            provided, we default to the list specified when creating this GitHubIssuesTestCases class.
+        :return: A list of pytest.param objects, one per GitHubIssueTest.
+        """
+        import pytest as _pytest
+
+        if github_repositories is None:
+            github_repositories = self.github_repositories
+
+        result = []
+        for repo_id in github_repositories:
+            self.logger.info(f"Looking up issues in GitHub repository {repo_id}")
+            repo = self.github.get_repo(repo_id, lazy=True)
+
+            issue_count = 0
+            for issue in tqdm(repo.get_issues(state='all', sort='updated'), desc=repo_id):
+                issue_count += 1
+                tests = self.get_test_issues_from_issue(issue, repo_id=repo_id)
+                for test_issue in tests:
+                    result.append(_pytest.param(test_issue, id=str(test_issue)))
+
+            self.logger.info(f"Found {issue_count} issues in GitHub repository {repo_id}")
+
+        return result
diff --git a/tests/github_issues/conftest.py b/tests/github_issues/conftest.py
@@ -0,0 +1,24 @@
+import os
+
+import dotenv
+
+from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssuesTestCases
+
+dotenv.load_dotenv()
+_github_issues_test_cases = GitHubIssuesTestCases(os.getenv('GITHUB_TOKEN'), [
+    'NCATSTranslator/Babel',
+    'NCATSTranslator/NodeNormalization',
+    'NCATSTranslator/NameResolution',
+    'TranslatorSRI/babel-validation',
+])
+
+
+def pytest_generate_tests(metafunc):
+    if "github_issue_test" not in metafunc.fixturenames:
+        return
+    selected_issues = metafunc.config.getoption('issue', default=[])
+    if selected_issues:
+        params = _github_issues_test_cases.get_specific_test_issues(selected_issues)
+    else:
+        params = _github_issues_test_cases.get_all_test_issues()
+    metafunc.parametrize("github_issue_test", params)
diff --git a/tests/github_issues/test_github_issues.py b/tests/github_issues/test_github_issues.py
@@ -1,74 +1,34 @@
 import itertools
-import os
 
-import dotenv
 import pytest
-from github import Issue
 
-from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssuesTestCases
+from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssueTest
 from src.babel_validation.services.nameres import CachedNameRes
 from src.babel_validation.services.nodenorm import CachedNodeNorm
 from src.babel_validation.core.testrow import TestResult, TestStatus
 
-# Helper functions
-def get_github_issue_id(github_issue: Issue.Issue):
-    return f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}"
 
-# Initialize the test.
-dotenv.load_dotenv()
-github_token = os.getenv('GITHUB_TOKEN')
-github_issues_test_cases = GitHubIssuesTestCases(github_token, [
-    'NCATSTranslator/Babel',                # https://github.com/NCATSTranslator/Babel
-    'NCATSTranslator/NodeNormalization',    # https://github.com/NCATSTranslator/NodeNormalization
-    'NCATSTranslator/NameResolution',       # https://github.com/NCATSTranslator/NameResolution
-    'TranslatorSRI/babel-validation',       # https://github.com/TranslatorSRI/babel-validation
-])
-
-@pytest.mark.parametrize("github_issue", github_issues_test_cases.get_all_issues())
-def test_github_issue(target_info, github_issue, selected_github_issues):
-    # If github_issues is provided, we can skip all others.
-    if selected_github_issues:
-        # Check all three possible ways in which this issue might be specified.
-        github_issue_matched = False
-        for selected_github_issue in selected_github_issues:
-            if '/' in selected_github_issue:
-                github_issue_matched = (f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}" == selected_github_issue)
-            elif '#' in selected_github_issue:
-                github_issue_matched = (f"{github_issue.repository.name}#{github_issue.number}" == selected_github_issue)
-            else:
-                github_issue_matched = int(selected_github_issue) == github_issue.number
-            if github_issue_matched:
-                break
-
-        if github_issue_matched:
-            # This issue is one of those that should be tested.
-            pass
-        else:
-            pytest.skip(f"GitHub Issue {str(github_issue)} not included in list of GitHub issues to be tested: {selected_github_issues}.")
-            return
-
-    # Test this issue with NodeNorm.
+def test_github_issue(target_info, github_issue_test: GitHubIssueTest):
     nodenorm = CachedNodeNorm.from_url(target_info['NodeNormURL'])
     nameres = CachedNameRes.from_url(target_info['NameResURL'])
-    tests = github_issues_test_cases.get_test_issues_from_issue(github_issue)
-    if not tests:
-        pytest.skip(f"No tests found in issue {github_issue}")
-        return
-
-    for test_issue in tests:
-        results_nodenorm = test_issue.test_with_nodenorm(nodenorm)
-        results_nameres = test_issue.test_with_nameres(nodenorm, nameres)
-
-        for result in itertools.chain(results_nodenorm, results_nameres):
-            match result:
-                case TestResult(status=TestStatus.Passed, message=message):
-                    assert True, f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}"
-
-                case TestResult(status=TestStatus.Failed, message=message):
-                    assert False, f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}"
-
-                case TestResult(status=TestStatus.Skipped, message=message):
-                    pytest.skip(f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}")
 
-                case _:
-                    assert False, f"Unknown result from {get_github_issue_id(github_issue)}: {result}"
+    results = itertools.chain(
+        github_issue_test.test_with_nodenorm(nodenorm),
+        github_issue_test.test_with_nameres(nodenorm, nameres),
+    )
+
+    issue_label = (
+        f"{github_issue_test.repo_id}#{github_issue_test.github_issue.number}"
+        f" ({github_issue_test.github_issue.state})"
+    )
+
+    for result in results:
+        match result:
+            case TestResult(status=TestStatus.Passed, message=message):
+                assert True, f"{issue_label}: {message}"
+            case TestResult(status=TestStatus.Failed, message=message):
+                assert False, f"{issue_label}: {message}"
+            case TestResult(status=TestStatus.Skipped, message=message):
+                pytest.skip(f"{issue_label}: {message}")
+            case _:
+                assert False, f"Unknown result from {issue_label}: {result}"