From 9c53ee41932e238f147eba84b4d96a53652c7555 Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla <jayaram.kancherla@gmail.com>
Date: Mon, 19 Jan 2026 20:28:01 -0800
Subject: [PATCH 1/4] using a template from txdb and orgdb

---
 src/ensembldb/_ahub.py    |   7 ++
 src/ensembldb/record.py   |  60 ++++++++++++++
 src/ensembldb/registry.py | 160 ++++++++++++++++++++++++++++++++++++++
 src/ensembldb/skeleton.py | 149 -----------------------------------
 4 files changed, 227 insertions(+), 149 deletions(-)
 create mode 100644 src/ensembldb/_ahub.py
 create mode 100644 src/ensembldb/record.py
 create mode 100644 src/ensembldb/registry.py
 delete mode 100644 src/ensembldb/skeleton.py

diff --git a/src/ensembldb/_ahub.py b/src/ensembldb/_ahub.py
new file mode 100644
index 0000000..85e286d
--- /dev/null
+++ b/src/ensembldb/_ahub.py
@@ -0,0 +1,7 @@
+"""Configuration for accessing AnnotationHub metadata for EnsDb."""
+
+__author__ = "Jayaram Kancherla"
+__copyright__ = "Jayaram Kancherla"
+__license__ = "MIT"
+
+AHUB_METADATA_URL = "https://annotationhub.bioconductor.org/metadata/annotationhub.sqlite3"
diff --git a/src/ensembldb/record.py b/src/ensembldb/record.py
new file mode 100644
index 0000000..a1e7b8a
--- /dev/null
+++ b/src/ensembldb/record.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import date, datetime
+from typing import Optional
+
+__author__ = "Jayaram Kancherla"
+__copyright__ = "Jayaram Kancherla"
+__license__ = "MIT"
+
+
+@dataclass(frozen=True)
+class EnsDbRecord:
+    """Container for a single EnsDb entry."""
+
+    ensdb_id: str  # e.g., "AH12345"
+    title: str
+    species: Optional[str]
+    taxonomy_id: Optional[str]
+    genome: Optional[str]
+    description: Optional[str]
+    url: str
+    release_date: Optional[date]
+    ensembl_version: Optional[str] = None
+
+    @classmethod
+    def from_db_row(cls, row: tuple) -> "EnsDbRecord":
+        """Build a record from a database query row."""
+        rid, title, species, tax_id, genome, desc, url, date_str = row
+
+        ah_id = f"AH{rid}"
+
+        rel_date: Optional[date] = None
+        if date_str:
+            try:
+                rel_date = datetime.strptime(str(date_str).split(" ")[0], "%Y-%m-%d").date()
+            except ValueError:
+                pass
+
+        ens_ver = None
+        if title and "Ensembl" in title:
+            parts = title.split(" ")
+            for i, p in enumerate(parts):
+                if p == "Ensembl" and i + 1 < len(parts):
+                    candidate = parts[i + 1]
+                    if candidate.isdigit():
+                        ens_ver = candidate
+                        break
+
+        return cls(
+            ensdb_id=ah_id,
+            title=title or "",
+            species=species,
+            taxonomy_id=str(tax_id) if tax_id else None,
+            genome=genome,
+            description=desc,
+            url=url,
+            release_date=rel_date,
+            ensembl_version=ens_ver,
+        )
diff --git a/src/ensembldb/registry.py b/src/ensembldb/registry.py
new file mode 100644
index 0000000..8eb4167
--- /dev/null
+++ b/src/ensembldb/registry.py
@@ -0,0 +1,160 @@
+import os
+import sqlite3
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
+from pybiocfilecache import BiocFileCache
+
+from ._ahub import AHUB_METADATA_URL
+from .ensdb import EnsDb
+from .record import EnsDbRecord
+
+__author__ = "Jayaram Kancherla"
+__copyright__ = "Jayaram Kancherla"
+__license__ = "MIT"
+
+
+class EnsDbRegistry:
+    """Registry for EnsDb resources."""
+
+    def __init__(
+        self,
+        cache_dir: Optional[Union[str, Path]] = None,
+        force: bool = False,
+    ) -> None:
+        """Initialize the EnsDb registry.
+
+        Args:
+            cache_dir: Path to cache directory.
+            force: Force re-download of metadata.
+        """
+        if cache_dir is None:
+            cache_dir = Path.home() / ".cache" / "ensembldb_bfc"
+
+        self._cache_dir = Path(cache_dir)
+        self._cache_dir.mkdir(parents=True, exist_ok=True)
+        self._bfc = BiocFileCache(self._cache_dir)
+
+        self._registry_map: Dict[str, EnsDbRecord] = {}
+        self._initialize_registry(force=force)
+
+    def _initialize_registry(self, force: bool = False):
+        """Populate registry from AnnotationHub metadata."""
+        rname = "annotationhub_metadata"
+
+        existing = None
+        try:
+            existing = self._bfc.get(rname)
+        except Exception:
+            pass
+
+        if force and existing:
+            try:
+                self._bfc.remove(rname)
+            except Exception:
+                pass
+            existing = None
+
+        if existing:
+            md_resource = existing
+        else:
+            md_resource = self._bfc.add(rname, AHUB_METADATA_URL, rtype="web")
+
+        md_path = self._get_filepath(md_resource)
+
+        if not md_path or not os.path.exists(md_path):
+            if existing and not force:
+                return self._initialize_registry(force=True)
+            raise RuntimeError("Failed to retrieve AnnotationHub metadata.")
+
+        conn = sqlite3.connect(md_path)
+        try:
+            # Filter for EnsDb sqlite files
+            query = """
+            SELECT
+                r.id,
+                r.title,
+                r.species,
+                r.taxonomyid,
+                r.genome,
+                r.description,
+                lp.location_prefix || rp.rdatapath AS full_url,
+                r.rdatadateadded
+            FROM resources r
+            LEFT JOIN location_prefixes lp
+                ON r.location_prefix_id = lp.id
+            LEFT JOIN rdatapaths rp
+                ON rp.resource_id = r.id
+            WHERE r.title LIKE 'Ensembl % EnsDb%' 
+              AND r.title LIKE '%.sqlite'
+            ORDER BY r.rdatadateadded DESC;
+            """
+            cursor = conn.cursor()
+            cursor.execute(query)
+            rows = cursor.fetchall()
+        finally:
+            conn.close()
+
+        for row in rows:
+            record = EnsDbRecord.from_db_row(row)
+            self._registry_map[record.ensdb_id] = record
+
+    def list_ensdbs(self) -> List[str]:
+        """List available EnsDb IDs."""
+        return sorted(list(self._registry_map.keys()))
+
+    def get_record(self, ensdb_id: str) -> EnsDbRecord:
+        if ensdb_id not in self._registry_map:
+            raise KeyError(f"ID '{ensdb_id}' not found.")
+        return self._registry_map[ensdb_id]
+
+    def download(self, ensdb_id: str, force: bool = False) -> str:
+        record = self.get_record(ensdb_id)
+        url = record.url
+        key = ensdb_id
+
+        if force:
+            try:
+                self._bfc.remove(key)
+            except Exception:
+                pass
+
+        if not force:
+            try:
+                existing = self._bfc.get(key)
+                if existing:
+                    path = self._get_filepath(existing)
+                    if path and os.path.exists(path) and os.path.getsize(path) > 0:
+                        return path
+            except Exception:
+                pass
+
+        resource = self._bfc.add(
+            key,
+            url,
+            rtype="web",
+            download=True,
+        )
+
+        path = self._get_filepath(resource)
+        if not path or not os.path.exists(path) or os.path.getsize(path) == 0:
+            try:
+                self._bfc.remove(key)
+            except Exception:
+                pass
+            raise RuntimeError(f"Download failed for {ensdb_id}.")
+
+        return path
+
+    def load_db(self, ensdb_id: str, force: bool = False) -> EnsDb:
+        path = self.download(ensdb_id, force=force)
+        return EnsDb(path)
+
+    def _get_filepath(self, resource: Any) -> Optional[str]:
+        if hasattr(resource, "rpath"):
+            rel_path = str(resource.rpath)
+        elif hasattr(resource, "get"):
+            rel_path = str(resource.get("rpath"))
+        else:
+            return None
+        return str(self._cache_dir / rel_path)
diff --git a/src/ensembldb/skeleton.py b/src/ensembldb/skeleton.py
deleted file mode 100644
index 19bfc1a..0000000
--- a/src/ensembldb/skeleton.py
+++ /dev/null
@@ -1,149 +0,0 @@
-"""
-This is a skeleton file that can serve as a starting point for a Python
-console script. To run this script uncomment the following lines in the
-``[options.entry_points]`` section in ``setup.cfg``::
-
-    console_scripts =
-         fibonacci = ensembldb.skeleton:run
-
-Then run ``pip install .`` (or ``pip install -e .`` for editable mode)
-which will install the command ``fibonacci`` inside your current environment.
-
-Besides console scripts, the header (i.e. until ``_logger``...) of this file can
-also be used as template for Python modules.
-
-Note:
-    This file can be renamed depending on your needs or safely removed if not needed.
-
-References:
-    - https://setuptools.pypa.io/en/latest/userguide/entry_point.html
-    - https://pip.pypa.io/en/stable/reference/pip_install
-"""
-
-import argparse
-import logging
-import sys
-
-from ensembldb import __version__
-
-__author__ = "Jayaram Kancherla"
-__copyright__ = "Jayaram Kancherla"
-__license__ = "MIT"
-
-_logger = logging.getLogger(__name__)
-
-
-# ---- Python API ----
-# The functions defined in this section can be imported by users in their
-# Python scripts/interactive interpreter, e.g. via
-# `from ensembldb.skeleton import fib`,
-# when using this Python module as a library.
-
-
-def fib(n):
-    """Fibonacci example function
-
-    Args:
-      n (int): integer
-
-    Returns:
-      int: n-th Fibonacci number
-    """
-    assert n > 0
-    a, b = 1, 1
-    for _i in range(n - 1):
-        a, b = b, a + b
-    return a
-
-
-# ---- CLI ----
-# The functions defined in this section are wrappers around the main Python
-# API allowing them to be called directly from the terminal as a CLI
-# executable/script.
-
-
-def parse_args(args):
-    """Parse command line parameters
-
-    Args:
-      args (List[str]): command line parameters as list of strings
-          (for example  ``["--help"]``).
-
-    Returns:
-      :obj:`argparse.Namespace`: command line parameters namespace
-    """
-    parser = argparse.ArgumentParser(description="Just a Fibonacci demonstration")
-    parser.add_argument(
-        "--version",
-        action="version",
-        version=f"ensembldb {__version__}",
-    )
-    parser.add_argument(dest="n", help="n-th Fibonacci number", type=int, metavar="INT")
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        dest="loglevel",
-        help="set loglevel to INFO",
-        action="store_const",
-        const=logging.INFO,
-    )
-    parser.add_argument(
-        "-vv",
-        "--very-verbose",
-        dest="loglevel",
-        help="set loglevel to DEBUG",
-        action="store_const",
-        const=logging.DEBUG,
-    )
-    return parser.parse_args(args)
-
-
-def setup_logging(loglevel):
-    """Setup basic logging
-
-    Args:
-      loglevel (int): minimum loglevel for emitting messages
-    """
-    logformat = "[%(asctime)s] %(levelname)s:%(name)s:%(message)s"
-    logging.basicConfig(
-        level=loglevel, stream=sys.stdout, format=logformat, datefmt="%Y-%m-%d %H:%M:%S"
-    )
-
-
-def main(args):
-    """Wrapper allowing :func:`fib` to be called with string arguments in a CLI fashion
-
-    Instead of returning the value from :func:`fib`, it prints the result to the
-    ``stdout`` in a nicely formatted message.
-
-    Args:
-      args (List[str]): command line parameters as list of strings
-          (for example  ``["--verbose", "42"]``).
-    """
-    args = parse_args(args)
-    setup_logging(args.loglevel)
-    _logger.debug("Starting crazy calculations...")
-    print(f"The {args.n}-th Fibonacci number is {fib(args.n)}")
-    _logger.info("Script ends here")
-
-
-def run():
-    """Calls :func:`main` passing the CLI arguments extracted from :obj:`sys.argv`
-
-    This function can be used as entry point to create console scripts with setuptools.
-    """
-    main(sys.argv[1:])
-
-
-if __name__ == "__main__":
-    # ^  This is a guard statement that will prevent the following code from
-    #    being executed in the case someone imports this file instead of
-    #    executing it as a script.
-    #    https://docs.python.org/3/library/__main__.html
-
-    # After installing your project with pip, users can also run your Python
-    # modules as scripts via the ``-m`` flag, as defined in PEP 338::
-    #
-    #     python -m ensembldb.skeleton 42
-    #
-    run()

From 8b088421c4c2281dba2e9a5a14f26b69135d99b0 Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla <jayaram.kancherla@gmail.com>
Date: Tue, 20 Jan 2026 16:42:01 -0800
Subject: [PATCH 2/4] add the rest of the files

---
 README.md                 | 105 +++++++++++++++-
 docs/index.md             |  18 ++-
 pyproject.toml            |   2 +-
 setup.cfg                 |   6 +-
 src/ensembldb/__init__.py |   4 +
 src/ensembldb/ensdb.py    | 258 ++++++++++++++++++++++++++++++++++++++
 src/ensembldb/registry.py |   5 +-
 tests/test_real.py        | 105 ++++++++++++++++
 tests/test_skeleton.py    |  25 ----
 9 files changed, 485 insertions(+), 43 deletions(-)
 create mode 100644 src/ensembldb/ensdb.py
 create mode 100644 tests/test_real.py
 delete mode 100644 tests/test_skeleton.py

diff --git a/README.md b/README.md
index 0942536..c746108 100644
--- a/README.md
+++ b/README.md
@@ -1,11 +1,11 @@
 [![PyPI-Server](https://img.shields.io/pypi/v/ensembldb.svg)](https://pypi.org/project/ensembldb/)
-![Unit tests](https://github.com/YOUR_ORG_OR_USERNAME/ensembldb/actions/workflows/run-tests.yml/badge.svg)
+![Unit tests](https://github.com/BiocPy/ensembldb/actions/workflows/run-tests.yml/badge.svg)
 
-# ensembldb
+# EnsemblDb
 
-> Access EnsemblDB objects
+**EnsemblDb** provides a Python interface to **Ensembl Annotation Databases (EnsDb)**. It mirrors the functionality of the Bioconductor `ensembldb` package, allowing users to efficiently query gene, transcript, and exon annotations from SQLite-based annotation files.
 
-A longer description of your project goes here...
+This package is part of the **BiocPy** ecosystem and integrates seamlessly with [GenomicRanges](https://github.com/biocpy/genomicranges).
 
 ## Install
 
@@ -15,6 +15,103 @@ To get started, install the package from [PyPI](https://pypi.org/project/ensembl
 pip install ensembldb
 ```
 
+## Usage
+
+### 1. Connecting to an EnsDb
+
+You can manage and download standard Ensembl databases via the registry (backed by AnnotationHub).
+
+```py
+from ensembldb import EnsDbRegistry
+
+# Initialize the registry
+registry = EnsDbRegistry()
+
+# List available databases
+available = registry.list_ensdbs()
+print(available[:5])
+# ['AH53211', 'AH53212', ...]
+
+# Load a specific database (e.g., Larimichthys crocea)
+# This automatically downloads and caches the SQLite file
+db = registry.load_db("AH113677")
+
+# View metadata
+print(db.metadata)
+```
+
+### 2. Retrieving Genomic Features
+
+EnsemblDb allows you to extract features as GenomicRanges objects.
+
+#### Fetch Genes
+
+```py
+genes = db.genes()
+print(genes)
+# GenomicRanges with 23958 ranges and 3 metadata columns
+#                    seqnames              ranges          strand              gene_id gene_name   gene_biotype
+#                       <str>           <IRanges> <ndarray[int8]>               <list>    <list>         <list>
+# ENSLCRG00005000002       MT              1 - 69               + | ENSLCRG00005000002                  Mt_tRNA
+# ENSLCRG00005000003       MT           70 - 1016               + | ENSLCRG00005000003                  Mt_rRNA
+# ENSLCRG00005000004       MT         1017 - 1087               + | ENSLCRG00005000004                  Mt_tRNA
+#                         ...                 ...             ... |                ...       ...            ...
+# ENSLCRG00005023957       VI 22289079 - 22304889               - | ENSLCRG00005023957    FILIP1 protein_coding
+# ENSLCRG00005023958       VI 22328118 - 22347657               + | ENSLCRG00005023958     SENP6 protein_coding
+# ENSLCRG00005023959       VI 22351962 - 22451867               + | ENSLCRG00005023959     myo6a protein_coding
+# ------
+# seqinfo(496 sequences): I II III ... XXII XXIII XXIV
+```
+
+#### Fetch Transcripts and Exons
+
+```py
+transcripts = db.transcripts()
+print(transcripts)
+
+exons = db.exons()
+print(exons)
+```
+
+### 3. Filtering
+
+You can filter results using a dictionary passed to the filter argument. Keys should match column names in the database (e.g., gene_id, gene_name, tx_biotype).
+
+#### Filter by Gene Name
+
+```py
+# Get coordinates for a specific gene
+senp6 = db.genes(filter={"gene_name": "SENP6"})
+print(senp6)
+```
+
+#### Filter by ID list
+
+```py
+# Get transcripts for a list of gene IDs
+ids = ["ENSLCRG00005023958", "ENSLCRG00005000003"]
+txs = db.transcripts(filter={"gene_id": ids})
+print(txs)
+```
+
+#### Filter Exons by Transcript ID:
+
+```py
+# Get all exons associated with a specific transcript
+tx_exons = db.exons(filter={"tx_id": "ENSLCRT00005000003"})
+print(tx_exons)
+```
+
+### 4. Direct SQL Access
+
+If you need more complex queries not covered by the standard methods, you can execute SQL directly against the underlying database.
+
+```py
+# Get a BiocFrame from a raw SQL query
+df = db._query_as_biocframe("SELECT * FROM gene LIMIT 5")
+print(df)
+```
+
 <!-- biocsetup-notes -->
 
 ## Note
diff --git a/docs/index.md b/docs/index.md
index d70fafd..5fd0abb 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,18 +1,16 @@
-# ensembldb
+# EnsemblDb
 
-Access EnsemblDB objects
+**EnsemblDb** provides a Python interface to **Ensembl Annotation Databases (EnsDb)**. It mirrors the functionality of the Bioconductor `ensembldb` package, allowing users to efficiently query gene, transcript, and exon annotations from SQLite-based annotation files.
 
+This package is part of the **BiocPy** ecosystem and integrates seamlessly with [GenomicRanges](https://github.com/biocpy/genomicranges).
 
-## Note
+## Install
 
-> This is the main page of your project's [Sphinx] documentation. It is
-> formatted in [Markdown]. Add additional pages by creating md-files in
-> `docs` or rst-files (formatted in [reStructuredText]) and adding links to
-> them in the `Contents` section below.
->
-> Please check [Sphinx] and [MyST] for more information
-> about how to document your project and how to configure your preferences.
+To get started, install the package from [PyPI](https://pypi.org/project/ensembldb/)
 
+```bash
+pip install ensembldb
+```
 
 ## Contents
 
diff --git a/pyproject.toml b/pyproject.toml
index 086f90c..bc45f55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,7 @@ version_scheme = "no-guess-dev"
 [tool.ruff]
 line-length = 120
 src = ["src"]
-exclude = ["tests"]
+# exclude = ["tests"]
 lint.extend-ignore = ["F821"]
 
 [tool.ruff.lint.pydocstyle]
diff --git a/setup.cfg b/setup.cfg
index 2cc6b21..0f09996 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -5,7 +5,7 @@
 
 [metadata]
 name = ensembldb
-description = Access EnsemblDB objects
+description = Access EnsemblDb resources from Bioconductors AnnotationHub
 author = Jayaram Kancherla
 author_email = jayaram.kancherla@gmail.com
 license = MIT
@@ -49,6 +49,10 @@ package_dir =
 # For more information, check out https://semver.org/.
 install_requires =
     importlib-metadata; python_version<"3.8"
+    pybiocfilecache
+    biocframe
+    genomicranges
+    iranges
 
 
 [options.packages.find]
diff --git a/src/ensembldb/__init__.py b/src/ensembldb/__init__.py
index e451f10..7501a7f 100644
--- a/src/ensembldb/__init__.py
+++ b/src/ensembldb/__init__.py
@@ -14,3 +14,7 @@
     __version__ = "unknown"
 finally:
     del version, PackageNotFoundError
+
+from .record import EnsDbRecord
+from .registry import EnsDbRegistry
+from .ensdb import EnsDb
\ No newline at end of file
diff --git a/src/ensembldb/ensdb.py b/src/ensembldb/ensdb.py
new file mode 100644
index 0000000..c81d215
--- /dev/null
+++ b/src/ensembldb/ensdb.py
@@ -0,0 +1,258 @@
+import sqlite3
+from typing import Dict, List, Optional, Union
+
+from biocframe import BiocFrame
+from genomicranges import GenomicRanges
+from iranges import IRanges
+
+__author__ = "Jayaram Kancherla"
+__copyright__ = "Jayaram Kancherla"
+__license__ = "MIT"
+
+
+class EnsDb:
+    """Interface to Ensembl SQLite annotations."""
+
+    def __init__(self, dbpath: str):
+        """Initialize the EnsDb object.
+
+        Args:
+            dbpath:
+                Path to the SQLite database file.
+        """
+        self.dbpath = dbpath
+        self.conn = sqlite3.connect(dbpath)
+        self.conn.row_factory = sqlite3.Row
+        self._metadata = None
+
+    def _query_as_biocframe(self, query: str, params: tuple = ()) -> BiocFrame:
+        """Execute query and return BiocFrame."""
+        cursor = self.conn.cursor()
+        cursor.execute(query, params)
+        results = cursor.fetchall()
+
+        if not results:
+            if cursor.description:
+                col_names = [desc[0] for desc in cursor.description]
+                # Fix: Initialize empty lists for each column to satisfy BiocFrame validation
+                empty_data = {col: [] for col in col_names}
+                return BiocFrame(empty_data, column_names=col_names)
+            return BiocFrame({})
+
+        col_names = [desc[0] for desc in cursor.description]
+        columns_data = list(zip(*results))
+
+        data_dict = {}
+        for i, name in enumerate(col_names):
+            data_dict[name] = list(columns_data[i])
+
+        return BiocFrame(data_dict)
+
+    @property
+    def metadata(self) -> BiocFrame:
+        """Get database metadata."""
+        if self._metadata is None:
+            self._metadata = self._query_as_biocframe("SELECT * FROM metadata")
+        return self._metadata
+
+    def _check_column_exists(self, table: str, column: str) -> bool:
+        """Check if a column exists in a table."""
+        try:
+            self.conn.execute(f"SELECT {column} FROM {table} LIMIT 1")
+            return True
+        except sqlite3.OperationalError:
+            return False
+
+    def genes(self, filter: Optional[Dict[str, Union[str, List[str]]]] = None) -> GenomicRanges:
+        """Retrieve genes as GenomicRanges.
+
+        Args:
+            filter:
+                A dictionary defining filters to narrow down the result.
+                Keys are column names (e.g., "gene_id", "gene_name", "gene_biotype").
+                Values can be a single string or a list of strings to match.
+
+                Example:
+                    `{'gene_name': 'BRCA1'}`
+                    `{'gene_biotype': ['protein_coding', 'lincRNA']}`
+
+        Returns:
+            A GenomicRanges object containing gene coordinates and metadata.
+        """
+        has_entrez = self._check_column_exists("gene", "entrezid")
+        entrez_col = ", g.entrezid" if has_entrez else ""
+
+        query = f"""
+        SELECT 
+            g.gene_id, g.gene_name, g.gene_biotype,
+            g.seq_name, g.gene_seq_start, g.gene_seq_end, g.seq_strand{entrez_col},
+            c.seq_length
+        FROM gene g
+        LEFT JOIN chromosome c ON g.seq_name = c.seq_name
+        """
+
+        where_clauses = []
+        params = []
+
+        if filter:
+            for col, val in filter.items():
+                if isinstance(val, list):
+                    placeholders = ",".join("?" * len(val))
+                    where_clauses.append(f"g.{col} IN ({placeholders})")
+                    params.extend(val)
+                else:
+                    where_clauses.append(f"g.{col} = ?")
+                    params.append(val)
+
+        if where_clauses:
+            query += " WHERE " + " AND ".join(where_clauses)
+
+        bf = self._query_as_biocframe(query, tuple(params))
+
+        if bf.shape[0] == 0:
+            return GenomicRanges.empty()
+
+        return self._make_gr(bf, prefix="gene_")
+
+    def transcripts(self, filter: Optional[Dict[str, Union[str, List[str]]]] = None) -> GenomicRanges:
+        """Retrieve transcripts as GenomicRanges.
+
+        Args:
+            filter:
+                A dictionary defining filters to narrow down the result.
+                Keys are column names (e.g., "tx_id", "gene_id", "tx_biotype").
+                Values can be a single string or a list of strings to match.
+
+                Columns from the gene table (like "gene_name") can also be used as keys
+                since the query performs a join.
+
+        Returns:
+            A GenomicRanges object containing transcript coordinates and metadata.
+        """
+        query = """
+        SELECT 
+            t.tx_id, t.tx_biotype, t.gene_id,
+            t.tx_seq_start, t.tx_seq_end,
+            g.seq_name, g.seq_strand, g.gene_name,
+            c.seq_length
+        FROM tx t
+        JOIN gene g ON t.gene_id = g.gene_id
+        LEFT JOIN chromosome c ON g.seq_name = c.seq_name
+        """
+
+        where_clauses = []
+        params = []
+
+        if filter:
+            for col, val in filter.items():
+                prefix = "t." if col.startswith("tx_") else "g."
+                if col == "gene_id":
+                    prefix = "t."
+
+                if isinstance(val, list):
+                    placeholders = ",".join("?" * len(val))
+                    where_clauses.append(f"{prefix}{col} IN ({placeholders})")
+                    params.extend(val)
+                else:
+                    where_clauses.append(f"{prefix}{col} = ?")
+                    params.append(val)
+
+        if where_clauses:
+            query += " WHERE " + " AND ".join(where_clauses)
+
+        bf = self._query_as_biocframe(query, tuple(params))
+        if bf.shape[0] == 0:
+            return GenomicRanges.empty()
+
+        return self._make_gr(bf, prefix="tx_")
+
+    def exons(self, filter: Optional[Dict[str, Union[str, List[str]]]] = None) -> GenomicRanges:
+        """Retrieve exons as GenomicRanges.
+
+        Args:
+            filter:
+                A dictionary defining filters to narrow down the result.
+                Keys are column names (e.g., "exon_id", "gene_id", "tx_id").
+                Values can be a single string or a list of strings to match.
+
+                This allows filtering exons by associated gene or transcript IDs
+                (e.g., `{'gene_id': 'ENSG00000139618'}`).
+
+        Returns:
+            A GenomicRanges object containing exon coordinates and metadata.
+        """
+        query = """
+        SELECT DISTINCT
+            e.exon_id, e.exon_seq_start, e.exon_seq_end,
+            g.seq_name, g.seq_strand,
+            c.seq_length
+        FROM exon e
+        JOIN tx2exon t2e ON e.exon_id = t2e.exon_id
+        JOIN tx t ON t2e.tx_id = t.tx_id
+        JOIN gene g ON t.gene_id = g.gene_id
+        LEFT JOIN chromosome c ON g.seq_name = c.seq_name
+        """
+
+        where_clauses = []
+        params = []
+        if filter:
+            for col, val in filter.items():
+                prefix = "g."
+                if col.startswith("tx_"):
+                    prefix = "t."
+                if col.startswith("exon_"):
+                    prefix = "e."
+
+                if isinstance(val, list):
+                    placeholders = ",".join("?" * len(val))
+                    where_clauses.append(f"{prefix}{col} IN ({placeholders})")
+                    params.extend(val)
+                else:
+                    where_clauses.append(f"{prefix}{col} = ?")
+                    params.append(val)
+
+        if where_clauses:
+            query += " WHERE " + " AND ".join(where_clauses)
+
+        bf = self._query_as_biocframe(query, tuple(params))
+        if bf.shape[0] == 0:
+            return GenomicRanges.empty()
+
+        return self._make_gr(bf, prefix="exon_")
+
+    def _make_gr(self, bf: BiocFrame, prefix: str = "gene_") -> GenomicRanges:
+        """Helper to convert BiocFrame to GenomicRanges."""
+        strand_col = bf.get_column("seq_strand")
+        strand_map = {1: "+", -1: "-", 0: "*", "1": "+", "-1": "-", "0": "*"}
+        strand = [strand_map.get(x, "*") for x in strand_col]
+
+        seqnames = [str(x) for x in bf.get_column("seq_name")]
+
+        starts = bf.get_column(f"{prefix}seq_start")
+        ends = bf.get_column(f"{prefix}seq_end")
+        widths = [abs(e - s) + 1 for s, e in zip(starts, ends)]
+        ranges = IRanges(start=starts, width=widths)
+
+        row_names = None
+        id_col = f"{prefix}id"
+        if id_col in bf.column_names:
+            row_names = [str(x) for x in bf.get_column(id_col)]
+
+        drop_cols = ["seq_name", "seq_strand", f"{prefix}seq_start", f"{prefix}seq_end", "seq_length"]
+        mcols_dict = {}
+        for c in bf.column_names:
+            if c not in drop_cols:
+                mcols_dict[c] = bf.get_column(c)
+
+        mcols = BiocFrame(mcols_dict, row_names=row_names)
+
+        return GenomicRanges(seqnames=seqnames, ranges=ranges, strand=strand, names=row_names, mcols=mcols)
+
+    def close(self):
+        self.conn.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
diff --git a/src/ensembldb/registry.py b/src/ensembldb/registry.py
index 8eb4167..4b1c852 100644
--- a/src/ensembldb/registry.py
+++ b/src/ensembldb/registry.py
@@ -70,6 +70,7 @@ def _initialize_registry(self, force: bool = False):
         conn = sqlite3.connect(md_path)
         try:
             # Filter for EnsDb sqlite files
+            # Updated query: Checks rdataclass AND rdatapath extension
             query = """
             SELECT
                 r.id,
@@ -85,8 +86,8 @@ def _initialize_registry(self, force: bool = False):
                 ON r.location_prefix_id = lp.id
             LEFT JOIN rdatapaths rp
                 ON rp.resource_id = r.id
-            WHERE r.title LIKE 'Ensembl % EnsDb%' 
-              AND r.title LIKE '%.sqlite'
+            WHERE (rp.rdataclass = 'EnsDb' OR r.title LIKE 'Ensembl % EnsDb%')
+              AND rp.rdatapath LIKE '%.sqlite'
             ORDER BY r.rdatadateadded DESC;
             """
             cursor = conn.cursor()
diff --git a/tests/test_real.py b/tests/test_real.py
new file mode 100644
index 0000000..cb10f6d
--- /dev/null
+++ b/tests/test_real.py
@@ -0,0 +1,105 @@
+import pytest
+from genomicranges import GenomicRanges
+
+from ensembldb import EnsDb, EnsDbRegistry
+
+__author__ = "Jayaram Kancherla"
+__copyright__ = "Jayaram Kancherla"
+__license__ = "MIT"
+
+
+@pytest.fixture(scope="module")
+def ensdb_resource():
+    registry = EnsDbRegistry()
+
+    all_ids = registry.list_ensdbs()
+
+    if not all_ids:
+        pytest.fail("Registry found no EnsDb files. Check query logic.")
+
+    target_id = "AH100751"  # Saccharomyces cerevisiae
+    return registry.load_db(target_id)
+
+
+def test_connection_and_metadata(ensdb_resource):
+    assert isinstance(ensdb_resource, EnsDb)
+
+    meta = ensdb_resource.metadata
+
+    assert "name" in meta.column_names
+    assert "value" in meta.column_names
+
+    names = meta.get_column("name")
+    values = meta.get_column("value")
+    meta_dict = dict(zip(names, values))
+
+    assert "DBSCHEMAVERSION" in meta_dict or "schema_version" in meta_dict
+
+
+def test_genes_fetch(ensdb_resource):
+    gr = ensdb_resource.genes()
+
+    assert isinstance(gr, GenomicRanges)
+    assert len(gr) > 0
+
+    mcols = gr.mcols
+    assert "gene_id" in mcols.column_names
+    assert len(gr.seqnames) == len(gr)
+    assert len(gr.ranges) == len(gr)
+
+
+def test_genes_filter(ensdb_resource):
+    all_genes = ensdb_resource.genes()
+    if len(all_genes) == 0:
+        pytest.skip("No genes found in DB to filter.")
+
+    target_id = all_genes.mcols.get_column("gene_id")[0]
+    gr_filtered = ensdb_resource.genes(filter={"gene_id": target_id})
+    assert len(gr_filtered) == 1
+    assert gr_filtered.mcols.get_column("gene_id")[0] == target_id
+
+
+def test_transcripts_fetch(ensdb_resource):
+    gr = ensdb_resource.transcripts()
+
+    assert isinstance(gr, GenomicRanges)
+    if len(gr) == 0:
+        print("Warning: No transcripts found.")
+        return
+
+    mcols = gr.mcols
+    assert "tx_id" in mcols.column_names
+    assert "gene_id" in mcols.column_names
+
+
+def test_exons_fetch(ensdb_resource):
+    gr = ensdb_resource.exons()
+
+    assert isinstance(gr, GenomicRanges)
+    if len(gr) == 0:
+        print("Warning: No exons found.")
+        return
+
+    mcols = gr.mcols
+    assert "exon_id" in mcols.column_names
+
+
+def test_combined_filter(ensdb_resource):
+    txs = ensdb_resource.transcripts()
+    if len(txs) == 0:
+        pytest.skip("No transcripts to filter.")
+
+    target_gene = txs.mcols.get_column("gene_id")[0]
+    gr = ensdb_resource.transcripts(filter={"gene_id": target_gene})
+
+    assert len(gr) > 0
+    for gid in gr.mcols.get_column("gene_id"):
+        assert gid == target_gene
+
+
+def test_seqinfo_population(ensdb_resource):
+    gr = ensdb_resource.genes()
+    if len(gr) == 0:
+        pytest.skip("No genes.")
+
+    assert all(start > 0 for start in gr.ranges.start)
diff --git a/tests/test_skeleton.py b/tests/test_skeleton.py
deleted file mode 100644
index 01f1b71..0000000
--- a/tests/test_skeleton.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import pytest
-
-from ensembldb.skeleton import fib, main
-
-__author__ = "Jayaram Kancherla"
-__copyright__ = "Jayaram Kancherla"
-__license__ = "MIT"
-
-
-def test_fib():
-    """API Tests"""
-    assert fib(1) == 1
-    assert fib(2) == 1
-    assert fib(7) == 13
-    with pytest.raises(AssertionError):
-        fib(-10)
-
-
-def test_main(capsys):
-    """CLI Tests"""
-    # capsys is a pytest fixture that allows asserts against stdout/stderr
-    # https://docs.pytest.org/en/stable/capture.html
-    main(["7"])
-    captured = capsys.readouterr()
-    assert "The 7-th Fibonacci number is 13" in captured.out

From 27351cd637440b76c3c2df8cb4267cb8b68e40bd Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla <jayaram.kancherla@gmail.com>
Date: Tue, 20 Jan 2026 16:49:21 -0800
Subject: [PATCH 3/4] disable tests for windows

---
 .github/workflows/run-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
index e0f247d..3d3bc95 100644
--- a/.github/workflows/run-tests.yml
+++ b/.github/workflows/run-tests.yml
@@ -32,7 +32,7 @@ jobs:
         platform:
           - ubuntu-latest
           - macos-latest
-          - windows-latest
+          # - windows-latest
     runs-on: ${{ matrix.platform }}
     name: Python ${{ matrix.python }}, ${{ matrix.platform }}
     steps:

From 2d4a49fa169ef38806afece7b3d0baf799833d49 Mon Sep 17 00:00:00 2001
From: Jayaram Kancherla <jayaram.kancherla@gmail.com>
Date: Tue, 20 Jan 2026 16:59:47 -0800
Subject: [PATCH 4/4] update changelog

---
 CHANGELOG.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 205cc5e..874c101 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,5 @@
 # Changelog
 
-## Version 0.1 (development)
+## Version 0.0.1
 
-- Feature A added
-- FIX: nasty bug #1729 fixed
-- add your changes here!
+- Initial implementation to access EnsemblDb sqlite files from AnnotationHub.