diff --git a/fileglancer/apps/__init__.py b/fileglancer/apps/__init__.py index 649f0749..b101295e 100644 --- a/fileglancer/apps/__init__.py +++ b/fileglancer/apps/__init__.py @@ -18,6 +18,7 @@ stop_job_monitor, submit_job, merge_requirements, + set_worker_exec, validate_path_for_shell, validate_path_in_filestore, verify_requirements, diff --git a/fileglancer/apps/core.py b/fileglancer/apps/core.py index 16dc567d..89e679cb 100644 --- a/fileglancer/apps/core.py +++ b/fileglancer/apps/core.py @@ -6,18 +6,14 @@ except ImportError: fcntl = None # type: ignore[assignment] try: - import grp import pwd except ImportError: - grp = None # type: ignore[assignment] pwd = None # type: ignore[assignment] -import json import os import re import shlex import shutil import subprocess -import sys import tempfile from pathlib import Path from datetime import datetime, UTC @@ -36,6 +32,26 @@ from fileglancer.settings import get_settings +# Registered by server.py at startup. Dispatches an action to the per-user +# persistent worker (or in-process in dev mode). Signature mirrors +# server._worker_exec: (username, action, **kwargs) -> awaitable[dict]. +_worker_exec = None + + +def set_worker_exec(fn): + """Register the persistent worker dispatcher. Called from server lifespan.""" + global _worker_exec + _worker_exec = fn + + +async def _dispatch(username: str, action: str, **kwargs) -> dict: + if _worker_exec is None: + raise RuntimeError( + "Worker dispatcher not registered — apps module used before server startup" + ) + return await _worker_exec(username, action, **kwargs) + + _MANIFEST_FILENAME = "runnables.yaml" def _repo_cache_base(username: str | None = None) -> Path: @@ -147,7 +163,7 @@ async def _ensure_repo_cache(url: str, pull: bool = False, When username is provided, the work is delegated to a worker subprocess that runs with the target user's real UID/GID, avoiding the process-wide - euid race condition that EffectiveUserContext has with concurrent async + euid race condition that seteuid/setegid has with concurrent async requests. When username is None, git commands run in-process (used by the worker subprocess itself, or in single-user dev mode). """ @@ -157,17 +173,9 @@ async def _ensure_repo_cache(url: str, pull: bool = False, branch = await _resolve_default_branch(clone_url) if username: - logger.debug( - f"Delegating ensure_repo to worker for user={username} " - f"repo={owner}/{repo} ({branch}) pull={pull}" - ) lock = _get_repo_lock(owner, repo, branch) async with lock: - result = await _run_as_user_async(username, { - "action": "ensure_repo", - "url": url, - "pull": pull, - }) + result = await _dispatch(username, "ensure_repo", url=url, pull=pull) return Path(result["repo_dir"]) # Running as the current user (worker subprocess or dev mode) @@ -281,11 +289,7 @@ async def discover_app_manifests(url: str, running as the target user. """ if username: - logger.debug(f"Delegating discover_manifests to worker for user={username} url={url}") - result = await _run_as_user_async(username, { - "action": "discover_manifests", - "url": url, - }) + result = await _dispatch(username, "discover_manifests", url=url) return [ (item["path"], AppManifest(**item["manifest"])) for item in result["manifests"] @@ -305,12 +309,7 @@ async def fetch_app_manifest(url: str, manifest_path: str = "", running as the target user. """ if username: - logger.debug(f"Delegating read_manifest to worker for user={username} url={url}") - result = await _run_as_user_async(username, { - "action": "read_manifest", - "url": url, - "manifest_path": manifest_path, - }) + result = await _dispatch(username, "read_manifest", url=url, manifest_path=manifest_path) return AppManifest(**result["manifest"]) repo_dir = await _ensure_repo_cache(url) @@ -501,12 +500,12 @@ def validate_path_for_shell(path_value: str) -> str | None: return None -def validate_path_in_filestore(path_value: str, session) -> str | None: +def validate_path_in_filestore(path_value: str, fsps: list) -> str | None: """Validate a path exists and is readable within an allowed file share. - Performs syntax checks, then resolves the path against known file share - mounts via the database. Returns an error message string if invalid, - or None if valid. + Performs syntax checks, then resolves the path against the given list of + file share paths. Returns an error message string if invalid, or None if + valid. """ # Syntax check first error = validate_path_for_shell(path_value) @@ -523,8 +522,8 @@ def validate_path_in_filestore(path_value: str, session) -> str | None: expanded = os.path.expanduser(normalized) # Resolve to a file share path - from fileglancer.database import find_fsp_from_absolute_path - result = find_fsp_from_absolute_path(session, expanded) + from fileglancer.database import find_fsp_in_paths + result = find_fsp_in_paths(fsps, expanded) if result is None: return "Path is not within an allowed file share" @@ -599,7 +598,8 @@ def _validate_parameter_value(param: AppParameter, value, session=None) -> str: home = home.replace("\\", "/") str_val = home + str_val[1:] if session is not None: - error = validate_path_in_filestore(str_val, session) + fsps = db.get_file_share_paths(session) + error = validate_path_in_filestore(str_val, fsps) else: error = validate_path_for_shell(str_val) if error: @@ -682,85 +682,14 @@ def build_command(entry_point: AppEntryPoint, parameters: dict, session=None) -> return (" \\\n ").join(parts) -def _run_as_user(username: str, request: dict) -> dict: - """Run a worker action as the given user in a subprocess. - - Spawns a child process with the target user's identity using - Python 3.9+ ``user``/``group``/``extra_groups`` subprocess kwargs. - The child runs fileglancer.apps.worker, which creates a fresh - py-cluster-api executor and performs the requested action. - - Returns the parsed JSON response from the worker. - Raises ValueError on worker failure. - """ - pw = pwd.getpwnam(username) - action = request.get("action", "unknown") - - # Only switch identity if running as root; otherwise we're already - # the target user (e.g. development mode). - identity_kwargs: dict = {} - if os.geteuid() == 0: - groups = [g.gr_gid for g in grp.getgrall() if username in g.gr_mem] - if pw.pw_gid not in groups: - groups.append(pw.pw_gid) - identity_kwargs = { - "user": pw.pw_uid, - "group": pw.pw_gid, - "extra_groups": groups, - } - logger.debug( - f"Spawning worker action={action} as user={username} " - f"uid={pw.pw_uid} gid={pw.pw_gid} HOME={pw.pw_dir}" - ) - else: - logger.debug( - f"Spawning worker action={action} as current user " - f"(euid={os.geteuid()})" - ) - - result = subprocess.run( - [sys.executable, "-m", "fileglancer.apps.worker"], - input=json.dumps(request).encode(), - capture_output=True, - env={**os.environ, "HOME": pw.pw_dir, "FGC_LOG_LEVEL": get_settings().log_level}, - **identity_kwargs, - ) - - # Forward worker stderr (contains cluster_api and worker logs) - if result.stderr: - for line in result.stderr.decode().rstrip().splitlines(): - logger.debug(f"[worker:{action}] {line}") - - if result.stdout: - try: - response = json.loads(result.stdout) - except json.JSONDecodeError: - raise ValueError( - f"Worker produced invalid JSON: {result.stdout.decode()[:500]}" - ) - else: - response = {} - - if result.returncode != 0: - error = response.get("error", result.stderr.decode()[:500]) - raise ValueError(f"Worker failed: {error}") - - return response - - -async def _run_as_user_async(username: str, request: dict) -> dict: - """Async wrapper for _run_as_user that doesn't block the event loop.""" - return await asyncio.to_thread(_run_as_user, username, request) - - # --- Job Monitoring --- # # The server process runs as root, which cannot execute LSF commands # (bjobs, bsub, bkill) due to HPC root-squash policy. All LSF -# operations go through worker subprocesses running as a real user. +# operations go through the persistent per-user worker pool. # -# The poll loop picks any user with active jobs and spawns a worker -# that runs ``bjobs -u all`` to get statuses for ALL users' jobs. +# The poll loop picks any user with active jobs and dispatches ``bjobs +# -u all`` through that user's worker to get statuses for ALL users' jobs. _poll_task = None _POLL_LOCK_PATH = os.path.join(tempfile.gettempdir(), "fileglancer_poll.lock") @@ -779,7 +708,7 @@ async def start_job_monitor(): try: with open(_POLL_LOCK_PATH, "w") as f: fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) - _reconnect_as_any_user(settings) + await _reconnect_as_any_user(settings) fcntl.flock(f, fcntl.LOCK_UN) logger.info("Job monitor started (reconnected existing jobs)") except OSError: @@ -835,8 +764,8 @@ def _get_any_active_username(settings) -> str | None: return None -def _reconnect_as_any_user(settings): - """Reconnect to existing cluster jobs via a worker subprocess. +async def _reconnect_as_any_user(settings): + """Reconnect to existing cluster jobs via the persistent worker. Picks any user with active jobs to run bjobs as. If no active jobs exist, reconnection is skipped (nothing to reconnect to). @@ -848,11 +777,8 @@ def _reconnect_as_any_user(settings): cluster_config = settings.cluster.model_dump(exclude_none=True) try: - result = _run_as_user(username, { - "action": "reconnect", - "cluster_config": cluster_config, - }) - except ValueError as e: + result = await _dispatch(username, "reconnect", cluster_config=cluster_config) + except Exception as e: logger.debug(f"Job reconnection skipped: {e}") return @@ -879,7 +805,7 @@ def _reconnect_as_any_user(settings): async def _poll_loop(settings): - """Periodically poll cluster job statuses via a worker subprocess. + """Periodically poll cluster job statuses via the persistent worker. All uvicorn workers run this loop, but only the one that acquires the file lock actually polls. The lock is held through both the @@ -898,7 +824,7 @@ async def _poll_loop(settings): lock_fd = open(_POLL_LOCK_PATH, "w") fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) try: - has_jobs = _poll_jobs(settings) + has_jobs = await _poll_jobs(settings) except Exception: logger.exception("Error in job poll loop") has_jobs = True # keep polling on error @@ -919,7 +845,7 @@ async def _poll_loop(settings): await asyncio.sleep(settings.cluster.poll_interval) -def _poll_jobs(settings): +async def _poll_jobs(settings): """Run one poll cycle: query bjobs via worker, update DB. Returns True if there are active jobs to continue polling, @@ -966,13 +892,13 @@ def _poll_jobs(settings): cluster_config = settings.cluster.model_dump(exclude_none=True) try: - result = _run_as_user(poll_username, { - "action": "poll", - "cluster_config": cluster_config, - "cluster_job_ids": list(job_statuses.keys()), - "job_statuses": job_statuses, - }) - except ValueError as e: + result = await _dispatch( + poll_username, "poll", + cluster_config=cluster_config, + cluster_job_ids=list(job_statuses.keys()), + job_statuses=job_statuses, + ) + except Exception as e: logger.warning(f"Poll failed: {e}") return True # keep polling on error @@ -1344,18 +1270,18 @@ async def submit_job( resource_spec.stdout_path = str(work_dir / "stdout.log") resource_spec.stderr_path = str(work_dir / "stderr.log") - # Submit to the cluster as the target user. The worker subprocess - # creates the work directory, symlinks the repo, and calls + # Submit to the cluster as the target user via the persistent worker: + # it creates the work directory, symlinks the repo, and calls # executor.submit() — all with the user's identity. job_name = f"{manifest.name}-{entry_point.id}" cluster_config = settings.cluster.model_dump(exclude_none=True) try: - worker_result = _run_as_user(username, { - "action": "submit", - "cluster_config": cluster_config, - "command": full_command, - "job_name": job_name, - "resources": { + worker_result = await _dispatch( + username, "submit", + cluster_config=cluster_config, + command=full_command, + job_name=job_name, + resources={ "cpus": resource_spec.cpus, "gpus": resource_spec.gpus, "memory": resource_spec.memory, @@ -1367,9 +1293,9 @@ async def submit_job( "extra_directives": resource_spec.extra_directives, "extra_args": resource_spec.extra_args, }, - "work_dir": str(work_dir), - "cached_repo_dir": str(cached_repo_dir), - }) + work_dir=str(work_dir), + cached_repo_dir=str(cached_repo_dir), + ) except Exception: # Cluster submission failed — remove the PENDING DB record so # the job does not appear in the user's jobs list. @@ -1449,11 +1375,11 @@ async def cancel_job(job_id: int, username: str) -> db.JobDB: # Cancel on cluster as the target user if db_job.cluster_job_id: cluster_config = settings.cluster.model_dump(exclude_none=True) - _run_as_user(username, { - "action": "cancel", - "cluster_config": cluster_config, - "job_id": db_job.cluster_job_id, - }) + await _dispatch( + username, "cancel", + cluster_config=cluster_config, + job_id=db_job.cluster_job_id, + ) # Update DB now = datetime.now(UTC) @@ -1474,19 +1400,27 @@ def _resolve_work_dir(db_job: db.JobDB) -> Path: return _build_work_dir(db_job.id, db_job.app_name, db_job.entry_point_id) -def _resolve_browse_path(abs_path: str) -> tuple[str | None, str | None]: - """Resolve an absolute path to an FSP name and subpath for browse links.""" - settings = get_settings() - with db.get_db_session(settings.db_url) as session: - result = db.find_fsp_from_absolute_path(session, abs_path) +def _resolve_browse_path(abs_path: str, fsps: Optional[list] = None) -> tuple[str | None, str | None]: + """Resolve an absolute path to an FSP name and subpath for browse links. + + If *fsps* is None, queries the database directly. Pass a pre-fetched list + to avoid the DB hit (used from the worker subprocess, which has no DB + access). + """ + if fsps is None: + settings = get_settings() + with db.get_db_session(settings.db_url) as session: + result = db.find_fsp_from_absolute_path(session, abs_path) + else: + result = db.find_fsp_in_paths(fsps, abs_path) if result: return result[0].name, result[1] return None, None -def _make_file_info(file_path: str, exists: bool) -> dict: +def _make_file_info(file_path: str, exists: bool, fsps: Optional[list] = None) -> dict: """Create a file info dict with browse link resolution.""" - fsp_name, subpath = _resolve_browse_path(file_path) if exists else (None, None) + fsp_name, subpath = _resolve_browse_path(file_path, fsps) if exists else (None, None) return { "path": file_path, "exists": exists, @@ -1524,10 +1458,11 @@ def get_service_url(db_job: db.JobDB) -> Optional[str]: return url -def get_job_file_paths(db_job: db.JobDB) -> dict[str, dict]: +def get_job_file_paths(db_job: db.JobDB, fsps: Optional[list] = None) -> dict[str, dict]: """Return file path info for a job's files (script, stdout, stderr, service_url). - Returns a dict keyed by file type with path and existence info. + Returns a dict keyed by file type with path and existence info. Pass *fsps* + to skip the per-file DB lookup needed for browse-link resolution. """ work_dir = _resolve_work_dir(db_job) @@ -1539,21 +1474,21 @@ def get_job_file_paths(db_job: db.JobDB) -> dict[str, dict]: stderr_path = work_dir / "stderr.log" files = { - "script": _make_file_info(script_path, len(scripts) > 0), - "stdout": _make_file_info(str(stdout_path), stdout_path.is_file()), - "stderr": _make_file_info(str(stderr_path), stderr_path.is_file()), + "script": _make_file_info(script_path, len(scripts) > 0, fsps), + "stdout": _make_file_info(str(stdout_path), stdout_path.is_file(), fsps), + "stderr": _make_file_info(str(stderr_path), stderr_path.is_file(), fsps), } # Include service_url file info for service-type jobs if getattr(db_job, 'entry_point_type', 'job') == 'service': service_url_path = work_dir / "service_url" - files["service_url"] = _make_file_info(str(service_url_path), service_url_path.is_file()) + files["service_url"] = _make_file_info(str(service_url_path), service_url_path.is_file(), fsps) return files -def get_job_file_content(job_id: int, username: str, file_type: str) -> Optional[str]: - """Read the content of a job file (script, stdout, or stderr). +def read_job_file(db_job, file_type: str) -> Optional[str]: + """Read the content of a job file given a loaded job record. All job files live in the job's work directory: - *.sh — the generated script (written by cluster-api) @@ -1562,14 +1497,6 @@ def get_job_file_content(job_id: int, username: str, file_type: str) -> Optional Returns the file content as a string, or None if the file doesn't exist. """ - settings = get_settings() - - with db.get_db_session(settings.db_url) as session: - db_job = db.get_job(session, job_id, username) - if db_job is None: - raise ValueError(f"Job {job_id} not found") - session.expunge(db_job) - work_dir = _resolve_work_dir(db_job) if file_type == "script": @@ -1588,3 +1515,16 @@ def get_job_file_content(job_id: int, username: str, file_type: str) -> Optional if path.is_file(): return path.read_text() return None + + +def get_job_file_content(job_id: int, username: str, file_type: str) -> Optional[str]: + """Read job file by id+username (does its own DB lookup).""" + settings = get_settings() + + with db.get_db_session(settings.db_url) as session: + db_job = db.get_job(session, job_id, username) + if db_job is None: + raise ValueError(f"Job {job_id} not found") + session.expunge(db_job) + + return read_job_file(db_job, file_type) diff --git a/fileglancer/apps/worker.py b/fileglancer/apps/worker.py deleted file mode 100644 index f5170bdb..00000000 --- a/fileglancer/apps/worker.py +++ /dev/null @@ -1,268 +0,0 @@ -"""Subprocess worker for running operations as a target user. - -This module is invoked as a subprocess by fileglancer to run py-cluster-api -operations (submit, cancel, poll) and git/manifest operations (clone, pull, -read) with the identity of the authenticated user. The parent process uses -Python 3.9+ ``user``/``group``/``extra_groups`` subprocess kwargs to set the -child's identity before any code runs. - -Protocol: - - Input: JSON on stdin - - Output: JSON on stdout ({"job_id": ...} or {"error": ...}) - - Errors: non-zero exit code + JSON error on stdout - -Usage (called by fileglancer, not directly): - subprocess.run( - [sys.executable, "-m", "fileglancer.apps.worker"], - input=json.dumps(request).encode(), - capture_output=True, - env={**os.environ, "HOME": pw.pw_dir}, - user=uid, group=gid, extra_groups=groups, - ) -""" - -from __future__ import annotations - -import asyncio -import json -import os -import sys -from pathlib import Path - -from cluster_api import create_executor, ResourceSpec - - -async def _submit(request: dict) -> dict: - """Create work dir, symlink repo, submit job via py-cluster-api.""" - config = request["cluster_config"] - # extra_args are handled via ResourceSpec, not config - config.pop("extra_args", None) - - executor = create_executor(**config) - - work_dir = Path(request["work_dir"]) - work_dir.mkdir(parents=True, exist_ok=True) - - # Symlink the cached repo into the work directory - cached_repo_dir = request["cached_repo_dir"] - repo_link = work_dir / "repo" - if repo_link.is_symlink() or repo_link.exists(): - repo_link.unlink() - repo_link.symlink_to(cached_repo_dir) - - # Build ResourceSpec from the serialized dict - res = request["resources"] - resource_spec = ResourceSpec( - cpus=res.get("cpus"), - gpus=res.get("gpus"), - memory=res.get("memory"), - walltime=res.get("walltime"), - queue=res.get("queue"), - work_dir=res["work_dir"], - stdout_path=res.get("stdout_path"), - stderr_path=res.get("stderr_path"), - extra_directives=res.get("extra_directives"), - extra_args=res.get("extra_args"), - ) - - job = await executor.submit( - command=request["command"], - name=request["job_name"], - resources=resource_spec, - ) - - # For local executor, write the subprocess PID to disk so the poll - # loop can check process liveness across worker invocations. - # Only LocalExecutor has a _processes dict; HPC executors don't. - processes = getattr(executor, "_processes", None) - if processes is not None: - proc = processes.get(job.job_id) - if proc is not None: - pid_file = work_dir / "job.pid" - pid_file.write_text(str(proc.pid)) - - return {"job_id": job.job_id, "script_path": job.script_path} - - -async def _cancel(request: dict) -> dict: - """Cancel a cluster job via py-cluster-api.""" - config = request["cluster_config"] - config.pop("extra_args", None) - - executor = create_executor(**config) - await executor.cancel(request["job_id"]) - - return {"status": "ok"} - - -async def _poll(request: dict) -> dict: - """Poll job statuses via py-cluster-api (bjobs -u all). - - The executor needs to know which jobs to track, so we seed it with - the cluster_job_ids from the DB before polling. After poll(), we - return the updated statuses and metadata for each tracked job. - """ - from cluster_api._types import JobRecord, JobStatus - - config = request["cluster_config"] - config.pop("extra_args", None) - - executor = create_executor(**config) - - # Seed the executor with stub JobRecords so poll() knows what to track. - # poll() queries bjobs and updates these records in-place. - # Use each job's current DB status so that jobs not found in bjobs - # output keep their real status instead of reverting to PENDING. - known_statuses = request.get("job_statuses", {}) - for cid in request["cluster_job_ids"]: - db_status = known_statuses.get(cid, "PENDING").lower() - try: - seed_status = JobStatus(db_status) - except ValueError: - seed_status = JobStatus.PENDING - executor._jobs[cid] = JobRecord( - job_id=cid, - name="", - command="", - status=seed_status, - ) - - await executor.poll() - - # Return the updated state for each job - jobs = {} - for cid, record in executor.jobs.items(): - jobs[cid] = { - "status": record.status.value, - "exit_code": record.exit_code, - "exec_host": record.exec_host, - "start_time": record.start_time.isoformat() if record.start_time else None, - "finish_time": record.finish_time.isoformat() if record.finish_time else None, - } - - return {"jobs": jobs} - - -async def _reconnect(request: dict) -> dict: - """Reconnect to existing jobs via py-cluster-api (bjobs -u all).""" - config = request["cluster_config"] - config.pop("extra_args", None) - - executor = create_executor(**config) - reconnected = await executor.reconnect() - - jobs = {} - for record in reconnected: - jobs[record.job_id] = { - "status": record.status.value, - "name": record.name, - "exit_code": record.exit_code, - "exec_host": record.exec_host, - "start_time": record.start_time.isoformat() if record.start_time else None, - "finish_time": record.finish_time.isoformat() if record.finish_time else None, - } - - return {"jobs": jobs} - - -async def _ensure_repo(request: dict) -> dict: - """Clone or update a GitHub repo in the current user's cache.""" - from fileglancer.apps.core import _ensure_repo_cache - - repo_dir = await _ensure_repo_cache( - url=request["url"], - pull=request.get("pull", False), - ) - return {"repo_dir": str(repo_dir)} - - -async def _discover_manifests(request: dict) -> dict: - """Clone/pull repo and discover all manifests.""" - from fileglancer.apps.core import _ensure_repo_cache, _find_manifests_in_repo - - repo_dir = await _ensure_repo_cache( - url=request["url"], - pull=True, - ) - results = _find_manifests_in_repo(repo_dir) - return { - "manifests": [ - {"path": path, "manifest": manifest.model_dump(mode="json")} - for path, manifest in results - ] - } - - -async def _read_manifest(request: dict) -> dict: - """Fetch and read a single manifest from a cached repo.""" - from fileglancer.apps.core import _ensure_repo_cache, _read_manifest_file - - repo_dir = await _ensure_repo_cache( - url=request["url"], - pull=request.get("pull", False), - ) - manifest_path = request.get("manifest_path", "") - target_dir = repo_dir / manifest_path if manifest_path else repo_dir - manifest = _read_manifest_file(target_dir) - return {"manifest": manifest.model_dump(mode="json")} - - -_ACTIONS = { - "submit": _submit, - "cancel": _cancel, - "poll": _poll, - "reconnect": _reconnect, - "ensure_repo": _ensure_repo, - "discover_manifests": _discover_manifests, - "read_manifest": _read_manifest, -} - - -def main(): - import logging - import pwd as _pwd - - # Configure cluster_api logging so debug output reaches the parent - # process via stderr. The parent captures stderr separately. - log_level = os.environ.get("FGC_LOG_LEVEL", "INFO").upper() - # Map loguru-specific levels to their nearest stdlib equivalents - _LOGURU_TO_STDLIB = {"TRACE": "DEBUG", "SUCCESS": "INFO"} - log_level = _LOGURU_TO_STDLIB.get(log_level, log_level) - handler = logging.StreamHandler(sys.stderr) - handler.setFormatter(logging.Formatter( - "%(levelname)s | %(name)s:%(funcName)s:%(lineno)d - %(message)s" - )) - cluster_logger = logging.getLogger("cluster_api") - cluster_logger.addHandler(handler) - cluster_logger.setLevel(log_level) - - request = json.loads(sys.stdin.buffer.read()) - action = request.get("action") - - uid = os.getuid() - euid = os.geteuid() - try: - uname = _pwd.getpwuid(uid).pw_name - except KeyError: - uname = str(uid) - print( - f"[worker] action={action} uid={uid}({uname}) euid={euid} " - f"HOME={os.environ.get('HOME', '')}", - file=sys.stderr, - ) - - handler = _ACTIONS.get(action) - if handler is None: - json.dump({"error": f"Unknown action: {action}"}, sys.stdout) - sys.exit(1) - - try: - result = asyncio.run(handler(request)) - json.dump(result, sys.stdout) - except Exception as e: - json.dump({"error": str(e)}, sys.stdout) - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/fileglancer/database.py b/fileglancer/database.py index 98ec46c0..4a52ca3f 100644 --- a/fileglancer/database.py +++ b/fileglancer/database.py @@ -576,27 +576,24 @@ def _find_best_fsp_match( return (best_fsp, subpath) -def find_fsp_from_absolute_path(session: Session, absolute_path: str) -> Optional[tuple[FileSharePath, str]]: - """ - Find the file share path that exactly matches the given absolute path. +def find_fsp_in_paths( + paths: list[FileSharePath], absolute_path: str +) -> Optional[tuple[FileSharePath, str]]: + """Match *absolute_path* against an in-memory list of file share paths. - This function iterates through all file share paths and checks if the absolute - path exists within any of them. Returns the first exact match found. + Pure function with no DB access — useful from contexts that already have + the path list (e.g. a worker subprocess that fetched it once and cached + it). Args: - session: Database session - absolute_path: Absolute file path to match against file shares + paths: All file share paths to search. + absolute_path: Absolute file path to match. Returns: - Tuple of (FileSharePath, relative_subpath) if an exact match is found, None otherwise + ``(fsp, relative_subpath)`` for the longest match, or *None*. """ - # Resolve symlinks in the input path (e.g., /var -> /private/var on macOS) normalized_path = os.path.realpath(absolute_path) - # Get all file share paths - paths = get_file_share_paths(session) - - # Pre-compute expanded mount paths so the helper can use them expanded_mounts: dict[str, str] = {} for fsp in paths: expanded = os.path.expanduser(fsp.mount_path) @@ -612,6 +609,23 @@ def _expanded_mount(fsp: FileSharePath): return result +def find_fsp_from_absolute_path(session: Session, absolute_path: str) -> Optional[tuple[FileSharePath, str]]: + """ + Find the file share path that exactly matches the given absolute path. + + This function iterates through all file share paths and checks if the absolute + path exists within any of them. Returns the first exact match found. + + Args: + session: Database session + absolute_path: Absolute file path to match against file shares + + Returns: + Tuple of (FileSharePath, relative_subpath) if an exact match is found, None otherwise + """ + return find_fsp_in_paths(get_file_share_paths(session), absolute_path) + + def _validate_proxied_path(session: Session, fsp_name: str, path: str) -> None: """Validate a proxied path exists and is accessible""" # Get mount path - check database first using existing session, then check local mounts diff --git a/fileglancer/filestore.py b/fileglancer/filestore.py index b6d633e4..9590ec8c 100644 --- a/fileglancer/filestore.py +++ b/fileglancer/filestore.py @@ -17,7 +17,7 @@ from typing import Optional, Generator from loguru import logger -from .database import find_fsp_from_absolute_path +from .database import find_fsp_in_paths from .model import FileSharePath # Default buffer size for streaming file contents @@ -84,15 +84,15 @@ def _safe_readlink(path: str, root_path: Optional[str] = None) -> Optional[str]: return None @classmethod - def _get_symlink_target_fsp(cls, absolute_path: str, is_symlink: bool, session, - root_path: Optional[str]) -> Optional[dict]: + def _get_symlink_target_fsp(cls, absolute_path: str, is_symlink: bool, + fsps: Optional[list], root_path: Optional[str]) -> Optional[dict]: """ Resolve a symlink target to a file share path. Returns a dict with fsp_name and subpath if the target is in a known file share, or None if not a symlink, target not found, or target not in any file share. """ - if not is_symlink or session is None: + if not is_symlink or not fsps: return None # Read the symlink target safely @@ -107,7 +107,7 @@ def _get_symlink_target_fsp(cls, absolute_path: str, is_symlink: bool, session, # Try to find which file share contains this target try: - match = find_fsp_from_absolute_path(session, target) + match = find_fsp_in_paths(fsps, target) if match: fsp, subpath = match @@ -133,7 +133,7 @@ def _get_symlink_target_fsp(cls, absolute_path: str, is_symlink: bool, session, @classmethod def from_stat(cls, path: str, absolute_path: str, lstat_result: os.stat_result, stat_result: os.stat_result, - current_user: str = None, session = None, + current_user: str = None, fsps: Optional[list] = None, root_path: Optional[str] = None, user_groups: Optional[set[str]] = None): """ @@ -145,7 +145,7 @@ def from_stat(cls, path: str, absolute_path: str, lstat_result: Result of os.lstat() on the path (detects symlinks). stat_result: Result of os.stat() or lstat for broken symlinks. current_user: Username for permission checking (optional). - session: Database session for symlink resolution (optional). + fsps: List of FileSharePath objects for symlink target resolution (optional). root_path: Filestore root for defense-in-depth validation in symlink reading (optional). user_groups: Pre-computed user group set to avoid per-file getgrall() (optional). """ @@ -177,7 +177,7 @@ def from_stat(cls, path: str, absolute_path: str, hasRead, hasWrite = cls._check_permissions(stat_result, current_user, owner, group, user_groups) # Resolve symlink target to file share path if applicable - symlink_target_fsp = cls._get_symlink_target_fsp(absolute_path, is_symlink, session, root_path) + symlink_target_fsp = cls._get_symlink_target_fsp(absolute_path, is_symlink, fsps, root_path) return cls( name=name, @@ -280,7 +280,8 @@ def _check_path_in_root(self, path: Optional[str]) -> str: return full_path - def _get_file_info_from_path(self, full_path: str, current_user: str = None, session = None, + def _get_file_info_from_path(self, full_path: str, current_user: str = None, + fsps: Optional[list] = None, user_groups: Optional[set[str]] = None) -> FileInfo: """ Get the FileInfo for a file or directory at the given path. @@ -348,13 +349,14 @@ def _is_within_root(p: str) -> bool: return FileInfo.from_stat( rel_path, full_path, lstat_result, stat_result, - current_user=current_user, session=session, + current_user=current_user, fsps=fsps, root_path=self.root_path, user_groups=user_groups, ) - def _file_info_from_direntry(self, entry: os.DirEntry, current_user: str = None, session = None, + def _file_info_from_direntry(self, entry: os.DirEntry, current_user: str = None, + fsps: Optional[list] = None, user_groups: Optional[set[str]] = None) -> FileInfo: """Build a FileInfo from a DirEntry, using entry.stat() instead of os.lstat/os.stat. @@ -387,7 +389,7 @@ def _file_info_from_direntry(self, entry: os.DirEntry, current_user: str = None, return FileInfo.from_stat( rel_path, full_path, lstat_result, stat_result, - current_user=current_user, session=session, + current_user=current_user, fsps=fsps, root_path=self.root_path, user_groups=user_groups, ) @@ -430,7 +432,8 @@ def get_absolute_path(self, relative_path: Optional[str] = None) -> str: return os.path.abspath(os.path.join(self.root_path, relative_path)) - def get_file_info(self, path: Optional[str] = None, current_user: str = None, session = None) -> FileInfo: + def get_file_info(self, path: Optional[str] = None, current_user: str = None, + fsps: Optional[list] = None) -> FileInfo: """ Get the FileInfo for a file or directory at the given path. @@ -439,7 +442,7 @@ def get_file_info(self, path: Optional[str] = None, current_user: str = None, se May be None, in which case the root directory is used. current_user (str): The username of the current user for permission checking. May be None, in which case hasRead and hasWrite will be None. - session: Database session for symlink resolution. + fsps: List of FileSharePath objects for symlink target resolution. May be None, in which case symlink_target_fsp will be None. Raises: @@ -449,7 +452,7 @@ def get_file_info(self, path: Optional[str] = None, current_user: str = None, se full_path = self.root_path else: full_path = os.path.join(self.root_path, path) - return self._get_file_info_from_path(full_path, current_user, session) + return self._get_file_info_from_path(full_path, current_user, fsps) def check_is_binary(self, path: Optional[str] = None, sample_size: int = 4096) -> bool: @@ -489,7 +492,7 @@ def check_is_binary(self, path: Optional[str] = None, sample_size: int = 4096) - def yield_file_infos_paginated(self, path: Optional[str] = None, current_user: str = None, - session = None, limit: int = 200, + fsps: Optional[list] = None, limit: int = 200, cursor: Optional[str] = None) -> tuple[list[FileInfo], bool, Optional[str], int]: """ Return a page of FileInfo objects for children of the given path. @@ -501,7 +504,7 @@ def yield_file_infos_paginated(self, path: Optional[str] = None, current_user: s Args: path: Relative path to the directory to list. current_user: Username for permission checking. - session: Database session for symlink resolution. + fsps: List of FileSharePath objects for symlink target resolution. limit: Maximum number of entries to return. cursor: Name of the last entry from the previous page. Entries after this name (in sort order) are returned. @@ -541,7 +544,7 @@ def yield_file_infos_paginated(self, path: Optional[str] = None, current_user: s for entry in page_entries: try: file_infos.append( - self._file_info_from_direntry(entry, current_user, session, user_groups) + self._file_info_from_direntry(entry, current_user, fsps, user_groups) ) except PermissionError as e: logger.error(f"Permission denied accessing entry: {entry.path}: {e}") @@ -550,7 +553,8 @@ def yield_file_infos_paginated(self, path: Optional[str] = None, current_user: s next_cursor = page_entries[-1].name if has_more and page_entries else None return file_infos, has_more, next_cursor, total_count - def yield_file_infos(self, path: Optional[str] = None, current_user: str = None, session = None) -> Generator[FileInfo, None, None]: + def yield_file_infos(self, path: Optional[str] = None, current_user: str = None, + fsps: Optional[list] = None) -> Generator[FileInfo, None, None]: """ Yield a FileInfo object for each child of the given path. @@ -559,7 +563,7 @@ def yield_file_infos(self, path: Optional[str] = None, current_user: str = None, May be None, in which case the root directory is listed. current_user (str): The username of the current user for permission checking. May be None, in which case hasRead and hasWrite will be None. - session: Database session for symlink resolution. + fsps: List of FileSharePath objects for symlink target resolution. May be None, in which case symlink_target_fsp will be None for symlinks. Raises: @@ -577,7 +581,7 @@ def yield_file_infos(self, path: Optional[str] = None, current_user: str = None, entries.sort(key=lambda e: (not e.is_dir(follow_symlinks=False), e.name)) for entry in entries: try: - yield self._file_info_from_direntry(entry, current_user, session, user_groups) + yield self._file_info_from_direntry(entry, current_user, fsps, user_groups) except PermissionError as e: # Skip files we don't have permission to access logger.error(f"Permission denied accessing entry: {entry.path}: {e}") @@ -667,6 +671,35 @@ def stream_file_range(self, path: str = None, start: int = 0, end: int = 0, buff file_handle.close() + @staticmethod + def _stream_contents(file_handle, buffer_size: int = DEFAULT_BUFFER_SIZE) -> Generator[bytes, None, None]: + """Stream from an open file handle. Handle is closed when done.""" + try: + while True: + chunk = file_handle.read(buffer_size) + if not chunk: + break + yield chunk + finally: + file_handle.close() + + @staticmethod + def _stream_range(start: int, end: int, content_length: int, + file_handle, buffer_size: int = DEFAULT_BUFFER_SIZE) -> Generator[bytes, None, None]: + """Stream a byte range from an open file handle. Handle is closed when done.""" + try: + file_handle.seek(start) + remaining = content_length + while remaining > 0: + chunk_size = min(buffer_size, remaining) + chunk = file_handle.read(chunk_size) + if not chunk: + break + yield chunk + remaining -= len(chunk) + finally: + file_handle.close() + def rename_file_or_dir(self, old_path: str, new_path: str): """ Rename a file at the given old path to the new path. diff --git a/fileglancer/server.py b/fileglancer/server.py index de3d0c7f..fecbbc11 100644 --- a/fileglancer/server.py +++ b/fileglancer/server.py @@ -1,4 +1,3 @@ -import logging import os import re import sys @@ -39,9 +38,9 @@ from fileglancer.settings import get_settings from fileglancer.issues import create_jira_ticket, get_jira_ticket_details, delete_jira_ticket from fileglancer.utils import format_timestamp, guess_content_type, parse_range_header -from fileglancer.user_context import UserContext, EffectiveUserContext, CurrentUserContext, UserContextConfigurationError from fileglancer.filestore import Filestore, RootCheckError from fileglancer.log import AccessLogMiddleware +from fileglancer.worker_pool import WorkerPool, WorkerError, WorkerDead from fileglancer import sshkeys from x2s3.utils import get_read_access_acl, get_nosuchbucket_response, get_error_response @@ -218,17 +217,55 @@ def create_app(settings): # Define ui_dir for serving static files and SPA ui_dir = PathLib(__file__).parent / "ui" - def _get_user_context(username: str) -> UserContext: - if settings.use_access_flags: - return EffectiveUserContext(username) - else: - return CurrentUserContext() + # Per-user persistent worker pool (only used when use_access_flags=True) + worker_pool = WorkerPool(settings) if settings.use_access_flags else None + + async def _worker_exec(username: str, action: str, **kwargs): + """Dispatch an action to the per-user worker and return the result. + When use_access_flags=True, dispatches to the persistent worker pool. + When use_access_flags=False (dev/test mode), runs the action directly + in the current process since no identity switching is needed. - def _get_file_proxy_client(sharing_key: str, captured_path: str) -> Tuple[FileProxyClient | Response, UserContext | None, str]: - """Resolve a sharing key and captured path to a FileProxyClient. + If the worker opens a file and passes back a file descriptor (e.g. + open_file, s3_open_object), the response dict will contain a + ``_file_handle`` key with an open file object. Callers that don't + need it can ignore this key. - Returns (client, user_context, subpath) on success, or (error_response, None, "") on failure. + Raises HTTPException on worker-level errors or dead workers. + """ + if worker_pool is not None: + try: + worker = await worker_pool.get_worker(username) + return await worker.execute(action, **kwargs) + except WorkerDead as e: + logger.error(f"Worker dead for {username}: {e}") + raise HTTPException(status_code=503, detail="Service temporarily unavailable") + except WorkerError as e: + if e.status_code >= 500: + logger.error(f"Worker error for {username} action={action}: {e}") + raise HTTPException(status_code=e.status_code, detail=str(e)) + else: + # Dev/test mode: run action directly in-process + from fileglancer.user_worker import _ACTIONS, WorkerContext, LocalDbProxy + handler = _ACTIONS.get(action) + if handler is None: + raise HTTPException(status_code=500, detail=f"Unknown action: {action}") + ctx = WorkerContext(username=username, db=LocalDbProxy(settings.db_url)) + request = {"action": action, **kwargs} + try: + result = handler(request, ctx) + except Exception as e: + logger.exception(f"Action handler error for {username} action={action}: {e}") + raise HTTPException(status_code=500, detail=str(e)) + # Strip the raw fd (not meaningful in-process), keep _file_handle + result.pop("_fd", None) + return result + + def _resolve_proxy_info(sharing_key: str, captured_path: str) -> Tuple[dict | Response, str]: + """Resolve a sharing key to proxy info (mount_path, target_name, username, subpath). + + Returns (info_dict, subpath) on success, or (error_response, "") on failure. """ def try_strip_prefix(captured: str, prefix: str) -> str | None: if captured == prefix: @@ -241,27 +278,25 @@ def try_strip_prefix(captured: str, prefix: str) -> str | None: proxied_path = db.get_proxied_path_by_sharing_key(session, sharing_key) if not proxied_path: - return get_nosuchbucket_response(captured_path), None, "" + return get_nosuchbucket_response(captured_path), "" - # Match captured_path against the stored url_prefix. - # The unquote() fallback handles clients like Vol-E viewer that send URLs - # with literal % characters instead of proper URL encoding — FastAPI - # auto-decodes path params, so we need to match the decoded form too. subpath = try_strip_prefix(captured_path, proxied_path.url_prefix) if subpath is None: subpath = try_strip_prefix(captured_path, unquote(proxied_path.url_prefix)) if subpath is None: - return get_error_response(404, "NoSuchKey", f"Path mismatch for sharing key {sharing_key}", captured_path), None, "" + return get_error_response(404, "NoSuchKey", f"Path mismatch for sharing key {sharing_key}", captured_path), "" fsp = db.get_file_share_path(session, proxied_path.fsp_name) if not fsp: - return get_error_response(400, "InvalidArgument", f"File share path {proxied_path.fsp_name} not found", captured_path), None, "" - # Expand ~ to user's home directory before constructing the mount path + return get_error_response(400, "InvalidArgument", f"File share path {proxied_path.fsp_name} not found", captured_path), "" expanded_mount_path = os.path.expanduser(fsp.mount_path) mount_path = f"{expanded_mount_path}/{proxied_path.path}" target_name = captured_path.rsplit('/', 1)[-1] if captured_path else os.path.basename(proxied_path.path) - # Use 256KB buffer for better performance on network filesystems - return FileProxyClient(proxy_kwargs={'target_name': target_name}, path=mount_path, buffer_size=256*1024), _get_user_context(proxied_path.username), subpath + return { + "mount_path": mount_path, + "target_name": target_name, + "username": proxied_path.username, + }, subpath @asynccontextmanager @@ -271,27 +306,6 @@ async def lifespan(app: FastAPI): logger.remove() logger.add(sys.stderr, level=settings.log_level) - # Intercept stdlib logging (e.g. py-cluster-api) into loguru - class InterceptHandler(logging.Handler): - def emit(self, record): - # Get corresponding loguru level - try: - level = logger.level(record.levelname).name - except ValueError: - level = record.levelno - # Find caller from where the log call originated - frame, depth = logging.currentframe(), 0 - while frame and frame.f_code.co_filename == logging.__file__: - frame = frame.f_back - depth += 1 - logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage()) - - # Attach directly to cluster_api logger so uvicorn can't clobber it - cluster_logger = logging.getLogger("cluster_api") - cluster_logger.handlers = [InterceptHandler()] - cluster_logger.setLevel(logging.DEBUG) - cluster_logger.propagate = False - def mask_password(url: str) -> str: """Mask password in database URL for logging""" import re @@ -356,6 +370,16 @@ def mask_password(url: str) -> str: else: logger.debug(f"No notifications file found at {notifications_file}") + # Start worker pool eviction loop (only when using access flags) + if worker_pool is not None: + await worker_pool.start_eviction_loop() + logger.info("Worker pool started") + + # Wire the apps module to dispatch through the persistent worker + # pool (or in-process in dev mode) instead of spawning ephemeral + # subprocesses. + apps_module.set_worker_exec(_worker_exec) + # Start cluster job monitor try: await apps_module.start_job_monitor() @@ -372,6 +396,14 @@ def mask_password(url: str) -> str: except Exception as e: logger.warning(f"Error stopping cluster job monitor: {e}") + # Cleanup: shut down all workers + if worker_pool is not None: + try: + await worker_pool.shutdown_all() + logger.info("Worker pool shut down") + except Exception as e: + logger.warning(f"Error shutting down worker pool: {e}") + app = FastAPI(lifespan=lifespan) # Add custom access log middleware @@ -414,14 +446,6 @@ async def validation_exception_handler(request, exc): return JSONResponse({"error":str(exc)}, status_code=400) - @app.exception_handler(UserContextConfigurationError) - async def user_context_config_error_handler(request, exc): - logger.error(f"User context configuration error: {exc}") - return JSONResponse( - {"error": str(exc)}, - status_code=500 - ) - @app.exception_handler(PermissionError) async def permission_error_handler(request, exc): error_msg = str(exc) @@ -931,14 +955,18 @@ async def create_proxied_path(fsp_name: str = Query(..., description="The name o _validate_url_prefix(url_prefix) sharing_name = url_prefix logger.info(f"Creating proxied path for {username} with sharing name {sharing_name} and fsp_name {fsp_name} and path {path} (url_prefix={url_prefix})") + # Validate the user can access the path via worker + validation = await _worker_exec(username, "validate_proxied_path", fsp_name=fsp_name, path=path) + if "error" in validation: + raise HTTPException(status_code=400, detail=validation["error"]) + with db.get_db_session(settings.db_url) as session: - with _get_user_context(username): # Necessary to validate the user can access the proxied path - try: - new_path = db.create_proxied_path(session, username, sharing_name, fsp_name, path, url_prefix=url_prefix) - return _convert_proxied_path(new_path, settings.external_proxy_url) - except ValueError as e: - logger.error(f"Error creating proxied path: {e}") - raise HTTPException(status_code=400, detail=str(e)) + try: + new_path = db.create_proxied_path(session, username, sharing_name, fsp_name, path, url_prefix=url_prefix) + return _convert_proxied_path(new_path, settings.external_proxy_url) + except ValueError as e: + logger.error(f"Error creating proxied path: {e}") + raise HTTPException(status_code=400, detail=str(e)) @app.get("/api/proxied-path", response_model=ProxiedPathResponse, @@ -973,14 +1001,25 @@ async def update_proxied_path(sharing_key: str = Path(..., description="The shar path: Optional[str] = Query(default=None, description="The path relative to the file share path mount point"), sharing_name: Optional[str] = Query(default=None, description="The sharing path of the proxied path"), username: str = Depends(get_current_user)): + # If path or fsp_name is changing, validate access via worker + if path is not None or fsp_name is not None: + with db.get_db_session(settings.db_url) as session: + existing = db.get_proxied_path_by_sharing_key(session, sharing_key) + if existing: + validate_fsp = fsp_name or existing.fsp_name + validate_path = path or existing.path + validation = await _worker_exec(username, "validate_proxied_path", + fsp_name=validate_fsp, path=validate_path) + if "error" in validation: + raise HTTPException(status_code=400, detail=validation["error"]) + with db.get_db_session(settings.db_url) as session: - with _get_user_context(username): # Necessary to validate the user can access the proxied path - try: - updated = db.update_proxied_path(session, username, sharing_key, new_path=path, new_sharing_name=sharing_name, new_fsp_name=fsp_name) - return _convert_proxied_path(updated, settings.external_proxy_url) - except ValueError as e: - logger.error(f"Error updating proxied path: {e}") - raise HTTPException(status_code=400, detail=str(e)) + try: + updated = db.update_proxied_path(session, username, sharing_key, new_path=path, new_sharing_name=sharing_name, new_fsp_name=fsp_name) + return _convert_proxied_path(updated, settings.external_proxy_url) + except ValueError as e: + logger.error(f"Error updating proxied path: {e}") + raise HTTPException(status_code=400, detail=str(e)) @app.delete("/api/proxied-path/{sharing_key}", description="Delete a proxied path by sharing key") @@ -1064,42 +1103,78 @@ async def target_dispatcher(request: Request, if 'acl' in request.query_params: return get_read_access_acl() - client, ctx, subpath = _get_file_proxy_client(sharing_key, path) - if isinstance(client, Response): - return client + info, subpath = _resolve_proxy_info(sharing_key, path) + if isinstance(info, Response): + return info if list_type: if list_type == 2: - with ctx: - return await client.list_objects_v2(continuation_token, delimiter, \ - encoding_type, fetch_owner, max_keys, prefix, start_after) + result = await _worker_exec(info["username"], "s3_list_objects", + mount_path=info["mount_path"], + target_name=info["target_name"], + continuation_token=continuation_token, + delimiter=delimiter, + encoding_type=encoding_type, + fetch_owner=fetch_owner, + max_keys=max_keys, + prefix=prefix, + start_after=start_after) + return Response(content=result["body"], media_type=result.get("media_type", "application/xml"), + status_code=result.get("status_code", 200)) else: return get_error_response(400, "InvalidArgument", f"Invalid list type {list_type}", path) else: range_header = request.headers.get("range") - # Open file in user context, then immediately exit - # The file descriptor retains access rights after we switch back to root - with ctx: - handle = await client.open_object(subpath, range_header) - - # Context exited! Now stream without holding the lock - if isinstance(handle, ObjectHandle): - return client.stream_object(handle) + result = await _worker_exec( + info["username"], "s3_open_object", + mount_path=info["mount_path"], + target_name=info["target_name"], + path=subpath, + range_header=range_header) + + file_handle = result.pop("_file_handle", None) + if result.get("type") == "handle" and file_handle is not None: + # Worker opened the file and passed the fd via SCM_RIGHTS + from x2s3.client_file import FileObjectHandle, file_iterator + handle = FileObjectHandle( + target_name=result["target_name"], + key=result["key"], + status_code=result["status_code"], + headers=result["headers"], + media_type=result.get("media_type"), + content_length=result["content_length"], + file_handle=file_handle, + start=result["start"], + end=result["end"], + ) + return StreamingResponse( + file_iterator(handle, 256 * 1024), + status_code=handle.status_code, + headers=handle.headers, + media_type=handle.media_type, + ) else: - # Error response (e.g., file not found, invalid range) - return handle + # Error response + return Response( + content=result.get("body", ""), + status_code=result.get("status_code", 500), + headers=result.get("headers", {}), + ) @app.head("/files/{sharing_key}/{path:path}") async def head_object(sharing_key: str, path: str = ''): try: - client, ctx, subpath = _get_file_proxy_client(sharing_key, path) - if isinstance(client, Response): - return client - with ctx: - return await client.head_object(subpath) - except: + info, subpath = _resolve_proxy_info(sharing_key, path) + if isinstance(info, Response): + return info + result = await _worker_exec(info["username"], "s3_head_object", + mount_path=info["mount_path"], + target_name=info["target_name"], + path=subpath) + return Response(headers=result.get("headers", {}), status_code=result.get("status_code", 200)) + except Exception: logger.opt(exception=sys.exc_info()).info("Error requesting head") return get_error_response(500, "InternalError", "Error requesting HEAD", path) @@ -1134,59 +1209,18 @@ def _get_filestore(path_name: str): @app.get("/api/profile", description="Get the current user's profile") async def get_profile(username: str = Depends(get_current_user)): """Get the current user's profile""" - with _get_user_context(username): - - # Find matching file share path for home directory - with db.get_db_session(settings.db_url) as session: - paths = db.get_file_share_paths(session) - - # First, check if there's a "home" FSP (for ~/ paths) - home_fsp = next((fsp for fsp in paths if fsp.mount_path in ('~', '~/')), None) - if home_fsp: - home_directory_name = "." - else: - # If no "home" FSP exists, fall back to finding by mount path - home_directory_path = os.path.expanduser(f"~{username}") - home_parent = os.path.dirname(home_directory_path) - home_fsp = next((fsp for fsp in paths if fsp.mount_path == home_parent), None) - home_directory_name = os.path.basename(home_directory_path) - - home_fsp_name = home_fsp.name if home_fsp else None - - # Get user groups - user_groups = [] - try: - user_info = pwd.getpwnam(username) - all_groups = grp.getgrall() - for group in all_groups: - if username in group.gr_mem: - user_groups.append(group.gr_name) - primary_group = grp.getgrgid(user_info.pw_gid).gr_name - if primary_group not in user_groups: - user_groups.append(primary_group) - except Exception as e: - logger.error(f"Error getting groups for user {username}: {str(e)}") - - return { - "username": username, - "homeFileSharePathName": home_fsp_name, - "homeDirectoryName": home_directory_name, - "groups": user_groups, - } + result = await _worker_exec(username, "get_profile") + return result # SSH Key Management endpoints @app.get("/api/ssh-keys", response_model=sshkeys.SSHKeyListResponse, description="List Fileglancer-managed SSH keys") async def list_ssh_keys(username: str = Depends(get_current_user)): """List SSH keys with 'fileglancer' in the comment from authorized_keys""" - with _get_user_context(username): - try: - ssh_dir = sshkeys.get_ssh_directory() - keys = sshkeys.list_ssh_keys(ssh_dir) - return sshkeys.SSHKeyListResponse(keys=keys) - except Exception as e: - logger.error(f"Error listing SSH keys for {username}: {e}") - raise HTTPException(status_code=500, detail=str(e)) + result = await _worker_exec(username, "list_ssh_keys") + if "error" in result: + raise HTTPException(status_code=result.get("status_code", 500), detail=result["error"]) + return sshkeys.SSHKeyListResponse(keys=[sshkeys.SSHKeyInfo(**k) for k in result["keys"]]) @app.post("/api/ssh-keys/generate-temp", description="Generate a temporary SSH key and return private key for one-time copy") @@ -1198,21 +1232,23 @@ async def generate_temp_ssh_key( The private key is streamed securely and the temporary files are deleted after the response is sent. Key info is included in response headers: - - X-SSH-Key-Filename - - X-SSH-Key-Type - X-SSH-Key-Fingerprint - X-SSH-Key-Comment """ - with _get_user_context(username): - try: - ssh_dir = sshkeys.get_ssh_directory() - return sshkeys.generate_temp_key_and_authorize(ssh_dir, request.passphrase) - - except RuntimeError as e: - raise HTTPException(status_code=500, detail=str(e)) - except Exception as e: - logger.error(f"Error generating temp SSH key for {username}: {e}") - raise HTTPException(status_code=500, detail=str(e)) + result = await _worker_exec(username, "generate_ssh_key", passphrase=request.passphrase) + if "error" in result: + raise HTTPException(status_code=result.get("status_code", 500), detail=result["error"]) + # Reconstruct the response with headers + headers = {} + if result.get("fingerprint"): + headers["X-SSH-Key-Fingerprint"] = result["fingerprint"] + if result.get("comment"): + headers["X-SSH-Key-Comment"] = result["comment"] + return Response( + content=result["private_key"], + media_type="application/x-pem-file", + headers=headers, + ) # File content endpoint @app.head("/api/content/{path_name:path}") @@ -1226,40 +1262,32 @@ async def head_file_content(path_name: str, else: filestore_name, _, subpath = path_name.partition('/') - with _get_user_context(username): - filestore, error = _get_filestore(filestore_name) - if filestore is None: - raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) - - file_name = subpath.split('/')[-1] if subpath else '' - content_type = guess_content_type(file_name) - - try: - file_info = filestore.get_file_info(subpath) - - is_binary = filestore.check_is_binary(subpath) - - headers = { - 'Accept-Ranges': 'bytes', - 'X-Is-Binary': 'true' if is_binary else 'false', - } - - if content_type == 'application/octet-stream' and file_name: - headers['Content-Disposition'] = f'attachment; filename="{file_name}"' - - if hasattr(file_info, 'size') and file_info.size is not None: - headers['Content-Length'] = str(file_info.size) - - if hasattr(file_info, 'last_modified') and file_info.last_modified is not None: - headers['Last-Modified'] = format_timestamp(file_info.last_modified) - - return Response(status_code=200, headers=headers, media_type=content_type) + result = await _worker_exec(username, "head_file", fsp_name=filestore_name, subpath=subpath) + if result.get("redirect"): + redirect_url = f"/api/content/{result['fsp_name']}" + if result.get("subpath"): + redirect_url += f"?subpath={result['subpath']}" + return RedirectResponse(url=redirect_url, status_code=307) + if "error" in result: + raise HTTPException(status_code=result.get("status_code", 500), detail=result["error"]) + + info = result["info"] + file_name = subpath.split('/')[-1] if subpath else '' + content_type = result["content_type"] + is_binary = result["is_binary"] + + headers = { + 'Accept-Ranges': 'bytes', + 'X-Is-Binary': 'true' if is_binary else 'false', + } + if content_type == 'application/octet-stream' and file_name: + headers['Content-Disposition'] = f'attachment; filename="{file_name}"' + if info.get("size") is not None: + headers['Content-Length'] = str(info["size"]) + if info.get("last_modified") is not None: + headers['Last-Modified'] = format_timestamp(info["last_modified"]) - except FileNotFoundError: - logger.warning(f"File not found in {filestore_name}: {subpath}") - raise HTTPException(status_code=404, detail="File not found") - except PermissionError: - raise HTTPException(status_code=403, detail="Permission denied") + return Response(status_code=200, headers=headers, media_type=content_type) @app.get("/api/content/{path_name:path}") @@ -1271,59 +1299,22 @@ async def get_file_content(request: Request, path_name: str, subpath: Optional[s else: filestore_name, _, subpath = path_name.partition('/') - # Open file with user's permissions, then immediately release the context - # The file descriptor retains the access rights after we switch back to root - with _get_user_context(username): - filestore, error = _get_filestore(filestore_name) - if filestore is None: - raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) - - file_name = subpath.split('/')[-1] if subpath else '' - content_type = guess_content_type(file_name) - - try: - file_info = filestore.get_file_info(subpath) - if file_info.is_dir: - raise HTTPException(status_code=400, detail="Cannot download directory content") - - file_size = file_info.size + # Worker opens the file as the user and passes the fd back + result = await _worker_exec(username, "open_file", fsp_name=filestore_name, subpath=subpath) - # Open the file while we have user's permissions - full_path = filestore._check_path_in_root(subpath) - file_handle = open(full_path, 'rb') + if result.get("redirect"): + redirect_url = f"/api/content/{result['fsp_name']}" + if result.get("subpath"): + redirect_url += f"?subpath={result['subpath']}" + return RedirectResponse(url=redirect_url, status_code=307) + if "error" in result: + raise HTTPException(status_code=result.get("status_code", 500), detail=result["error"]) - except RootCheckError as e: - # Path attempts to escape root directory - try to find a valid fsp for this absolute path - logger.info(f"RootCheckError caught for {filestore_name}/{subpath}: {e}") - - # Use the full_path from the exception - full_path = e.full_path - - with db.get_db_session(settings.db_url) as session: - match = db.find_fsp_from_absolute_path(session, full_path) - - if match: - fsp, relative_subpath = match - # Construct the correct URL - if relative_subpath: - redirect_url = f"/api/content/{fsp.name}?subpath={relative_subpath}" - else: - redirect_url = f"/api/content/{fsp.name}" - - logger.info(f"Redirecting from /api/content/{filestore_name}?subpath={subpath} to {redirect_url}") - return RedirectResponse(url=redirect_url, status_code=307) - - # If no match found, return the original error message - logger.error(f"No valid file share found for path: {full_path}") - raise HTTPException(status_code=400, detail=str(e)) - except FileNotFoundError: - logger.error(f"File not found in {filestore_name}: {subpath}") - raise HTTPException(status_code=404, detail="File or directory not found") - except PermissionError: - raise HTTPException(status_code=403, detail="Permission denied") + file_handle = result.get("_file_handle") - # Context exited! We're back to root, but file_handle retains user's access rights - # Now we can stream the file asynchronously without holding the user context lock + file_size = result["file_size"] + content_type = result["content_type"] + file_name = subpath.split('/')[-1] if subpath else '' range_header = request.headers.get('Range') @@ -1348,8 +1339,10 @@ async def get_file_content(request: Request, path_name: str, subpath: Optional[s if content_type == 'application/octet-stream' and file_name: headers['Content-Disposition'] = f'attachment; filename="{file_name}"' + # Construct a temporary filestore just for streaming + # (stream_file_range only needs the file_handle) return StreamingResponse( - filestore.stream_file_range(start=start, end=end, file_handle=file_handle), + Filestore._stream_range(start=start, end=end, content_length=content_length, file_handle=file_handle), status_code=206, headers=headers, media_type=content_type @@ -1364,7 +1357,7 @@ async def get_file_content(request: Request, path_name: str, subpath: Optional[s headers['Content-Disposition'] = f'attachment; filename="{file_name}"' return StreamingResponse( - filestore.stream_file_contents(file_handle=file_handle), + Filestore._stream_contents(file_handle=file_handle), status_code=200, headers=headers, media_type=content_type @@ -1383,73 +1376,27 @@ async def get_file_metadata(path_name: str, subpath: Optional[str] = Query(''), else: filestore_name, _, subpath = path_name.partition('/') - with _get_user_context(username): - filestore, error = _get_filestore(filestore_name) - if filestore is None: - raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) - - try: - with db.get_db_session(settings.db_url) as session: - file_info = filestore.get_file_info(subpath, current_user=username, session=session) - logger.trace(f"File info: {file_info}") - - result = {"info": json.loads(file_info.model_dump_json())} - - if file_info.is_dir: - try: - if limit is not None: - files, has_more, next_cursor, total_count = filestore.yield_file_infos_paginated( - subpath, current_user=username, session=session, - limit=limit, cursor=cursor - ) - result["files"] = [json.loads(f.model_dump_json()) for f in files] - result["has_more"] = has_more - result["next_cursor"] = next_cursor - result["total_count"] = total_count - else: - files = list(filestore.yield_file_infos(subpath, current_user=username, session=session)) - result["files"] = [json.loads(f.model_dump_json()) for f in files] - except PermissionError: - logger.error(f"Permission denied when listing files in directory: {subpath}") - result["files"] = [] - result["error"] = "Permission denied when listing directory contents" - return JSONResponse(content=result, status_code=403) - except FileNotFoundError: - logger.error(f"Directory not found during listing: {subpath}") - result["files"] = [] - result["error"] = "Directory contents not found" - return JSONResponse(content=result, status_code=404) - - return result - - except RootCheckError as e: - # Path attempts to escape root directory - try to find a valid fsp for this absolute path - logger.info(f"RootCheckError caught for {filestore_name}/{subpath}: {e}") - - full_path = e.full_path - - with db.get_db_session(settings.db_url) as session: - match = db.find_fsp_from_absolute_path(session, full_path) - - if match: - fsp, relative_subpath = match - # Construct the correct URL - if relative_subpath: - redirect_url = f"/api/files/{fsp.name}?subpath={relative_subpath}" - else: - redirect_url = f"/api/files/{fsp.name}" - - logger.info(f"Redirecting from /api/files/{filestore_name}?subpath={subpath} to {redirect_url}") - return RedirectResponse(url=redirect_url, status_code=307) - - # If no match found, return the original error message - logger.error(f"No valid file share found for path: {full_path}") - raise HTTPException(status_code=400, detail=str(e)) - except FileNotFoundError: - logger.error(f"File or directory not found: {subpath}") - raise HTTPException(status_code=404, detail="File or directory not found") - except PermissionError: - raise HTTPException(status_code=403, detail="Permission denied") + if limit is not None: + result = await _worker_exec(username, "list_dir_paged", + fsp_name=filestore_name, subpath=subpath, + limit=limit, cursor=cursor) + else: + result = await _worker_exec(username, "list_dir", + fsp_name=filestore_name, subpath=subpath) + + if result.get("redirect"): + redirect_url = f"/api/files/{result['fsp_name']}" + if result.get("subpath"): + redirect_url += f"?subpath={result['subpath']}" + return RedirectResponse(url=redirect_url, status_code=307) + if "error" in result and "status_code" in result: + status_code = result["status_code"] + if status_code == 403 or status_code == 404: + return JSONResponse(content=result, status_code=status_code) + raise HTTPException(status_code=status_code, detail=result["error"]) + if "error" in result: + raise HTTPException(status_code=500, detail=result["error"]) + return result @app.post("/api/files/{path_name}") @@ -1478,30 +1425,19 @@ async def create_file_or_dir(path_name: str, # Use the validated and sanitized path for all operations validated_subpath = normalized_path - with _get_user_context(username): - filestore, error = _get_filestore(path_name) - if filestore is None: - raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) - - try: - file_type = body.get("type") - if file_type == "directory": - logger.info(f"User {username} creating directory {path_name}/{validated_subpath}") - # Path is validated above - safe to use in filesystem operation - filestore.create_dir(validated_subpath) - elif file_type == "file": - logger.info(f"User {username} creating file {path_name}/{validated_subpath}") - # Path is validated above - safe to use in filesystem operation - filestore.create_empty_file(validated_subpath) - else: - raise HTTPException(status_code=400, detail="Invalid file type") - - except FileExistsError: - raise HTTPException(status_code=409, detail="A file or directory with this name already exists") - except PermissionError as e: - raise HTTPException(status_code=403, detail=str(e)) + file_type = body.get("type") + if file_type == "directory": + logger.info(f"User {username} creating directory {path_name}/{validated_subpath}") + result = await _worker_exec(username, "create_dir", fsp_name=path_name, subpath=validated_subpath) + elif file_type == "file": + logger.info(f"User {username} creating file {path_name}/{validated_subpath}") + result = await _worker_exec(username, "create_file", fsp_name=path_name, subpath=validated_subpath) + else: + raise HTTPException(status_code=400, detail="Invalid file type") - return JSONResponse(status_code=201, content={"message": "Item created"}) + if "error" in result: + raise HTTPException(status_code=result.get("status_code", 500), detail=result["error"]) + return JSONResponse(status_code=201, content={"message": "Item created"}) @app.patch("/api/files/{path_name}") @@ -1510,47 +1446,26 @@ async def update_file_or_dir(path_name: str, body: Dict = Body(...), username: str = Depends(get_current_user)): """Handle PATCH requests to rename or update file permissions""" - with _get_user_context(username): - filestore, error = _get_filestore(path_name) - if filestore is None: - raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) - old_file_info = filestore.get_file_info(subpath, username) - new_path = body.get("path") - new_permissions = body.get("permissions") - - # Validate and sanitize new_path if renaming - validated_new_path = new_path - if new_path is not None and new_path != old_file_info.path: - # Normalize the path to prevent path traversal - normalized_new_path = os.path.normpath(new_path) - - # Security check: Ensure normalized path doesn't escape directory - if normalized_new_path.startswith('..') or os.path.isabs(normalized_new_path): - raise HTTPException(status_code=400, detail="New path cannot escape the current directory") - - # Validate the filename portion for invalid characters - new_filename = os.path.basename(normalized_new_path) - _validate_filename(new_filename) - - # Use the validated path - validated_new_path = normalized_new_path - - try: - if new_permissions is not None and new_permissions != old_file_info.permissions: - logger.info(f"User {username} changing permissions of {old_file_info.absolute_path} to {new_permissions}") - filestore.change_file_permissions(subpath, new_permissions) - - if new_path is not None and new_path != old_file_info.path: - logger.info(f"User {username} renaming {old_file_info.absolute_path} to {validated_new_path}") - # Path is validated above - safe to use in filesystem operation - filestore.rename_file_or_dir(old_file_info.path, validated_new_path) - - except PermissionError as e: - raise HTTPException(status_code=403, detail=str(e)) - except OSError as e: - raise HTTPException(status_code=500, detail=str(e)) - - return JSONResponse(status_code=200, content={"message": "Permissions changed"}) + new_path = body.get("path") + new_permissions = body.get("permissions") + + # Validate and sanitize new_path if renaming + validated_new_path = new_path + if new_path is not None: + normalized_new_path = os.path.normpath(new_path) + if normalized_new_path.startswith('..') or os.path.isabs(normalized_new_path): + raise HTTPException(status_code=400, detail="New path cannot escape the current directory") + new_filename = os.path.basename(normalized_new_path) + _validate_filename(new_filename) + validated_new_path = normalized_new_path + + result = await _worker_exec(username, "update_file", + fsp_name=path_name, subpath=subpath, + new_path=validated_new_path, + new_permissions=new_permissions) + if "error" in result: + raise HTTPException(status_code=result.get("status_code", 500), detail=result["error"]) + return JSONResponse(status_code=200, content={"message": "Permissions changed"}) @app.delete("/api/files/{fsp_name}") @@ -1558,18 +1473,11 @@ async def delete_file_or_dir(fsp_name: str, subpath: Optional[str] = Query(''), username: str = Depends(get_current_user)): """Handle DELETE requests to remove a file or (empty) directory""" - with _get_user_context(username): - filestore, error = _get_filestore(fsp_name) - if filestore is None: - raise HTTPException(status_code=404 if "not found" in error else 500, detail=error) - - try: - logger.info(f"User {username} deleting {filestore.get_root_path()}/{subpath}") - filestore.remove_file_or_dir(subpath) - except PermissionError as e: - raise HTTPException(status_code=403, detail=str(e)) - - return JSONResponse(status_code=200, content={"message": "Item deleted"}) + logger.info(f"User {username} deleting {fsp_name}/{subpath}") + result = await _worker_exec(username, "delete", fsp_name=fsp_name, subpath=subpath) + if "error" in result: + raise HTTPException(status_code=result.get("status_code", 500), detail=result["error"]) + return JSONResponse(status_code=200, content={"message": "Item deleted"}) # --- Apps & Jobs API --- @@ -1755,14 +1663,8 @@ async def update_user_app(body: ManifestFetchRequest, description="Validate file/directory paths for app parameters") async def validate_paths(body: PathValidationRequest, username: str = Depends(get_current_user)): - errors = {} - with _get_user_context(username): - with db.get_db_session(settings.db_url) as session: - for param_key, path_value in body.paths.items(): - error = apps_module.validate_path_in_filestore(path_value, session) - if error: - errors[param_key] = error - return PathValidationResponse(errors=errors) + result = await _worker_exec(username, "validate_paths", paths=body.paths) + return PathValidationResponse(errors=result.get("errors", {})) @app.get("/api/cluster-defaults", description="Get cluster configuration defaults") @@ -1795,8 +1697,7 @@ async def submit_job(body: JobSubmitRequest, container=body.container, container_args=body.container_args, ) - with _get_user_context(username): - return _convert_job(db_job) + return _convert_job(db_job) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: @@ -1809,8 +1710,17 @@ async def get_jobs(status: Optional[str] = Query(None, description="Filter by st username: str = Depends(get_current_user)): with db.get_db_session(settings.db_url) as session: db_jobs = db.get_jobs_by_username(session, username, status) - with _get_user_context(username): - jobs = [_convert_job(j) for j in db_jobs] + # For listing, read service_url for running service jobs via worker + jobs = [] + for j in db_jobs: + service_url = None + if getattr(j, 'entry_point_type', 'job') == 'service' and j.status == 'RUNNING': + try: + result = await _worker_exec(username, "get_service_url", job_id=j.id) + service_url = result.get("service_url") + except Exception: + pass + jobs.append(_convert_job(j, service_url=service_url)) return JobResponse(jobs=jobs) @app.get("/api/jobs/{job_id}", response_model=Job, @@ -1821,8 +1731,16 @@ async def get_job(job_id: int, db_job = db.get_job(session, job_id, username) if db_job is None: raise HTTPException(status_code=404, detail="Job not found") - with _get_user_context(username): - return _convert_job(db_job, include_files=True) + # Read file paths and service URL via worker + files_result = await _worker_exec(username, "get_job_file_paths", job_id=job_id) + service_url = None + if getattr(db_job, 'entry_point_type', 'job') == 'service' and db_job.status == 'RUNNING': + try: + svc_result = await _worker_exec(username, "get_service_url", job_id=job_id) + service_url = svc_result.get("service_url") + except Exception: + pass + return _convert_job(db_job, service_url=service_url, files=files_result.get("files")) @app.post("/api/jobs/{job_id}/cancel", description="Cancel a running job") @@ -1830,8 +1748,7 @@ async def cancel_job(job_id: int, username: str = Depends(get_current_user)): try: db_job = await apps_module.cancel_job(job_id, username) - with _get_user_context(username): - return _convert_job(db_job) + return _convert_job(db_job) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -1853,13 +1770,17 @@ async def get_job_file(job_id: int, if file_type not in ("script", "stdout", "stderr"): raise HTTPException(status_code=400, detail="file_type must be script, stdout, or stderr") try: - with _get_user_context(username): - content = apps_module.get_job_file_content(job_id, username, file_type) + result = await _worker_exec(username, "get_job_file", job_id=job_id, file_type=file_type) + if "error" in result: + raise HTTPException(status_code=result.get("status_code", 404), detail=result["error"]) + content = result.get("content") if content is None: raise HTTPException(status_code=404, detail=f"File not found: {file_type}") return PlainTextResponse(content) - except ValueError as e: - raise HTTPException(status_code=404, detail=str(e)) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) def _ensure_utc(dt: Optional[datetime]) -> Optional[datetime]: """Re-attach UTC timezone to naive datetimes from the DB. @@ -1874,11 +1795,12 @@ def _ensure_utc(dt: Optional[datetime]) -> Optional[datetime]: return dt.replace(tzinfo=UTC) return dt - def _convert_job(db_job: db.JobDB, include_files: bool = False) -> Job: - """Convert a database JobDB to a Pydantic Job model.""" - files = None - if include_files: - files = apps_module.get_job_file_paths(db_job) + def _convert_job(db_job: db.JobDB, service_url: str = None, files: dict = None) -> Job: + """Convert a database JobDB to a Pydantic Job model. + + File-reading fields (service_url, files) must be passed in pre-computed + by the caller, since they require user-context file I/O. + """ return Job( id=db_job.id, app_url=db_job.app_url, @@ -1898,7 +1820,7 @@ def _convert_job(db_job: db.JobDB, include_files: bool = False) -> Job: container_args=db_job.container_args, pull_latest=db_job.pull_latest, cluster_job_id=db_job.cluster_job_id, - service_url=apps_module.get_service_url(db_job), + service_url=service_url, created_at=_ensure_utc(db_job.created_at), started_at=_ensure_utc(db_job.started_at), finished_at=_ensure_utc(db_job.finished_at), diff --git a/fileglancer/settings.py b/fileglancer/settings.py index 3b21bdef..8b597789 100644 --- a/fileglancer/settings.py +++ b/fileglancer/settings.py @@ -100,6 +100,10 @@ class Settings(BaseSettings): # Useful for setting up scheduler env (e.g., /misc/lsf/conf/profile.lsf). env_source_script: Optional[str] = None + # Worker pool settings + worker_pool_max_workers: int = 50 + worker_pool_idle_timeout: int = 300 # seconds + # Cluster / Apps settings (mirrors py-cluster-api ClusterConfig) cluster: ClusterSettings = ClusterSettings() diff --git a/fileglancer/user_context.py b/fileglancer/user_context.py deleted file mode 100644 index b8fb1cac..00000000 --- a/fileglancer/user_context.py +++ /dev/null @@ -1,135 +0,0 @@ -import os -try: - import pwd -except ImportError: - pwd = None # type: ignore[assignment] -from contextlib import AbstractContextManager - -from loguru import logger - -from fileglancer.settings import get_settings - - -class UserContextConfigurationError(PermissionError): - """ - Raised when user context setup fails due to configuration issues. - This happens when use_access_flags=true but the server is not running with sufficient privileges. - """ - def __init__(self, message: str = "Server configuration error: Run the server as root or set use_access_flags=false in config.yaml"): - super().__init__(message) - - -class UserContext(AbstractContextManager): - """ - Base no-op proxy context that does nothing. - """ - def __exit__(self, exc_type, exc_val, exc_tb): - return False - - -class CurrentUserContext(UserContext): - """ - A context manager the keeps the current user context. - """ - pass - - -class EffectiveUserContext(UserContext): - """ - A context manager for setting the user and group context for a process using seteuid/setegid access flags. - """ - def __init__(self, username: str): - self.username = username - self._uid = os.getuid() - self._gid = os.getgid() - self._gids = os.getgrouplist(pwd.getpwuid(self._uid).pw_name, self._gid) - self._user = None - - def __enter__(self): - logger.debug( - f"EffectiveUserContext entering for {self.username} " - f"(current euid={os.geteuid()} egid={os.getegid()})" - ) - user = pwd.getpwnam(self.username) - - uid = user.pw_uid - gid = user.pw_gid - gids = os.getgrouplist(self.username, gid) - try: - os.setegid(gid) - except PermissionError as e: - logger.error(f"Failed to set the effective gid: {e}") - settings = get_settings() - if settings.use_access_flags: - raise UserContextConfigurationError() from e - else: - raise - except Exception as e: - logger.error(f"Failed to set the effective gid: {e}") - raise e - - try: - # the maximum number of groups that could be set is os.sysconf("SC_NGROUPS_MAX") - # so if the current user has more than that an exception will be raised - # for now I don't limit this because I want to see if this will happen - if len(gids) > os.sysconf("SC_NGROUPS_MAX"): - logger.warning(( - f"User {self.username} is part of {len(gids)} groups " - f"which is greater than {os.sysconf("SC_NGROUPS_MAX")} " - "so this may result in an error" - )) - os.setgroups(gids) - except PermissionError as e: - logger.error(f"Failed to set the user groups: {e}") - # reset egid first - os.setegid(self._gid) - settings = get_settings() - if settings.use_access_flags: - raise UserContextConfigurationError() from e - else: - raise - except Exception as e: - logger.error(f"Failed to set the user groups: {e}") - # reset egid first - os.setegid(self._gid) - raise e - - try: - os.seteuid(uid) - except PermissionError as e: - logger.error(f"Failed to set euid: {e}") - # reset egid - os.setegid(self._gid) - settings = get_settings() - if settings.use_access_flags: - raise UserContextConfigurationError() from e - else: - raise - except Exception as e: - logger.error(f"Failed to set euid: {e}") - # reset egid - os.setegid(self._gid) - raise e - - self._user = user - logger.debug( - f"EffectiveUserContext now running as {self.username} " - f"(euid={os.geteuid()} egid={os.getegid()})" - ) - - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - logger.debug( - f"EffectiveUserContext exiting for {self.username} " - f"(restoring euid={self._uid} egid={self._gid})" - ) - os.seteuid(self._uid) - os.setegid(self._gid) - if len(self._gids) > os.sysconf("SC_NGROUPS_MAX"): - logger.info(f"Truncate original {len(self._gids)} groups to max allowed to set: {os.sysconf("SC_NGROUPS_MAX")}") - os.setgroups(self._gids[:os.sysconf("SC_NGROUPS_MAX")]) - else: - os.setgroups(self._gids) - self._user = None - return False diff --git a/fileglancer/user_worker.py b/fileglancer/user_worker.py new file mode 100644 index 00000000..b0c8889f --- /dev/null +++ b/fileglancer/user_worker.py @@ -0,0 +1,1151 @@ +"""Persistent per-user worker subprocess. + +This module is the entry point for long-lived worker subprocesses spawned by +WorkerPool. Each worker runs as a single user (identity set at fork time) +and handles all user-scoped operations: file I/O, cluster jobs, git ops, +SSH key management, etc. + +Protocol: + - IPC over a Unix socketpair (fd passed via FGC_WORKER_FD env var) + - Messages are length-prefixed JSON: 4-byte big-endian length + JSON body + - Worker reads requests, dispatches to action handlers, writes responses + - {"action": "shutdown"} triggers a clean exit + +The worker runs a synchronous request/response loop. Cluster operations +that use py-cluster-api's async API are run via _run_async() per-request. + +In dev/test mode (use_access_flags=False), action handlers are called +directly in-process from server.py, so _run_async() must handle being +called from within an existing event loop. +""" + +from __future__ import annotations + +import asyncio +import ctypes +import ctypes.util +import functools +import json +import logging +import os +try: + import pwd +except ImportError: + pwd = None # type: ignore[assignment] +import socket +import struct +import sys +from pathlib import Path +from typing import Any, Optional + +from loguru import logger + + +# Length-prefix format: 4-byte big-endian unsigned int +_HEADER_FMT = "!I" +_HEADER_SIZE = struct.calcsize(_HEADER_FMT) + + +def _run_async(coro): + """Run an async coroutine, handling both subprocess and in-process contexts. + + In subprocess mode (no running event loop), uses asyncio.run(). + In dev/test mode (called from within FastAPI's event loop), uses + a new event loop in a thread to avoid "cannot be called from a running loop". + """ + try: + asyncio.get_running_loop() + except RuntimeError: + # No running loop — we're in the subprocess + return asyncio.run(coro) + else: + # Inside an event loop (dev/test mode) — run in a thread + import concurrent.futures + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: + future = pool.submit(asyncio.run, coro) + return future.result() + + +def _set_pdeathsig(): + """Ask the kernel to send SIGTERM when our parent process dies. + + This prevents orphan workers if the main process is killed without + a chance to clean up. Linux-only (PR_SET_PDEATHSIG = 1). + """ + try: + import signal + libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True) + PR_SET_PDEATHSIG = 1 + result = libc.prctl(PR_SET_PDEATHSIG, signal.SIGTERM, 0, 0, 0) + if result != 0: + logger.warning(f"prctl(PR_SET_PDEATHSIG) failed: errno={ctypes.get_errno()}") + except Exception as e: + logger.warning(f"Could not set PR_SET_PDEATHSIG: {e}") + + +def _send(sock: socket.socket, data: dict): + """Send a length-prefixed JSON message.""" + payload = json.dumps(data, default=str).encode() + header = struct.pack(_HEADER_FMT, len(payload)) + sock.sendall(header + payload) + + +def _send_with_fd(sock: socket.socket, data: dict, fd: int): + """Send a length-prefixed JSON message with a file descriptor via SCM_RIGHTS.""" + import array as _array + + payload = json.dumps(data, default=str).encode() + header = struct.pack(_HEADER_FMT, len(payload)) + full_msg = header + payload + + fds = _array.array("i", [fd]) + sock.sendmsg( + [full_msg], + [(socket.SOL_SOCKET, socket.SCM_RIGHTS, fds)], + ) + + +def _recv(sock: socket.socket) -> dict: + """Receive a length-prefixed JSON message.""" + header = _recvall(sock, _HEADER_SIZE) + if header is None: + raise ConnectionError("Parent closed connection") + (length,) = struct.unpack(_HEADER_FMT, header) + payload = _recvall(sock, length) + if payload is None: + raise ConnectionError("Parent closed connection mid-message") + return json.loads(payload) + + +def _recvall(sock: socket.socket, n: int) -> Optional[bytes]: + """Read exactly n bytes from socket.""" + data = bytearray() + while len(data) < n: + chunk = sock.recv(n - len(data)) + if not chunk: + return None + data.extend(chunk) + return bytes(data) + + +# --------------------------------------------------------------------------- +# Action registry +# --------------------------------------------------------------------------- + +# Populated by @action(name) decorators on each handler. +_ACTIONS: dict[str, Any] = {} + + +def action(name: str): + """Register a handler under the given action name.""" + def decorator(fn): + _ACTIONS[name] = fn + return fn + return decorator + + +# --------------------------------------------------------------------------- +# DB proxy +# --------------------------------------------------------------------------- +# +# Worker subprocesses run as the (untrusted) target user, so we don't give +# them the database URL. Instead, action handlers go through a DbProxy that +# reverse-RPCs back to the parent over the same socket. The parent runs the +# query with full credentials and returns the result. +# +# In dev/test mode (no subprocess), the same handlers use LocalDbProxy which +# calls the database functions directly. +# +# DB_METHODS is the whitelist of methods both proxies expose; the parent's +# inbound dispatch only accepts these names. + +from types import SimpleNamespace + + +def _job_db_to_dict(j) -> dict: + """Serialize a JobDB row to a JSON-safe dict for transport to the worker. + + Only includes fields used by worker-side handlers (read_job_file, + get_job_file_paths, get_service_url) — keep this list minimal so the + worker sees as little of the DB row as possible. + """ + return { + "id": j.id, + "app_name": j.app_name, + "entry_point_id": j.entry_point_id, + "entry_point_type": getattr(j, "entry_point_type", "job"), + "status": j.status, + "work_dir": j.work_dir, + } + + +class LocalDbProxy: + """DbProxy backed by a real database connection. + + Used in dev/test mode (in-process) and on the parent side as the + backend for inbound db_request messages from worker subprocesses. + """ + + def __init__(self, db_url: str): + self.db_url = db_url + + def get_file_share_paths(self): + from fileglancer import database as db + with db.get_db_session(self.db_url) as session: + return db.get_file_share_paths(session) + + def get_job(self, job_id: int, username: str): + from fileglancer import database as db + with db.get_db_session(self.db_url) as session: + j = db.get_job(session, job_id, username) + if j is None: + return None + return SimpleNamespace(**_job_db_to_dict(j)) + + +class RpcDbProxy: + """DbProxy that reverse-RPCs each call back to the parent over the socket.""" + + def __init__(self, sock: socket.socket): + self.sock = sock + + def _call(self, method: str, **kwargs): + _send(self.sock, {"_kind": "db_request", "method": method, "kwargs": kwargs}) + resp = _recv(self.sock) + if resp.get("_kind") != "db_response": + raise RuntimeError(f"Expected db_response, got: {resp!r}") + if not resp.get("ok"): + raise RuntimeError(resp.get("error", "DB request failed")) + return resp.get("result") + + def get_file_share_paths(self): + from fileglancer.model import FileSharePath + rows = self._call("get_file_share_paths") or [] + return [FileSharePath(**r) for r in rows] + + def get_job(self, job_id: int, username: str): + result = self._call("get_job", job_id=job_id, username=username) + return SimpleNamespace(**result) if result else None + + +# Whitelist of method names the parent will dispatch; used by worker_pool +# when handling inbound db_request messages. +DB_METHODS = frozenset({"get_file_share_paths", "get_job"}) + + +def serialize_db_result(method: str, value): + """Convert a LocalDbProxy result into a JSON-serializable form. + + Called by the parent before sending a db_response back to the worker. + Keeps the wire format consistent regardless of which backend produced + the value. + """ + if value is None: + return None + if method == "get_file_share_paths": + # value is a list of FileSharePath models + return [fsp.model_dump(mode="json") for fsp in value] + if method == "get_job": + # value is a SimpleNamespace; vars() gives the underlying dict + return vars(value) + raise ValueError(f"Unknown db method: {method}") + + +# --------------------------------------------------------------------------- +# Action handlers — file operations +# --------------------------------------------------------------------------- + +# Per-worker cache of verified Filestore instances. Once a mount has been +# successfully verified, we trust it for the lifetime of the worker process — +# workers are short-lived enough (idle eviction) that we don't need to handle +# unmount/remount mid-session. +_filestore_cache: dict[str, Any] = {} + +# Per-username cache of supplementary group names. Keyed by username so the +# in-process dev/test path (which serves multiple users from one process) +# stays correct; in subprocess mode there's only ever one entry. +_user_groups_cache: dict[str, list[str]] = {} + + +def _get_user_groups(username: str) -> list[str]: + """Return the supplementary group names for a user. + + Uses os.getgrouplist (NSS initgroups) instead of grp.getgrall, which + enumerates every group on the system and is very slow on LDAP/NIS hosts. + Result is cached for the lifetime of the process. + """ + cached = _user_groups_cache.get(username) + if cached is not None: + return cached + + import grp as _grp + pw = pwd.getpwnam(username) + gids = os.getgrouplist(username, pw.pw_gid) + names = [] + for gid in gids: + try: + names.append(_grp.getgrgid(gid).gr_name) + except KeyError: + continue + _user_groups_cache[username] = names + return names + + +def _get_filestore(fsp_name: str, fsps: list): + """Look up a FileSharePath and return a Filestore instance. + + Returns (filestore, None) on success, or (None, error_response) on failure + where error_response is a dict ready to be returned from a handler. + """ + cached = _filestore_cache.get(fsp_name) + if cached is not None: + return cached, None + + from fileglancer.filestore import Filestore + + fsp = next((f for f in fsps if f.name == fsp_name), None) + if fsp is None: + return None, { + "error": f"File share path '{fsp_name}' not found", + "status_code": 404, + } + + filestore = Filestore(fsp) + try: + filestore.get_file_info(None) + except FileNotFoundError: + return None, { + "error": f"File share path '{fsp_name}' is not mounted", + "status_code": 503, + } + + _filestore_cache[fsp_name] = filestore + return filestore, None + + +def with_filestore(fn): + """Resolve request["fsp_name"] to a Filestore and pass it as the third arg. + + Also passes the freshly-fetched FSP list as the fourth arg, so handlers + that need it (for symlink resolution etc.) don't have to fetch again. + + Returns an error response if the filestore can't be resolved (404 for a + missing fsp, 503 for an unmounted one) so the handler body never has to + deal with the not-found case. + """ + @functools.wraps(fn) + def wrapper(request: dict, ctx: WorkerContext) -> dict: + fsps = ctx.db.get_file_share_paths() + filestore, error_response = _get_filestore(request["fsp_name"], fsps) + if filestore is None: + return error_response + return fn(request, ctx, filestore, fsps) + return wrapper + + +def _redirect_or_error(e, fsps): + """Build a redirect response for a RootCheckError, or fall through to 400.""" + from fileglancer.database import find_fsp_in_paths + match = find_fsp_in_paths(fsps, e.full_path) + if match: + fsp, relative_subpath = match + return {"redirect": True, "fsp_name": fsp.name, "subpath": relative_subpath or ""} + return {"error": str(e), "status_code": 400} + + +@action("list_dir") +@with_filestore +def _action_list_dir(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """List directory contents.""" + subpath = request.get("subpath", "") + current_user = ctx.username + + from fileglancer.filestore import RootCheckError + + try: + file_info = filestore.get_file_info(subpath, current_user=current_user, fsps=fsps) + result = {"info": file_info.model_dump(mode="json")} + + if file_info.is_dir: + try: + files = list(filestore.yield_file_infos(subpath, current_user=current_user, fsps=fsps)) + result["files"] = [f.model_dump(mode="json") for f in files] + except PermissionError: + result["files"] = [] + result["error"] = "Permission denied when listing directory contents" + result["status_code"] = 403 + except FileNotFoundError: + result["files"] = [] + result["error"] = "Directory contents not found" + result["status_code"] = 404 + + return result + except RootCheckError as e: + return _redirect_or_error(e, fsps) + except FileNotFoundError: + return {"error": "File or directory not found", "status_code": 404} + except PermissionError: + return {"error": "Permission denied", "status_code": 403} + + +@action("list_dir_paged") +@with_filestore +def _action_list_dir_paged(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """List directory contents with pagination.""" + subpath = request.get("subpath", "") + current_user = ctx.username + limit = request.get("limit", 200) + cursor = request.get("cursor") + + from fileglancer.filestore import RootCheckError + + try: + file_info = filestore.get_file_info(subpath, current_user=current_user, fsps=fsps) + result = {"info": file_info.model_dump(mode="json")} + + if file_info.is_dir: + try: + files, has_more, next_cursor, total_count = filestore.yield_file_infos_paginated( + subpath, current_user=current_user, fsps=fsps, + limit=limit, cursor=cursor + ) + result["files"] = [f.model_dump(mode="json") for f in files] + result["has_more"] = has_more + result["next_cursor"] = next_cursor + result["total_count"] = total_count + except PermissionError: + result["files"] = [] + result["error"] = "Permission denied when listing directory contents" + result["status_code"] = 403 + except FileNotFoundError: + result["files"] = [] + result["error"] = "Directory contents not found" + result["status_code"] = 404 + + return result + except RootCheckError as e: + return _redirect_or_error(e, fsps) + except FileNotFoundError: + return {"error": "File or directory not found", "status_code": 404} + except PermissionError: + return {"error": "Permission denied", "status_code": 403} + + +@action("get_file_info") +@with_filestore +def _action_get_file_info(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Get metadata for a single file or directory.""" + subpath = request.get("subpath", "") + + try: + file_info = filestore.get_file_info(subpath, current_user=ctx.username, fsps=fsps) + return {"info": file_info.model_dump(mode="json")} + except FileNotFoundError: + return {"error": "File or directory not found", "status_code": 404} + except PermissionError: + return {"error": "Permission denied", "status_code": 403} + + +@action("check_binary") +@with_filestore +def _action_check_binary(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Check if a file is binary.""" + subpath = request.get("subpath", "") + + try: + is_binary = filestore.check_is_binary(subpath) + return {"is_binary": is_binary} + except FileNotFoundError: + return {"error": "File or directory not found", "status_code": 404} + except PermissionError: + return {"error": "Permission denied", "status_code": 403} + + +@action("open_file") +@with_filestore +def _action_open_file(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Open a file and return its metadata + open file descriptor. + + The worker opens the file as the user and passes the fd back to the + main process via SCM_RIGHTS. The response includes "_fd" key with the + fd number — the main loop uses _send_with_fd() for these responses. + """ + fsp_name = request["fsp_name"] + subpath = request.get("subpath", "") + + from fileglancer.filestore import RootCheckError + from fileglancer.utils import guess_content_type + + try: + file_info = filestore.get_file_info(subpath) + if file_info.is_dir: + return {"error": "Cannot download directory content", "status_code": 400} + + file_name = subpath.split('/')[-1] if subpath else '' + content_type = guess_content_type(file_name) + full_path = filestore._check_path_in_root(subpath) + + # Open the file — the fd retains user's access rights + file_handle = open(full_path, 'rb') + fd = file_handle.fileno() + + return { + "file_size": file_info.size, + "content_type": content_type, + "_fd": fd, + "_file_handle": file_handle, # kept alive until fd is sent + } + except RootCheckError as e: + return _redirect_or_error(e, fsps) + except FileNotFoundError: + return {"error": f"File or directory not found: {fsp_name}/{subpath}", "status_code": 404} + except PermissionError: + return {"error": f"Permission denied: {fsp_name}/{subpath}", "status_code": 403} + + +@action("head_file") +@with_filestore +def _action_head_file(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Get file metadata and binary check for HEAD requests.""" + subpath = request.get("subpath", "") + + from fileglancer.filestore import RootCheckError + from fileglancer.utils import guess_content_type + + try: + file_info = filestore.get_file_info(subpath, current_user=ctx.username) + file_name = subpath.split('/')[-1] if subpath else '' + content_type = guess_content_type(file_name) + is_binary = filestore.check_is_binary(subpath) if not file_info.is_dir else False + + return { + "info": file_info.model_dump(mode="json"), + "content_type": content_type, + "is_binary": is_binary, + } + except RootCheckError as e: + return _redirect_or_error(e, fsps) + except FileNotFoundError: + return {"error": "File or directory not found", "status_code": 404} + except PermissionError: + return {"error": "Permission denied", "status_code": 403} + + +@action("create_dir") +@with_filestore +def _action_create_dir(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Create a directory.""" + subpath = request["subpath"] + + try: + filestore.create_dir(subpath) + return {"ok": True} + except FileExistsError: + return {"error": "A file or directory with this name already exists", "status_code": 409} + except PermissionError as e: + return {"error": str(e), "status_code": 403} + + +@action("create_file") +@with_filestore +def _action_create_file(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Create an empty file.""" + subpath = request["subpath"] + + try: + filestore.create_empty_file(subpath) + return {"ok": True} + except FileExistsError: + return {"error": "A file or directory with this name already exists", "status_code": 409} + except PermissionError as e: + return {"error": str(e), "status_code": 403} + + +@action("rename") +@with_filestore +def _action_rename(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Rename a file or directory.""" + old_path = request["old_path"] + new_path = request["new_path"] + + try: + filestore.rename_file_or_dir(old_path, new_path) + return {"ok": True} + except PermissionError as e: + return {"error": str(e), "status_code": 403} + except OSError as e: + return {"error": str(e), "status_code": 500} + + +@action("delete") +@with_filestore +def _action_delete(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Delete a file or directory.""" + subpath = request["subpath"] + + try: + filestore.remove_file_or_dir(subpath) + return {"ok": True} + except PermissionError as e: + return {"error": str(e), "status_code": 403} + except OSError as e: + return {"error": str(e), "status_code": 500} + + +@action("chmod") +@with_filestore +def _action_chmod(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Change file permissions.""" + subpath = request["subpath"] + permissions = request["permissions"] + + try: + filestore.change_file_permissions(subpath, permissions) + return {"ok": True} + except PermissionError as e: + return {"error": str(e), "status_code": 403} + except OSError as e: + return {"error": str(e), "status_code": 500} + + +@action("update_file") +@with_filestore +def _action_update_file(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Handle rename and/or permission change on a file.""" + subpath = request.get("subpath", "") + new_path = request.get("new_path") + new_permissions = request.get("new_permissions") + + try: + old_file_info = filestore.get_file_info(subpath, ctx.username) + result = {"info": old_file_info.model_dump(mode="json")} + + if new_permissions is not None and new_permissions != old_file_info.permissions: + filestore.change_file_permissions(subpath, new_permissions) + + if new_path is not None and new_path != old_file_info.path: + filestore.rename_file_or_dir(old_file_info.path, new_path) + + result["ok"] = True + return result + except PermissionError as e: + return {"error": str(e), "status_code": 403} + except OSError as e: + return {"error": str(e), "status_code": 500} + + +@action("validate_paths") +def _action_validate_paths(request: dict, ctx: WorkerContext) -> dict: + """Validate file/directory paths for app parameters.""" + from fileglancer.apps.core import validate_path_in_filestore + + paths = request["paths"] + fsps = ctx.db.get_file_share_paths() + errors = {} + for param_key, path_value in paths.items(): + error = validate_path_in_filestore(path_value, fsps) + if error: + errors[param_key] = error + return {"errors": errors} + + +@action("get_profile") +def _action_get_profile(request: dict, ctx: WorkerContext) -> dict: + """Get user profile information.""" + username = ctx.username + paths = ctx.db.get_file_share_paths() + + home_fsp = next((fsp for fsp in paths if fsp.mount_path in ('~', '~/')), None) + if home_fsp: + home_directory_name = "." + else: + home_directory_path = os.path.expanduser(f"~{username}") + home_parent = os.path.dirname(home_directory_path) + home_fsp = next((fsp for fsp in paths if fsp.mount_path == home_parent), None) + home_directory_name = os.path.basename(home_directory_path) + + home_fsp_name = home_fsp.name if home_fsp else None + + user_groups = [] + try: + user_groups = _get_user_groups(username) + except Exception as e: + logger.error(f"Error getting groups for user {username}: {e}") + + return { + "username": username, + "homeFileSharePathName": home_fsp_name, + "homeDirectoryName": home_directory_name, + "groups": user_groups, + } + + +# --------------------------------------------------------------------------- +# Action handlers — SSH keys +# --------------------------------------------------------------------------- + +@action("list_ssh_keys") +def _action_list_ssh_keys(request: dict, ctx: WorkerContext) -> dict: + """List SSH keys.""" + from fileglancer import sshkeys + try: + ssh_dir = sshkeys.get_ssh_directory() + keys = sshkeys.list_ssh_keys(ssh_dir) + return {"keys": [k.model_dump() for k in keys]} + except Exception as e: + return {"error": str(e), "status_code": 500} + + +@action("generate_ssh_key") +def _action_generate_ssh_key(request: dict, ctx: WorkerContext) -> dict: + """Generate a temporary SSH key and authorize it.""" + from fileglancer import sshkeys + try: + ssh_dir = sshkeys.get_ssh_directory() + passphrase = request.get("passphrase") + result = sshkeys.generate_temp_key_and_authorize(ssh_dir, passphrase) + # TempKeyResponse is a Response object; extract the data we need + return { + "private_key": result.body.decode() if hasattr(result, 'body') else str(result), + "fingerprint": result.headers.get("X-SSH-Key-Fingerprint", "") if hasattr(result, 'headers') else "", + "comment": result.headers.get("X-SSH-Key-Comment", "") if hasattr(result, 'headers') else "", + } + except Exception as e: + return {"error": str(e), "status_code": 500} + + +# --------------------------------------------------------------------------- +# Action handlers — job files +# --------------------------------------------------------------------------- + +@action("get_job_file") +def _action_get_job_file(request: dict, ctx: WorkerContext) -> dict: + """Read job file content (script, stdout, stderr).""" + from fileglancer.apps.core import read_job_file + job_id = request["job_id"] + file_type = request["file_type"] + + db_job = ctx.db.get_job(job_id, ctx.username) + if db_job is None: + return {"error": f"Job {job_id} not found", "status_code": 404} + + content = read_job_file(db_job, file_type) + if content is None: + return {"content": None} + return {"content": content} + + +@action("get_job_file_paths") +def _action_get_job_file_paths(request: dict, ctx: WorkerContext) -> dict: + """Get job file path info.""" + from fileglancer.apps.core import get_job_file_paths + + job_id = request["job_id"] + db_job = ctx.db.get_job(job_id, ctx.username) + if db_job is None: + return {"error": f"Job {job_id} not found", "status_code": 404} + fsps = ctx.db.get_file_share_paths() + files = get_job_file_paths(db_job, fsps) + return {"files": files} + + +@action("get_service_url") +def _action_get_service_url(request: dict, ctx: WorkerContext) -> dict: + """Read service URL from job work directory.""" + from fileglancer.apps.core import get_service_url + + job_id = request["job_id"] + db_job = ctx.db.get_job(job_id, ctx.username) + if db_job is None: + return {"error": f"Job {job_id} not found", "status_code": 404} + url = get_service_url(db_job) + return {"service_url": url} + + +# --------------------------------------------------------------------------- +# Action handlers — S3 proxy +# --------------------------------------------------------------------------- + +@action("s3_list_objects") +def _action_s3_list_objects(request: dict, ctx: WorkerContext) -> dict: + """S3-compatible list objects.""" + from x2s3.client_file import FileProxyClient + + mount_path = request["mount_path"] + target_name = request["target_name"] + buffer_size = request.get("buffer_size", 256 * 1024) + + client = FileProxyClient( + proxy_kwargs={"target_name": target_name}, + path=mount_path, + buffer_size=buffer_size, + ) + + # list_objects_v2 is async def but does only sync I/O + result = _run_async(client.list_objects_v2( + continuation_token=request.get("continuation_token"), + delimiter=request.get("delimiter"), + encoding_type=request.get("encoding_type"), + fetch_owner=request.get("fetch_owner"), + max_keys=request.get("max_keys", 1000), + prefix=request.get("prefix"), + start_after=request.get("start_after"), + )) + # Result is a fastapi Response object + return {"body": result.body.decode(), "media_type": result.media_type, "status_code": result.status_code} + + +@action("s3_head_object") +def _action_s3_head_object(request: dict, ctx: WorkerContext) -> dict: + """S3-compatible head object.""" + from x2s3.client_file import FileProxyClient + + mount_path = request["mount_path"] + target_name = request["target_name"] + path = request["path"] + + client = FileProxyClient( + proxy_kwargs={"target_name": target_name}, + path=mount_path, + ) + + result = _run_async(client.head_object(path)) + headers = dict(result.headers) if hasattr(result, 'headers') else {} + return {"headers": headers, "status_code": result.status_code} + + +@action("s3_open_object") +def _action_s3_open_object(request: dict, ctx: WorkerContext) -> dict: + """S3-compatible open object — open the file and pass the fd back. + + The worker opens the file as the user via FileProxyClient.open_object(), + then passes the file descriptor to the main process via SCM_RIGHTS. + The main process wraps it in a StreamingResponse. + """ + from x2s3.client_file import FileProxyClient, FileObjectHandle + + mount_path = request["mount_path"] + target_name = request["target_name"] + path = request["path"] + range_header = request.get("range_header") + + client = FileProxyClient( + proxy_kwargs={"target_name": target_name}, + path=mount_path, + buffer_size=request.get("buffer_size", 256 * 1024), + ) + + result = _run_async(client.open_object(path, range_header)) + + if isinstance(result, FileObjectHandle): + # Keep the file handle alive and pass the fd + fd = result.file_handle.fileno() + response = { + "type": "handle", + "status_code": result.status_code, + "headers": result.headers, + "media_type": result.media_type, + "content_length": result.content_length, + "key": result.key, + "target_name": result.target_name, + "start": result.start, + "end": result.end, + "_fd": fd, + "_file_handle": result.file_handle, # kept alive until fd is sent + } + # Don't close the handle — the fd needs to survive transfer + # The main process will close it after streaming + return response + else: + # Error response + return { + "type": "error_response", + "body": result.body.decode() if hasattr(result, 'body') else "", + "status_code": result.status_code, + "headers": dict(result.headers) if hasattr(result, 'headers') else {}, + } + + +# --------------------------------------------------------------------------- +# Action handlers — proxied path validation +# --------------------------------------------------------------------------- + +@action("validate_proxied_path") +@with_filestore +def _action_validate_proxied_path(request: dict, ctx: WorkerContext, filestore, fsps) -> dict: + """Validate that the user can access a proxied path. + + Runs within the user's context (the worker IS the user), so + filesystem permission checks just work. + """ + path = request["path"] + + try: + filestore.get_file_info(path) + return {"ok": True} + except (FileNotFoundError, PermissionError) as e: + return {"error": str(e), "status_code": 400} + + +# --------------------------------------------------------------------------- +# Action handlers — cluster operations (absorbed from apps/worker.py) +# --------------------------------------------------------------------------- + +def _get_executor(request: dict): + """Build a py-cluster-api executor from request['cluster_config'].""" + from cluster_api import create_executor + + config = {k: v for k, v in request["cluster_config"].items() if k != "extra_args"} + return create_executor(**config) + + +@action("submit") +def _action_submit(request: dict, ctx: WorkerContext) -> dict: + """Create work dir, symlink repo, submit job via py-cluster-api.""" + from cluster_api import ResourceSpec + + executor = _get_executor(request) + + work_dir = Path(request["work_dir"]) + work_dir.mkdir(parents=True, exist_ok=True) + + cached_repo_dir = request["cached_repo_dir"] + repo_link = work_dir / "repo" + if repo_link.is_symlink() or repo_link.exists(): + repo_link.unlink() + repo_link.symlink_to(cached_repo_dir) + + res = request["resources"] + resource_spec = ResourceSpec( + cpus=res.get("cpus"), + gpus=res.get("gpus"), + memory=res.get("memory"), + walltime=res.get("walltime"), + queue=res.get("queue"), + work_dir=res["work_dir"], + stdout_path=res.get("stdout_path"), + stderr_path=res.get("stderr_path"), + extra_directives=res.get("extra_directives"), + extra_args=res.get("extra_args"), + ) + + job = _run_async(executor.submit( + command=request["command"], + name=request["job_name"], + resources=resource_spec, + )) + + # For LocalExecutor, persist the subprocess PID so the parent's poll + # loop can check liveness across calls. HPC executors don't have + # _processes and don't need this. + processes = getattr(executor, "_processes", None) + if processes is not None: + proc = processes.get(job.job_id) + if proc is not None: + (work_dir / "job.pid").write_text(str(proc.pid)) + + return {"job_id": job.job_id, "script_path": job.script_path} + + +@action("cancel") +def _action_cancel(request: dict, ctx: WorkerContext) -> dict: + """Cancel a cluster job via py-cluster-api.""" + executor = _get_executor(request) + _run_async(executor.cancel(request["job_id"])) + return {"status": "ok"} + + +@action("poll") +def _action_poll(request: dict, ctx: WorkerContext) -> dict: + """Poll job statuses via py-cluster-api.""" + from cluster_api import JobStatus + + executor = _get_executor(request) + + known_statuses = request.get("job_statuses", {}) + for cid in request["cluster_job_ids"]: + db_status = known_statuses.get(cid, "PENDING").lower() + try: + seed_status = JobStatus(db_status) + except ValueError: + seed_status = JobStatus.PENDING + executor.track(cid, status=seed_status) + + _run_async(executor.poll()) + + jobs = {} + for cid, record in executor.jobs.items(): + jobs[cid] = { + "status": record.status.value, + "exit_code": record.exit_code, + "exec_host": record.exec_host, + "start_time": record.start_time.isoformat() if record.start_time else None, + "finish_time": record.finish_time.isoformat() if record.finish_time else None, + } + + return {"jobs": jobs} + + +@action("reconnect") +def _action_reconnect(request: dict, ctx: WorkerContext) -> dict: + """Reconnect to existing jobs via py-cluster-api.""" + executor = _get_executor(request) + reconnected = _run_async(executor.reconnect()) + + jobs = {} + for record in reconnected: + jobs[record.job_id] = { + "status": record.status.value, + "name": record.name, + "exit_code": record.exit_code, + "exec_host": record.exec_host, + "start_time": record.start_time.isoformat() if record.start_time else None, + "finish_time": record.finish_time.isoformat() if record.finish_time else None, + } + + return {"jobs": jobs} + + +# --------------------------------------------------------------------------- +# Action handlers — git/manifest operations (absorbed from apps/worker.py) +# --------------------------------------------------------------------------- + +@action("ensure_repo") +def _action_ensure_repo(request: dict, ctx: WorkerContext) -> dict: + """Clone or update a GitHub repo in the current user's cache.""" + from fileglancer.apps.core import _ensure_repo_cache + repo_dir = _run_async(_ensure_repo_cache( + url=request["url"], + pull=request.get("pull", False), + )) + return {"repo_dir": str(repo_dir)} + + +@action("discover_manifests") +def _action_discover_manifests(request: dict, ctx: WorkerContext) -> dict: + """Clone/pull repo and discover all manifests.""" + from fileglancer.apps.core import _ensure_repo_cache, _find_manifests_in_repo + repo_dir = _run_async(_ensure_repo_cache( + url=request["url"], + pull=True, + )) + results = _find_manifests_in_repo(repo_dir) + return { + "manifests": [ + {"path": path, "manifest": manifest.model_dump(mode="json")} + for path, manifest in results + ] + } + + +@action("read_manifest") +def _action_read_manifest(request: dict, ctx: WorkerContext) -> dict: + """Fetch and read a single manifest from a cached repo.""" + from fileglancer.apps.core import _ensure_repo_cache, _read_manifest_file + repo_dir = _run_async(_ensure_repo_cache( + url=request["url"], + pull=request.get("pull", False), + )) + manifest_path = request.get("manifest_path", "") + target_dir = repo_dir / manifest_path if manifest_path else repo_dir + manifest = _read_manifest_file(target_dir) + return {"manifest": manifest.model_dump(mode="json")} + + +# --------------------------------------------------------------------------- +# Worker context and main loop +# --------------------------------------------------------------------------- + +class WorkerContext: + """Holds per-worker state.""" + + def __init__(self, username: str, db): + self.username = username + self.db = db + + +def main(): + """Worker entry point — run the request/response loop.""" + + # Set up orphan prevention + _set_pdeathsig() + + # Configure logging + log_level = os.environ.get("FGC_LOG_LEVEL", "INFO").upper() + + # Use loguru for worker logging, output to stderr + logger.remove() + logger.add(sys.stderr, level=log_level) + + # Configure cluster_api logging + handler = logging.StreamHandler(sys.stderr) + handler.setFormatter(logging.Formatter( + "%(levelname)s | %(name)s:%(funcName)s:%(lineno)d - %(message)s" + )) + cluster_logger = logging.getLogger("cluster_api") + cluster_logger.addHandler(handler) + cluster_logger.setLevel(log_level) + + # Get the socket fd from environment + fd = int(os.environ["FGC_WORKER_FD"]) + sock = socket.fromfd(fd, socket.AF_UNIX, socket.SOCK_STREAM) + os.close(fd) # close the original fd, we have a dup now + + # Determine username + uid = os.getuid() + try: + username = pwd.getpwuid(uid).pw_name + except KeyError: + username = str(uid) + + # Worker subprocess never gets DB credentials; all DB access goes back + # through the parent over the same socket via RpcDbProxy. + ctx = WorkerContext(username=username, db=RpcDbProxy(sock)) + + logger.info( + f"Worker started for {username} " + f"(uid={uid} euid={os.geteuid()} pid={os.getpid()})" + ) + + # Main request/response loop + while True: + try: + request = _recv(sock) + except ConnectionError: + logger.info("Parent connection closed, exiting") + break + + action = request.get("action") + + if action == "shutdown": + logger.info(f"Shutdown requested, exiting") + break + + handler = _ACTIONS.get(action) + if handler is None: + _send(sock, {"error": f"Unknown action: {action}"}) + continue + + try: + result = handler(request, ctx) + + # If the result contains a file descriptor, send it via SCM_RIGHTS + fd = result.pop("_fd", None) + file_handle = result.pop("_file_handle", None) + if fd is not None: + _send_with_fd(sock, result, fd) + # Close our copy of the fd — the main process has its own now + if file_handle is not None: + file_handle.close() + else: + _send(sock, result) + except Exception as e: + logger.exception(f"Error handling action {action}") + _send(sock, {"error": str(e)}) + + sock.close() + logger.info("Worker exiting") + + +if __name__ == "__main__": + main() diff --git a/fileglancer/worker_pool.py b/fileglancer/worker_pool.py new file mode 100644 index 00000000..659a525c --- /dev/null +++ b/fileglancer/worker_pool.py @@ -0,0 +1,463 @@ +"""Per-user persistent worker pool. + +Manages a pool of long-lived subprocess workers, one per active user. +Each worker runs with the target user's real UID/GID/groups (set at +fork time via subprocess.Popen kwargs), so the main Uvicorn process +never calls seteuid/setegid/setgroups. + +Workers communicate with the main process over a Unix socketpair using +a length-prefixed JSON protocol. When a worker response includes a file +descriptor (e.g. an opened file for streaming), it arrives transparently +via SCM_RIGHTS — callers see a ``_file_handle`` key in the response dict. + +Usage from server.py: + + pool = WorkerPool(settings) + worker = await pool.get_worker(username) + result = await worker.execute("list_dir", fsp_name="home", subpath="Documents") + + # For actions that open files, the fd arrives automatically: + result = await worker.execute("open_file", fsp_name="home", subpath="data.bin") + file_handle = result.get("_file_handle") # open file object, or None +""" + +from __future__ import annotations + +import array +import asyncio +import json +import os +try: + import pwd +except ImportError: + pwd = None # type: ignore[assignment] +import socket +import struct +import subprocess +import sys +import time +from typing import Any, Optional + +from loguru import logger + +from fileglancer.settings import Settings + + +# Length-prefix format: 4-byte big-endian unsigned int +_HEADER_FMT = "!I" +_HEADER_SIZE = struct.calcsize(_HEADER_FMT) +_MAX_MESSAGE_SIZE = 64 * 1024 * 1024 # 64 MB safety limit + + +class WorkerError(Exception): + """Raised when a worker returns an error response.""" + def __init__(self, message: str, status_code: int = 500): + super().__init__(message) + self.status_code = status_code + + +class WorkerDead(Exception): + """Raised when the worker subprocess has died unexpectedly.""" + pass + + +class UserWorker: + """Wraps a single persistent worker subprocess for one user. + + IPC uses a blocking Unix socket accessed from a thread (via + run_in_executor) so the async event loop is never blocked. + All receives use recvmsg(), which transparently handles both + plain messages and messages carrying file descriptors via SCM_RIGHTS. + """ + + def __init__(self, username: str, process: subprocess.Popen, + sock: socket.socket, db_proxy): + self.username = username + self.process = process + self.sock = sock + self.db_proxy = db_proxy # LocalDbProxy used to satisfy worker db_requests + self.last_activity = time.monotonic() + self._busy = False + self._lock = asyncio.Lock() # serialize requests to the worker + + @property + def is_alive(self) -> bool: + return self.process.poll() is None + + @property + def is_busy(self) -> bool: + return self._busy + + async def execute(self, action: str, **kwargs) -> Any: + """Send a request to the worker and return the parsed response. + + If the worker sends a file descriptor (SCM_RIGHTS), the response + dict will contain a ``_file_handle`` key with an open file object. + + Requests are serialized per-worker via an asyncio lock — the worker + subprocess handles one request at a time, so concurrent callers + must not interleave their sends/receives on the shared socket. + + Raises WorkerError on application-level errors from the worker. + Raises WorkerDead if the subprocess has exited. + """ + if not self.is_alive: + raise WorkerDead(f"Worker for {self.username} is dead (rc={self.process.returncode})") + + logger.debug(f"Delegating {action} to worker for {self.username} (pid={self.process.pid})") + + async with self._lock: + self._busy = True + self.last_activity = time.monotonic() + try: + request = {"action": action, **kwargs} + loop = asyncio.get_event_loop() + response = await loop.run_in_executor( + None, self._send_and_recv, request) + + if response.get("error"): + # Close any fd that arrived with an error response + fh = response.pop("_file_handle", None) + if fh is not None: + fh.close() + raise WorkerError( + response["error"], + status_code=response.get("status_code", 500), + ) + + return response + except (BrokenPipeError, ConnectionResetError, OSError) as e: + raise WorkerDead(f"Worker for {self.username} connection lost: {e}") from e + finally: + self._busy = False + self.last_activity = time.monotonic() + + def _send_and_recv(self, request: dict) -> dict: + """Send a request and receive the action response (blocking, runs in thread). + + Loops on receive: any inbound ``_kind == "db_request"`` message is a + reverse-RPC from the worker (which has no DB credentials) asking the + parent to run a DB query on its behalf. We dispatch it, send back a + ``db_response``, and keep reading. Anything else is the action result. + """ + self._send_msg(request) + + while True: + response = self._recv_msg() + if response.get("_kind") == "db_request": + self._handle_db_request(response) + continue + return response + + def _send_msg(self, msg: dict): + """Send a length-prefixed JSON message.""" + payload = json.dumps(msg).encode() + header = struct.pack(_HEADER_FMT, len(payload)) + self.sock.sendall(header + payload) + + def _recv_msg(self) -> dict: + """Receive one length-prefixed JSON message, capturing any SCM_RIGHTS fd. + + All receives use recvmsg() so that SCM_RIGHTS file descriptors are + captured transparently — the ancillary data arrives with the first + bytes of the message, so we must use recvmsg for the header too. + """ + fds = array.array("i") + raw = b"" + try: + while len(raw) < _HEADER_SIZE: + msg, ancdata, flags, addr = self.sock.recvmsg( + max(_HEADER_SIZE - len(raw), 4096), + socket.CMSG_LEN(struct.calcsize("i")), + ) + if not msg: + raise ConnectionError("Worker closed connection") + raw += msg + for cmsg_level, cmsg_type, cmsg_data in ancdata: + if cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_RIGHTS: + fds.frombytes(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % fds.itemsize)]) + + (length,) = struct.unpack(_HEADER_FMT, raw[:_HEADER_SIZE]) + if length > _MAX_MESSAGE_SIZE: + raise WorkerError(f"Response too large: {length} bytes") + + total_needed = _HEADER_SIZE + length + while len(raw) < total_needed: + msg, ancdata, flags, addr = self.sock.recvmsg( + total_needed - len(raw), + socket.CMSG_LEN(struct.calcsize("i")), + ) + if not msg: + raise ConnectionError("Worker closed connection mid-message") + raw += msg + for cmsg_level, cmsg_type, cmsg_data in ancdata: + if cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_RIGHTS: + fds.frombytes(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % fds.itemsize)]) + + body = raw[_HEADER_SIZE:_HEADER_SIZE + length] + response = json.loads(body) + except Exception: + # Close any fds received before the error to prevent leaks + for fd_val in fds: + try: + os.close(fd_val) + except OSError: + pass + raise + + if fds: + response["_file_handle"] = os.fdopen(fds[0], "rb") + for extra_fd in fds[1:]: + try: + os.close(extra_fd) + except OSError: + pass + + return response + + def _handle_db_request(self, request: dict): + """Run a DB query on behalf of the worker and send the result back.""" + from fileglancer.user_worker import DB_METHODS, serialize_db_result + + method = request.get("method") + kwargs = request.get("kwargs", {}) or {} + if method not in DB_METHODS: + self._send_msg({ + "_kind": "db_response", + "ok": False, + "error": f"Unknown db method: {method}", + }) + return + + try: + value = getattr(self.db_proxy, method)(**kwargs) + result = serialize_db_result(method, value) + self._send_msg({"_kind": "db_response", "ok": True, "result": result}) + except Exception as e: + logger.exception(f"db_request {method} for {self.username} failed") + self._send_msg({ + "_kind": "db_response", + "ok": False, + "error": f"{type(e).__name__}: {e}", + }) + + async def shutdown(self, timeout: float = 5.0): + """Ask the worker to shut down gracefully, then force-kill if needed.""" + if not self.is_alive: + return + try: + payload = json.dumps({"action": "shutdown"}).encode() + header = struct.pack(_HEADER_FMT, len(payload)) + self.sock.sendall(header + payload) + except (BrokenPipeError, ConnectionResetError, OSError): + pass + + # Wait for clean exit + try: + await asyncio.wait_for( + asyncio.get_event_loop().run_in_executor(None, self.process.wait), + timeout=timeout, + ) + except asyncio.TimeoutError: + logger.warning(f"Worker for {self.username} did not exit in {timeout}s, killing") + self.process.kill() + self.process.wait() + + try: + self.sock.close() + except OSError: + pass + + +class WorkerPool: + """Manages per-user persistent worker subprocesses. + + Workers are spawned on demand and evicted after idle timeout. + """ + + def __init__(self, settings: Settings): + from fileglancer.user_worker import LocalDbProxy + + self.settings = settings + self._workers: dict[str, UserWorker] = {} + self._locks: dict[str, asyncio.Lock] = {} + self._eviction_task: Optional[asyncio.Task] = None + self.max_workers = settings.worker_pool_max_workers + self.idle_timeout = settings.worker_pool_idle_timeout + # All worker DB requests are satisfied by this proxy in the parent; + # the worker subprocess never sees the DB URL. + self._db_proxy = LocalDbProxy(settings.db_url) + + def _get_lock(self, username: str) -> asyncio.Lock: + if username not in self._locks: + self._locks[username] = asyncio.Lock() + return self._locks[username] + + async def get_worker(self, username: str) -> UserWorker: + """Get or create a worker for the given user.""" + # Fast path: worker exists and is alive + worker = self._workers.get(username) + if worker is not None and worker.is_alive: + worker.last_activity = time.monotonic() + return worker + + # Slow path: need to create or replace worker + async with self._get_lock(username): + # Double-check after acquiring lock + worker = self._workers.get(username) + if worker is not None and worker.is_alive: + worker.last_activity = time.monotonic() + return worker + + # Clean up dead worker if present + if worker is not None: + logger.warning(f"Worker for {username} found dead, replacing") + self._workers.pop(username, None) + + # Evict LRU worker if at capacity + if len(self._workers) >= self.max_workers: + await self._evict_lru() + + # If still at capacity (all workers busy), refuse rather than exceed the limit + if len(self._workers) >= self.max_workers: + raise WorkerError("Worker pool at capacity, try again later", status_code=503) + + # Spawn new worker + new_worker = await self._spawn_worker(username) + self._workers[username] = new_worker + return new_worker + + async def _spawn_worker(self, username: str) -> UserWorker: + """Spawn a new persistent worker subprocess for the given user.""" + pw = pwd.getpwnam(username) + + # Build identity kwargs (only switch if running as root) + identity_kwargs: dict = {} + if os.geteuid() == 0: + groups = os.getgrouplist(username, pw.pw_gid) + identity_kwargs = { + "user": pw.pw_uid, + "group": pw.pw_gid, + "extra_groups": groups, + } + + # Create Unix socketpair for IPC + parent_sock, child_sock = socket.socketpair() + + # Worker subprocess deliberately does NOT receive the DB URL — it + # runs as the (untrusted) target user, so credentials would be + # readable via /proc//environ. All DB queries reverse-RPC back + # to the parent over the IPC socket instead. + env = { + **os.environ, + "HOME": pw.pw_dir, + "FGC_LOG_LEVEL": self.settings.log_level, + "FGC_WORKER_FD": str(child_sock.fileno()), + } + env.pop("FGC_DB_URL", None) + + logger.info( + f"Spawning persistent worker for {username} " + f"(uid={pw.pw_uid} gid={pw.pw_gid})" + ) + + process = subprocess.Popen( + [sys.executable, "-m", "fileglancer.user_worker"], + env=env, + pass_fds=(child_sock.fileno(),), + stderr=subprocess.PIPE, + **identity_kwargs, + ) + + # Close child's end in the parent + child_sock.close() + + # Keep the socket blocking — all I/O runs in a thread via run_in_executor. + # Set a timeout so a hung worker can't block a thread forever. + parent_sock.setblocking(True) + parent_sock.settimeout(120) + + # Start a background task to forward worker stderr to loguru + asyncio.create_task(self._forward_stderr(username, process)) + + return UserWorker(username, process, parent_sock, self._db_proxy) + + async def _forward_stderr(self, username: str, process: subprocess.Popen): + """Forward worker stderr lines to loguru in the background. + + If this task dies, the worker's stderr pipe will eventually fill and + block the worker on its next write — so failures here are logged + loudly rather than swallowed. + """ + try: + loop = asyncio.get_event_loop() + while True: + line = await loop.run_in_executor(None, process.stderr.readline) + if not line: + break + logger.debug(f"[worker:{username}] {line.decode().rstrip()}") + except Exception: + logger.exception(f"stderr forwarder for worker {username} crashed") + + async def _evict_lru(self): + """Evict the least-recently-used idle worker.""" + candidates = [ + (w.last_activity, name, w) + for name, w in self._workers.items() + if not w.is_busy + ] + if not candidates: + logger.warning("Worker pool at capacity with no idle workers to evict") + return + + candidates.sort() + _, name, worker = candidates[0] + logger.info(f"Evicting LRU worker for {name}") + await worker.shutdown() + self._workers.pop(name, None) + + async def start_eviction_loop(self): + """Start the background eviction loop.""" + if self._eviction_task is None or self._eviction_task.done(): + self._eviction_task = asyncio.create_task(self._eviction_loop()) + + async def _eviction_loop(self): + """Periodically evict idle workers.""" + while True: + await asyncio.sleep(min(60, self.idle_timeout)) + now = time.monotonic() + to_evict = [] + for name, worker in list(self._workers.items()): + if not worker.is_busy and (now - worker.last_activity) > self.idle_timeout: + to_evict.append(name) + elif not worker.is_alive: + to_evict.append(name) + + for name in to_evict: + worker = self._workers.pop(name, None) + if worker is not None: + if worker.is_alive: + logger.info(f"Evicting idle worker for {name}") + await worker.shutdown() + else: + logger.info(f"Removing dead worker for {name}") + + async def shutdown_all(self): + """Shut down all workers (called during server shutdown).""" + if self._eviction_task and not self._eviction_task.done(): + self._eviction_task.cancel() + try: + await self._eviction_task + except asyncio.CancelledError: + pass + + tasks = [] + for name, worker in list(self._workers.items()): + logger.info(f"Shutting down worker for {name}") + tasks.append(worker.shutdown(timeout=10.0)) + + if tasks: + await asyncio.gather(*tasks, return_exceptions=True) + + self._workers.clear() + self._locks.clear() diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 85cacddf..cad34def 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -1,12 +1,12 @@ { "name": "fileglancer", - "version": "2.8.1", + "version": "2.8.2-a0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "fileglancer", - "version": "2.8.1", + "version": "2.8.2-a0", "license": "BSD-3-Clause", "dependencies": { "@material-tailwind/react": "^3.0.0-beta.24", diff --git a/frontend/package.json b/frontend/package.json index e40bd05a..0238a1c3 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,7 +1,7 @@ { "name": "fileglancer", "type": "module", - "version": "2.8.1", + "version": "2.8.2-a0", "description": "Browse, share, and publish files on the Janelia file system", "keywords": [ "ngff", diff --git a/pixi.lock b/pixi.lock index 6e770f6f..897021bd 100644 --- a/pixi.lock +++ b/pixi.lock @@ -161,7 +161,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/fe/3b/8ec5074bcfc450fe84273713b4b0a0dd47c0249358f5d82eb8104ffe2520/multidict-6.7.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/44/d6/c8b4f53f34e295e45709b7568bf9b9407a612ea30387d35eb9fa84f269b4/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -314,7 +314,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/a7/f9/44d4b3064c65079d2467888794dea218d1601898ac50222ab8a9a8094460/multidict-6.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/13/1e/98874ce72fd29cbde93209977b196a2edae03f8490d1bd8158e7f1daf3a0/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -468,7 +468,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/d5/22/492f2246bb5b534abd44804292e81eeaf835388901f0c574bac4eeec73c5/multidict-6.7.1-cp314-cp314-macosx_10_15_x86_64.whl - pypi: https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl - pypi: https://files.pythonhosted.org/packages/64/12/93ef0098590cf51d9732b4f139533732565704f45bdc1ffa741b7c95fb54/psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -622,7 +622,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/f1/4f/733c48f270565d78b4544f2baddc2fb2a245e5a8640254b12c36ac7ac68e/multidict-6.7.1-cp314-cp314-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/7c/a9/9d55c614a891288f15ca4b5209b09f0f01e3124056924e17b81b9fa054cc/psycopg2_binary-2.9.11-cp314-cp314-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -775,7 +775,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/e0/bf/52f25716bbe93745595800f36fb17b73711f14da59ed0bb2eba141bc9f0f/multidict-6.7.1-cp314-cp314-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/e1/36/9c0c326fe3a4227953dfb29f5d0c8ae3b8eb8c1cd2967aa569f50cb3c61f/psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -986,7 +986,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/fe/3b/8ec5074bcfc450fe84273713b4b0a0dd47c0249358f5d82eb8104ffe2520/multidict-6.7.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/44/d6/c8b4f53f34e295e45709b7568bf9b9407a612ea30387d35eb9fa84f269b4/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -1183,7 +1183,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/a7/f9/44d4b3064c65079d2467888794dea218d1601898ac50222ab8a9a8094460/multidict-6.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/13/1e/98874ce72fd29cbde93209977b196a2edae03f8490d1bd8158e7f1daf3a0/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -1376,7 +1376,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/d5/22/492f2246bb5b534abd44804292e81eeaf835388901f0c574bac4eeec73c5/multidict-6.7.1-cp314-cp314-macosx_10_15_x86_64.whl - pypi: https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl - pypi: https://files.pythonhosted.org/packages/64/12/93ef0098590cf51d9732b4f139533732565704f45bdc1ffa741b7c95fb54/psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -1569,7 +1569,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/f1/4f/733c48f270565d78b4544f2baddc2fb2a245e5a8640254b12c36ac7ac68e/multidict-6.7.1-cp314-cp314-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/7c/a9/9d55c614a891288f15ca4b5209b09f0f01e3124056924e17b81b9fa054cc/psycopg2_binary-2.9.11-cp314-cp314-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -1762,7 +1762,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/e0/bf/52f25716bbe93745595800f36fb17b73711f14da59ed0bb2eba141bc9f0f/multidict-6.7.1-cp314-cp314-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/e1/36/9c0c326fe3a4227953dfb29f5d0c8ae3b8eb8c1cd2967aa569f50cb3c61f/psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c6/01/c330682e398978b2e151d9a6684826874e09b9319d565b6b6b73986093e5/x2s3-1.2.0-py3-none-any.whl @@ -2011,7 +2011,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/fe/3b/8ec5074bcfc450fe84273713b4b0a0dd47c0249358f5d82eb8104ffe2520/multidict-6.7.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/b2/f2/889ad4b2408f72fe1a4f6a19491177b30ea7bf1a0fd5f17050ca08cfc882/propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/44/d6/c8b4f53f34e295e45709b7568bf9b9407a612ea30387d35eb9fa84f269b4/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -2252,7 +2252,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/a7/f9/44d4b3064c65079d2467888794dea218d1601898ac50222ab8a9a8094460/multidict-6.7.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/86/bd/47816020d337f4a746edc42fe8d53669965138f39ee117414c7d7a340cfe/propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/13/1e/98874ce72fd29cbde93209977b196a2edae03f8490d1bd8158e7f1daf3a0/psycopg2_binary-2.9.11-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -2494,7 +2494,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/d5/22/492f2246bb5b534abd44804292e81eeaf835388901f0c574bac4eeec73c5/multidict-6.7.1-cp314-cp314-macosx_10_15_x86_64.whl - pypi: https://files.pythonhosted.org/packages/65/9b/03b04e7d82a5f54fb16113d839f5ea1ede58a61e90edf515f6577c66fa8f/propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl - pypi: https://files.pythonhosted.org/packages/64/12/93ef0098590cf51d9732b4f139533732565704f45bdc1ffa741b7c95fb54/psycopg2_binary-2.9.11-cp314-cp314-macosx_10_13_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -2736,7 +2736,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/f1/4f/733c48f270565d78b4544f2baddc2fb2a245e5a8640254b12c36ac7ac68e/multidict-6.7.1-cp314-cp314-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/b2/fa/89a8ef0468d5833a23fff277b143d0573897cf75bd56670a6d28126c7d68/propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/7c/a9/9d55c614a891288f15ca4b5209b09f0f01e3124056924e17b81b9fa054cc/psycopg2_binary-2.9.11-cp314-cp314-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -2975,7 +2975,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/e0/bf/52f25716bbe93745595800f36fb17b73711f14da59ed0bb2eba141bc9f0f/multidict-6.7.1-cp314-cp314-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/0c/2a/a758b47de253636e1b8aef181c0b4f4f204bf0dd964914fb2af90a95b49b/propcache-0.4.1-cp314-cp314-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/e1/36/9c0c326fe3a4227953dfb29f5d0c8ae3b8eb8c1cd2967aa569f50cb3c61f/psycopg2_binary-2.9.11-cp314-cp314-win_amd64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -3229,7 +3229,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/f3/8d/5e5be3ced1d12966fefb5c4ea3b2a5b480afcea36406559442c6e31d4a48/multidict-6.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/30/da/4e42788fb811bbbfd7b7f045570c062f49e350e1d1f3df056c3fb5763353/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -3475,7 +3475,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/fd/80/c959c5933adedb9ac15152e4067c702a808ea183a8b64cf8f31af8ad3155/multidict-6.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/2d/75/364847b879eb630b3ac8293798e380e441a957c53657995053c5ec39a316/psycopg2_binary-2.9.11-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -3717,7 +3717,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/fe/cf/18ef143a81610136d3da8193da9d80bfe1cb548a1e2d1c775f26b23d024a/multidict-6.7.1-cp312-cp312-macosx_10_13_x86_64.whl - pypi: https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl - pypi: https://files.pythonhosted.org/packages/d8/91/f870a02f51be4a65987b45a7de4c2e1897dd0d01051e2b559a38fa634e3e/psycopg2_binary-2.9.11-cp312-cp312-macosx_10_13_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -3959,7 +3959,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/a9/65/1caac9d4cd32e8433908683446eebc953e82d22b03d10d41a5f0fefe991b/multidict-6.7.1-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/27/fa/cae40e06849b6c9a95eb5c04d419942f00d9eaac8d81626107461e268821/psycopg2_binary-2.9.11-cp312-cp312-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -4199,7 +4199,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/80/31/0b2517913687895f5904325c2069d6a3b78f66cc641a86a2baf75a05dcbb/multidict-6.7.1-cp312-cp312-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/b1/d2/99b55e85832ccde77b211738ff3925a5d73ad183c0b37bcbbe5a8ff04978/psycopg2_binary-2.9.11-cp312-cp312-win_amd64.whl - - pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/a2/87a0b61a3078be07ab04ec574ef6c683c764590ed0d2a50d00cbb23aeae7/pydantic_settings_yaml-0.2.0-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/84/47/07046e0acedc12fe2bae79cf6c73ad67f51ae9d67df64d06b0f3eac73d36/pytest_html-4.2.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl @@ -6313,8 +6313,8 @@ packages: timestamp: 1760972937564 - pypi: ./ name: fileglancer - version: 2.7.0 - sha256: c8b222d4b8a585dfbb7ad278203f4adaa89479d1e7374021c6ea932a97fa0e7b + version: 2.8.1 + sha256: a41eaa22ff769b80dae43fb9f159a7ed90ac0c1839d67154056a8cabc1de6067 requires_dist: - alembic>=1.17.0 - atlassian-python-api>=4.0.7 @@ -6329,7 +6329,7 @@ packages: - packaging>=24.0 - pandas>=2.3.3 - psycopg2-binary>=2.9.10,<3 - - py-cluster-api>=0.5.0 + - py-cluster-api>=0.6.0 - pydantic-settings>=2.11.0 - pydantic>=2.10.6 - python-jose>=3.5.0,<4 @@ -11431,10 +11431,10 @@ packages: - pkg:pypi/pure-eval?source=hash-mapping size: 16668 timestamp: 1733569518868 -- pypi: https://files.pythonhosted.org/packages/b8/ff/386c969ebe70942b23136c469db51980ac770ebccd2316803b2a018d99db/py_cluster_api-0.5.0-py3-none-any.whl +- pypi: https://files.pythonhosted.org/packages/d8/96/9e5625a7c52e7c41fc8c1f89f40d8b4b29577016fa3c645fbfb42433fe83/py_cluster_api-0.6.0-py3-none-any.whl name: py-cluster-api - version: 0.5.0 - sha256: 4f0d1475ff39ec78d79fd8dde85d5ce2fe8146d446a34a713eca820a8de903a8 + version: 0.6.0 + sha256: 7be4d871ae6cc80adbbf33ab8f6616f0301281fede7802264b43948dbf929434 requires_dist: - pyyaml - build ; extra == 'release' diff --git a/pyproject.toml b/pyproject.toml index d8c4c1a2..e6cb24c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dependencies = [ "packaging >=24.0", "uvicorn >=0.38.0", "x2s3 >=1.2.0", - "py-cluster-api >=0.5.0" + "py-cluster-api >=0.6.0" ] [project.scripts] diff --git a/tests/test_apps.py b/tests/test_apps.py index 5783cb97..f6031316 100644 --- a/tests/test_apps.py +++ b/tests/test_apps.py @@ -499,9 +499,7 @@ class TestValidatePathInFilestore: def test_path_outside_any_share(self): """Path not in any file share returns an error.""" - mock_session = MagicMock() - with patch("fileglancer.database.find_fsp_from_absolute_path", return_value=None): - error = validate_path_in_filestore("/nowhere/file.txt", mock_session) + error = validate_path_in_filestore("/nowhere/file.txt", []) assert error is not None assert "not within an allowed file share" in error @@ -513,17 +511,12 @@ def test_valid_path_in_share(self, tmp_path): from fileglancer.model import FileSharePath fsp = FileSharePath(zone="test", name="test", mount_path=str(tmp_path)) - - mock_session = MagicMock() - with patch("fileglancer.database.find_fsp_from_absolute_path", - return_value=(fsp, "data.txt")): - error = validate_path_in_filestore(str(test_file), mock_session) + error = validate_path_in_filestore(str(test_file), [fsp]) assert error is None def test_syntax_error_short_circuits(self): - """Metachar in path returns error before DB lookup.""" - mock_session = MagicMock() - error = validate_path_in_filestore("/data;bad", mock_session) + """Metachar in path returns error before path lookup.""" + error = validate_path_in_filestore("/data;bad", []) assert error is not None assert "invalid characters" in error diff --git a/tests/test_endpoints.py b/tests/test_endpoints.py index ae814ed7..89f67d73 100644 --- a/tests/test_endpoints.py +++ b/tests/test_endpoints.py @@ -85,6 +85,12 @@ def test_app(temp_dir): from fileglancer.database import dispose_engine dispose_engine(db_url) + # Clear the per-process filestore cache so subsequent tests don't see + # stale Filestore instances pointing at this test's temp directory + from fileglancer.user_worker import _filestore_cache, _user_groups_cache + _filestore_cache.clear() + _user_groups_cache.clear() + # Restore original get_settings and clear cache fileglancer.settings.get_settings = original_get_settings fileglancer.database.get_settings = original_get_settings diff --git a/tests/test_filestore.py b/tests/test_filestore.py index 53765afc..18997f61 100644 --- a/tests/test_filestore.py +++ b/tests/test_filestore.py @@ -5,12 +5,9 @@ import tempfile import shutil -from contextlib import contextmanager -from unittest.mock import Mock, MagicMock, patch -from typing import List, Callable, Optional +from unittest.mock import MagicMock, patch from conftest import requires_symlinks -from fileglancer import database from fileglancer.filestore import Filestore, FileInfo from fileglancer.model import FileSharePath @@ -46,36 +43,6 @@ def filestore(test_dir): return Filestore(file_share_path) -@contextmanager -def mock_database_for_symlinks(file_share_paths: List[FileSharePath], - find_fsp_func: Optional[Callable] = None): - """ - Context manager to mock database functions for symlink resolution tests. - - Args: - file_share_paths: List of FileSharePath objects to return from get_file_share_paths - find_fsp_func: Optional custom function for find_fsp_from_absolute_path. - If None, the original function is preserved (not mocked). - """ - original_get_paths = database.get_file_share_paths - original_find = database.find_fsp_from_absolute_path - - def mock_get_file_share_paths(session, fsp_name=None): - paths = file_share_paths - if fsp_name: - paths = [p for p in paths if p.name == fsp_name] - return paths - - try: - database.get_file_share_paths = mock_get_file_share_paths - if find_fsp_func is not None: - database.find_fsp_from_absolute_path = find_fsp_func - yield - finally: - database.get_file_share_paths = original_get_paths - database.find_fsp_from_absolute_path = original_find - - def test_unmounted_filestore(): test_dir = "/not/a/real/path" file_share_path = FileSharePath(zone="test", name="test", mount_path=test_dir) @@ -270,25 +237,17 @@ def test_same_share_symlink_resolution_via_listing(filestore, test_dir): symlink_path = os.path.join(test_dir, "link_to_subdir_file") os.symlink(target_file, symlink_path) - mock_session = Mock() fsp = FileSharePath(zone="test", name="test", mount_path=test_dir) - def mock_find(session, path): - # Normalize paths for comparison - if os.path.realpath(path) == os.path.realpath(target_file): - return (fsp, "subdir/target_same_share.txt") - return None - - with mock_database_for_symlinks([fsp], mock_find): - # Use yield_file_infos to list directory - symlinks are detected this way - files = list(filestore.yield_file_infos("", session=mock_session)) - symlink_info = next((f for f in files if f.name == "link_to_subdir_file"), None) + # Use yield_file_infos to list directory - symlinks are detected this way + files = list(filestore.yield_file_infos("", fsps=[fsp])) + symlink_info = next((f for f in files if f.name == "link_to_subdir_file"), None) - assert symlink_info is not None - assert symlink_info.is_symlink is True - assert symlink_info.symlink_target_fsp is not None - assert symlink_info.symlink_target_fsp["fsp_name"] == "test" - assert symlink_info.symlink_target_fsp["subpath"] == "subdir/target_same_share.txt" + assert symlink_info is not None + assert symlink_info.is_symlink is True + assert symlink_info.symlink_target_fsp is not None + assert symlink_info.symlink_target_fsp["fsp_name"] == "test" + assert symlink_info.symlink_target_fsp["subpath"] == "subdir/target_same_share.txt" @requires_symlinks @@ -314,24 +273,15 @@ def test_cross_share_symlink_resolution_via_listing(test_dir): fsp2 = FileSharePath(zone="test", name="share2", mount_path=share2_dir) filestore1 = Filestore(fsp1) - mock_session = Mock() - - def mock_find(session, path): - # Normalize paths for comparison - if os.path.realpath(path) == os.path.realpath(target_file): - return (fsp2, "target.txt") - return None - - with mock_database_for_symlinks([fsp1, fsp2], mock_find): - # Use yield_file_infos to list directory - symlinks are detected this way - files = list(filestore1.yield_file_infos("", session=mock_session)) - symlink_info = next((f for f in files if f.name == "link_to_share2"), None) + # Use yield_file_infos to list directory - symlinks are detected this way + files = list(filestore1.yield_file_infos("", fsps=[fsp1, fsp2])) + symlink_info = next((f for f in files if f.name == "link_to_share2"), None) - assert symlink_info is not None - assert symlink_info.is_symlink is True - assert symlink_info.symlink_target_fsp is not None - assert symlink_info.symlink_target_fsp["fsp_name"] == "share2" - assert symlink_info.symlink_target_fsp["subpath"] == "target.txt" + assert symlink_info is not None + assert symlink_info.is_symlink is True + assert symlink_info.symlink_target_fsp is not None + assert symlink_info.symlink_target_fsp["fsp_name"] == "share2" + assert symlink_info.symlink_target_fsp["subpath"] == "target.txt" @requires_symlinks @@ -353,23 +303,14 @@ def test_relative_symlink_resolution(test_dir): fsp_rel = FileSharePath(zone="test", name="rel_test", mount_path=os.path.join(test_dir, "rel_test")) nested_filestore = Filestore(fsp) - mock_session = Mock() + # List directory to find the symlink + files = list(nested_filestore.yield_file_infos("", fsps=[fsp, fsp_rel])) + symlink_info = next((f for f in files if f.name == "link"), None) - def mock_find(session, path): - if os.path.realpath(path) == os.path.realpath(target_file): - # Return fsp for rel_test directory - return (fsp_rel, "target.txt") - return None - - with mock_database_for_symlinks([fsp, fsp_rel], mock_find): - # List directory to find the symlink - files = list(nested_filestore.yield_file_infos("", session=mock_session)) - symlink_info = next((f for f in files if f.name == "link"), None) - - assert symlink_info is not None - assert symlink_info.is_symlink is True - assert symlink_info.symlink_target_fsp is not None - assert symlink_info.symlink_target_fsp["subpath"] == "target.txt" + assert symlink_info is not None + assert symlink_info.is_symlink is True + assert symlink_info.symlink_target_fsp is not None + assert symlink_info.symlink_target_fsp["subpath"] == "target.txt" @requires_symlinks @@ -384,16 +325,14 @@ def test_yield_file_infos_with_symlinks(filestore, test_dir): os.path.join(test_dir, "link1") ) - mock_session = Mock() fsp = FileSharePath(zone="test", name="test", mount_path=test_dir) - with mock_database_for_symlinks([fsp], lambda s, p: (fsp, "file1.txt")): - files = list(filestore.yield_file_infos("", session=mock_session)) + files = list(filestore.yield_file_infos("", fsps=[fsp])) - # Find the symlink in the list - symlink_info = next((f for f in files if f.name == "link1"), None) - assert symlink_info is not None - assert symlink_info.is_symlink is True + # Find the symlink in the list + symlink_info = next((f for f in files if f.name == "link1"), None) + assert symlink_info is not None + assert symlink_info.is_symlink is True @requires_symlinks @@ -432,18 +371,16 @@ def test_symlink_to_directory(filestore, test_dir): symlink_path = os.path.join(test_dir, "link_to_dir") os.symlink(target_dir, symlink_path) - mock_session = Mock() fsp = FileSharePath(zone="test", name="test", mount_path=test_dir) - with mock_database_for_symlinks([fsp], lambda s, p: (fsp, "target_dir")): - # Use yield_file_infos to list directory - symlinks to dirs are detected this way - files = list(filestore.yield_file_infos("", session=mock_session)) - symlink_info = next((f for f in files if f.name == "link_to_dir"), None) + # Use yield_file_infos to list directory - symlinks to dirs are detected this way + files = list(filestore.yield_file_infos("", fsps=[fsp])) + symlink_info = next((f for f in files if f.name == "link_to_dir"), None) - assert symlink_info is not None - assert symlink_info.is_symlink is True - assert symlink_info.is_dir is True # Should also be marked as directory - assert symlink_info.symlink_target_fsp is not None + assert symlink_info is not None + assert symlink_info.is_symlink is True + assert symlink_info.is_dir is True # Should also be marked as directory + assert symlink_info.symlink_target_fsp is not None @requires_symlinks @@ -476,29 +413,16 @@ def test_broken_symlink_within_share(test_dir): missing_target = os.path.join(test_dir, "subdir", "nonexistent.txt") os.symlink(missing_target, broken_link_path) - mock_session = Mock() - - def mock_find(session, path): - # This would normally match the file share pattern, - # but since the target doesn't exist, we should not get here - # because os.path.exists check should return False first - normalized_path = os.path.realpath(path) - test_dir_real = os.path.realpath(test_dir) - if normalized_path.startswith(test_dir_real): - return (fsp, os.path.relpath(normalized_path, test_dir_real)) - return None - - with mock_database_for_symlinks([fsp], mock_find): - # Get file infos with session (so symlink resolution is attempted) - file_infos = list(filestore.yield_file_infos("", session=mock_session)) - - # Find the broken symlink - broken_link_info = next((f for f in file_infos if f.name == "link_to_missing_file"), None) - - # Verify the symlink is detected but target is not resolved - assert broken_link_info is not None, "Broken symlink should be listed" - assert broken_link_info.is_symlink is True, "Should be marked as symlink" - assert broken_link_info.symlink_target_fsp is None, "symlink_target_fsp should be None for broken symlink even if target path matches share pattern" + # Get file infos with fsps (so symlink resolution is attempted) + file_infos = list(filestore.yield_file_infos("", fsps=[fsp])) + + # Find the broken symlink + broken_link_info = next((f for f in file_infos if f.name == "link_to_missing_file"), None) + + # Verify the symlink is detected but target is not resolved + assert broken_link_info is not None, "Broken symlink should be listed" + assert broken_link_info.is_symlink is True, "Should be marked as symlink" + assert broken_link_info.symlink_target_fsp is None, "symlink_target_fsp should be None for broken symlink even if target path matches share pattern" # Filestore.validate_path() tests diff --git a/tests/test_poll.py b/tests/test_poll.py index 01801e6b..151ceaf2 100644 --- a/tests/test_poll.py +++ b/tests/test_poll.py @@ -9,7 +9,7 @@ from datetime import datetime, UTC from pathlib import Path from types import SimpleNamespace -from unittest.mock import patch, MagicMock, call +from unittest.mock import patch, MagicMock, AsyncMock, call from fileglancer.apps.core import _poll_jobs, _poll_local_jobs, _POLL_LOCK_PATH @@ -56,9 +56,9 @@ class TestPollSkipsSameStatus: _poll_jobs must NOT call update_job_status. This was the bug that caused 'RUNNING -> RUNNING' log spam with multiple workers.""" - @patch("fileglancer.apps.core._run_as_user") + @patch("fileglancer.apps.core._dispatch", new_callable=AsyncMock) @patch("fileglancer.apps.core.db") - def test_same_status_not_written(self, mock_db, mock_run): + def test_same_status_not_written(self, mock_db, mock_dispatch): settings = _make_settings() job = _make_db_job(1, "1001", "RUNNING") @@ -68,7 +68,7 @@ def test_same_status_not_written(self, mock_db, mock_run): mock_db.get_active_jobs.return_value = [job] # Worker returns RUNNING (lowercase from cluster_api) — same as DB - mock_run.return_value = { + mock_dispatch.return_value = { "jobs": { "1001": { "status": "running", @@ -80,13 +80,13 @@ def test_same_status_not_written(self, mock_db, mock_run): }, } - _poll_jobs(settings) + asyncio.run(_poll_jobs(settings)) mock_db.update_job_status.assert_not_called() - @patch("fileglancer.apps.core._run_as_user") + @patch("fileglancer.apps.core._dispatch", new_callable=AsyncMock) @patch("fileglancer.apps.core.db") - def test_changed_status_is_written(self, mock_db, mock_run): + def test_changed_status_is_written(self, mock_db, mock_dispatch): settings = _make_settings() job = _make_db_job(1, "1001", "RUNNING") @@ -96,7 +96,7 @@ def test_changed_status_is_written(self, mock_db, mock_run): mock_db.get_active_jobs.return_value = [job] # Worker returns DONE — different from DB's RUNNING - mock_run.return_value = { + mock_dispatch.return_value = { "jobs": { "1001": { "status": "done", @@ -108,15 +108,15 @@ def test_changed_status_is_written(self, mock_db, mock_run): }, } - _poll_jobs(settings) + asyncio.run(_poll_jobs(settings)) mock_db.update_job_status.assert_called_once() args, kwargs = mock_db.update_job_status.call_args assert args == (mock_session, 1, "DONE") - @patch("fileglancer.apps.core._run_as_user") + @patch("fileglancer.apps.core._dispatch", new_callable=AsyncMock) @patch("fileglancer.apps.core.db") - def test_job_statuses_passed_to_worker(self, mock_db, mock_run): + def test_job_statuses_passed_to_worker(self, mock_db, mock_dispatch): """The poll request must include job_statuses so the worker seeds stubs with the correct status instead of defaulting to PENDING.""" settings = _make_settings() @@ -130,12 +130,13 @@ def test_job_statuses_passed_to_worker(self, mock_db, mock_run): mock_db.get_db_session.return_value.__exit__ = MagicMock(return_value=False) mock_db.get_active_jobs.return_value = jobs - mock_run.return_value = {"jobs": {}} + mock_dispatch.return_value = {"jobs": {}} - _poll_jobs(settings) + asyncio.run(_poll_jobs(settings)) - request = mock_run.call_args[0][1] - assert request["job_statuses"] == {"1001": "RUNNING", "1002": "PENDING"} + # _dispatch is called as (username, action, **kwargs) + kwargs = mock_dispatch.call_args.kwargs + assert kwargs["job_statuses"] == {"1001": "RUNNING", "1002": "PENDING"} # --------------------------------------------------------------------------- @@ -365,10 +366,10 @@ def test_poll_jobs_routes_local_executor(self, tmp_path): mock_db.get_db_session.return_value.__exit__ = MagicMock(return_value=False) mock_db.get_active_jobs.return_value = [job] - result = _poll_jobs(settings) + result = asyncio.run(_poll_jobs(settings)) assert result is True - # Should NOT have called _run_as_user (cluster-based polling) + # Should NOT have dispatched a cluster poll mock_db.update_job_status.assert_called_once() finally: proc.terminate() diff --git a/tests/test_worker.py b/tests/test_worker.py new file mode 100644 index 00000000..d0613c54 --- /dev/null +++ b/tests/test_worker.py @@ -0,0 +1,584 @@ +"""Tests for the per-user persistent worker infrastructure. + +Tests the IPC protocol (length-prefixed JSON, SCM_RIGHTS fd passing), +worker lifecycle (spawn, execute, shutdown, crash recovery), +and the in-process dev-mode fallback. +""" + +import asyncio +import json +import os +import socket +import struct +import sys +import tempfile +import time + +import pytest + +pytestmark = pytest.mark.skipif( + sys.platform == "win32", + reason="Worker subsystem uses fork/setuid/SCM_RIGHTS (Unix-only)", +) + +from fileglancer.user_worker import ( + _send, + _send_with_fd, + _recv, + _ACTIONS, + WorkerContext, + _HEADER_FMT, + _HEADER_SIZE, +) +from fileglancer.worker_pool import ( + UserWorker, + WorkerPool, + WorkerError, + WorkerDead, +) + + +# --------------------------------------------------------------------------- +# IPC protocol tests (user_worker.py _send/_recv/_send_with_fd) +# --------------------------------------------------------------------------- + +class TestIPCProtocol: + """Test the length-prefixed JSON wire protocol.""" + + def test_send_recv_roundtrip(self): + """A message sent with _send can be read back with _recv.""" + a, b = socket.socketpair() + try: + msg = {"action": "test", "value": 42, "nested": {"key": "val"}} + _send(a, msg) + result = _recv(b) + assert result == msg + finally: + a.close() + b.close() + + def test_send_recv_empty_dict(self): + """Empty dicts round-trip correctly.""" + a, b = socket.socketpair() + try: + _send(a, {}) + assert _recv(b) == {} + finally: + a.close() + b.close() + + def test_send_recv_large_message(self): + """Messages larger than a single recv buffer work.""" + a, b = socket.socketpair() + try: + # Create a message larger than typical socket buffer + big_value = "x" * 100_000 + msg = {"data": big_value} + _send(a, msg) + result = _recv(b) + assert result["data"] == big_value + finally: + a.close() + b.close() + + def test_send_recv_multiple_messages(self): + """Multiple sequential messages on the same socket.""" + a, b = socket.socketpair() + try: + for i in range(10): + _send(a, {"seq": i}) + for i in range(10): + result = _recv(b) + assert result == {"seq": i} + finally: + a.close() + b.close() + + def test_recv_connection_closed(self): + """_recv raises ConnectionError when the peer closes the socket.""" + a, b = socket.socketpair() + a.close() + with pytest.raises(ConnectionError): + _recv(b) + b.close() + + def test_send_with_fd_passes_file_descriptor(self): + """_send_with_fd sends a file descriptor via SCM_RIGHTS.""" + import array + + a, b = socket.socketpair() + try: + # Create a temp file and send its fd + with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: + f.write("hello from fd passing") + temp_path = f.name + + fd_to_send = os.open(temp_path, os.O_RDONLY) + try: + msg = {"type": "handle", "size": 21} + _send_with_fd(a, msg, fd_to_send) + + # Receive using recvmsg for EVERYTHING (header + payload + ancillary) + # The fd arrives with the first bytes, so we must use recvmsg from the start + fds = array.array("i") + raw = b"" + total_header = _HEADER_SIZE + while len(raw) < total_header: + data, ancdata, flags, addr = b.recvmsg( + 4096, + socket.CMSG_LEN(struct.calcsize("i")), + ) + raw += data + for cmsg_level, cmsg_type, cmsg_data in ancdata: + if cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_RIGHTS: + fds.frombytes(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % fds.itemsize)]) + + (length,) = struct.unpack(_HEADER_FMT, raw[:_HEADER_SIZE]) + total_needed = _HEADER_SIZE + length + while len(raw) < total_needed: + data, ancdata, flags, addr = b.recvmsg( + total_needed - len(raw), + socket.CMSG_LEN(struct.calcsize("i")), + ) + raw += data + for cmsg_level, cmsg_type, cmsg_data in ancdata: + if cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_RIGHTS: + fds.frombytes(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % fds.itemsize)]) + + payload = raw[_HEADER_SIZE:_HEADER_SIZE + length] + result = json.loads(payload) + assert result == {"type": "handle", "size": 21} + assert len(fds) == 1 + + # Read from the received fd + received_fd = fds[0] + with os.fdopen(received_fd, 'r') as f: + content = f.read() + assert content == "hello from fd passing" + finally: + os.close(fd_to_send) + os.unlink(temp_path) + finally: + a.close() + b.close() + + +# --------------------------------------------------------------------------- +# UserWorker IPC integration tests (worker_pool.py _send_and_recv) +# --------------------------------------------------------------------------- + +class TestUserWorkerIPC: + """Test UserWorker's _send_and_recv with a mock worker on the other end.""" + + def _make_worker_pair(self): + """Create a UserWorker connected to a mock 'worker' socket.""" + parent, child = socket.socketpair() + parent.setblocking(True) + + # Create a fake Popen-like object + class FakeProcess: + returncode = None + pid = 12345 + def poll(self): return None + def wait(self): pass + def kill(self): pass + + worker = UserWorker("testuser", FakeProcess(), parent, db_proxy=None) + return worker, child + + def test_send_and_recv_basic(self): + """Basic request/response over the socket.""" + worker, child = self._make_worker_pair() + try: + # Simulate worker: read request, send response + def mock_worker(): + req = _recv(child) + assert req["action"] == "ping" + _send(child, {"status": "pong"}) + + import threading + t = threading.Thread(target=mock_worker) + t.start() + + result = worker._send_and_recv({"action": "ping"}) + assert result == {"status": "pong"} + t.join() + finally: + worker.sock.close() + child.close() + + def test_send_and_recv_with_fd(self): + """Response with SCM_RIGHTS fd is auto-wrapped in _file_handle.""" + worker, child = self._make_worker_pair() + try: + with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: + f.write("fd test content") + temp_path = f.name + + def mock_worker(): + req = _recv(child) + fd = os.open(temp_path, os.O_RDONLY) + _send_with_fd(child, {"type": "handle", "size": 15}, fd) + os.close(fd) + + import threading + t = threading.Thread(target=mock_worker) + t.start() + + result = worker._send_and_recv({"action": "open_file"}) + assert result["type"] == "handle" + assert "_file_handle" in result + + fh = result["_file_handle"] + content = fh.read().decode() + fh.close() + assert content == "fd test content" + + t.join() + os.unlink(temp_path) + finally: + worker.sock.close() + child.close() + + def test_send_and_recv_no_fd(self): + """Normal response without fd has no _file_handle key.""" + worker, child = self._make_worker_pair() + try: + def mock_worker(): + _recv(child) + _send(child, {"files": [1, 2, 3]}) + + import threading + t = threading.Thread(target=mock_worker) + t.start() + + result = worker._send_and_recv({"action": "list_dir"}) + assert result == {"files": [1, 2, 3]} + assert "_file_handle" not in result + t.join() + finally: + worker.sock.close() + child.close() + + +# --------------------------------------------------------------------------- +# UserWorker async execute tests +# --------------------------------------------------------------------------- + +class TestUserWorkerExecute: + """Test the async execute() method.""" + + def _make_worker_pair(self): + parent, child = socket.socketpair() + parent.setblocking(True) + + class FakeProcess: + returncode = None + pid = 12345 + def poll(self): return None + def wait(self): pass + def kill(self): pass + + worker = UserWorker("testuser", FakeProcess(), parent, db_proxy=None) + return worker, child + + @pytest.mark.asyncio + async def test_execute_success(self): + worker, child = self._make_worker_pair() + try: + import threading + def mock_worker(): + _recv(child) + _send(child, {"result": "ok"}) + + t = threading.Thread(target=mock_worker) + t.start() + + result = await worker.execute("test_action") + assert result == {"result": "ok"} + t.join() + finally: + worker.sock.close() + child.close() + + @pytest.mark.asyncio + async def test_execute_worker_error(self): + worker, child = self._make_worker_pair() + try: + import threading + def mock_worker(): + _recv(child) + _send(child, {"error": "something broke"}) + + t = threading.Thread(target=mock_worker) + t.start() + + with pytest.raises(WorkerError, match="something broke"): + await worker.execute("bad_action") + t.join() + finally: + worker.sock.close() + child.close() + + @pytest.mark.asyncio + async def test_execute_dead_worker(self): + parent, child = socket.socketpair() + parent.setblocking(True) + child.close() + + class DeadProcess: + returncode = 1 + def poll(self): return 1 + def wait(self): pass + def kill(self): pass + + worker = UserWorker("testuser", DeadProcess(), parent, db_proxy=None) + with pytest.raises(WorkerDead): + await worker.execute("anything") + parent.close() + + @pytest.mark.asyncio + async def test_execute_with_fd_transparent(self): + """execute() transparently includes _file_handle when worker sends fd.""" + worker, child = self._make_worker_pair() + try: + with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: + f.write("transparent fd") + temp_path = f.name + + import threading + def mock_worker(): + _recv(child) + fd = os.open(temp_path, os.O_RDONLY) + _send_with_fd(child, {"content_type": "text/plain"}, fd) + os.close(fd) + + t = threading.Thread(target=mock_worker) + t.start() + + result = await worker.execute("open_file") + assert result["content_type"] == "text/plain" + assert "_file_handle" in result + + fh = result["_file_handle"] + assert fh.read().decode() == "transparent fd" + fh.close() + + t.join() + os.unlink(temp_path) + finally: + worker.sock.close() + child.close() + + + @pytest.mark.asyncio + async def test_concurrent_execute_serialized(self): + """Concurrent execute() calls are serialized — responses never get swapped.""" + worker, child = self._make_worker_pair() + try: + import threading + + def mock_worker(): + """Echo worker: returns the action name in the response.""" + for _ in range(20): + try: + req = _recv(child) + except ConnectionError: + break + action = req.get("action", "unknown") + if action == "shutdown": + break + # Simulate some work + time.sleep(0.01) + _send(child, {"action_echo": action, "seq": req.get("seq")}) + + t = threading.Thread(target=mock_worker, daemon=True) + t.start() + + # Fire 10 concurrent requests with different actions + async def make_request(seq): + action = f"action_{seq}" + result = await worker.execute(action, seq=seq) + # Verify we got OUR response back, not someone else's + assert result["action_echo"] == action + assert result["seq"] == seq + + await asyncio.gather(*[make_request(i) for i in range(10)]) + + _send(child, {"action": "shutdown"}) # won't be read, but close cleanly + t.join(timeout=5) + finally: + worker.sock.close() + child.close() + + +# --------------------------------------------------------------------------- +# Action handler tests (user_worker.py actions run in-process) +# --------------------------------------------------------------------------- + +class TestActionHandlers: + """Test action handlers directly (simulates dev/test mode).""" + + @pytest.fixture + def temp_dir(self): + d = tempfile.mkdtemp() + # Create test files + with open(os.path.join(d, "hello.txt"), "w") as f: + f.write("hello world") + os.makedirs(os.path.join(d, "subdir")) + with open(os.path.join(d, "subdir", "nested.txt"), "w") as f: + f.write("nested content") + yield d + import shutil + shutil.rmtree(d) + + @pytest.fixture + def ctx(self, temp_dir): + """Create a WorkerContext with a LocalDbProxy backed by the test database.""" + from fileglancer.settings import get_settings + from fileglancer.user_worker import LocalDbProxy + settings = get_settings() + return WorkerContext(username=os.environ.get("USER", "test"), + db=LocalDbProxy(settings.db_url)) + + def test_get_profile(self, ctx): + handler = _ACTIONS["get_profile"] + result = handler({"action": "get_profile"}, ctx) + assert "username" in result + assert "groups" in result + assert isinstance(result["groups"], list) + + def test_unknown_action(self): + """Unknown actions are not in the registry.""" + assert "nonexistent_action" not in _ACTIONS + + def test_validate_paths_empty(self, ctx): + handler = _ACTIONS["validate_paths"] + result = handler({"action": "validate_paths", "paths": {}}, ctx) + assert result == {"errors": {}} + + +# --------------------------------------------------------------------------- +# Worker main loop integration test +# --------------------------------------------------------------------------- + +class TestWorkerMainLoop: + """Test the worker subprocess main loop via socketpair (no actual subprocess).""" + + def _run_worker_loop(self, child_sock): + """Run the worker main loop in a thread using the given socket.""" + import threading + + def target(): + # Simulate what main() does, but with our socket + sock = child_sock + uid = os.getuid() + try: + username = os.environ.get("USER", str(uid)) + except KeyError: + username = str(uid) + + from fileglancer.settings import get_settings + from fileglancer.user_worker import LocalDbProxy + settings = get_settings() + ctx = WorkerContext(username=username, db=LocalDbProxy(settings.db_url)) + + while True: + try: + request = _recv(sock) + except ConnectionError: + break + + action = request.get("action") + if action == "shutdown": + break + + handler = _ACTIONS.get(action) + if handler is None: + _send(sock, {"error": f"Unknown action: {action}"}) + continue + + try: + result = handler(request, ctx) + fd = result.pop("_fd", None) + file_handle = result.pop("_file_handle", None) + if fd is not None: + _send_with_fd(sock, result, fd) + if file_handle is not None: + file_handle.close() + else: + _send(sock, result) + except Exception as e: + _send(sock, {"error": str(e)}) + + sock.close() + + t = threading.Thread(target=target, daemon=True) + t.start() + return t + + def test_shutdown_message(self): + """Worker exits cleanly on shutdown message.""" + parent, child = socket.socketpair() + t = self._run_worker_loop(child) + + _send(parent, {"action": "shutdown"}) + t.join(timeout=5) + assert not t.is_alive() + parent.close() + + def test_unknown_action_returns_error(self): + """Worker returns error for unknown actions.""" + parent, child = socket.socketpair() + t = self._run_worker_loop(child) + + _send(parent, {"action": "totally_fake"}) + result = _recv(parent) + assert "error" in result + assert "Unknown action" in result["error"] + + _send(parent, {"action": "shutdown"}) + t.join(timeout=5) + parent.close() + + def test_get_profile_via_loop(self): + """End-to-end: send get_profile through the worker loop.""" + parent, child = socket.socketpair() + t = self._run_worker_loop(child) + + _send(parent, {"action": "get_profile"}) + result = _recv(parent) + assert "username" in result + assert "groups" in result + + _send(parent, {"action": "shutdown"}) + t.join(timeout=5) + parent.close() + + def test_multiple_requests(self): + """Worker handles multiple sequential requests.""" + parent, child = socket.socketpair() + t = self._run_worker_loop(child) + + # Send several requests + _send(parent, {"action": "get_profile"}) + r1 = _recv(parent) + assert "username" in r1 + + _send(parent, {"action": "validate_paths", "paths": {}}) + r2 = _recv(parent) + assert r2 == {"errors": {}} + + _send(parent, {"action": "shutdown"}) + t.join(timeout=5) + parent.close() + + def test_connection_close_exits_loop(self): + """Worker exits when parent closes the socket.""" + parent, child = socket.socketpair() + t = self._run_worker_loop(child) + + # Close without sending shutdown — worker should detect and exit + parent.close() + t.join(timeout=5) + assert not t.is_alive()