diff --git a/fileglancer/filestore.py b/fileglancer/filestore.py index b6d633e4..f14385b0 100644 --- a/fileglancer/filestore.py +++ b/fileglancer/filestore.py @@ -3,6 +3,7 @@ rooted at a specific directory. """ +import itertools import os import stat try: @@ -490,7 +491,8 @@ def check_is_binary(self, path: Optional[str] = None, sample_size: int = 4096) - def yield_file_infos_paginated(self, path: Optional[str] = None, current_user: str = None, session = None, limit: int = 200, - cursor: Optional[str] = None) -> tuple[list[FileInfo], bool, Optional[str], int]: + cursor: Optional[str] = None, + *, max_count: int) -> tuple[list[FileInfo], bool, Optional[str], int, bool]: """ Return a page of FileInfo objects for children of the given path. @@ -505,19 +507,33 @@ def yield_file_infos_paginated(self, path: Optional[str] = None, current_user: s limit: Maximum number of entries to return. cursor: Name of the last entry from the previous page. Entries after this name (in sort order) are returned. + max_count: Maximum number of directory entries to read and sort. + Directories with more entries are truncated at this limit; + files beyond max_count are not accessible via pagination. + Required keyword arg — callers must pass the configured limit + (e.g. Settings.max_directory_count) so the advertised and + actual limits cannot drift. Returns: - Tuple of (file_infos, has_more, next_cursor, total_count). + Tuple of (file_infos, has_more, next_cursor, total_count, is_truncated). + total_count is capped at max_count. is_truncated is True when the + directory has more entries than max_count. """ full_path = self._check_path_in_root(path) # Compute user groups once for the entire listing user_groups = FileInfo._get_user_groups(current_user) if current_user else None - # Collect and sort all entries — scandir's is_dir() is free on Linux - entries = list(os.scandir(full_path)) - entries.sort(key=lambda e: (not e.is_dir(follow_symlinks=False), e.name)) + # Read max_count + 1 entries: the extra one detects truncation without + # reading the entire directory. + with os.scandir(full_path) as scanner: + entries = list(itertools.islice(scanner, max_count + 1)) total_count = len(entries) + is_truncated = total_count > max_count + if is_truncated: + entries = entries[:max_count] + total_count = max_count + entries.sort(key=lambda e: (not e.is_dir(follow_symlinks=False), e.name)) # Apply cursor: skip past the cursor entry if cursor: @@ -548,7 +564,7 @@ def yield_file_infos_paginated(self, path: Optional[str] = None, current_user: s continue next_cursor = page_entries[-1].name if has_more and page_entries else None - return file_infos, has_more, next_cursor, total_count + return file_infos, has_more, next_cursor, total_count, is_truncated def yield_file_infos(self, path: Optional[str] = None, current_user: str = None, session = None) -> Generator[FileInfo, None, None]: """ diff --git a/fileglancer/server.py b/fileglancer/server.py index de3d0c7f..d29052ae 100644 --- a/fileglancer/server.py +++ b/fileglancer/server.py @@ -1398,14 +1398,17 @@ async def get_file_metadata(path_name: str, subpath: Optional[str] = Query(''), if file_info.is_dir: try: if limit is not None: - files, has_more, next_cursor, total_count = filestore.yield_file_infos_paginated( + files, has_more, next_cursor, total_count, is_truncated = filestore.yield_file_infos_paginated( subpath, current_user=username, session=session, - limit=limit, cursor=cursor + limit=limit, cursor=cursor, + max_count=settings.max_directory_count, ) result["files"] = [json.loads(f.model_dump_json()) for f in files] result["has_more"] = has_more result["next_cursor"] = next_cursor result["total_count"] = total_count + result["is_truncated"] = is_truncated + result["max_count"] = settings.max_directory_count else: files = list(filestore.yield_file_infos(subpath, current_user=username, session=session)) result["files"] = [json.loads(f.model_dump_json()) for f in files] diff --git a/fileglancer/settings.py b/fileglancer/settings.py index 3b21bdef..7fdfb1ef 100644 --- a/fileglancer/settings.py +++ b/fileglancer/settings.py @@ -78,6 +78,10 @@ class Settings(BaseSettings): # Maximum size of the sharing key LRU cache sharing_key_cache_size: int = 1000 + # Maximum number of directory entries reported in total_count for paginated listings. + # Prevents a full directory scan for the count in very large directories. + max_directory_count: int = 10000 + # OKTA OAuth/OIDC settings for authentication okta_domain: Optional[str] = None okta_client_id: Optional[str] = None @@ -127,6 +131,13 @@ def validate_external_proxy_url(cls, v): if v is None or (isinstance(v, str) and v.strip() == ''): raise ValueError("Add external_proxy_url to your config.yaml or FGC_EXTERNAL_PROXY_URL to your .env file") return v + + @field_validator('max_directory_count') + @classmethod + def validate_max_directory_count(cls, v: int) -> int: + if v <= 0: + raise ValueError('max_directory_count must be a positive integer') + return v @classmethod def settings_customise_sources( # noqa: PLR0913 diff --git a/frontend/src/components/ui/BrowsePage/FileTable.tsx b/frontend/src/components/ui/BrowsePage/FileTable.tsx index 9639463d..879c55d1 100644 --- a/frontend/src/components/ui/BrowsePage/FileTable.tsx +++ b/frontend/src/components/ui/BrowsePage/FileTable.tsx @@ -22,6 +22,10 @@ import { makeBrowseLink } from '@/utils/index'; import FgTooltip from '@/components/ui/widgets/FgTooltip'; import { FgStyledLink } from '@/components/ui/widgets/FgLink'; import { SortIcons } from '@/components/ui/Table/TableCard'; +import { + NotificationItem, + getNotificationStyles +} from '@/components/ui/Notifications/NotificationItem'; import { typeColumn, lastModifiedColumn, @@ -96,7 +100,14 @@ export default function Table({ parent = parent.parentElement; } }, []); - const sortingEnabled = !hasNextPage; + const sortingDisabledByTruncation = Boolean(fileQuery.data?.isTruncated); + const sortingDisabledByPagination = + !sortingDisabledByTruncation && Boolean(hasNextPage); + const sortingEnabled = + !sortingDisabledByTruncation && !sortingDisabledByPagination; + const sortingDisabledTooltip = sortingDisabledByTruncation + ? 'Sort unavailable: folder is too large and contents are truncated' + : 'Sort unavailable until all files are loaded'; const selectedFileNames = useMemo( () => new Set(fileBrowserState.selectedFiles.map(file => file.name)), @@ -312,8 +323,28 @@ export default function Table({ navigate(link); }); + const isTruncated = fileQuery.data?.isTruncated ?? false; + const maxCount = fileQuery.data?.maxCount ?? null; + const truncationLimit = + maxCount?.toLocaleString() ?? 'the configured limit of'; + return (
+ {isTruncated ? ( +
+ +
+ ) : null}
{table.getHeaderGroups().map(headerGroup => (
) diff --git a/frontend/src/queries/fileQueries.ts b/frontend/src/queries/fileQueries.ts index 2a1a9c31..254d9e16 100644 --- a/frontend/src/queries/fileQueries.ts +++ b/frontend/src/queries/fileQueries.ts @@ -28,6 +28,8 @@ type FileBrowserPageResponse = { has_more?: boolean; next_cursor?: string | null; total_count?: number | null; + is_truncated?: boolean; + max_count?: number; }; export type FileQueryData = { @@ -37,6 +39,8 @@ export type FileQueryData = { errorMessage?: string; hasMore: boolean; totalCount: number | null; + isTruncated: boolean; + maxCount: number | null; }; // Query key factory for hierarchical cache management @@ -142,7 +146,9 @@ export default function useFileQuery( currentFileOrFolder, files: allFiles, hasMore: lastPage?.has_more ?? false, - totalCount + totalCount, + isTruncated: lastPage?.is_truncated ?? false, + maxCount: lastPage?.max_count ?? null }; }; diff --git a/tests/test_pagination.py b/tests/test_pagination.py index cfaef1ec..08f0d327 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -6,6 +6,10 @@ from fileglancer.filestore import Filestore from fileglancer.model import FileSharePath +# Matches the production default in Settings.max_directory_count; large enough +# that the small fixture directories used here never get truncated. +NO_TRUNCATION = 10000 + @pytest.fixture def pagination_dir(): @@ -37,31 +41,35 @@ class TestPaginatedListing: def test_first_page(self, pagination_store): """First page returns correct number of items with has_more=True.""" - infos, has_more, next_cursor, total_count = ( - pagination_store.yield_file_infos_paginated(None, limit=5) + infos, has_more, next_cursor, total_count, is_truncated = ( + pagination_store.yield_file_infos_paginated(None, limit=5, max_count=NO_TRUNCATION) ) assert len(infos) == 5 assert has_more is True assert next_cursor is not None assert total_count == 13 # 3 dirs + 10 files + assert is_truncated is False def test_full_listing_no_pagination(self, pagination_store): """When limit >= total entries, has_more is False and next_cursor is None.""" - infos, has_more, next_cursor, total_count = ( - pagination_store.yield_file_infos_paginated(None, limit=100) + infos, has_more, next_cursor, total_count, is_truncated = ( + pagination_store.yield_file_infos_paginated(None, limit=100, max_count=NO_TRUNCATION) ) assert len(infos) == 13 assert has_more is False assert next_cursor is None assert total_count == 13 + assert is_truncated is False def test_cursor_continuation(self, pagination_store): """Cursor returns the next page starting after the cursor entry.""" - infos1, _, cursor1, _ = pagination_store.yield_file_infos_paginated( - None, limit=5 + infos1, _, cursor1, _, _ = pagination_store.yield_file_infos_paginated( + None, limit=5, max_count=NO_TRUNCATION ) - infos2, has_more2, cursor2, total2 = ( - pagination_store.yield_file_infos_paginated(None, limit=5, cursor=cursor1) + infos2, has_more2, cursor2, total2, is_truncated2 = ( + pagination_store.yield_file_infos_paginated( + None, limit=5, cursor=cursor1, max_count=NO_TRUNCATION + ) ) # Second page should not overlap with first @@ -71,14 +79,17 @@ def test_cursor_continuation(self, pagination_store): assert len(infos2) == 5 assert has_more2 is True assert total2 == 13 + assert is_truncated2 is False def test_all_pages_cover_all_entries(self, pagination_store): """Iterating through all pages returns every entry exactly once.""" all_names = [] cursor = None while True: - infos, has_more, cursor, _ = ( - pagination_store.yield_file_infos_paginated(None, limit=4, cursor=cursor) + infos, has_more, cursor, _, _ = ( + pagination_store.yield_file_infos_paginated( + None, limit=4, cursor=cursor, max_count=NO_TRUNCATION + ) ) all_names.extend(fi.name for fi in infos) if not has_more: @@ -89,8 +100,8 @@ def test_all_pages_cover_all_entries(self, pagination_store): def test_dirs_sorted_before_files(self, pagination_store): """Directories appear before files in the listing.""" - infos, _, _, _ = pagination_store.yield_file_infos_paginated( - None, limit=100 + infos, _, _, _, _ = pagination_store.yield_file_infos_paginated( + None, limit=100, max_count=NO_TRUNCATION ) dir_indices = [i for i, fi in enumerate(infos) if fi.is_dir] file_indices = [i for i, fi in enumerate(infos) if not fi.is_dir] @@ -98,8 +109,8 @@ def test_dirs_sorted_before_files(self, pagination_store): def test_alphabetical_within_type(self, pagination_store): """Names are sorted alphabetically within dirs and within files.""" - infos, _, _, _ = pagination_store.yield_file_infos_paginated( - None, limit=100 + infos, _, _, _, _ = pagination_store.yield_file_infos_paginated( + None, limit=100, max_count=NO_TRUNCATION ) dir_names = [fi.name for fi in infos if fi.is_dir] file_names = [fi.name for fi in infos if not fi.is_dir] @@ -108,27 +119,30 @@ def test_alphabetical_within_type(self, pagination_store): def test_deleted_cursor_fallback(self, pagination_dir, pagination_store): """When cursor entry no longer exists, listing starts from the beginning.""" - infos, has_more, _, total = ( + infos, has_more, _, total, is_truncated = ( pagination_store.yield_file_infos_paginated( - None, limit=5, cursor="nonexistent_file" + None, limit=5, cursor="nonexistent_file", max_count=NO_TRUNCATION ) ) # Falls back to beginning assert len(infos) == 5 assert has_more is True assert total == 13 + assert is_truncated is False # First entry should be the first dir (sorted first) assert infos[0].name == "dir_00" def test_last_page_boundary(self, pagination_store): """Last page has has_more=False and next_cursor=None.""" # Get first 10 entries - _, _, cursor, _ = pagination_store.yield_file_infos_paginated( - None, limit=10 + _, _, cursor, _, _ = pagination_store.yield_file_infos_paginated( + None, limit=10, max_count=NO_TRUNCATION ) # Get remaining 3 - infos, has_more, next_cursor, _ = ( - pagination_store.yield_file_infos_paginated(None, limit=10, cursor=cursor) + infos, has_more, next_cursor, _, _ = ( + pagination_store.yield_file_infos_paginated( + None, limit=10, cursor=cursor, max_count=NO_TRUNCATION + ) ) assert len(infos) == 3 assert has_more is False @@ -137,11 +151,13 @@ def test_last_page_boundary(self, pagination_store): def test_exact_limit_boundary(self, pagination_store): """When entries remaining == limit, has_more is False.""" # 13 total, get first 10, then exactly 3 remain - _, _, cursor, _ = pagination_store.yield_file_infos_paginated( - None, limit=10 + _, _, cursor, _, _ = pagination_store.yield_file_infos_paginated( + None, limit=10, max_count=NO_TRUNCATION ) - infos, has_more, next_cursor, _ = ( - pagination_store.yield_file_infos_paginated(None, limit=3, cursor=cursor) + infos, has_more, next_cursor, _, _ = ( + pagination_store.yield_file_infos_paginated( + None, limit=3, cursor=cursor, max_count=NO_TRUNCATION + ) ) assert len(infos) == 3 assert has_more is False @@ -149,13 +165,14 @@ def test_exact_limit_boundary(self, pagination_store): def test_limit_one(self, pagination_store): """Pagination works with limit=1.""" - infos, has_more, cursor, total = ( - pagination_store.yield_file_infos_paginated(None, limit=1) + infos, has_more, cursor, total, is_truncated = ( + pagination_store.yield_file_infos_paginated(None, limit=1, max_count=NO_TRUNCATION) ) assert len(infos) == 1 assert has_more is True assert cursor == infos[0].name assert total == 13 + assert is_truncated is False def test_empty_directory(self): """Paginated listing of an empty directory.""" @@ -163,13 +180,14 @@ def test_empty_directory(self): try: fsp = FileSharePath(zone="test", name="test", mount_path=temp_dir) store = Filestore(fsp) - infos, has_more, next_cursor, total = ( - store.yield_file_infos_paginated(None, limit=10) + infos, has_more, next_cursor, total, is_truncated = ( + store.yield_file_infos_paginated(None, limit=10, max_count=NO_TRUNCATION) ) assert infos == [] assert has_more is False assert next_cursor is None assert total == 0 + assert is_truncated is False finally: shutil.rmtree(temp_dir) @@ -181,19 +199,48 @@ def test_subdir_pagination(self, pagination_dir, pagination_store): with open(os.path.join(subdir, f"sub_{i}.txt"), "w") as f: f.write(f"sub {i}") - infos, has_more, _, total = ( - pagination_store.yield_file_infos_paginated("dir_00", limit=3) + infos, has_more, _, total, is_truncated = ( + pagination_store.yield_file_infos_paginated("dir_00", limit=3, max_count=NO_TRUNCATION) ) assert len(infos) == 3 assert has_more is True assert total == 5 + assert is_truncated is False + + def test_truncation(self, pagination_dir): + """is_truncated is True, and the truncated subset is sorted so cursor pagination is stable.""" + fsp = FileSharePath(zone="test", name="test", mount_path=pagination_dir) + store = Filestore(fsp) + # max_count=5 on a 13-entry directory + infos, has_more, next_cursor, total_count, is_truncated = ( + store.yield_file_infos_paginated(None, limit=10, max_count=5) + ) + assert total_count == 5 + assert is_truncated is True + assert len(infos) == 5 # limit=10 > max_count=5, so all 5 returned + + # The truncated subset is whatever max_count+1 entries os.scandir + # happened to return first — we cannot predict which entries those + # are because scandir order is filesystem-dependent. What we *can* + # guarantee is that the returned subset is sorted (dirs-first, then + # name), so cursor-based pagination is stable across calls. + sort_key = lambda fi: (not fi.is_dir, fi.name) + assert [fi.name for fi in infos] == [fi.name for fi in sorted(infos, key=sort_key)] + + # Stability: a second call with the same arguments returns the same + # entries in the same order (the property the e368d2c3 sort was + # added to guarantee for cursor-based pagination). + infos2, _, _, _, _ = store.yield_file_infos_paginated( + None, limit=10, max_count=5 + ) + assert [fi.name for fi in infos2] == [fi.name for fi in infos] def test_chroot_escape_raises(self, pagination_store): """Attempting to escape root raises ValueError.""" with pytest.raises(ValueError): - pagination_store.yield_file_infos_paginated("../") + pagination_store.yield_file_infos_paginated("../", max_count=NO_TRUNCATION) def test_nonexistent_path_raises(self, pagination_store): """Listing a nonexistent path raises FileNotFoundError.""" with pytest.raises((FileNotFoundError, PermissionError)): - pagination_store.yield_file_infos_paginated("nonexistent") + pagination_store.yield_file_infos_paginated("nonexistent", max_count=NO_TRUNCATION)