diff --git a/src/clis/substack/utils.test.ts b/src/clis/substack/utils.test.ts new file mode 100644 index 00000000..476f4aa4 --- /dev/null +++ b/src/clis/substack/utils.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { IPage } from '../../types.js'; +import { __test__, loadSubstackArchive, loadSubstackFeed } from './utils.js'; + +function createPageMock(evaluateResult: unknown): IPage { + return { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(evaluateResult), + snapshot: vi.fn().mockResolvedValue(undefined), + click: vi.fn().mockResolvedValue(undefined), + typeText: vi.fn().mockResolvedValue(undefined), + pressKey: vi.fn().mockResolvedValue(undefined), + scrollTo: vi.fn().mockResolvedValue(undefined), + getFormState: vi.fn().mockResolvedValue({}), + wait: vi.fn().mockResolvedValue(undefined), + tabs: vi.fn().mockResolvedValue([]), + closeTab: vi.fn().mockResolvedValue(undefined), + newTab: vi.fn().mockResolvedValue(undefined), + selectTab: vi.fn().mockResolvedValue(undefined), + networkRequests: vi.fn().mockResolvedValue([]), + consoleMessages: vi.fn().mockResolvedValue([]), + scroll: vi.fn().mockResolvedValue(undefined), + autoScroll: vi.fn().mockResolvedValue(undefined), + installInterceptor: vi.fn().mockResolvedValue(undefined), + getInterceptedRequests: vi.fn().mockResolvedValue([]), + getCookies: vi.fn().mockResolvedValue([]), + screenshot: vi.fn().mockResolvedValue(''), + waitForCapture: vi.fn().mockResolvedValue(undefined), + }; +} + +describe('substack utils wait selectors', () => { + it('waits for both feed link shapes before scraping the feed', async () => { + const page = createPageMock([]); + + await loadSubstackFeed(page, 'https://substack.com/', 5); + + expect(page.wait).toHaveBeenCalledWith({ + selector: __test__.FEED_POST_LINK_SELECTOR, + timeout: 5, + }); + }); + + it('waits for archive post links before scraping archive pages', async () => { + const page = createPageMock([]); + + await loadSubstackArchive(page, 'https://example.substack.com', 5); + + expect(page.wait).toHaveBeenCalledWith({ + selector: __test__.ARCHIVE_POST_LINK_SELECTOR, + timeout: 5, + }); + }); +}); diff --git a/src/clis/substack/utils.ts b/src/clis/substack/utils.ts index 0b0b958d..07345304 100644 --- a/src/clis/substack/utils.ts +++ b/src/clis/substack/utils.ts @@ -1,6 +1,9 @@ import { CommandExecutionError } from '../../errors.js'; import type { IPage } from '../../types.js'; +const FEED_POST_LINK_SELECTOR = 'a[href*="/home/post/"], a[href*="/p/"]'; +const ARCHIVE_POST_LINK_SELECTOR = 'a[href*="/p/"]'; + export function buildSubstackBrowseUrl(category?: string): string { if (!category || category === 'all') return 'https://substack.com/'; const slug = category === 'tech' ? 'technology' : category; @@ -10,7 +13,7 @@ export function buildSubstackBrowseUrl(category?: string): string { export async function loadSubstackFeed(page: IPage, url: string, limit: number): Promise { if (!page) throw new CommandExecutionError('Browser session required for substack feed'); await page.goto(url); - await page.wait({ selector: 'article', timeout: 5 }); + await page.wait({ selector: FEED_POST_LINK_SELECTOR, timeout: 5 }); const data = await page.evaluate(` (async () => { await new Promise((resolve) => setTimeout(resolve, 3000)); @@ -79,7 +82,7 @@ export async function loadSubstackFeed(page: IPage, url: string, limit: number): export async function loadSubstackArchive(page: IPage, baseUrl: string, limit: number): Promise { if (!page) throw new CommandExecutionError('Browser session required for substack archive'); await page.goto(`${baseUrl}/archive`); - await page.wait({ selector: 'article', timeout: 5 }); + await page.wait({ selector: ARCHIVE_POST_LINK_SELECTOR, timeout: 5 }); const data = await page.evaluate(` (async () => { await new Promise((resolve) => setTimeout(resolve, 3000)); @@ -131,3 +134,8 @@ export async function loadSubstackArchive(page: IPage, baseUrl: string, limit: n return Array.isArray(data) ? data : []; } + +export const __test__ = { + FEED_POST_LINK_SELECTOR, + ARCHIVE_POST_LINK_SELECTOR, +};