From 535ffa7009c850ca2a43525bcb034602992ac2d4 Mon Sep 17 00:00:00 2001 From: kagura-agent Date: Tue, 31 Mar 2026 09:49:14 +0800 Subject: [PATCH 1/2] fix(substack): update selectors for Substack DOM redesign (fixes #621) Substack replaced
elements with role="article" divs and a new SPA-based feed. The wait() selector 'article' no longer matches, causing 'Selector not found: article' on feed and publication commands. - loadSubstackFeed: use 'a[href*="/p/"]' (matches actual post links) - loadSubstackArchive: use '[role="article"]' (Substack's new ARIA roles) The evaluate() scraping logic inside both functions is unchanged since it already uses 'a' href pattern matching, not article tags. --- src/clis/substack/utils.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/clis/substack/utils.ts b/src/clis/substack/utils.ts index 0b0b958d..0cafd6ef 100644 --- a/src/clis/substack/utils.ts +++ b/src/clis/substack/utils.ts @@ -10,7 +10,7 @@ export function buildSubstackBrowseUrl(category?: string): string { export async function loadSubstackFeed(page: IPage, url: string, limit: number): Promise { if (!page) throw new CommandExecutionError('Browser session required for substack feed'); await page.goto(url); - await page.wait({ selector: 'article', timeout: 5 }); + await page.wait({ selector: 'a[href*="/p/"]', timeout: 5 }); const data = await page.evaluate(` (async () => { await new Promise((resolve) => setTimeout(resolve, 3000)); @@ -79,7 +79,7 @@ export async function loadSubstackFeed(page: IPage, url: string, limit: number): export async function loadSubstackArchive(page: IPage, baseUrl: string, limit: number): Promise { if (!page) throw new CommandExecutionError('Browser session required for substack archive'); await page.goto(`${baseUrl}/archive`); - await page.wait({ selector: 'article', timeout: 5 }); + await page.wait({ selector: '[role="article"]', timeout: 5 }); const data = await page.evaluate(` (async () => { await new Promise((resolve) => setTimeout(resolve, 3000)); From 8a02ddc238c9704236d21dd34639514f0fe5c9b3 Mon Sep 17 00:00:00 2001 From: jackwener Date: Tue, 31 Mar 2026 12:53:10 +0800 Subject: [PATCH 2/2] review: align substack wait selectors with scraper --- src/clis/substack/utils.test.ts | 54 +++++++++++++++++++++++++++++++++ src/clis/substack/utils.ts | 12 ++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 src/clis/substack/utils.test.ts diff --git a/src/clis/substack/utils.test.ts b/src/clis/substack/utils.test.ts new file mode 100644 index 00000000..476f4aa4 --- /dev/null +++ b/src/clis/substack/utils.test.ts @@ -0,0 +1,54 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { IPage } from '../../types.js'; +import { __test__, loadSubstackArchive, loadSubstackFeed } from './utils.js'; + +function createPageMock(evaluateResult: unknown): IPage { + return { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(evaluateResult), + snapshot: vi.fn().mockResolvedValue(undefined), + click: vi.fn().mockResolvedValue(undefined), + typeText: vi.fn().mockResolvedValue(undefined), + pressKey: vi.fn().mockResolvedValue(undefined), + scrollTo: vi.fn().mockResolvedValue(undefined), + getFormState: vi.fn().mockResolvedValue({}), + wait: vi.fn().mockResolvedValue(undefined), + tabs: vi.fn().mockResolvedValue([]), + closeTab: vi.fn().mockResolvedValue(undefined), + newTab: vi.fn().mockResolvedValue(undefined), + selectTab: vi.fn().mockResolvedValue(undefined), + networkRequests: vi.fn().mockResolvedValue([]), + consoleMessages: vi.fn().mockResolvedValue([]), + scroll: vi.fn().mockResolvedValue(undefined), + autoScroll: vi.fn().mockResolvedValue(undefined), + installInterceptor: vi.fn().mockResolvedValue(undefined), + getInterceptedRequests: vi.fn().mockResolvedValue([]), + getCookies: vi.fn().mockResolvedValue([]), + screenshot: vi.fn().mockResolvedValue(''), + waitForCapture: vi.fn().mockResolvedValue(undefined), + }; +} + +describe('substack utils wait selectors', () => { + it('waits for both feed link shapes before scraping the feed', async () => { + const page = createPageMock([]); + + await loadSubstackFeed(page, 'https://substack.com/', 5); + + expect(page.wait).toHaveBeenCalledWith({ + selector: __test__.FEED_POST_LINK_SELECTOR, + timeout: 5, + }); + }); + + it('waits for archive post links before scraping archive pages', async () => { + const page = createPageMock([]); + + await loadSubstackArchive(page, 'https://example.substack.com', 5); + + expect(page.wait).toHaveBeenCalledWith({ + selector: __test__.ARCHIVE_POST_LINK_SELECTOR, + timeout: 5, + }); + }); +}); diff --git a/src/clis/substack/utils.ts b/src/clis/substack/utils.ts index 0cafd6ef..07345304 100644 --- a/src/clis/substack/utils.ts +++ b/src/clis/substack/utils.ts @@ -1,6 +1,9 @@ import { CommandExecutionError } from '../../errors.js'; import type { IPage } from '../../types.js'; +const FEED_POST_LINK_SELECTOR = 'a[href*="/home/post/"], a[href*="/p/"]'; +const ARCHIVE_POST_LINK_SELECTOR = 'a[href*="/p/"]'; + export function buildSubstackBrowseUrl(category?: string): string { if (!category || category === 'all') return 'https://substack.com/'; const slug = category === 'tech' ? 'technology' : category; @@ -10,7 +13,7 @@ export function buildSubstackBrowseUrl(category?: string): string { export async function loadSubstackFeed(page: IPage, url: string, limit: number): Promise { if (!page) throw new CommandExecutionError('Browser session required for substack feed'); await page.goto(url); - await page.wait({ selector: 'a[href*="/p/"]', timeout: 5 }); + await page.wait({ selector: FEED_POST_LINK_SELECTOR, timeout: 5 }); const data = await page.evaluate(` (async () => { await new Promise((resolve) => setTimeout(resolve, 3000)); @@ -79,7 +82,7 @@ export async function loadSubstackFeed(page: IPage, url: string, limit: number): export async function loadSubstackArchive(page: IPage, baseUrl: string, limit: number): Promise { if (!page) throw new CommandExecutionError('Browser session required for substack archive'); await page.goto(`${baseUrl}/archive`); - await page.wait({ selector: '[role="article"]', timeout: 5 }); + await page.wait({ selector: ARCHIVE_POST_LINK_SELECTOR, timeout: 5 }); const data = await page.evaluate(` (async () => { await new Promise((resolve) => setTimeout(resolve, 3000)); @@ -131,3 +134,8 @@ export async function loadSubstackArchive(page: IPage, baseUrl: string, limit: n return Array.isArray(data) ? data : []; } + +export const __test__ = { + FEED_POST_LINK_SELECTOR, + ARCHIVE_POST_LINK_SELECTOR, +};