Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions src/clis/substack/utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { describe, expect, it, vi } from 'vitest';
import type { IPage } from '../../types.js';
import { __test__, loadSubstackArchive, loadSubstackFeed } from './utils.js';

function createPageMock(evaluateResult: unknown): IPage {
return {
goto: vi.fn().mockResolvedValue(undefined),
evaluate: vi.fn().mockResolvedValue(evaluateResult),
snapshot: vi.fn().mockResolvedValue(undefined),
click: vi.fn().mockResolvedValue(undefined),
typeText: vi.fn().mockResolvedValue(undefined),
pressKey: vi.fn().mockResolvedValue(undefined),
scrollTo: vi.fn().mockResolvedValue(undefined),
getFormState: vi.fn().mockResolvedValue({}),
wait: vi.fn().mockResolvedValue(undefined),
tabs: vi.fn().mockResolvedValue([]),
closeTab: vi.fn().mockResolvedValue(undefined),
newTab: vi.fn().mockResolvedValue(undefined),
selectTab: vi.fn().mockResolvedValue(undefined),
networkRequests: vi.fn().mockResolvedValue([]),
consoleMessages: vi.fn().mockResolvedValue([]),
scroll: vi.fn().mockResolvedValue(undefined),
autoScroll: vi.fn().mockResolvedValue(undefined),
installInterceptor: vi.fn().mockResolvedValue(undefined),
getInterceptedRequests: vi.fn().mockResolvedValue([]),
getCookies: vi.fn().mockResolvedValue([]),
screenshot: vi.fn().mockResolvedValue(''),
waitForCapture: vi.fn().mockResolvedValue(undefined),
};
}

describe('substack utils wait selectors', () => {
it('waits for both feed link shapes before scraping the feed', async () => {
const page = createPageMock([]);

await loadSubstackFeed(page, 'https://substack.com/', 5);

expect(page.wait).toHaveBeenCalledWith({
selector: __test__.FEED_POST_LINK_SELECTOR,
timeout: 5,
});
});

it('waits for archive post links before scraping archive pages', async () => {
const page = createPageMock([]);

await loadSubstackArchive(page, 'https://example.substack.com', 5);

expect(page.wait).toHaveBeenCalledWith({
selector: __test__.ARCHIVE_POST_LINK_SELECTOR,
timeout: 5,
});
});
});
12 changes: 10 additions & 2 deletions src/clis/substack/utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { CommandExecutionError } from '../../errors.js';
import type { IPage } from '../../types.js';

const FEED_POST_LINK_SELECTOR = 'a[href*="/home/post/"], a[href*="/p/"]';
const ARCHIVE_POST_LINK_SELECTOR = 'a[href*="/p/"]';

export function buildSubstackBrowseUrl(category?: string): string {
if (!category || category === 'all') return 'https://substack.com/';
const slug = category === 'tech' ? 'technology' : category;
Expand All @@ -10,7 +13,7 @@ export function buildSubstackBrowseUrl(category?: string): string {
export async function loadSubstackFeed(page: IPage, url: string, limit: number): Promise<any[]> {
if (!page) throw new CommandExecutionError('Browser session required for substack feed');
await page.goto(url);
await page.wait({ selector: 'article', timeout: 5 });
await page.wait({ selector: FEED_POST_LINK_SELECTOR, timeout: 5 });
const data = await page.evaluate(`
(async () => {
await new Promise((resolve) => setTimeout(resolve, 3000));
Expand Down Expand Up @@ -79,7 +82,7 @@ export async function loadSubstackFeed(page: IPage, url: string, limit: number):
export async function loadSubstackArchive(page: IPage, baseUrl: string, limit: number): Promise<any[]> {
if (!page) throw new CommandExecutionError('Browser session required for substack archive');
await page.goto(`${baseUrl}/archive`);
await page.wait({ selector: 'article', timeout: 5 });
await page.wait({ selector: ARCHIVE_POST_LINK_SELECTOR, timeout: 5 });
const data = await page.evaluate(`
(async () => {
await new Promise((resolve) => setTimeout(resolve, 3000));
Expand Down Expand Up @@ -131,3 +134,8 @@ export async function loadSubstackArchive(page: IPage, baseUrl: string, limit: n

return Array.isArray(data) ? data : [];
}

export const __test__ = {
FEED_POST_LINK_SELECTOR,
ARCHIVE_POST_LINK_SELECTOR,
};