diff --git a/src/build-manifest.ts b/src/build-manifest.ts index fa1b2579..ff4b5aaf 100644 --- a/src/build-manifest.ts +++ b/src/build-manifest.ts @@ -230,7 +230,7 @@ export async function buildManifest(): Promise { } } - return [...manifest.values()]; + return [...manifest.values()].sort((a, b) => a.site.localeCompare(b.site) || a.name.localeCompare(b.name)); } async function main(): Promise { diff --git a/src/clis/instagram/download.test.ts b/src/clis/instagram/download.test.ts new file mode 100644 index 00000000..956aaea7 --- /dev/null +++ b/src/clis/instagram/download.test.ts @@ -0,0 +1,159 @@ +import * as os from 'node:os'; +import { beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { CliCommand } from '../../registry.js'; +import { getRegistry } from '../../registry.js'; +import { ArgumentError, AuthRequiredError, CliError, CommandExecutionError } from '../../errors.js'; +import type { IPage } from '../../types.js'; + +const { mockHttpDownload, logSpy } = vi.hoisted(() => ({ + mockHttpDownload: vi.fn(), + logSpy: vi.spyOn(console, 'log').mockImplementation(() => undefined), +})); + +vi.mock('../../download/index.js', async () => { + const actual = await vi.importActual('../../download/index.js'); + return { ...actual, httpDownload: mockHttpDownload }; +}); + +const { + buildInstagramDownloadItems, + parseInstagramMediaTarget, +} = await import('./download.js'); + +let cmd: CliCommand; + +beforeAll(() => { + cmd = getRegistry().get('instagram/download')!; + expect(cmd?.func).toBeTypeOf('function'); +}); + +function createPageMock(evaluateResult: unknown): IPage { + return { + goto: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(evaluateResult), + } as unknown as IPage; +} + +describe('instagram download helpers', () => { + it('parses canonical and username-prefixed Instagram media URLs', () => { + expect(parseInstagramMediaTarget('https://www.instagram.com/reel/DWg8NuZEj9p/?utm_source=ig_web_copy_link')).toEqual({ + kind: 'reel', + shortcode: 'DWg8NuZEj9p', + canonicalUrl: 'https://www.instagram.com/reel/DWg8NuZEj9p/', + }); + + expect(parseInstagramMediaTarget('https://www.instagram.com/nasa/p/DWUR_azCWbN/?img_index=1')).toEqual({ + kind: 'p', + shortcode: 'DWUR_azCWbN', + canonicalUrl: 'https://www.instagram.com/p/DWUR_azCWbN/', + }); + }); + + it('rejects unsupported URLs early', () => { + expect(() => parseInstagramMediaTarget('https://example.com/p/abc')).toThrow(ArgumentError); + expect(() => parseInstagramMediaTarget('https://www.instagram.com/stories/abc/123')).toThrow(ArgumentError); + }); + + it('builds padded filenames and preserves known file extensions', () => { + expect(buildInstagramDownloadItems('DWUR_azCWbN', [ + { type: 'image', url: 'https://cdn.example.com/photo.webp?foo=1' }, + { type: 'video', url: 'https://cdn.example.com/video.mp4?bar=2' }, + { type: 'image', url: 'not-a-valid-url' }, + ])).toEqual([ + { type: 'image', url: 'https://cdn.example.com/photo.webp?foo=1', filename: 'DWUR_azCWbN_01.webp' }, + { type: 'video', url: 'https://cdn.example.com/video.mp4?bar=2', filename: 'DWUR_azCWbN_02.mp4' }, + { type: 'image', url: 'not-a-valid-url', filename: 'DWUR_azCWbN_03.jpg' }, + ]); + }); +}); + +describe('instagram download command', () => { + beforeEach(() => { + mockHttpDownload.mockReset(); + logSpy.mockClear(); + }); + + it('rejects invalid URLs before browser work', async () => { + const page = createPageMock({ ok: true, items: [] }); + await expect(cmd.func!(page, { url: 'https://example.com/not-instagram' })).rejects.toThrow(ArgumentError); + expect((page.goto as any).mock.calls).toHaveLength(0); + }); + + it('maps auth failures to AuthRequiredError', async () => { + const page = createPageMock({ ok: false, errorCode: 'AUTH_REQUIRED', error: 'Instagram login required' }); + await expect(cmd.func!(page, { url: 'https://www.instagram.com/p/DWUR_azCWbN/' })).rejects.toThrow(AuthRequiredError); + expect(mockHttpDownload).not.toHaveBeenCalled(); + }); + + it('maps rate limit failures to CliError with RATE_LIMITED code', async () => { + const page = createPageMock({ ok: false, errorCode: 'RATE_LIMITED', error: 'Please wait a few minutes' }); + await expect(cmd.func!(page, { url: 'https://www.instagram.com/p/DWUR_azCWbN/' })).rejects.toMatchObject({ code: 'RATE_LIMITED' } satisfies Partial); + expect(mockHttpDownload).not.toHaveBeenCalled(); + }); + + it('maps private/unavailable failures to CommandExecutionError', async () => { + const page = createPageMock({ ok: false, errorCode: 'PRIVATE_OR_UNAVAILABLE', error: 'Post may be private' }); + await expect(cmd.func!(page, { url: 'https://www.instagram.com/p/DWUR_azCWbN/' })).rejects.toThrow(CommandExecutionError); + expect(mockHttpDownload).not.toHaveBeenCalled(); + }); + + it('throws when no downloadable media is found', async () => { + const page = createPageMock({ ok: true, shortcode: 'DWUR_azCWbN', items: [] }); + await expect(cmd.func!(page, { url: 'https://www.instagram.com/p/DWUR_azCWbN/' })).rejects.toThrow(CommandExecutionError); + expect(mockHttpDownload).not.toHaveBeenCalled(); + }); + + it('downloads media and prints saved directory', async () => { + mockHttpDownload + .mockResolvedValueOnce({ success: true, size: 120_000 }) + .mockResolvedValueOnce({ success: true, size: 8_200_000 }); + + const page = createPageMock({ + ok: true, + shortcode: 'DWUR_azCWbN', + items: [ + { type: 'image', url: 'https://cdn.example.com/photo.webp?foo=1' }, + { type: 'video', url: 'https://cdn.example.com/video.mp4?bar=2' }, + ], + }); + + const result = await cmd.func!(page, { + url: 'https://www.instagram.com/nasa/p/DWUR_azCWbN/?img_index=1', + path: './instagram-test', + }); + + expect(result).toBeNull(); + expect((page.goto as any).mock.calls[0]?.[0]).toBe('https://www.instagram.com/p/DWUR_azCWbN/'); + expect(mockHttpDownload).toHaveBeenNthCalledWith(1, + 'https://cdn.example.com/photo.webp?foo=1', + expect.stringContaining('instagram-test/DWUR_azCWbN/DWUR_azCWbN_01.webp'), + expect.objectContaining({ timeout: 60000 }), + ); + expect(mockHttpDownload).toHaveBeenNthCalledWith(2, + 'https://cdn.example.com/video.mp4?bar=2', + expect.stringContaining('instagram-test/DWUR_azCWbN/DWUR_azCWbN_02.mp4'), + expect.objectContaining({ timeout: 120000 }), + ); + expect(logSpy).toHaveBeenCalledWith('📁 saved: instagram-test/DWUR_azCWbN'); + }); + + it('uses a cross-platform Downloads default when path is omitted', async () => { + mockHttpDownload.mockResolvedValueOnce({ success: true, size: 120_000 }); + + const page = createPageMock({ + ok: true, + shortcode: 'DWUR_azCWbN', + items: [ + { type: 'image', url: 'https://cdn.example.com/photo.webp?foo=1' }, + ], + }); + + await cmd.func!(page, { url: 'https://www.instagram.com/p/DWUR_azCWbN/' }); + + expect(mockHttpDownload).toHaveBeenCalledWith( + 'https://cdn.example.com/photo.webp?foo=1', + expect.stringContaining(`${os.homedir()}/Downloads/Instagram/DWUR_azCWbN/DWUR_azCWbN_01.webp`), + expect.objectContaining({ timeout: 60000 }), + ); + }); +}); diff --git a/src/clis/instagram/download.ts b/src/clis/instagram/download.ts new file mode 100644 index 00000000..54463c4a --- /dev/null +++ b/src/clis/instagram/download.ts @@ -0,0 +1,286 @@ +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { cli, Strategy } from '../../registry.js'; +import { ArgumentError, AuthRequiredError, CliError, CommandExecutionError, EXIT_CODES } from '../../errors.js'; +import { httpDownload } from '../../download/index.js'; +import type { IPage } from '../../types.js'; + +const INSTAGRAM_GRAPHQL_DOC_ID = '8845758582119845'; +const INSTAGRAM_GRAPHQL_APP_ID = '936619743392459'; +const INSTAGRAM_HOST_SUFFIX = 'instagram.com'; +const SUPPORTED_KINDS = new Set(['p', 'reel', 'tv']); + +export interface InstagramMediaTarget { + kind: 'p' | 'reel' | 'tv'; + shortcode: string; + canonicalUrl: string; +} + +interface InstagramPageMediaItem { + type: 'image' | 'video'; + url: string; +} + +interface InstagramFetchResult { + ok: boolean; + shortcode?: string; + owner?: string; + items?: InstagramPageMediaItem[]; + errorCode?: string; + error?: string; +} + +interface DownloadedMediaItem extends InstagramPageMediaItem { + filename: string; +} + +function displayPath(filePath: string): string { + const home = os.homedir(); + return filePath.startsWith(home) ? `~${filePath.slice(home.length)}` : filePath; +} + + +export function parseInstagramMediaTarget(input: string): InstagramMediaTarget { + const raw = String(input || '').trim(); + if (!raw) { + throw new ArgumentError( + 'Instagram URL is required', + 'Expected https://www.instagram.com/p/... or https://www.instagram.com/reel/...', + ); + } + + let url: URL; + try { + url = new URL(raw); + } catch { + throw new ArgumentError( + `Invalid Instagram URL: ${raw}`, + 'Expected https://www.instagram.com/p// or /reel//', + ); + } + + if (!['http:', 'https:'].includes(url.protocol)) { + throw new ArgumentError(`Unsupported URL protocol: ${url.protocol}`); + } + + const host = url.hostname.toLowerCase(); + if (host !== INSTAGRAM_HOST_SUFFIX && !host.endsWith(`.${INSTAGRAM_HOST_SUFFIX}`)) { + throw new ArgumentError(`Unsupported host: ${host}`, 'Only instagram.com URLs are supported'); + } + + const segments = url.pathname.split('/').filter(Boolean); + let kind: string | undefined; + let shortcode: string | undefined; + + if (segments.length >= 2 && SUPPORTED_KINDS.has(segments[0]!)) { + kind = segments[0]; + shortcode = segments[1]; + } else if (segments.length >= 3 && SUPPORTED_KINDS.has(segments[1]!)) { + kind = segments[1]; + shortcode = segments[2]; + } + + if (!kind || !shortcode) { + throw new ArgumentError( + `Unsupported Instagram media URL: ${raw}`, + 'Only /p//, /reel//, and /tv// links are supported', + ); + } + + return { + kind: kind as 'p' | 'reel' | 'tv', + shortcode, + canonicalUrl: `https://www.instagram.com/${kind}/${shortcode}/`, + }; +} + +export function buildInstagramDownloadItems(shortcode: string, items: InstagramPageMediaItem[]): DownloadedMediaItem[] { + return items + .filter((item) => item?.url) + .map((item, index) => { + const fallbackExt = item.type === 'video' ? '.mp4' : '.jpg'; + let ext = fallbackExt; + + try { + const pathname = new URL(item.url).pathname; + const candidateExt = path.extname(pathname).toLowerCase(); + if (candidateExt && candidateExt.length <= 8) ext = candidateExt; + } catch { + ext = fallbackExt; + } + + return { + type: item.type, + url: item.url, + filename: `${shortcode}_${String(index + 1).padStart(2, '0')}${ext}`, + }; + }); +} + +export function buildInstagramFetchScript(shortcode: string): string { + return ` + (async () => { + const shortcode = ${JSON.stringify(shortcode)}; + const docId = ${JSON.stringify(INSTAGRAM_GRAPHQL_DOC_ID)}; + const variables = { + shortcode, + fetch_tagged_user_count: null, + hoisted_comment_id: null, + hoisted_reply_id: null, + }; + const url = 'https://www.instagram.com/graphql/query/?doc_id=' + docId + '&variables=' + encodeURIComponent(JSON.stringify(variables)); + const res = await fetch(url, { + credentials: 'include', + headers: { + 'Accept': 'application/json,text/plain,*/*', + 'X-IG-App-ID': ${JSON.stringify(INSTAGRAM_GRAPHQL_APP_ID)}, + }, + }); + const rawText = await res.text(); + + let data = null; + try { + data = rawText ? JSON.parse(rawText) : null; + } catch { + return { + ok: false, + errorCode: 'COMMAND_EXEC', + error: 'Instagram returned non-JSON content while fetching media metadata', + }; + } + + const message = typeof data?.message === 'string' ? data.message : ''; + const lowered = (message || '').toLowerCase(); + + if (!res.ok) { + if (res.status === 401 || res.status === 403 || data?.require_login) { + return { ok: false, errorCode: 'AUTH_REQUIRED', error: message || ('HTTP ' + res.status) }; + } + if (res.status === 429) { + return { ok: false, errorCode: 'RATE_LIMITED', error: message || 'HTTP 429' }; + } + if (res.status === 404 || res.status === 410) { + return { ok: false, errorCode: 'PRIVATE_OR_UNAVAILABLE', error: message || ('HTTP ' + res.status) }; + } + return { ok: false, errorCode: 'COMMAND_EXEC', error: message || ('HTTP ' + res.status) }; + } + + if (data?.require_login) { + return { ok: false, errorCode: 'AUTH_REQUIRED', error: message || 'Instagram login required' }; + } + if (lowered.includes('wait a few minutes') || lowered.includes('rate')) { + return { ok: false, errorCode: 'RATE_LIMITED', error: message || 'Instagram rate limit triggered' }; + } + + const media = data?.data?.xdt_shortcode_media; + if (!media) { + return { + ok: false, + errorCode: 'PRIVATE_OR_UNAVAILABLE', + error: message || 'Post may be private, unavailable, or inaccessible to the current browser session', + }; + } + + const nodes = Array.isArray(media?.edge_sidecar_to_children?.edges) && media.edge_sidecar_to_children.edges.length > 0 + ? media.edge_sidecar_to_children.edges.map((edge) => edge?.node).filter(Boolean) + : [media]; + + const items = nodes + .map((node) => ({ + type: node?.is_video ? 'video' : 'image', + url: String(node?.is_video ? (node?.video_url || '') : (node?.display_url || '')), + })) + .filter((item) => item.url); + + return { + ok: true, + shortcode: media.shortcode || shortcode, + owner: media?.owner?.username || '', + items, + }; + })() + `; +} + +function ensurePage(page: IPage | null): IPage { + if (!page) throw new CommandExecutionError('Browser session required'); + return page; +} + +function normalizeFetchResult(result: unknown): InstagramFetchResult { + if (!result || typeof result !== 'object') { + throw new CommandExecutionError('Failed to fetch Instagram media metadata'); + } + return result as InstagramFetchResult; +} + +function handleFetchFailure(result: InstagramFetchResult): never { + const message = result.error || 'Instagram media fetch failed'; + + if (result.errorCode === 'AUTH_REQUIRED') { + throw new AuthRequiredError('instagram.com', message); + } + if (result.errorCode === 'RATE_LIMITED') { + throw new CliError( + 'RATE_LIMITED', + message, + 'Wait a few minutes and retry, or switch to a browser session with a warmer Instagram login state.', + EXIT_CODES.TEMPFAIL, + ); + } + if (result.errorCode === 'PRIVATE_OR_UNAVAILABLE') { + throw new CommandExecutionError(message, 'Open the post in a logged-in browser session and retry'); + } + + throw new CommandExecutionError(message); +} + +async function downloadInstagramMedia(items: DownloadedMediaItem[], outputDir: string): Promise { + fs.mkdirSync(outputDir, { recursive: true }); + + for (const item of items) { + const destPath = path.join(outputDir, item.filename); + const result = await httpDownload(item.url, destPath, { + timeout: item.type === 'video' ? 120000 : 60000, + }); + + if (!result.success) { + throw new CommandExecutionError(`Failed to download ${item.filename}: ${result.error || 'unknown error'}`); + } + } +} + +cli({ + site: 'instagram', + name: 'download', + description: 'Download images and videos from Instagram posts and reels', + domain: 'www.instagram.com', + strategy: Strategy.COOKIE, + navigateBefore: false, + args: [ + { name: 'url', positional: true, required: true, help: 'Instagram post / reel / tv URL' }, + { name: 'path', default: path.join(os.homedir(), 'Downloads', 'Instagram'), help: 'Download directory' }, + ], + func: async (page, kwargs) => { + const browserPage = ensurePage(page); + const target = parseInstagramMediaTarget(String(kwargs.url ?? '')); + const outputRoot = String(kwargs.path ?? path.join(os.homedir(), 'Downloads', 'Instagram')); + + await browserPage.goto(target.canonicalUrl); + + const fetchResult = normalizeFetchResult(await browserPage.evaluate(buildInstagramFetchScript(target.shortcode))); + if (!fetchResult.ok) handleFetchFailure(fetchResult); + + const shortcode = fetchResult.shortcode || target.shortcode; + const mediaItems = buildInstagramDownloadItems(shortcode, fetchResult.items || []); + if (mediaItems.length === 0) { + throw new CommandExecutionError('No downloadable media found'); + } + + const savedDir = path.join(outputRoot, shortcode); + await downloadInstagramMedia(mediaItems, savedDir); + console.log(`📁 saved: ${displayPath(savedDir)}`); + return null; + }, +}); diff --git a/src/commanderAdapter.ts b/src/commanderAdapter.ts index 66204848..07ce3466 100644 --- a/src/commanderAdapter.ts +++ b/src/commanderAdapter.ts @@ -103,6 +103,9 @@ export function registerCommandToProgram(siteCmd: Command, cmd: CliCommand): voi } const result = await executeCommand(cmd, kwargs, verbose); + if (result === null || result === undefined) { + return; + } if (verbose && (!result || (Array.isArray(result) && result.length === 0))) { console.error(chalk.yellow('[Verbose] Warning: Command returned an empty result.'));