From eb7dac563d704c73745c3e200d684e1089095113 Mon Sep 17 00:00:00 2001 From: Blodroed Date: Wed, 18 Feb 2026 13:56:26 +0100 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8pattern=20matcher?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib/Subscription.ts | 15 ++++++++ src/lib/helpers/patternMatcher.ts | 59 +++++++++++++++++++++++++++++++ src/lib/types.ts | 2 ++ 3 files changed, 76 insertions(+) create mode 100644 src/lib/helpers/patternMatcher.ts diff --git a/src/lib/Subscription.ts b/src/lib/Subscription.ts index c991e11..cd38c51 100644 --- a/src/lib/Subscription.ts +++ b/src/lib/Subscription.ts @@ -9,6 +9,7 @@ import type { ChannelOptions, SubscriptionSettings } from "./types"; import { Attachment } from "./Attachment"; import { settings } from "./helpers/index"; import { Video } from "./Video"; +import { compilePatterns, matchesPatterns, CompiledPatterns } from "./helpers/patternMatcher"; const removeRepeatedSentences = (postTitle: string, attachmentTitle: string) => { const separators = /(?:\s+|^)((?:[^.,;:!?-]+[\s]*[.,;:!?-]+)+)(?:\s+|$)/g; @@ -28,11 +29,21 @@ export default class Subscription { public readonly creatorId: string; public readonly channels: SubscriptionSettings["channels"]; public readonly plan: string; + private compiledPatternsCache: Map = new Map(); constructor(subscription: SubscriptionSettings) { this.creatorId = subscription.creatorId; this.channels = subscription.channels; this.plan = subscription.plan; + + // Precompile patterns to cache + for (const channel of this.channels) { + const cacheKey = `${channel.title}`; + this.compiledPatternsCache.set( + cacheKey, + compilePatterns(channel.includePatterns, channel.excludePatterns) + ) + } } public deleteOldVideos = async () => { @@ -84,6 +95,10 @@ export default class Subscription { if (channel.skip) break; if (channel.daysToKeepVideos !== undefined && new Date(post.releaseDate).getTime() < Subscription.getIgnoreBeforeTimestamp(channel)) return; + // Pattern match the video title if patterns are defined for the channel + const compiledPatterns = this.compiledPatternsCache.get(channel.title); + if (compiledPatterns !== undefined && !matchesPatterns(video.title, compiledPatterns)) continue; + // Remove the identifier from the video title if to give a nicer title if (settings.extras.stripSubchannelPrefix === true) { const replacers = [ diff --git a/src/lib/helpers/patternMatcher.ts b/src/lib/helpers/patternMatcher.ts new file mode 100644 index 0000000..8a1bcdf --- /dev/null +++ b/src/lib/helpers/patternMatcher.ts @@ -0,0 +1,59 @@ +type PatternMatcher = (title: string) => boolean; + +export interface CompiledPatterns { + includeMatchers: PatternMatcher[]; + excludeMatchers: PatternMatcher[]; +} + +/** + * Compiles and validates include and exclude patterns. + * @param includePatterns Optional array of patterns to include when matching titles. + * @param excludePatterns Optional array of patterns to exclude when matching titles. + * @returns An object containing precompiled include and exclude pattern matchers. + */ +export const compilePatterns = (includePatterns?: string[], excludePatterns?: string[]): CompiledPatterns => { + const compilePattern = (pattern: string): PatternMatcher => { + const normalized = pattern.trim().toLowerCase(); + + // Literal match + if (!normalized.includes("*")) { + return (title) => title.includes(normalized); + } + + // Wildcard pattern conversion to regex + const regexPattern = normalized.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*"); + + const regex = new RegExp(`^${regexPattern}$`); + return (title) => regex.test(title); + }; + + return { + includeMatchers: (includePatterns || []).map(compilePattern), + excludeMatchers: (excludePatterns || []).map(compilePattern), + }; +}; + +/** + * Checks if a title matches the compiled include and exclude patterns. + * @param title The video title to check against the patterns. + * @param compiled An object containing precompiled include and exclude pattern matchers. + * @returns true if the video title is valid, false if it should be excluded. + */ +export const matchesPatterns = (title: string, compiled: CompiledPatterns): boolean => { + const lowerTitle = title.toLowerCase(); + + if (compiled.excludeMatchers.length > 0 && compiled.excludeMatchers.some((matcher) => matcher(lowerTitle))) { + console.log(`Excluding video "${title}".`); + return false; + } + + // If no include patterns, include anything + if (compiled.includeMatchers.length === 0) { + return true; + } + + // Log included videos for validation + const match = compiled.includeMatchers.some((matcher) => matcher(lowerTitle)); + if (match) console.log(`Including video "${title}".`); + return match; +}; diff --git a/src/lib/types.ts b/src/lib/types.ts index 8cc5fff..f97e933 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -7,6 +7,8 @@ export type ChannelOptions = { skip: boolean; isChannel: string; daysToKeepVideos?: number; + includePatterns?: string[]; + excludePatterns?: string[]; }; export type Channels = ChannelOptions[]; From 0e443bde1a1256070a701a386c1a57c68d0b1517 Mon Sep 17 00:00:00 2001 From: Blodroed Date: Wed, 18 Feb 2026 13:56:46 +0100 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=93=9Dpattern=20matcher=20documentati?= =?UTF-8?q?on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wiki/settings.md | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/wiki/settings.md b/wiki/settings.md index 64ccc86..daf07d5 100644 --- a/wiki/settings.md +++ b/wiki/settings.md @@ -224,10 +224,12 @@ You can add custom channels to a creator if you want. First come first served, the first channel a video matches to is what it goes into, channels are checked top to bottom in the config. Videos cannot be sorted into multiple channels.
-A **channel** is made up of a `title`, `skip`, `isChannel` and optionally `daysToKeepVideos`. +A **channel** is made up of a `title`, `skip`, `isChannel`, `includePatterns`, `excludePatterns` and optionally `daysToKeepVideos`. `title` is the nice name used for the channel. `skip` can be set to true to skip downloading videos matched on the given channel. -`isChannel` function that returns true or false if the video should be sorted into this channel (more on this further down). +`isChannel` function that returns true or false if the video should be sorted into this channel (more on this further down). +`includePatterns` is an optional array of plain text patterns for matching videos (more on this [here](#pattern-matching)). +`excludePatterns` is an optional array of plain text patterns to exclude from matching videos (more on this [here](#pattern-matching)). `daysToKeepVideos` is the optional number of days to keep videos for this channel. **2** would mean only videos released within the **last two days** are downloaded and any older will be **automatically deleted** if previously downloaded.
@@ -251,6 +253,34 @@ For example:
+### Pattern Matching + +**includePatterns** and **excludePatterns** are arrays of plain text patterns to include or exclude videos from being downloaded. It uses a simple wildcard system where `*` can be used to match any characters. For example `*beat saber*` would match any video with "beat saber" in the title, but `beat saber*` would only match videos that start with "beat saber". + +The patterns are applied in a Exclude-Then-Include order: + +* If the patterns are empty then all videos are included by default. +* If `includePatterns` is provided but `excludePatterns` is empty then only videos that match the `includePatterns` are included. +* If `excludePatterns` is provided but `includePatterns` is empty then all videos that do not match the `excludePatterns` are included. +* If both `includePatterns` and `excludePatterns` are provided then videos that match the `excludePatterns` are excluded first, then from the remaining videos only those that match the `includePatterns` are included. + +Example: + +```json +{ + "title": "FP Exclusives", + "skip": false, + "excludePatterns": [ + "*exec*", + ], + "includePatterns": [ + "*week*", + ] +} +``` + +The example shown above will include all videos with "week" in the title except those that also have "exec" in the title. + ## Metrics: **metrics.prometheusExporterPort**: