diff --git a/Runware/Runware-base.ts b/Runware/Runware-base.ts index 6ce55b7..fde79d8 100644 --- a/Runware/Runware-base.ts +++ b/Runware/Runware-base.ts @@ -38,6 +38,8 @@ import { IRequestVideo, IAsyncResults, IVideoToImage, + TAudioInference, + IAudioResult, } from "./types"; import { BASE_RUNWARE_URLS, @@ -266,7 +268,7 @@ export class RunwareBase { } }; - private listenToImages({ + private listenToResponse({ onPartialImages, taskUUID, groupKey, @@ -574,7 +576,7 @@ export class RunwareBase { // const generationTime = endTime - startTime; - lis = this.listenToImages({ + lis = this.listenToResponse({ onPartialImages, taskUUID: taskUUID, groupKey: LISTEN_TO_IMAGES_KEY.REQUEST_IMAGES, @@ -723,6 +725,8 @@ export class RunwareBase { retry, includePayload, includeGenerationTime, + inputImages, + ...rest }: IRequestImageToText): Promise => { const totalRetry = retry || this._globalMaxRetries; let lis: any = undefined; @@ -737,13 +741,33 @@ export class RunwareBase { ? await this.uploadImage(inputImage as File | string) : null; + const imagesUploaded = inputImages?.length + ? await Promise.all( + inputImages.map((image) => + this.uploadImage(image as File | string) + ) + ) + : null; + const taskUUID = customTaskUUID || getUUID(); const payload = { taskUUID, taskType: ETaskType.IMAGE_CAPTION, - inputImage: imageUploaded?.imageUUID, + + ...(imageUploaded?.imageUUID + ? { inputImage: imageUploaded.imageUUID } + : {}), + + ...(imagesUploaded?.length + ? { + inputImages: imagesUploaded + .map((img) => img?.imageUUID) + .filter(Boolean), + } + : {}), ...evaluateNonTrue({ key: "includeCost", value: includeCost }), + ...rest, }; this.send(payload); @@ -850,7 +874,7 @@ export class RunwareBase { await getIntervalAsyncWithPromise( async ({ resolve, reject }) => { try { - const videos = await this.getResponse({ taskUUID }); + const videos = await this.getResponse({ taskUUID }); // Add videos to the collection for (const video of videos || []) { @@ -885,10 +909,10 @@ export class RunwareBase { } }; - getResponse = async (payload: IAsyncResults): Promise => { + getResponse = async (payload: IAsyncResults): Promise => { const taskUUID = payload.taskUUID; // const mock = getRandomTaskResponses({ count: 2, taskUUID }); - return this.baseSingleRequest({ + return this.baseSingleRequest({ payload: { ...payload, customTaskUUID: taskUUID, @@ -910,6 +934,7 @@ export class RunwareBase { retry, includeGenerationTime, includePayload, + ...rest }: IUpscaleGan): Promise => { const totalRetry = retry || this._globalMaxRetries; let lis: any = undefined; @@ -933,6 +958,7 @@ export class RunwareBase { ...(outputType ? { outputType } : {}), ...(outputQuality ? { outputQuality } : {}), ...(outputFormat ? { outputFormat } : {}), + ...rest, }; this.send(payload); @@ -1178,7 +1204,7 @@ export class RunwareBase { numberResults: imageRemaining, }); - lis = this.listenToImages({ + lis = this.listenToResponse({ onPartialImages, taskUUID: taskUUID, groupKey: LISTEN_TO_IMAGES_KEY.REQUEST_IMAGES, @@ -1252,6 +1278,32 @@ export class RunwareBase { }); }; + audioInference = async ( + payload: TAudioInference + ): Promise => { + const { skipResponse, deliveryMethod = "sync", ...rest } = payload; + try { + const requestMethod = + deliveryMethod === "sync" + ? this.baseSyncRequest + : this.baseSingleRequest; + + const request = await requestMethod({ + payload: { + ...rest, + numberResults: rest.numberResults || 1, + taskType: ETaskType.AUDIO_INFERENCE, + deliveryMethod: deliveryMethod, + }, + debugKey: "audio-inference", + }); + + return request; + } catch (e) { + throw e; + } + }; + protected baseSingleRequest = async ({ payload, debugKey, @@ -1335,6 +1387,81 @@ export class RunwareBase { throw e; } }; + protected baseSyncRequest = async ({ + payload, + debugKey, + }: { + payload: Record; + debugKey: string; + }): Promise => { + const { + retry, + customTaskUUID, + includePayload, + numberResults = 1, + onPartialResponse, + includeGenerationTime, + ...restPayload + } = payload; + + const totalRetry = retry || this._globalMaxRetries; + let lis: any = undefined; + let taskUUIDs: string[] = []; + let retryCount = 0; + + const startTime = Date.now(); + + try { + return await asyncRetry( + async () => { + await this.ensureConnection(); + retryCount++; + + const taskWithSimilarTaskUUID = this._globalImages.filter((audio) => + taskUUIDs.includes(audio.taskUUID) + ); + + const taskUUID = customTaskUUID || getUUID(); + taskUUIDs.push(taskUUID); + const taskRemaining = numberResults - taskWithSimilarTaskUUID.length; + + const payload = { + ...restPayload, + taskUUID, + numberResults: taskRemaining, + }; + + this.send(payload); + + lis = this.listenToResponse({ + onPartialImages: onPartialResponse, + taskUUID: taskUUID, + groupKey: LISTEN_TO_IMAGES_KEY.REQUEST_AUDIO, + requestPayload: includePayload ? payload : undefined, + startTime: includeGenerationTime ? startTime : undefined, + }); + + const promise = await this.getSimilarImages({ + taskUUID: taskUUIDs, + numberResults, + lis, + debugKey, + }); + + lis.destroy(); + return promise as T; + }, + { + maxRetries: totalRetry, + callback: () => { + lis?.destroy(); + }, + } + ); + } catch (e) { + throw e; + } + }; async ensureConnection() { let isConnected = this.connected(); @@ -1437,11 +1564,13 @@ export class RunwareBase { numberResults, shouldThrowError, lis, + debugKey = "getting-images", }: { taskUUID: string | string[]; numberResults: number; shouldThrowError?: boolean; lis: any; + debugKey?: string; }): Promise { return (await getIntervalWithPromise( ({ resolve, reject, intervalId }) => { @@ -1471,7 +1600,7 @@ export class RunwareBase { } }, { - debugKey: "getting images", + debugKey, shouldThrowError, timeoutDuration: this._timeoutDuration, } diff --git a/Runware/types.ts b/Runware/types.ts index 8c3e13a..b499bf3 100644 --- a/Runware/types.ts +++ b/Runware/types.ts @@ -23,6 +23,7 @@ export enum ETaskType { AUTHENTICATION = "authentication", MODEL_UPLOAD = "modelUpload", MODEL_SEARCH = "modelSearch", + AUDIO_INFERENCE = "audioInference", } export type RunwareBaseType = { @@ -34,6 +35,7 @@ export type RunwareBaseType = { }; export type IOutputType = "base64Data" | "dataURI" | "URL"; +export type IDeliveryType = "sync" | "async"; export type IOutputFormat = "JPG" | "PNG" | "WEBP"; export type IVideoOutputFormat = "MP4" | "WEBM" | "MOV"; @@ -69,6 +71,7 @@ export interface IVideoToImage { seed?: number; videoURL?: string; } + export interface IControlNetImage { taskUUID: string; inputImageUUID: string; @@ -249,6 +252,12 @@ export interface IRequestImageToText extends IAdditionalResponsePayload { includeCost?: boolean; customTaskUUID?: string; retry?: number; + + model?: string; + prompts?: string[]; + inputImages?: string[]; + + [key: string]: any; } export interface IImageToText { taskType: ETaskType; @@ -335,9 +344,29 @@ export interface IUpscaleGan extends IAdditionalResponsePayload { outputFormat?: IOutputFormat; includeCost?: boolean; outputQuality?: number; + revertExtra?: boolean; + model?: string; customTaskUUID?: string; retry?: number; + + settings?: { + seed?: number; + controlNetWeight?: number; + CFGScale?: number; + positivePrompt?: string; + negativePrompt?: string; + scheduler?: string; + colorFix?: boolean; + tileDiffusion?: boolean; + clipSkip?: number; + steps?: number; + strength?: number; + checkNSFW?: boolean; + [key: string]: any; + }; + + [key: string]: any; } export type ReconnectingWebsocketProps = { @@ -644,6 +673,39 @@ export type TModelSearch = { retry?: number; } & { [key: string]: any }; +export type TAudioInference = { + model: string; + positivePrompt: string; + negativePrompt?: string; + duration: number; + numberResults?: number; + outputFormat?: "MP3" | "WAV" | "FLAC" | "AAC" | "OGG"; + outputType?: IOutputType; + webhookURL?: string; + deliveryMethod?: IDeliveryType; + uploadEndpoint?: string; + includeCost?: boolean; + onPartialResponse?: (images: IImage[], error?: IError) => void; + + audioSettings?: { + sampleRate?: number; + bitrate?: number; + [key: string]: any; + }; + + providerSettings?: { + elevenlabs?: { + music?: string; + [key: string]: any; + }; + [key: string]: any; + }; + + // other options + customTaskUUID?: string; + retry?: number; +} & { [key: string]: any }; + export type TModel = { air: string; name: string; @@ -705,6 +767,22 @@ export type TImageUploadResponse = { imageURL: string; }; +export type IAudioSyncResult = { + taskType: string; + taskUUID: string; + audioUUID: string; + audioURL?: string; + audioBase64Data?: string; + audioDataURI?: string; + cost: number; +}; +export type IAuidoAsyncResult = { + taskType: string; + taskUUID: string; + status: string; +}; +export type IAudioResult = IAudioSyncResult | IAuidoAsyncResult; + export type TImageMaskingResponse = { taskType: string; taskUUID: string; diff --git a/Runware/utils.ts b/Runware/utils.ts index 2c32170..76f9615 100644 --- a/Runware/utils.ts +++ b/Runware/utils.ts @@ -252,6 +252,7 @@ export const removeAllKeyListener = ({ export enum LISTEN_TO_IMAGES_KEY { REQUEST_IMAGES = "REQUEST_IMAGES", + REQUEST_AUDIO = "REQUEST_AUDIO", } export const evaluateNonTrue = ({ @@ -324,7 +325,6 @@ export const getIntervalAsyncWithPromise = ( export const isUrl = (value: any): value is string => { return ( typeof value === "string" && - (value.startsWith("http:") || - value.startsWith("https:")) + (value.startsWith("http:") || value.startsWith("https:")) ); -}; \ No newline at end of file +}; diff --git a/package.json b/package.json index bbc118a..a216e9d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@runware/sdk-js", - "version": "1.1.45-beta.1", + "version": "1.1.45-beta.7", "description": "The SDK is used to run image inference with the Runware API, powered by the RunWare inference platform. It can be used to generate imaged with text-to-image and image-to-image. It also allows the use of an existing gallery of models or selecting any model or LoRA from the CivitAI gallery. The API also supports upscaling, background removal, inpainting and outpainting, and a series of other ControlNet models.", "main": "dist/index.js", "module": "dist/index.js",