Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
CLAUDE_CODE_KEY=dummy_claude_code_key
OPEN_ROUTER_API_KEY=dummy_openrouter_key
OPENAI_API_KEY=dummy_openai_key
NOVITA_API_KEY=dummy_novita_key
ANTHROPIC_API_KEY=dummy_anthropic_key

# Database & Server
Expand Down
1 change: 1 addition & 0 deletions common/src/constants/byok.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export const BYOK_OPENROUTER_HEADER = 'x-openrouter-api-key'
export const BYOK_OPENROUTER_ENV_VAR = 'CODEBUFF_BYOK_OPENROUTER'
export const BYOK_NOVITA_ENV_VAR = 'CODEBUFF_BYOK_NOVITA'
2 changes: 2 additions & 0 deletions packages/internal/src/env-schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export const serverEnvSchema = clientEnvSchema.extend({
// LLM API keys
OPEN_ROUTER_API_KEY: z.string().min(1),
OPENAI_API_KEY: z.string().min(1),
NOVITA_API_KEY: z.string().min(1),
ANTHROPIC_API_KEY: z.string().min(1),
LINKUP_API_KEY: z.string().min(1),
CONTEXT7_API_KEY: z.string().optional(),
Expand Down Expand Up @@ -47,6 +48,7 @@ export const serverProcessEnv: ServerInput = {
// LLM API keys
OPEN_ROUTER_API_KEY: process.env.OPEN_ROUTER_API_KEY,
OPENAI_API_KEY: process.env.OPENAI_API_KEY,
NOVITA_API_KEY: process.env.NOVITA_API_KEY,
ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
LINKUP_API_KEY: process.env.LINKUP_API_KEY,
CONTEXT7_API_KEY: process.env.CONTEXT7_API_KEY,
Expand Down
1 change: 1 addition & 0 deletions packages/internal/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ if (isCI) {

ensureEnvDefault('OPEN_ROUTER_API_KEY', 'test')
ensureEnvDefault('OPENAI_API_KEY', 'test')
ensureEnvDefault('NOVITA_API_KEY', 'test')
ensureEnvDefault('ANTHROPIC_API_KEY', 'test')
ensureEnvDefault('LINKUP_API_KEY', 'test')
ensureEnvDefault('GRAVITY_API_KEY', 'test')
Expand Down
9 changes: 8 additions & 1 deletion sdk/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
* process env with SDK-specific vars for binary paths and WASM.
*/

import { BYOK_OPENROUTER_ENV_VAR } from '@codebuff/common/constants/byok'
import {
BYOK_OPENROUTER_ENV_VAR,
BYOK_NOVITA_ENV_VAR,
} from '@codebuff/common/constants/byok'
import { CLAUDE_OAUTH_TOKEN_ENV_VAR } from '@codebuff/common/constants/claude-oauth'
import { API_KEY_ENV_VAR } from '@codebuff/common/constants/paths'
import { getBaseEnv } from '@codebuff/common/env-process'
Expand Down Expand Up @@ -42,6 +45,10 @@ export const getByokOpenrouterApiKeyFromEnv = (): string | undefined => {
return process.env[BYOK_OPENROUTER_ENV_VAR]
}

export const getByokNovitaApiKeyFromEnv = (): string | undefined => {
return process.env[BYOK_NOVITA_ENV_VAR]
}

/**
* Get Claude OAuth token from environment variable.
* This allows users to provide their Claude Pro/Max OAuth token for direct Anthropic API access.
Expand Down
32 changes: 31 additions & 1 deletion sdk/src/impl/model-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,13 @@ import {

import { WEBSITE_URL } from '../constants'
import { getValidClaudeOAuthCredentials } from '../credentials'
import { getByokOpenrouterApiKeyFromEnv } from '../env'
import {
getByokOpenrouterApiKeyFromEnv,
getByokNovitaApiKeyFromEnv,
} from '../env'

import type { LanguageModel } from 'ai'
import { createOpenAICompatible } from '@codebuff/internal/openai-compatible/index'

// ============================================================================
// Claude OAuth Rate Limit Cache
Expand Down Expand Up @@ -188,12 +192,38 @@ export async function getModelForRequest(params: ModelRequestParams): Promise<Mo
}

// Default: use Codebuff backend
if (model.startsWith('novita/')) {
const novitaApiKey = getByokNovitaApiKeyFromEnv()
if (novitaApiKey) {
return {
model: createNovitaDirectModel(model, novitaApiKey),
isClaudeOAuth: false,
}
}
}

return {
model: createCodebuffBackendModel(apiKey, model),
isClaudeOAuth: false,
}
}

/**
* Create a direct Novita model.
*/
function createNovitaDirectModel(
model: string,
apiKey: string,
): LanguageModel {
const novitaModelId = model.startsWith('novita/') ? model.slice(7) : model
const novita = createOpenAICompatible({
name: 'novita',
baseURL: 'https://api.novita.ai/openai',
apiKey,
})
return novita.chatModel(novitaModelId) as unknown as LanguageModel
}

/**
* Create an Anthropic model that uses OAuth Bearer token authentication.
*/
Expand Down
57 changes: 36 additions & 21 deletions web/src/app/api/v1/chat/completions/_post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import {
handleOpenAINonStream,
OPENAI_SUPPORTED_MODELS,
} from '@/llm-api/openai'
import { handleNovitaNonStream } from '@/llm-api/novita'
import {
handleOpenRouterNonStream,
handleOpenRouterStream,
Expand Down Expand Up @@ -390,27 +391,41 @@ export async function postChatCompletions(params: {
// All other models (including non-OpenAI with n parameter) should use OpenRouter
const shouldUseOpenAIEndpoint =
isOpenAIDirectModel && typedBody.codebuff_metadata?.n !== undefined

const nonStreamRequest = shouldUseOpenAIEndpoint
? handleOpenAINonStream({
body: typedBody,
userId,
stripeCustomerId,
agentId,
fetch,
logger,
insertMessageBigquery,
})
: handleOpenRouterNonStream({
body: typedBody,
userId,
stripeCustomerId,
agentId,
openrouterApiKey,
fetch,
logger,
insertMessageBigquery,
})
const shouldUseNovitaEndpoint = model.startsWith('novita/')

let nonStreamRequest
if (shouldUseNovitaEndpoint) {
nonStreamRequest = handleNovitaNonStream({
body: typedBody,
userId,
stripeCustomerId,
agentId,
fetch,
logger,
insertMessageBigquery,
})
} else if (shouldUseOpenAIEndpoint) {
nonStreamRequest = handleOpenAINonStream({
body: typedBody,
userId,
stripeCustomerId,
agentId,
fetch,
logger,
insertMessageBigquery,
})
} else {
nonStreamRequest = handleOpenRouterNonStream({
body: typedBody,
userId,
stripeCustomerId,
agentId,
openrouterApiKey,
fetch,
logger,
insertMessageBigquery,
})
}
const result = await nonStreamRequest

trackEvent({
Expand Down
170 changes: 170 additions & 0 deletions web/src/llm-api/novita.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import { env } from '@codebuff/internal/env'

import {
consumeCreditsForMessage,
extractRequestMetadata,
insertMessageToBigQuery,
} from './helpers'

import type { UsageData } from './helpers'
import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery'
import type { Logger } from '@codebuff/common/types/contracts/logger'
import type { ChatCompletionRequestBody } from './types'

// Novita pricing ($/M tokens, based on Novita pricing page)
const INPUT_TOKEN_COSTS: Record<string, number> = {
'deepseek/deepseek-r1': 0.6,
'deepseek/deepseek-v3.2': 0.269,
'deepseek/deepseek-v3': 0.269, // alias
'zai-org/glm-5': 1.0,
'minimax/minimax-m2.5': 0.3,
'meta-llama/llama-3.3-70b-instruct': 0.6,
'default': 0.6,
} as const

const OUTPUT_TOKEN_COSTS: Record<string, number> = {
'deepseek/deepseek-r1': 2.4,
'deepseek/deepseek-v3.2': 0.4,
'deepseek/deepseek-v3': 0.4, // alias
'zai-org/glm-5': 3.2,
'minimax/minimax-m2.5': 1.2,
'meta-llama/llama-3.3-70b-instruct': 2.4,
'default': 2.4,
} as const

function extractUsageAndCost(
usage: any,
model: string,
): UsageData {
const inputTokenCost = INPUT_TOKEN_COSTS[model] ?? INPUT_TOKEN_COSTS['default']
const outputTokenCost = OUTPUT_TOKEN_COSTS[model] ?? OUTPUT_TOKEN_COSTS['default']

const inTokens = usage.prompt_tokens ?? 0
const outTokens = usage.completion_tokens ?? 0
const cost =
(inTokens / 1_000_000) * inputTokenCost +
(outTokens / 1_000_000) * outputTokenCost

return {
inputTokens: inTokens,
outputTokens: outTokens,
cacheReadInputTokens: 0,
reasoningTokens: usage.completion_tokens_details?.reasoning_tokens ?? 0,
cost,
}
}

export async function handleNovitaNonStream({
body,
userId,
stripeCustomerId,
agentId,
fetch,
logger,
insertMessageBigquery,
}: {
body: ChatCompletionRequestBody
userId: string
stripeCustomerId?: string | null
agentId: string
fetch: typeof globalThis.fetch
logger: Logger
insertMessageBigquery: InsertMessageBigqueryFn
}) {
const startTime = new Date()
const { clientId, clientRequestId, costMode } = extractRequestMetadata({
body,
logger,
})

const { model } = body
// model is something like "novita/deepseek/deepseek-r1"
const novitaModel = model.startsWith('novita/') ? model.slice(7) : model

// Build Novita-compatible body
const novitaBody: Record<string, unknown> = {
...body,
model: novitaModel,
stream: false,
}

// Remove fields that Novita/OpenAI doesn't support
delete novitaBody.usage
delete novitaBody.provider
delete novitaBody.transforms
delete novitaBody.codebuff_metadata

const response = await fetch('https://api.novita.ai/openai/chat/completions', {
method: 'POST',
headers: {
Authorization: `Bearer ${env.NOVITA_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(novitaBody),
})

if (!response.ok) {
throw new Error(
`Novita API error: ${response.status} ${response.statusText} ${await response.text()}`,
)
}

const data = await response.json()

const usage = data.usage ?? {}
const usageData = extractUsageAndCost(usage, novitaModel)

data.usage.cost = usageData.cost
data.usage.cost_details = { upstream_inference_cost: null }

const responseContents: string[] = []
if (data.choices && Array.isArray(data.choices)) {
for (const choice of data.choices) {
responseContents.push(choice.message?.content ?? '')
}
}
const responseText = JSON.stringify(responseContents)
const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? ''

insertMessageToBigQuery({
messageId: data.id,
userId,
startTime,
request: body,
reasoningText,
responseText,
usageData,
logger,
insertMessageBigquery,
}).catch((error) => {
logger.error({ error }, 'Failed to insert message into BigQuery (Novita)')
})

await consumeCreditsForMessage({
messageId: data.id,
userId,
stripeCustomerId,
agentId,
clientId,
clientRequestId,
startTime,
model: data.model,
reasoningText,
responseText,
usageData,
byok: false,
logger,
costMode,
})

return {
...data,
choices: [
{
index: 0,
message: { content: responseContents[0] ?? '', role: 'assistant' },
finish_reason: 'stop',
},
],
}
}