Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 6 additions & 19 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,24 +75,7 @@
"@next/next/no-styled-jsx-in-document": ["warn"],
"@next/next/no-title-in-document-head": ["warn"],
"@next/next/no-typos": ["warn"],
"import/order": [
"error",
{
"groups": [
"builtin",
"external",
"internal",
"parent",
"sibling",
"index"
],
"newlines-between": "always",
"alphabetize": {
"order": "asc",
"caseInsensitive": false
}
}
],
"import/order": "off",
"import/no-unresolved": ["off"],
"import/no-anonymous-default-export": ["warn"],
"no-empty": ["warn"],
Expand All @@ -104,7 +87,11 @@
},
"import/resolver": {
"typescript": {
"alwaysTryTypes": true
"alwaysTryTypes": true,
"project": "./tsconfig.json"
},
"node": {
"extensions": [".js", ".jsx", ".ts", ".tsx"]
}
}
},
Expand Down
96 changes: 89 additions & 7 deletions app/api/auth/[...auth0]/route.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,96 @@
import { handleAuth } from '@auth0/nextjs-auth0';
import { NextRequest } from 'next/server';
import { NextRequest, NextResponse } from 'next/server';

const DEV_SESSION_COOKIE = 'dev_auth_session';

function getDevUserId(): string {
return process.env.DEV_USER_ID || 'dev-test-user';
}

function getDevUser() {
const userId = getDevUserId();
const email = userId.includes('|')
? `${userId.split('|')[0]}@dev.test`
: 'dev@test.com';
const name = userId.includes('|')
? `Dev User (${userId.split('|')[0]})`
: 'Dev Test User';

return {
sub: userId,
email,
name,
picture: null,
email_verified: true,
updated_at: new Date().toISOString(),
};
}

function isDevBypassEnabled() {
return process.env.NODE_ENV === 'development' && process.env.DEV_BYPASS_AUTH === 'true';
}

function handleDevLogin(request: NextRequest): NextResponse {
const returnTo = request.nextUrl.searchParams.get('returnTo') || '/';
const redirectUrl = new URL(returnTo, request.url);

const response = NextResponse.redirect(redirectUrl);

response.cookies.set(DEV_SESSION_COOKIE, 'true', {
httpOnly: true,
secure: false,
sameSite: 'lax',
maxAge: 60 * 60 * 24 * 7,
path: '/',
});

const userId = getDevUserId();
console.log('[DEV MODE] Login bypass - setting dev session cookie for user:', userId);
return response;
}

function handleDevLogout(request: NextRequest): NextResponse {
const returnTo = request.nextUrl.searchParams.get('returnTo') || '/';
const redirectUrl = new URL(returnTo, request.url);

const response = NextResponse.redirect(redirectUrl);

response.cookies.delete(DEV_SESSION_COOKIE);

console.log('[DEV MODE] Logout bypass - clearing dev session cookie');
return response;
}

function handleDevMe(request: NextRequest): NextResponse {
const hasDevSession = request.cookies.get(DEV_SESSION_COOKIE)?.value === 'true';

if (hasDevSession) {
const devUser = getDevUser();
console.log('[DEV MODE] Returning dev user for /me endpoint:', devUser.sub);
return NextResponse.json(devUser);
}

return NextResponse.json(null, { status: 401 });
}

// Next.js 15 compatible Auth0 handler
export async function GET(request: NextRequest, context: { params: Promise<{ auth0: string[] }> }) {
// Await the params as required by Next.js 15
const params = await context.params;

// Create a new context with resolved params for Auth0
const route = params.auth0?.[0];

if (isDevBypassEnabled()) {
switch (route) {
case 'login':
return handleDevLogin(request);
case 'logout':
return handleDevLogout(request);
case 'me':
return handleDevMe(request);
case 'callback':
return NextResponse.redirect(new URL('/', request.url));
}
}

const auth0Handler = handleAuth();

// Call the Auth0 handler with the resolved params

return auth0Handler(request, { params });
}
62 changes: 16 additions & 46 deletions app/api/chat/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ function createOpenAIWithRateLimit(apiKey: string) {
fetch: async (url, options) => {
// Inject required kwargs for DeepSeek V3.1 to enable thinking. Workaround for old ai sdk version.
const body = JSON.parse(options!.body as string || '{}');
if (body.model === 'deepseek-ai/DeepSeek-V3.1' || body.model === 'DeepSeek-V3.1') {
if (body.model.includes('deepseek-ai/DeepSeek-V3.') || body.model === 'DeepSeek-V3.1') {
options!.body = JSON.stringify({
...body,
chat_template_kwargs: {
Expand All @@ -37,7 +37,6 @@ function createOpenAIWithRateLimit(apiKey: string) {

const response = await fetch(url, options);

// Only process streaming responses
if (response.headers.get('content-type')?.includes('text/event-stream')) {
let reasoningBuffer = '';
let isFirstContent = true;
Expand All @@ -53,12 +52,10 @@ function createOpenAIWithRateLimit(apiKey: string) {
try {
const data = JSON.parse(line.slice(6));

// Capture reasoning content
if (data.choices?.[0]?.delta?.reasoning_content) {
reasoningBuffer += data.choices[0].delta.reasoning_content;
}

// Inject reasoning before first content
if (data.choices?.[0]?.delta?.content && isFirstContent && reasoningBuffer) {
isFirstContent = false;

Expand Down Expand Up @@ -103,39 +100,31 @@ function createOpenAIWithRateLimit(apiKey: string) {
});
}

// Create custom OpenAI provider instance with reasoning injection
const openai = createOpenAIWithRateLimit(apiKey);

// Define the handler function to be wrapped with authentication
async function handlePostRequest(req: NextRequest) {
// Check Auth0 authentication first
const session = await getSession(req, NextResponse.next());
const isAuthenticated = !!session?.user;

// Get user-specific API key for authenticated users

let userApiKey: string | null = null;
let openaiClient = openai; // Default to admin client
let openaiClient = openai;

if (isAuthenticated && session?.user?.sub) {
userApiKey = await LiteLLMService.getApiKey(session.user.sub);

// Create user-specific OpenAI client if user has API key

if (userApiKey) {
openaiClient = createOpenAIWithRateLimit(userApiKey);
}
}

// Apply rate limiting to all users (unless ACCESS_TOKEN is required)

const isAccessTokenRequired = process.env.ACCESS_TOKEN && process.env.ACCESS_TOKEN.trim() !== '';
let rateLimitIdentifier: string | null = null;

const shouldApplyRateLimit = !isAccessTokenRequired;

if (shouldApplyRateLimit) {
// Determine user ID for database-driven rate limiting
const userId = isAuthenticated && session?.user?.sub ? session.user.sub : null;

// Use Auth0 user ID for authenticated users, IP for anonymous

rateLimitIdentifier = userId || getClientIP(req);

const rateLimitConfig = await getRateLimitConfigForUser(userId);
Expand Down Expand Up @@ -164,30 +153,24 @@ async function handlePostRequest(req: NextRequest) {
);
}
}

// Extract the `messages` and `model` from the body of the request

const body = await req.json();
const { messages, system, context } = body;
// Ensure temperature and topP are numbers
const temperature = body.temperature ? Number(body.temperature) : undefined;
const topP = body.topP ? Number(body.topP) : undefined;
let { model } = body;
// Helper to normalize model format
const normalizeModel = (model: any) => ({
model_id: model.model_id || model.id,
token_limit: model.token_limit || model.tokenLimit,
temperature: model.temperature,
top_p: model.top_p,
});

// Get user ID for model access validation (authenticated users vs anonymous)
const userId = isAuthenticated && session?.user?.sub ? session.user.sub : null;

// Get available models for this specific user (considers tier access)

const allModels = await getAvailableModelsForUser(userId);
const dbModel = allModels.find(m => m.model_id === model);

// Validate that user has access to this model

if (!dbModel) {
return new Response(
JSON.stringify({
Expand Down Expand Up @@ -242,10 +225,9 @@ async function handlePostRequest(req: NextRequest) {
const availableTokens = tokenLimit - tokenCount - 1000;
const errorMessage = "[Message too long for this model. Please try with a shorter message or a different model.]";

if (availableTokens > 100) { // Ensure we have enough tokens for a meaningful truncation
// Calculate how much content we can actually fit
const maxContentTokens = availableTokens - 50; // Reserve tokens for truncation notice
const truncatedContent = message.content.slice(0, Math.floor(maxContentTokens * 3.5)); // Rough estimate: 1 token ≈ 3.5 chars
if (availableTokens > 100) {
const maxContentTokens = availableTokens - 50;
const truncatedContent = message.content.slice(0, Math.floor(maxContentTokens * 3.5));
messagesToSend = [{
...message,
content: truncatedContent + "\n\n[Message truncated due to length]"
Expand Down Expand Up @@ -276,7 +258,6 @@ async function handlePostRequest(req: NextRequest) {
}
}

// Store conversation token count for rate limit status display
if (shouldApplyRateLimit && rateLimitIdentifier) {
try {
await storeConversationTokens(
Expand All @@ -292,9 +273,7 @@ async function handlePostRequest(req: NextRequest) {
encoding.free();

if (model === 'AkashGen') {
// Skip the image generation tool if it fails
try {
// Send the message to a small model first to determine if it's an image request
const smallModelId = imgGenFnModel || 'Meta-Llama-3-3-70B-Instruct';
const smallResponse = await generateText({
model: openaiClient(smallModelId),
Expand All @@ -307,18 +286,15 @@ async function handlePostRequest(req: NextRequest) {
topP: topP || selectedModel?.top_p
});

// If the small model used the image generation tool, return the result
if (smallResponse.toolResults.length > 0) {
const imageResult = smallResponse.toolResults[0].result;

// Track token usage for image generation

if (shouldApplyRateLimit && rateLimitIdentifier && smallResponse.usage) {
const totalTokens = (smallResponse.usage.promptTokens || 0) + (smallResponse.usage.completionTokens || 0);
if (totalTokens > 0) {
try {
const userId = isAuthenticated && session?.user?.sub ? session.user.sub : null;
const rateLimitConfig = await getRateLimitConfigForUser(userId);
// Use model from body for token multiplier calculation
await incrementTokenUsageWithMultiplier(rateLimitIdentifier, totalTokens, model, rateLimitConfig);
} catch (error) {
console.error('Failed to track token usage for image generation:', error);
Expand Down Expand Up @@ -346,7 +322,6 @@ async function handlePostRequest(req: NextRequest) {
);
}

// If the small model didn't use the image generation tool, use the default model for the rest of the conversation
model = 'Meta-Llama-3-3-70B-Instruct';
} catch (error) {
return new Response(
Expand Down Expand Up @@ -376,16 +351,14 @@ async function handlePostRequest(req: NextRequest) {
topP: topP || selectedModel?.top_p,

});

// Track token usage for rate limiting

result.usage.then(async (usage) => {
if (shouldApplyRateLimit && rateLimitIdentifier && usage) {
const totalTokens = (usage.promptTokens || 0) + (usage.completionTokens || 0);
if (totalTokens > 0) {
try {
const userId = isAuthenticated && session?.user?.sub ? session.user.sub : null;
const rateLimitConfig = await getRateLimitConfigForUser(userId);
// Use model from body for token multiplier calculation
await incrementTokenUsageWithMultiplier(rateLimitIdentifier, totalTokens, model, rateLimitConfig);
} catch (error) {
console.error('Failed to track token usage:', error);
Expand All @@ -399,10 +372,8 @@ async function handlePostRequest(req: NextRequest) {
result.mergeIntoDataStream(dataStream);
},
onError: error => {
// Handle specific OpenAI errors
if (error instanceof Error) {
if (error.name === 'OpenAIError') {
// Return user-friendly error messages for common OpenAI errors
if (error.message.includes('Rate limit')) {
return 'Rate limit exceeded. Please try again later.';
} else if (error.message.includes('Invalid API key')) {
Expand All @@ -418,5 +389,4 @@ async function handlePostRequest(req: NextRequest) {
});
}

// Export the wrapped handler
export const POST = withSessionAuth(handlePostRequest);
Loading