max queue size

This commit is contained in:
Aidan 2025-07-03 14:35:59 -04:00
parent 6b1c7f0da9
commit 19e794e34c
4 changed files with 19 additions and 4 deletions

View file

@ -3,6 +3,7 @@ import type { ModelInfo } from "../src/commands/ai"
export const defaultFlashModel = "gemma3:4b" export const defaultFlashModel = "gemma3:4b"
export const defaultThinkingModel = "qwen3:4b" export const defaultThinkingModel = "qwen3:4b"
export const unloadModelAfterB = 12 // how many billion params until model is auto-unloaded export const unloadModelAfterB = 12 // how many billion params until model is auto-unloaded
export const maxUserQueueSize = 3
export const models: ModelInfo[] = [ export const models: ModelInfo[] = [
{ {

View file

@ -41,7 +41,7 @@ import { ensureUserInDb } from "../utils/ensure-user"
import * as schema from '../db/schema' import * as schema from '../db/schema'
import type { NodePgDatabase } from "drizzle-orm/node-postgres" import type { NodePgDatabase } from "drizzle-orm/node-postgres"
import { eq, sql } from 'drizzle-orm' import { eq, sql } from 'drizzle-orm'
import { models, unloadModelAfterB } from "../../config/ai" import { models, unloadModelAfterB, maxUserQueueSize } from "../../config/ai"
const spamwatchMiddleware = spamwatchMiddlewareModule(isOnSpamWatch) const spamwatchMiddleware = spamwatchMiddlewareModule(isOnSpamWatch)
export const flash_model = process.env.flashModel || "gemma3:4b" export const flash_model = process.env.flashModel || "gemma3:4b"
@ -503,6 +503,7 @@ export default (bot: Telegraf<Context>, db: NodePgDatabase<typeof schema>) => {
task: () => Promise<void>; task: () => Promise<void>;
ctx: TextContext; ctx: TextContext;
wasQueued: boolean; wasQueued: boolean;
userId: number;
} }
const requestQueue: AiRequest[] = []; const requestQueue: AiRequest[] = [];
@ -567,6 +568,17 @@ export default (bot: Telegraf<Context>, db: NodePgDatabase<typeof schema>) => {
return; return;
} }
const userId = ctx.from!.id;
const userQueueSize = requestQueue.filter(req => req.userId === userId).length;
if (userQueueSize >= maxUserQueueSize) {
await ctx.reply(Strings.ai.queueFull, {
parse_mode: 'Markdown',
...(reply_to_message_id && { reply_parameters: { message_id: reply_to_message_id } })
});
return;
}
const task = async () => { const task = async () => {
const modelLabel = getModelLabelByName(model); const modelLabel = getModelLabelByName(model);
const replyGenerating = await ctx.reply(Strings.ai.askGenerating.replace("{model}", `\`${modelLabel}\``), { const replyGenerating = await ctx.reply(Strings.ai.askGenerating.replace("{model}", `\`${modelLabel}\``), {
@ -578,14 +590,14 @@ export default (bot: Telegraf<Context>, db: NodePgDatabase<typeof schema>) => {
}; };
if (isProcessing) { if (isProcessing) {
requestQueue.push({ task, ctx, wasQueued: true }); requestQueue.push({ task, ctx, wasQueued: true, userId: ctx.from!.id });
const position = requestQueue.length; const position = requestQueue.length;
await ctx.reply(Strings.ai.inQueue.replace("{position}", String(position)), { await ctx.reply(Strings.ai.inQueue.replace("{position}", String(position)), {
parse_mode: 'Markdown', parse_mode: 'Markdown',
...(reply_to_message_id && { reply_parameters: { message_id: reply_to_message_id } }) ...(reply_to_message_id && { reply_parameters: { message_id: reply_to_message_id } })
}); });
} else { } else {
requestQueue.push({ task, ctx, wasQueued: false }); requestQueue.push({ task, ctx, wasQueued: false, userId: ctx.from!.id });
processQueue(); processQueue();
} }
} }

View file

@ -78,6 +78,7 @@
"finishedThinking": "`🧠 Done thinking.`", "finishedThinking": "`🧠 Done thinking.`",
"urlWarning": "\n\n⚠ Note: The model cannot access or visit links!", "urlWarning": "\n\n⚠ Note: The model cannot access or visit links!",
"inQueue": " You are {position} in the queue.", "inQueue": " You are {position} in the queue.",
"queueFull": "🚫 You already have too many requests in the queue. Please wait for them to finish.",
"startingProcessing": "✨ Starting to process your request...", "startingProcessing": "✨ Starting to process your request...",
"systemPrompt": "You are a friendly assistant called {botName}.\nCurrent Date/Time (UTC): {date}\n\n---\n\nUser message:\n{message}", "systemPrompt": "You are a friendly assistant called {botName}.\nCurrent Date/Time (UTC): {date}\n\n---\n\nUser message:\n{message}",
"statusWaitingRender": "⏳ Streaming...", "statusWaitingRender": "⏳ Streaming...",

View file

@ -76,6 +76,7 @@
"finishedThinking": "`🧠 Pensamento concluido.`", "finishedThinking": "`🧠 Pensamento concluido.`",
"urlWarning": "\n\n⚠ Nota: O modelo de IA não pode acessar ou visitar links!", "urlWarning": "\n\n⚠ Nota: O modelo de IA não pode acessar ou visitar links!",
"inQueue": " Você é o {position} na fila.", "inQueue": " Você é o {position} na fila.",
"queueFull": "🚫 Você já tem muitas solicitações na fila. Por favor, espere que elas terminem.",
"startingProcessing": "✨ Começando a processar o seu pedido...", "startingProcessing": "✨ Começando a processar o seu pedido...",
"aiEnabled": "IA", "aiEnabled": "IA",
"aiModel": "Modelo de IA", "aiModel": "Modelo de IA",