Add limits on API endpoints (#886)
* Add limits on messages, conversations, assistants and messages/minute * Add max message length limit * remove rate limits from public config * add `RATE_LIMITS` to secrets * Add `MESSAGES_BEFORE_LOGIN` to secrets * replace `RATE_LIMITS` by `USAGE_LIMITS` * replace `RateLimits` by `usageLimits` and only get nEvents if needed * rename schema too * replace \r\n by \npull/908/head
parent
21c9b41a4e
commit
537b6f5a78
7
.env
7
.env
|
@ -113,7 +113,7 @@ ADMIN_API_SECRET=# secret to admin API calls, like computing usage stats or expo
|
|||
|
||||
PARQUET_EXPORT_SECRET=#DEPRECATED, use ADMIN_API_SECRET instead
|
||||
|
||||
RATE_LIMIT= # requests per minute
|
||||
RATE_LIMIT= # /!\ Legacy definition of messages per minute. Use USAGE_LIMITS.messagesPerMinute instead
|
||||
MESSAGES_BEFORE_LOGIN=# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
|
||||
|
||||
APP_BASE="" # base path of the app, e.g. /chat, left blank as default
|
||||
|
@ -140,4 +140,7 @@ ALTERNATIVE_REDIRECT_URLS=`[]` #valide alternative redirect URL for OAuth
|
|||
|
||||
WEBHOOK_URL_REPORT_ASSISTANT=#provide webhook url to get notified when an assistant gets reported
|
||||
|
||||
ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app
|
||||
ALLOWED_USER_EMAILS=`[]` # if it's defined, only these emails will be allowed to use the app
|
||||
|
||||
USAGE_LIMITS=`{}`
|
||||
|
||||
|
|
|
@ -269,9 +269,6 @@ PUBLIC_APP_DISCLAIMER_MESSAGE="Disclaimer: AI is an area of active research with
|
|||
PUBLIC_APP_DATA_SHARING=1
|
||||
PUBLIC_APP_DISCLAIMER=1
|
||||
|
||||
RATE_LIMIT=16
|
||||
MESSAGES_BEFORE_LOGIN=5# how many messages a user can send in a conversation before having to login. set to 0 to force login right away
|
||||
|
||||
PUBLIC_GOOGLE_ANALYTICS_ID=G-8Q63TH4CSL
|
||||
PUBLIC_PLAUSIBLE_SCRIPT_URL="/js/script.js"
|
||||
|
||||
|
|
|
@ -27,6 +27,8 @@ jobs:
|
|||
HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
|
||||
WEBHOOK_URL_REPORT_ASSISTANT: ${{ secrets.WEBHOOK_URL_REPORT_ASSISTANT }}
|
||||
ADMIN_API_SECRET: ${{ secrets.ADMIN_API_SECRET }}
|
||||
USAGE_LIMITS: ${{ secrets.USAGE_LIMITS }}
|
||||
MESSAGES_BEFORE_LOGIN: ${{ secrets.MESSAGES_BEFORE_LOGIN }}
|
||||
run: npm run updateProdEnv
|
||||
sync-to-hub:
|
||||
runs-on: ubuntu-latest
|
||||
|
|
|
@ -8,6 +8,8 @@ const MONGODB_URL = process.env.MONGODB_URL;
|
|||
const HF_TOKEN = process.env.HF_TOKEN ?? process.env.HF_ACCESS_TOKEN; // token used for API requests in prod
|
||||
const WEBHOOK_URL_REPORT_ASSISTANT = process.env.WEBHOOK_URL_REPORT_ASSISTANT; // slack webhook url used to get "report assistant" events
|
||||
const ADMIN_API_SECRET = process.env.ADMIN_API_SECRET;
|
||||
const USAGE_LIMITS = process.env.USAGE_LIMITS;
|
||||
const MESSAGES_BEFORE_LOGIN = process.env.MESSAGES_BEFORE_LOGIN;
|
||||
|
||||
// Read the content of the file .env.template
|
||||
const PUBLIC_CONFIG = fs.readFileSync(".env.template", "utf8");
|
||||
|
@ -20,6 +22,8 @@ SERPER_API_KEY=${SERPER_API_KEY}
|
|||
HF_TOKEN=${HF_TOKEN}
|
||||
WEBHOOK_URL_REPORT_ASSISTANT=${WEBHOOK_URL_REPORT_ASSISTANT}
|
||||
ADMIN_API_SECRET=${ADMIN_API_SECRET}
|
||||
USAGE_LIMITS=${USAGE_LIMITS}
|
||||
MESSAGES_BEFORE_LOGIN=${MESSAGES_BEFORE_LOGIN}
|
||||
`;
|
||||
|
||||
// Make an HTTP POST request to add the space secrets
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
import { z } from "zod";
|
||||
import { USAGE_LIMITS, RATE_LIMIT } from "$env/static/private";
|
||||
import JSON5 from "json5";
|
||||
|
||||
// RATE_LIMIT is the legacy way to define messages per minute limit
|
||||
export const usageLimitsSchema = z
|
||||
.object({
|
||||
conversations: z.coerce.number().optional(), // how many conversations
|
||||
messages: z.coerce.number().optional(), // how many messages in a conversation
|
||||
assistants: z.coerce.number().optional(), // how many assistants
|
||||
messageLength: z.coerce.number().optional(), // how long can a message be before we cut it off
|
||||
messagesPerMinute: z
|
||||
.preprocess((val) => {
|
||||
if (val === undefined) {
|
||||
return RATE_LIMIT;
|
||||
}
|
||||
return val;
|
||||
}, z.coerce.number().optional())
|
||||
.optional(), // how many messages per minute
|
||||
})
|
||||
.optional();
|
||||
|
||||
export const usageLimits = usageLimitsSchema.parse(JSON5.parse(USAGE_LIMITS));
|
|
@ -47,8 +47,9 @@
|
|||
});
|
||||
|
||||
if (!res.ok) {
|
||||
error.set("Error while creating conversation, try again.");
|
||||
console.error("Error while creating conversation: " + (await res.text()));
|
||||
const errorMessage = (await res.json()).message || ERROR_MESSAGES.default;
|
||||
error.set(errorMessage);
|
||||
console.error("Error while creating conversation: ", errorMessage);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -63,7 +64,7 @@
|
|||
// invalidateAll to update list of conversations
|
||||
await goto(`${base}/conversation/${conversationId}`, { invalidateAll: true });
|
||||
} catch (err) {
|
||||
error.set(ERROR_MESSAGES.default);
|
||||
error.set((err as Error).message || ERROR_MESSAGES.default);
|
||||
console.error(err);
|
||||
} finally {
|
||||
loading = false;
|
||||
|
|
|
@ -8,6 +8,8 @@ import type { Message } from "$lib/types/Message";
|
|||
import { models, validateModel } from "$lib/server/models";
|
||||
import { defaultEmbeddingModel } from "$lib/server/embeddingModels";
|
||||
import { v4 } from "uuid";
|
||||
import { authCondition } from "$lib/server/auth";
|
||||
import { usageLimits } from "$lib/server/usageLimits";
|
||||
|
||||
export const POST: RequestHandler = async ({ locals, request }) => {
|
||||
const body = await request.text();
|
||||
|
@ -23,6 +25,15 @@ export const POST: RequestHandler = async ({ locals, request }) => {
|
|||
})
|
||||
.parse(JSON.parse(body));
|
||||
|
||||
const convCount = await collections.conversations.countDocuments(authCondition(locals));
|
||||
|
||||
if (usageLimits?.conversations && convCount > usageLimits?.conversations) {
|
||||
throw error(
|
||||
429,
|
||||
"You have reached the maximum number of conversations. Delete some to continue."
|
||||
);
|
||||
}
|
||||
|
||||
let messages: Message[] = [
|
||||
{
|
||||
id: v4(),
|
||||
|
|
|
@ -43,7 +43,7 @@
|
|||
});
|
||||
|
||||
if (!res.ok) {
|
||||
error.set("Error while creating conversation, try again.");
|
||||
error.set(await res.text());
|
||||
console.error("Error while creating conversation: " + (await res.text()));
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import { MESSAGES_BEFORE_LOGIN, RATE_LIMIT } from "$env/static/private";
|
||||
import { MESSAGES_BEFORE_LOGIN } from "$env/static/private";
|
||||
import { authCondition, requiresUser } from "$lib/server/auth";
|
||||
import { collections } from "$lib/server/database";
|
||||
import { models } from "$lib/server/models";
|
||||
|
@ -19,6 +19,7 @@ import { buildSubtree } from "$lib/utils/tree/buildSubtree.js";
|
|||
import { addChildren } from "$lib/utils/tree/addChildren.js";
|
||||
import { addSibling } from "$lib/utils/tree/addSibling.js";
|
||||
import { preprocessMessages } from "$lib/server/preprocessMessages.js";
|
||||
import { usageLimits } from "$lib/server/usageLimits";
|
||||
|
||||
export async function POST({ request, locals, params, getClientAddress }) {
|
||||
const id = z.string().parse(params.id);
|
||||
|
@ -95,14 +96,22 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|||
}
|
||||
}
|
||||
|
||||
// check if the user is rate limited
|
||||
const nEvents = Math.max(
|
||||
await collections.messageEvents.countDocuments({ userId }),
|
||||
await collections.messageEvents.countDocuments({ ip: getClientAddress() })
|
||||
);
|
||||
if (usageLimits?.messagesPerMinute) {
|
||||
// check if the user is rate limited
|
||||
const nEvents = Math.max(
|
||||
await collections.messageEvents.countDocuments({ userId }),
|
||||
await collections.messageEvents.countDocuments({ ip: getClientAddress() })
|
||||
);
|
||||
if (nEvents > usageLimits.messagesPerMinute) {
|
||||
throw error(429, ERROR_MESSAGES.rateLimited);
|
||||
}
|
||||
}
|
||||
|
||||
if (RATE_LIMIT != "" && nEvents > parseInt(RATE_LIMIT)) {
|
||||
throw error(429, ERROR_MESSAGES.rateLimited);
|
||||
if (usageLimits?.messages && conv.messages.length > usageLimits.messages) {
|
||||
throw error(
|
||||
429,
|
||||
`This conversation has more than ${usageLimits.messages} messages. Start a new one to continue`
|
||||
);
|
||||
}
|
||||
|
||||
// fetch the model
|
||||
|
@ -125,7 +134,13 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|||
} = z
|
||||
.object({
|
||||
id: z.string().uuid().refine(isMessageId).optional(), // parent message id to append to for a normal message, or the message id for a retry/continue
|
||||
inputs: z.optional(z.string().trim().min(1)),
|
||||
inputs: z.optional(
|
||||
z
|
||||
.string()
|
||||
.trim()
|
||||
.min(1)
|
||||
.transform((s) => s.replace(/\r\n/g, "\n"))
|
||||
),
|
||||
is_retry: z.optional(z.boolean()),
|
||||
is_continue: z.optional(z.boolean()),
|
||||
web_search: z.optional(z.boolean()),
|
||||
|
@ -133,6 +148,9 @@ export async function POST({ request, locals, params, getClientAddress }) {
|
|||
})
|
||||
.parse(json);
|
||||
|
||||
if (usageLimits?.messageLength && (newPrompt?.length ?? 0) > usageLimits.messageLength) {
|
||||
throw error(400, "Message too long.");
|
||||
}
|
||||
// files is an array of base64 strings encoding Blob objects
|
||||
// we need to convert this array to an array of File objects
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ import { ObjectId } from "mongodb";
|
|||
import { z } from "zod";
|
||||
import { sha256 } from "$lib/utils/sha256";
|
||||
import sharp from "sharp";
|
||||
import { usageLimits } from "$lib/server/usageLimits";
|
||||
import { generateSearchTokens } from "$lib/utils/searchTokens";
|
||||
|
||||
const newAsssistantSchema = z.object({
|
||||
|
@ -62,6 +63,18 @@ export const actions: Actions = {
|
|||
return fail(400, { error: true, errors });
|
||||
}
|
||||
|
||||
const assistantsCount = await collections.assistants.countDocuments(authCondition(locals));
|
||||
|
||||
if (usageLimits?.assistants && assistantsCount > usageLimits.assistants) {
|
||||
const errors = [
|
||||
{
|
||||
field: "preprompt",
|
||||
message: "You have reached the maximum number of assistants. Delete some to continue.",
|
||||
},
|
||||
];
|
||||
return fail(400, { error: true, errors });
|
||||
}
|
||||
|
||||
const createdById = locals.user?._id ?? locals.sessionId;
|
||||
|
||||
const newAssistantId = new ObjectId();
|
||||
|
|
Loading…
Reference in New Issue