From 4ff96acfc6ad69baf8788a683c069970a3d10dfb Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Thu, 8 Aug 2024 18:10:45 +0530 Subject: [PATCH 1/6] chore: Update package versions to 1.9.5 --- app/ui/package.json | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/ui/package.json b/app/ui/package.json index 3b5a502b..9a92f2bd 100644 --- a/app/ui/package.json +++ b/app/ui/package.json @@ -1,7 +1,7 @@ { "name": "app", "private": true, - "version": "1.9.4", + "version": "1.9.5", "type": "module", "scripts": { "dev": "vite", diff --git a/package.json b/package.json index f4a598a8..d25d1ffb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "dialoqbase", - "version": "1.9.4", + "version": "1.9.5", "description": "Create chatbots with ease", "scripts": { "ui:dev": "pnpm run --filter ui dev", From af83b7570e525b55bdd00d391b7be430a40aae89 Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Thu, 8 Aug 2024 18:15:03 +0530 Subject: [PATCH 2/6] chore: Update fastifyMultipart options for file size limit --- server/src/app.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/src/app.ts b/server/src/app.ts index 88a8baaf..cec4fe06 100644 --- a/server/src/app.ts +++ b/server/src/app.ts @@ -31,7 +31,11 @@ const app: FastifyPluginAsync = async ( void fastify.register(FastifySSEPlugin); - void fastify.register(fastifyMultipart); + void fastify.register(fastifyMultipart, { + limits: { + fileSize: 1 * 1024 * 1024 * 1024, + } + }); void fastify.register(swagger); From a8e56e64abcd4fc72231f3c93ae831b0729b2800 Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Thu, 8 Aug 2024 18:16:36 +0530 Subject: [PATCH 3/6] chore: Update file upload accept attribute to include .zip files --- app/ui/src/components/Common/BotForm.tsx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/app/ui/src/components/Common/BotForm.tsx b/app/ui/src/components/Common/BotForm.tsx index e16c2164..e2550608 100644 --- a/app/ui/src/components/Common/BotForm.tsx +++ b/app/ui/src/components/Common/BotForm.tsx @@ -35,7 +35,7 @@ type Props = { form: FormInstance; showEmbeddingAndModels: boolean; newSelectedSource?: any; - botConfig: BotConfig + botConfig: BotConfig; }; function classNames(...classes: string[]) { return classes.filter(Boolean).join(" "); @@ -47,7 +47,7 @@ export const BotForm = ({ setSelectedSource, form, showEmbeddingAndModels, - botConfig + botConfig, }: Props) => { const youtubeMode = Form.useWatch(["options", "youtube_mode"], form); const url = Form.useWatch(["content"], form); @@ -117,7 +117,7 @@ export const BotForm = ({ }} > { @@ -130,6 +130,7 @@ export const BotForm = ({ "audio/mp4", "video/mp4", "video/mpeg", + "application/zip", ] .map((type) => type.toLowerCase()) .join(", "); @@ -161,12 +162,13 @@ export const BotForm = ({

- Click or drag PDF, Docx, CSV , TXT, MP3, MP4 files to this + Click or drag PDF, Docx, CSV , TXT, MP3, MP4, Zip files to + this

- Support is available for a single or bulk upload of up to 10 + {`Support is available for a single or bulk upload of up to ${botConfig?.fileUploadSizeLimit} files. Please note that file upload is in beta, so if you - encounter any issues, kindly report them. + encounter any issues, kindly report them.`}

From 5bceaea9a50184bb4ded7644bada61e058236a12 Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Thu, 8 Aug 2024 19:27:14 +0530 Subject: [PATCH 4/6] chore: Update file upload accept attribute to include .zip files --- app/ui/src/components/Common/BotForm.tsx | 1 + server/package.json | 4 + .../handlers/api/v1/bot/bot/get.handler.ts | 7 +- .../handlers/api/v1/bot/bot/upload.handler.ts | 2 +- server/src/plugins/bull.ts | 12 +- .../src/queue/controllers/zip.controller.ts | 117 +++++++++ server/src/queue/index.ts | 4 + server/src/queue/q.ts | 19 ++ server/src/utils/fileType.ts | 4 + server/yarn.lock | 244 +++++++++++++++++- 10 files changed, 396 insertions(+), 18 deletions(-) create mode 100644 server/src/queue/controllers/zip.controller.ts create mode 100644 server/src/queue/q.ts diff --git a/app/ui/src/components/Common/BotForm.tsx b/app/ui/src/components/Common/BotForm.tsx index e2550608..b45bbbd3 100644 --- a/app/ui/src/components/Common/BotForm.tsx +++ b/app/ui/src/components/Common/BotForm.tsx @@ -131,6 +131,7 @@ export const BotForm = ({ "video/mp4", "video/mpeg", "application/zip", + "application/x-zip-compressed", ] .map((type) => type.toLowerCase()) .join(", "); diff --git a/server/package.json b/server/package.json index 17e22687..6e7774ff 100644 --- a/server/package.json +++ b/server/package.json @@ -92,6 +92,8 @@ "turndown": "^7.1.3", "unique-names-generator": "^4.7.1", "wavefile": "^11.0.0", + "yauzl": "^3.1.3", + "yauzl-promise": "^4.0.0", "yt-transcript": "^0.0.2", "ytdl-core": "^4.11.5" }, @@ -103,6 +105,8 @@ "@types/pubsub-js": "^1.8.3", "@types/tap": "^15.0.5", "@types/turndown": "^5.0.4", + "@types/yauzl": "^2.10.3", + "@types/yauzl-promise": "^4.0.1", "c8": "9.0.0", "fastify-tsconfig": "2.0.0", "prisma": "^5.9.1", diff --git a/server/src/handlers/api/v1/bot/bot/get.handler.ts b/server/src/handlers/api/v1/bot/bot/get.handler.ts index 436bd125..421f5678 100644 --- a/server/src/handlers/api/v1/bot/bot/get.handler.ts +++ b/server/src/handlers/api/v1/bot/bot/get.handler.ts @@ -63,7 +63,7 @@ export const getDatasourceByBotId = async ( where: { botId: id, type: { - notIn: ["crawl", "sitemap"], + notIn: ["crawl", "sitemap", "zip"], }, }, orderBy: { @@ -77,7 +77,7 @@ export const getDatasourceByBotId = async ( where: { botId: id, type: { - notIn: ["crawl", "sitemap"], + notIn: ["crawl", "sitemap", "zip"], }, }, }); @@ -119,10 +119,9 @@ export const getAllBotsHandler = async ( reply: FastifyReply ) => { const prisma = request.server.prisma; - const bots = await prisma.bot.findMany({ where: { - user_id: request.user?.is_admin ? undefined : request.user?.user_id + user_id: request.user?.user_id }, orderBy: { createdAt: "desc", diff --git a/server/src/handlers/api/v1/bot/bot/upload.handler.ts b/server/src/handlers/api/v1/bot/bot/upload.handler.ts index 9e4cabba..520f5d66 100644 --- a/server/src/handlers/api/v1/bot/bot/upload.handler.ts +++ b/server/src/handlers/api/v1/bot/bot/upload.handler.ts @@ -124,7 +124,7 @@ export const createBotFileHandler = async ( const path = `./uploads/${fileName}`; await fs.promises.mkdir("./uploads", { recursive: true }); await pump( - file.file, + file.file, fs.createWriteStream(path) as any ); const type = fileTypeFinder(file.mimetype); diff --git a/server/src/plugins/bull.ts b/server/src/plugins/bull.ts index a1d2c5f3..4755c620 100644 --- a/server/src/plugins/bull.ts +++ b/server/src/plugins/bull.ts @@ -1,7 +1,7 @@ import fp from "fastify-plugin"; import { FastifyPluginAsync } from "fastify"; import { Queue } from "bullmq"; -import { parseRedisUrl } from "../utils/redis"; +import { queue } from "../queue/q"; declare module "fastify" { interface FastifyInstance { queue: Queue; @@ -13,16 +13,6 @@ const bullPlugin: FastifyPluginAsync = fp(async (server, options) => { if (!redis_url) { throw new Error("Redis url is not defined"); } - const { host, port, password } = parseRedisUrl(redis_url); - - const queue = new Queue("vector", { - connection: { - host, - port, - password, - username: process?.env?.DB_REDIS_USERNAME, - }, - }); server.decorate("queue", queue); diff --git a/server/src/queue/controllers/zip.controller.ts b/server/src/queue/controllers/zip.controller.ts new file mode 100644 index 00000000..04f3ef84 --- /dev/null +++ b/server/src/queue/controllers/zip.controller.ts @@ -0,0 +1,117 @@ +import { QSource } from "../type"; +import { PrismaClient } from "@prisma/client"; +import yauzl from "yauzl-promise" +import * as fs from "fs" +import * as util from "util"; +import path from "path" +import { pipeline } from "stream/promises"; +import { fileTypeFinder } from "../../utils/fileType"; +import { queue } from "../q"; +const pump = util.promisify(pipeline); + +function getMimeType(filename: string): string { + const ext = path.extname(filename).toLowerCase(); + switch (ext) { + case ".pdf": + return "application/pdf"; + case ".docx": + return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"; + case ".csv": + return "text/csv"; + case ".txt": + return "text/plain"; + case ".mp4": + return "video/mp4"; + case ".mp3": + return "audio/mpeg"; + case ".zip": + return "application/zip"; + default: + return "none"; + } +} + +interface UnzippedFile { + name: string; + mimeType: string; + location: string; + type: string; +} + +async function unzip(zipFilePath: string): Promise { + const unzipPath = `${zipFilePath.replace(".zip", "")}-${Date.now()}` + await fs.promises.mkdir(unzipPath, { recursive: true }); + + const zip = await yauzl.open(zipFilePath); + + const unzippedFiles: UnzippedFile[] = []; + + try { + for await (const entry of zip) { + const entryPath = `${unzipPath}/${entry.filename}`; + if (entry.filename.endsWith('/')) { + await fs.promises.mkdir(entryPath, { recursive: true }); + } else { + const dirName = path.dirname(entryPath); + await fs.promises.mkdir(dirName, { recursive: true }); + + const readStream = await entry.openReadStream(); + const mimeType = getMimeType(entry.filename); + + const writeStream = fs.createWriteStream(entryPath); + //@ts-ignore + await pump(readStream, writeStream); + + unzippedFiles.push({ + name: entry.filename, + mimeType, + location: entryPath, + type: fileTypeFinder(mimeType) + }); + } + } + } finally { + await zip.close(); + } + + return unzippedFiles; +} + +export const zipQueueController = async ( + source: QSource, + prisma: PrismaClient +) => { + console.log("loading zip"); + + const location = source.location!; + const fileInfo = await unzip(location); + const validFiles = fileInfo.filter(file => file.type !== "none"); + console.log("validFiles", validFiles.length); + + for (const file of validFiles) { + const botSource = await prisma.botSource.create({ + data: { + botId: source.botId, + content: file.name, + type: file.type, + location: file.location, + } + }) + + queue.add( + "process", + [ + { + ...botSource, + embedding: source.embedding, + }, + ], + { + jobId: botSource.id, + removeOnComplete: true, + removeOnFail: true, + } + ); + } + console.log("zip loaded"); +}; diff --git a/server/src/queue/index.ts b/server/src/queue/index.ts index 6d2c63df..2ac8119e 100644 --- a/server/src/queue/index.ts +++ b/server/src/queue/index.ts @@ -15,6 +15,7 @@ import { restQueueController } from "./controllers/rest.controller"; import { sitemapQueueController } from "./controllers/sitemap.controller"; import { SandboxedJob } from "bullmq"; import { getRagSettings } from "../utils/rag-settings"; +import { zipQueueController } from "./controllers/zip.controller"; const prisma = new PrismaClient(); @@ -80,6 +81,9 @@ export default async function queueHandler(job: SandboxedJob) { case "sitemap": await sitemapQueueController(source); break; + case "zip": + await zipQueueController(source, prisma); + break; default: break; } diff --git a/server/src/queue/q.ts b/server/src/queue/q.ts new file mode 100644 index 00000000..859cfa64 --- /dev/null +++ b/server/src/queue/q.ts @@ -0,0 +1,19 @@ +import { Queue } from "bullmq"; +import { parseRedisUrl } from "../utils/redis"; + +const redis_url = process.env.DB_REDIS_URL || process.env.REDIS_URL; +if (!redis_url) { + throw new Error("Redis url is not defined"); +} + +//@ts-ignore +const { host, port, password } = parseRedisUrl(redis_url); + +export const queue = new Queue("vector", { + connection: { + host, + port, + password, + username: process?.env?.DB_REDIS_USERNAME, + }, +}); diff --git a/server/src/utils/fileType.ts b/server/src/utils/fileType.ts index c61e2d3f..9622c5d6 100644 --- a/server/src/utils/fileType.ts +++ b/server/src/utils/fileType.ts @@ -16,6 +16,10 @@ export const fileTypeFinder = (mimeType: string) => { return "mp3"; case "video/mpeg": return "mp4"; + case "application/zip": + return "zip"; + case "application/x-zip-compressed": + return "zip"; default: return "none"; } diff --git a/server/yarn.lock b/server/yarn.lock index 9b3f5525..1c63f5b5 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -272,6 +272,13 @@ "@babel/plugin-syntax-jsx" "^7.21.4" "@babel/types" "^7.22.3" +"@babel/runtime@^7.16.7": + version "7.25.0" + resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.25.0.tgz#3af9a91c1b739c569d5d80cc917280919c544ecb" + integrity sha512-7dRy4DwXwtzBrPbZflqxnvfxLF8kdZXPkhymtDeFoFqE6ldzjQFgYTtYIFARcLEYDrqfBfYcZt1WqFxRoyC9Rw== + dependencies: + regenerator-runtime "^0.14.0" + "@babel/runtime@^7.21.0": version "7.22.3" resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.22.3.tgz#0a7fce51d43adbf0f7b517a71f4c3aaca92ebcbb" @@ -384,6 +391,28 @@ tslib "^2.5.0" ws "^8.13.0" +"@emnapi/core@^1.1.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@emnapi/core/-/core-1.2.0.tgz#7b738e5033738132bf6af0b8fae7b05249bdcbd7" + integrity sha512-E7Vgw78I93we4ZWdYCb4DGAwRROGkMIXk7/y87UmANR+J6qsWusmC3gLt0H+O0KOt5e6O38U8oJamgbudrES/w== + dependencies: + "@emnapi/wasi-threads" "1.0.1" + tslib "^2.4.0" + +"@emnapi/runtime@^1.1.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@emnapi/runtime/-/runtime-1.2.0.tgz#71d018546c3a91f3b51106530edbc056b9f2f2e3" + integrity sha512-bV21/9LQmcQeCPEg3BDFtvwL6cwiTMksYNWQQ4KOxCZikEGalWtenoZ0wCiukJINlGCIi2KXx01g4FoH/LxpzQ== + dependencies: + tslib "^2.4.0" + +"@emnapi/wasi-threads@1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@emnapi/wasi-threads/-/wasi-threads-1.0.1.tgz#d7ae71fd2166b1c916c6cd2d0df2ef565a2e1a5b" + integrity sha512-iIBu7mwkq4UQGeMEM8bLwNK962nXdhodeScX4slfQnRhEMMzvYivHhutCIk8uojvmASXXPC2WNEjwxFWk72Oqw== + dependencies: + tslib "^2.4.0" + "@fastify/accept-negotiator@^1.0.0": version "1.1.0" resolved "https://registry.yarnpkg.com/@fastify/accept-negotiator/-/accept-negotiator-1.1.0.tgz#c1c66b3b771c09742a54dd5bc87c582f6b0630ff" @@ -858,6 +887,107 @@ resolved "https://registry.yarnpkg.com/@msgpackr-extract/msgpackr-extract-win32-x64/-/msgpackr-extract-win32-x64-3.0.2.tgz#0f164b726869f71da3c594171df5ebc1c4b0a407" integrity sha512-O+6Gs8UeDbyFpbSh2CPEz/UOrrdWPTBYNblZK5CxxLisYt4kGX3Sc+czffFonyjiGSq3jWLwJS/CCJc7tBr4sQ== +"@napi-rs/wasm-runtime@^0.2.3": + version "0.2.4" + resolved "https://registry.yarnpkg.com/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.4.tgz#d27788176f250d86e498081e3c5ff48a17606918" + integrity sha512-9zESzOO5aDByvhIAsOy9TbpZ0Ur2AJbUI7UT73kcUTS2mxAMHOBaa1st/jAymNoCtvrit99kkzT1FZuXVcgfIQ== + dependencies: + "@emnapi/core" "^1.1.0" + "@emnapi/runtime" "^1.1.0" + "@tybys/wasm-util" "^0.9.0" + +"@node-rs/crc32-android-arm-eabi@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-android-arm-eabi/-/crc32-android-arm-eabi-1.10.3.tgz#964073a6a417878aaba2a79b0b2dc0fce98512b7" + integrity sha512-V9iNJd5ux9I415qOldmxZIHrazYMJNsQ6v+Kq/t9FTQyYqiEeHvRc1FzBh9MT6Uc24InwMhBeC1WVw0BL4VaxQ== + +"@node-rs/crc32-android-arm64@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-android-arm64/-/crc32-android-arm64-1.10.3.tgz#c43c58f8fb7ec87aeec66ca2f690db5e510624e8" + integrity sha512-d6xLAhbk5FDGpltAKTFs7hZO/PWpHeihZ/ZCKx2LEVz8jXQEshpo2/ojnfb5FAw6oNzU2H+S/RI5GeCr7paa1Q== + +"@node-rs/crc32-darwin-arm64@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-darwin-arm64/-/crc32-darwin-arm64-1.10.3.tgz#5fcde2490601964032639f6942bd6e4a558ca3ff" + integrity sha512-IoX6HC4dlKc9BONe7632DADBtiHUiIVD7Bibuj3bGrvOBllN8hvBL9+dDC+/iDdOeuiBKgb0hgL5h2nPIybpzA== + +"@node-rs/crc32-darwin-x64@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-darwin-x64/-/crc32-darwin-x64-1.10.3.tgz#783ace9730a1ad0d7ad5fe06556a776684ecd404" + integrity sha512-JUDGAX/0W4A9ok9p6yuy4fAsBDrq8Db0sUjKLMZ/+P3NHB+Qk+OsZUsEDxP3yhBJxhPq97JpN4bBzgMnkDajpw== + +"@node-rs/crc32-freebsd-x64@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-freebsd-x64/-/crc32-freebsd-x64-1.10.3.tgz#4bc354ce48d3bcde3af28efa690910a20fe7681d" + integrity sha512-mbpVcrF9cRJm9ksv2vVaWc/yRsLJErdb90Kusc6I8CgsBxpS6/wI637i0khSl1l10iWrALXjfh6osihixANYhQ== + +"@node-rs/crc32-linux-arm-gnueabihf@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-linux-arm-gnueabihf/-/crc32-linux-arm-gnueabihf-1.10.3.tgz#d4591b2a55c8373b6f3279780ed47b0ebb1ce089" + integrity sha512-9MZohdtKzdnb16xRKU76t1UTEJu80dFO8f2/N0geJYNobnT1E6p/+5pqB/G1/H6OnPvjqMuFuLVL4BJVvO4GYQ== + +"@node-rs/crc32-linux-arm64-gnu@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-linux-arm64-gnu/-/crc32-linux-arm64-gnu-1.10.3.tgz#1881b811a2ac5d3faa4895b6ef1c558afc428ee9" + integrity sha512-t1+9ik4awZF+luQp94HsUH8M1lSw8jWjvQiLaHyxMzrM0NY0/oIkhjqdOswXL11Wybkc63eunNwVqGKWfJEi4Q== + +"@node-rs/crc32-linux-arm64-musl@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-linux-arm64-musl/-/crc32-linux-arm64-musl-1.10.3.tgz#01280d1767fe9c845cc3aef7d0bcb0e4260f4d5f" + integrity sha512-fsxOk9CpFzyon+vktvCICwhGk0b+tnfEZfPOXa3QDrkyZD7R7cHmpEHGim1BYgJZIJSTBfal5eM11hzBGjJbxw== + +"@node-rs/crc32-linux-x64-gnu@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-linux-x64-gnu/-/crc32-linux-x64-gnu-1.10.3.tgz#43dcc4b3efabe2a542c9594366b7959d84a68636" + integrity sha512-0zIX68FIeqpRMRNvmB5AgONnLMm628+8mV9UDuCRmGppME8WGnY+Dirx+TPUeTJ4f27+in+6CU4u6LJDi9cXmQ== + +"@node-rs/crc32-linux-x64-musl@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-linux-x64-musl/-/crc32-linux-x64-musl-1.10.3.tgz#88111ff5f74f083e8b44c81f54dc14538981e12a" + integrity sha512-dKKt0FEm8JDp2MvIu1J7vg8Dc5D5upNO6LAuvfShq9Hy8hYNQWy6f+AF8mSm/c5wWnjn+pv7I1+jvrZIe6wMig== + +"@node-rs/crc32-wasm32-wasi@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-wasm32-wasi/-/crc32-wasm32-wasi-1.10.3.tgz#8981e54755d3699a459a8eee2202a534afd4e683" + integrity sha512-oT2V4r0lGZqZHkFLHeXu5Z8C8SutIvBVV0Ws3unz4/KhwmlMcOZYRmSelUSSILbjNLrg4FihCe20tC1VbmaNxA== + dependencies: + "@napi-rs/wasm-runtime" "^0.2.3" + +"@node-rs/crc32-win32-arm64-msvc@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-win32-arm64-msvc/-/crc32-win32-arm64-msvc-1.10.3.tgz#f85bc246e71c6d57dee70416d4e809ad11331629" + integrity sha512-IwP/TjDoQycv3ZCbAHV3qS9oH8pmBo7h9RC0chOvKY0g9+RxRl0nXhxcAcmZvJugKdJd+eCOR9fJrWzcwQOgFg== + +"@node-rs/crc32-win32-ia32-msvc@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-win32-ia32-msvc/-/crc32-win32-ia32-msvc-1.10.3.tgz#dfa5297e7ba8f080d89e8ef527cc1ea33b274313" + integrity sha512-YK0qYTHUFqriqAkHyXfe3IpDFfpG5fc2yuNl7MXn4ejklLLyNQPOCSawvPU7ouOBgtQDaAH60yZhFhsXZfwSfQ== + +"@node-rs/crc32-win32-x64-msvc@1.10.3": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32-win32-x64-msvc/-/crc32-win32-x64-msvc-1.10.3.tgz#77a4ad3a92ea9ab0a2d1bd051326e2bc64c146ba" + integrity sha512-VI9jd8ECiij4YADsfzVuDnhk/UZ5op4RYHyN40yZzwhzcOQ8DDluOeHv91FPHSyMYJEsVsqbr3cqtD6R47xYjw== + +"@node-rs/crc32@^1.7.0": + version "1.10.3" + resolved "https://registry.yarnpkg.com/@node-rs/crc32/-/crc32-1.10.3.tgz#caced9582d629102c68f2ffba0a7fc3527425aaa" + integrity sha512-4UgH0fDRxs0eMSgrUN0UUM4BpIEbVKutiSkFLICwegbgIger3c1t7V3jOYralK0xTBHraW3r59wlESdc3h/nQg== + optionalDependencies: + "@node-rs/crc32-android-arm-eabi" "1.10.3" + "@node-rs/crc32-android-arm64" "1.10.3" + "@node-rs/crc32-darwin-arm64" "1.10.3" + "@node-rs/crc32-darwin-x64" "1.10.3" + "@node-rs/crc32-freebsd-x64" "1.10.3" + "@node-rs/crc32-linux-arm-gnueabihf" "1.10.3" + "@node-rs/crc32-linux-arm64-gnu" "1.10.3" + "@node-rs/crc32-linux-arm64-musl" "1.10.3" + "@node-rs/crc32-linux-x64-gnu" "1.10.3" + "@node-rs/crc32-linux-x64-musl" "1.10.3" + "@node-rs/crc32-wasm32-wasi" "1.10.3" + "@node-rs/crc32-win32-arm64-msvc" "1.10.3" + "@node-rs/crc32-win32-ia32-msvc" "1.10.3" + "@node-rs/crc32-win32-x64-msvc" "1.10.3" + "@prisma/client@^5.9.1": version "5.9.1" resolved "https://registry.yarnpkg.com/@prisma/client/-/client-5.9.1.tgz#d92bd2f7f006e0316cb4fda9d73f235965cf2c64" @@ -1166,6 +1296,13 @@ resolved "https://registry.yarnpkg.com/@tsconfig/node16/-/node16-1.0.4.tgz#0b92dcc0cc1c81f6f306a381f28e31b1a56536e9" integrity sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA== +"@tybys/wasm-util@^0.9.0": + version "0.9.0" + resolved "https://registry.yarnpkg.com/@tybys/wasm-util/-/wasm-util-0.9.0.tgz#3e75eb00604c8d6db470bf18c37b7d984a0e3355" + integrity sha512-6+7nlbMVX/PVDCwaIQ8nTOPveOcFLSt8GcXdx8hD0bt39uWxYT88uXzqTd4fTvqta7oeUJqudepapKNt2DYJFw== + dependencies: + tslib "^2.4.0" + "@types/bcryptjs@^2.4.2": version "2.4.2" resolved "https://registry.yarnpkg.com/@types/bcryptjs/-/bcryptjs-2.4.2.tgz#e3530eac9dd136bfdfb0e43df2c4c5ce1f77dfae" @@ -1481,7 +1618,14 @@ dependencies: "@types/node" "*" -"@types/yauzl@^2.9.1": +"@types/yauzl-promise@^4.0.1": + version "4.0.1" + resolved "https://registry.yarnpkg.com/@types/yauzl-promise/-/yauzl-promise-4.0.1.tgz#66ec3c8e4f88b54966ab08a9333acfec8f4b763c" + integrity sha512-qYEC3rJwqiJpdQ9b+bPNeuSY0c3JUM8vIuDy08qfuVN7xHm3ZDsHn2kGphUIB0ruEXrPGNXZ64nMUcu4fDjViQ== + dependencies: + "@types/node" "*" + +"@types/yauzl@^2.10.3", "@types/yauzl@^2.9.1": version "2.10.3" resolved "https://registry.yarnpkg.com/@types/yauzl/-/yauzl-2.10.3.tgz#e9b2808b4f109504a03cda958259876f61017999" integrity sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q== @@ -2736,6 +2880,15 @@ defer-to-connect@^2.0.0: resolved "https://registry.yarnpkg.com/defer-to-connect/-/defer-to-connect-2.0.1.tgz#8016bdb4143e4632b77a3449c6236277de520587" integrity sha512-4tvttepXG1VaYGrRibk5EwJd1t4udunSOVMdLSAL6mId1ix438oPwPZMALY41FCijukO1L0twNcGsdzS7dHgDg== +define-data-property@^1.0.1: + version "1.1.4" + resolved "https://registry.yarnpkg.com/define-data-property/-/define-data-property-1.1.4.tgz#894dc141bb7d3060ae4366f6a0107e68fbe48c5e" + integrity sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A== + dependencies: + es-define-property "^1.0.0" + es-errors "^1.3.0" + gopd "^1.0.1" + define-properties@^1.1.3, define-properties@^1.1.4, define-properties@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.2.0.tgz#52988570670c9eacedd8064f4a990f2405849bd5" @@ -2744,6 +2897,15 @@ define-properties@^1.1.3, define-properties@^1.1.4, define-properties@^1.2.0: has-property-descriptors "^1.0.0" object-keys "^1.1.1" +define-properties@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.2.1.tgz#10781cc616eb951a80a034bafcaa7377f6af2b6c" + integrity sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg== + dependencies: + define-data-property "^1.0.1" + has-property-descriptors "^1.0.0" + object-keys "^1.1.1" + degenerator@^5.0.0: version "5.0.1" resolved "https://registry.yarnpkg.com/degenerator/-/degenerator-5.0.1.tgz#9403bf297c6dad9a1ece409b37db27954f91f2f5" @@ -2985,6 +3147,18 @@ es-array-method-boxes-properly@^1.0.0: resolved "https://registry.yarnpkg.com/es-array-method-boxes-properly/-/es-array-method-boxes-properly-1.0.0.tgz#873f3e84418de4ee19c5be752990b2e44718d09e" integrity sha512-wd6JXUmyHmt8T5a2xreUwKcGPq6f1f+WwIJkijUqiGcJz1qqnZgP6XIK+QyIWU5lT7imeNxUll48bziG+TSYcA== +es-define-property@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/es-define-property/-/es-define-property-1.0.0.tgz#c7faefbdff8b2696cf5f46921edfb77cc4ba3845" + integrity sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ== + dependencies: + get-intrinsic "^1.2.4" + +es-errors@^1.3.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/es-errors/-/es-errors-1.3.0.tgz#05f75a25dab98e4fb1dcd5e1472c0546d5057c8f" + integrity sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw== + es-get-iterator@^1.0.2: version "1.1.3" resolved "https://registry.yarnpkg.com/es-get-iterator/-/es-get-iterator-1.1.3.tgz#3ef87523c5d464d41084b2c3c9c214f1199763d6" @@ -3633,6 +3807,11 @@ function-bind@^1.1.1: resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.1.tgz#a56899d3ea3c9bab874bb9773b7c5ede92f4895d" integrity sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A== +function-bind@^1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.2.tgz#2c02d864d97f3ea6c8830c464cbd11ab6eab7a1c" + integrity sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA== + function-loop@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/function-loop/-/function-loop-2.0.1.tgz#799c56ced01698cf12a1b80e4802e9dafc2ebada" @@ -3734,6 +3913,17 @@ get-intrinsic@^1.0.2, get-intrinsic@^1.1.1, get-intrinsic@^1.1.3, get-intrinsic@ has-proto "^1.0.1" has-symbols "^1.0.3" +get-intrinsic@^1.2.4: + version "1.2.4" + resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.2.4.tgz#e385f5a4b5227d449c3eabbad05494ef0abbeadd" + integrity sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ== + dependencies: + es-errors "^1.3.0" + function-bind "^1.1.2" + has-proto "^1.0.1" + has-symbols "^1.0.3" + hasown "^2.0.0" + get-iterator@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/get-iterator/-/get-iterator-1.0.2.tgz#cd747c02b4c084461fac14f48f6b45a80ed25c82" @@ -3814,6 +4004,14 @@ globals@^11.1.0: resolved "https://registry.yarnpkg.com/globals/-/globals-11.12.0.tgz#ab8795338868a0babd8525758018c2a7eb95c42e" integrity sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA== +globalthis@^1.0.2: + version "1.0.4" + resolved "https://registry.yarnpkg.com/globalthis/-/globalthis-1.0.4.tgz#7430ed3a975d97bfb59bcce41f5cabbafa651236" + integrity sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ== + dependencies: + define-properties "^1.2.1" + gopd "^1.0.1" + globalthis@^1.0.3: version "1.0.3" resolved "https://registry.yarnpkg.com/globalthis/-/globalthis-1.0.3.tgz#5852882a52b80dc301b0660273e1ed082f0b6ccf" @@ -3992,6 +4190,13 @@ hasha@^5.0.0: is-stream "^2.0.0" type-fest "^0.8.0" +hasown@^2.0.0: + version "2.0.2" + resolved "https://registry.yarnpkg.com/hasown/-/hasown-2.0.2.tgz#003eaf91be7adc372e84ec59dc37252cedb80003" + integrity sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ== + dependencies: + function-bind "^1.1.2" + help-me@^4.0.1: version "4.2.0" resolved "https://registry.yarnpkg.com/help-me/-/help-me-4.2.0.tgz#50712bfd799ff1854ae1d312c36eafcea85b0563" @@ -4384,6 +4589,14 @@ is-gzip@2.0.0: resolved "https://registry.yarnpkg.com/is-gzip/-/is-gzip-2.0.0.tgz#f4fed2bbd9f96bf2cb39e19262797fdb15aad933" integrity sha512-jtO4Njg6q58zDo/Pu4027beSZ0VdsZlt8/5Moco6yAg+DIxb5BK/xUYqYG2+MD4+piKldXJNHxRkhEYI2fvrxA== +is-it-type@^5.1.2: + version "5.1.2" + resolved "https://registry.yarnpkg.com/is-it-type/-/is-it-type-5.1.2.tgz#30da6429200f4cb55280b92b9327226439b0ed34" + integrity sha512-q/gOZQTNYABAxaXWnBKZjTFH4yACvWEFtgVOj+LbgxYIgAJG1xVmUZOsECSrZPIemYUQvaQWVilSFVbh4Eyt8A== + dependencies: + "@babel/runtime" "^7.16.7" + globalthis "^1.0.2" + is-map@^2.0.2: version "2.0.2" resolved "https://registry.yarnpkg.com/is-map/-/is-map-2.0.2.tgz#00922db8c9bf73e81b7a335827bc2a43f2b91127" @@ -6516,6 +6729,11 @@ regenerator-runtime@^0.13.11: resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz#f6dca3e7ceec20590d07ada785636a90cdca17f9" integrity sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg== +regenerator-runtime@^0.14.0: + version "0.14.1" + resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f" + integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw== + regexp.prototype.flags@^1.5.0: version "1.5.0" resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.5.0.tgz#fe7ce25e7e4cca8db37b6634c8a2c7009199b9cb" @@ -6896,6 +7114,11 @@ simple-get@^4.0.0, simple-get@^4.0.1: once "^1.3.1" simple-concat "^1.0.0" +simple-invariant@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/simple-invariant/-/simple-invariant-2.0.1.tgz#b8935284d31bc0c2719582f9cddf17bee8f57526" + integrity sha512-1sbhsxqI+I2tqlmjbz99GXNmZtr6tKIyEgGGnJw/MKGblalqk/XoOYYFJlBzTKZCxx8kLaD3FD5s9BEEjx5Pyg== + simple-swizzle@^0.2.2: version "0.2.2" resolved "https://registry.yarnpkg.com/simple-swizzle/-/simple-swizzle-0.2.2.tgz#a4da6b635ffcccca33f70d17cb92592de95e557a" @@ -7485,7 +7708,7 @@ tslib@^2.0.0: resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae" integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q== -tslib@^2.0.1: +tslib@^2.0.1, tslib@^2.4.0: version "2.6.3" resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.3.tgz#0438f810ad7a9edcde7a241c3d80db693c8cbfe0" integrity sha512-xNvxJEOUiWPGhUuUdQgAJPKOOJfGnIyKySOc09XkKsgdUV/3E2zvwZYdejjmRgPCgcym1juLH3226yA7sEFJKQ== @@ -8047,6 +8270,15 @@ yargs@^16.1.0: y18n "^5.0.5" yargs-parser "^20.2.2" +yauzl-promise@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/yauzl-promise/-/yauzl-promise-4.0.0.tgz#4870124bfdbe4e7e23da4258719bb264fc18576a" + integrity sha512-/HCXpyHXJQQHvFq9noqrjfa/WpQC2XYs3vI7tBiAi4QiIU1knvYhZGaO1QPjwIVMdqflxbmwgMXtYeaRiAE0CA== + dependencies: + "@node-rs/crc32" "^1.7.0" + is-it-type "^5.1.2" + simple-invariant "^2.0.1" + yauzl@^2.10.0: version "2.10.0" resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9" @@ -8055,6 +8287,14 @@ yauzl@^2.10.0: buffer-crc32 "~0.2.3" fd-slicer "~1.1.0" +yauzl@^3.1.3: + version "3.1.3" + resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-3.1.3.tgz#f61c17ad1a09403bc7adb01dfb302a9e74bf4a50" + integrity sha512-JCCdmlJJWv7L0q/KylOekyRaUrdEoUxWkWVcgorosTROCFWiS9p2NNPE9Yb91ak7b1N5SxAZEliWpspbZccivw== + dependencies: + buffer-crc32 "~0.2.3" + pend "~1.2.0" + yn@3.1.1: version "3.1.1" resolved "https://registry.yarnpkg.com/yn/-/yn-3.1.1.tgz#1e87401a09d767c1d5eab26a6e4c185182d2eb50" From 97dd2a2bd13a6a182cd667e9303cccde54006aeb Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Sat, 10 Aug 2024 01:00:00 +0530 Subject: [PATCH 5/6] chore: Add autoSyncDataSources column to Bot table and update related code --- app/ui/src/@types/bot.ts | 1 + .../components/Bot/Settings/SettingsBody.tsx | 9 +++ app/ui/src/routes/settings/application.tsx | 11 ++- server/package.json | 1 + server/prisma/migrations/q_14_3/migration.sql | 2 + server/prisma/migrations/q_14_4/migration.sql | 2 + server/prisma/schema.prisma | 2 + server/src/app.ts | 12 ++++ server/src/cron/index.ts | 70 +++++++++++++++++++ server/src/handlers/api/v1/admin/type.ts | 1 + .../src/queue/controllers/crawl.controller.ts | 59 +++++++++------- .../queue/controllers/sitemap.controller.ts | 55 +++++++++------ server/src/schema/api/v1/admin/index.ts | 2 + server/yarn.lock | 18 +++++ 14 files changed, 195 insertions(+), 50 deletions(-) create mode 100644 server/prisma/migrations/q_14_3/migration.sql create mode 100644 server/prisma/migrations/q_14_4/migration.sql create mode 100644 server/src/cron/index.ts diff --git a/app/ui/src/@types/bot.ts b/app/ui/src/@types/bot.ts index d6ba8202..aac427e2 100644 --- a/app/ui/src/@types/bot.ts +++ b/app/ui/src/@types/bot.ts @@ -21,6 +21,7 @@ export type BotSettings = { semanticSearchSimilarityScore: string; inactivityTimeout: number; autoResetSession: boolean; + autoSyncDataSources: boolean; }; chatModel: { label: string; diff --git a/app/ui/src/components/Bot/Settings/SettingsBody.tsx b/app/ui/src/components/Bot/Settings/SettingsBody.tsx index 5a30ffbe..2f2def2a 100644 --- a/app/ui/src/components/Bot/Settings/SettingsBody.tsx +++ b/app/ui/src/components/Bot/Settings/SettingsBody.tsx @@ -168,6 +168,7 @@ export const SettingsBody: React.FC = ({ semanticSearchSimilarityScore: data.semanticSearchSimilarityScore, autoResetSession: data.autoResetSession, inactivityTimeout: data.inactivityTimeout, + autoSyncDataSources: data.autoSyncDataSources, }} form={form} requiredMark={false} @@ -453,6 +454,14 @@ export const SettingsBody: React.FC = ({ placeholder="Enter inactivity timeout" /> + + + + diff --git a/app/ui/src/routes/settings/application.tsx b/app/ui/src/routes/settings/application.tsx index c8812fb1..f9f0a25f 100644 --- a/app/ui/src/routes/settings/application.tsx +++ b/app/ui/src/routes/settings/application.tsx @@ -29,6 +29,7 @@ export default function SettingsApplicationRoot() { dynamicallyFetchOllamaModels: boolean; ollamaURL: string; fileUploadSizeLimit: number; + refetchDatasource: boolean; }; }); @@ -173,9 +174,15 @@ export default function SettingsApplicationRoot() { ]} tooltip="Default is 10" > - - + + +
diff --git a/server/package.json b/server/package.json index 6e7774ff..38494f84 100644 --- a/server/package.json +++ b/server/package.json @@ -61,6 +61,7 @@ "cohere-ai": "^6.2.1", "concurrently": "^7.0.0", "copyfiles": "^2.4.1", + "cron": "^3.1.7", "d3-dsv": "2", "date-fns": "^3.6.0", "discord.js": "^14.11.0", diff --git a/server/prisma/migrations/q_14_3/migration.sql b/server/prisma/migrations/q_14_3/migration.sql new file mode 100644 index 00000000..ce2026bf --- /dev/null +++ b/server/prisma/migrations/q_14_3/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "DialoqbaseSettings" ADD COLUMN "refetchDatasource" BOOLEAN NOT NULL DEFAULT false; diff --git a/server/prisma/migrations/q_14_4/migration.sql b/server/prisma/migrations/q_14_4/migration.sql new file mode 100644 index 00000000..a42621a5 --- /dev/null +++ b/server/prisma/migrations/q_14_4/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "Bot" ADD COLUMN "autoSyncDataSources" BOOLEAN DEFAULT false; diff --git a/server/prisma/schema.prisma b/server/prisma/schema.prisma index 46134c8a..406972cd 100644 --- a/server/prisma/schema.prisma +++ b/server/prisma/schema.prisma @@ -44,6 +44,7 @@ model Bot { bot_api_key String? bot_model_api_key String? options Json? @default("{}") @db.Json + autoSyncDataSources Boolean? @default(false) BotAppearance BotAppearance[] document BotDocument[] BotIntegration BotIntegration[] @@ -106,6 +107,7 @@ model DialoqbaseSettings { defaultEmbeddingModel String @default("dialoqbase_eb_text-embedding-ada-002") ollamaURL String? @default("http://host.docker.internal:11434") usePuppeteerFetch Boolean? @default(false) + refetchDatasource Boolean @default(false) } model BotIntegration { diff --git a/server/src/app.ts b/server/src/app.ts index cec4fe06..76ee7bb9 100644 --- a/server/src/app.ts +++ b/server/src/app.ts @@ -13,6 +13,9 @@ import swaggerUi from "@fastify/swagger-ui"; import { pathToFileURL } from "url"; import { Worker } from "bullmq"; import { parseRedisUrl } from "./utils/redis"; +import { CronJob } from 'cron'; +import { processDatasourceCron } from "./cron/index"; + declare module "fastify" { interface Session { is_bot_allowed: boolean; @@ -103,8 +106,17 @@ const worker = new Worker("vector", workerUrl, { useWorkerThreads: workerThreads === "true", }); +const job = new CronJob( + process.env.DB_CRON_TIME || '0 0 0 * * *', + processDatasourceCron, + null, + true, + process.env.DB_CRON_TIMEZONE +); + process.on("SIGINT", async () => { await worker.close(); + job.stop(); process.exit(); }); diff --git a/server/src/cron/index.ts b/server/src/cron/index.ts new file mode 100644 index 00000000..4181d0b4 --- /dev/null +++ b/server/src/cron/index.ts @@ -0,0 +1,70 @@ +import { PrismaClient } from "@prisma/client"; +import { getSettings } from "../utils/common"; +import { queue } from "../queue/q"; +const prisma = new PrismaClient(); + +async function processDatasourceCron() { + try { + await prisma.$connect(); + const setting = await getSettings(prisma); + + if (!setting.refetchDatasource) { + return; + } + + console.log("[CRON] Processing datasource cron"); + + + const dataSources = await prisma.botSource.findMany({ + where: { + bot: { + autoSyncDataSources: true + }, + type: { + in: [ + "website", + "crawl", + "sitemap", + ] + } + }, + include: { + bot: true + } + }) + + for (const dataSource of dataSources) { + + await prisma.botDocument.deleteMany({ + where: { + botId: dataSource.botId, + sourceId: dataSource.id, + }, + }); + await queue.add( + "process", + [ + { + ...dataSource, + embedding: dataSource.bot.embedding, + }, + ], + { + jobId: dataSource.id, + removeOnComplete: true, + removeOnFail: true, + } + ); + } + + + console.log("[CRON] Finished processing datasource cron"); + + } catch (error) { + console.error(error); + } finally { + await prisma.$disconnect(); + } +} + +export { processDatasourceCron }; \ No newline at end of file diff --git a/server/src/handlers/api/v1/admin/type.ts b/server/src/handlers/api/v1/admin/type.ts index 3a68be82..a7514a27 100644 --- a/server/src/handlers/api/v1/admin/type.ts +++ b/server/src/handlers/api/v1/admin/type.ts @@ -5,6 +5,7 @@ export type UpdateDialoqbaseSettingsRequest = { allowUserToRegister: boolean; usePuppeteerFetch: boolean; fileUploadSizeLimit: number; + refetchDatasource: boolean; }; }; diff --git a/server/src/queue/controllers/crawl.controller.ts b/server/src/queue/controllers/crawl.controller.ts index 18bff46c..cabc1215 100644 --- a/server/src/queue/controllers/crawl.controller.ts +++ b/server/src/queue/controllers/crawl.controller.ts @@ -12,37 +12,46 @@ export const crawlQueueController = async (source: QSource) => { const links = Array.from(data?.links || []); for (const link of links) { - const newSource = await prisma.botSource.create({ - data: { + const existingSource = await prisma.botSource.findFirst({ + where: { botId: source.botId, content: link, - isPending: true, - status: "PENDING", - type: "website", }, }); - await websiteQueueController( - { - ...newSource, - embedding: source.embedding, - chunkOverlap: source.chunkOverlap, - chunkSize: source.chunkSize, - usePuppeteerFetch: source.usePuppeteerFetch, - doNotClosePuppeteer: true, - }, - prisma - ); + if (!existingSource) { + const newSource = await prisma.botSource.create({ + data: { + botId: source.botId, + content: link, + isPending: true, + status: "PENDING", + type: "website", + }, + }); - await prisma.botSource.update({ - where: { - id: newSource.id, - }, - data: { - status: "FINISHED", - isPending: false, - }, - }); + await websiteQueueController( + { + ...newSource, + embedding: source.embedding, + chunkOverlap: source.chunkOverlap, + chunkSize: source.chunkSize, + usePuppeteerFetch: source.usePuppeteerFetch, + doNotClosePuppeteer: true, + }, + prisma + ); + + await prisma.botSource.update({ + where: { + id: newSource.id, + }, + data: { + status: "FINISHED", + isPending: false, + }, + }); + } } await closePuppeteer() diff --git a/server/src/queue/controllers/sitemap.controller.ts b/server/src/queue/controllers/sitemap.controller.ts index 93137bd2..1aad3408 100644 --- a/server/src/queue/controllers/sitemap.controller.ts +++ b/server/src/queue/controllers/sitemap.controller.ts @@ -33,34 +33,43 @@ export const sitemapQueueController = async (source: QSource) => { const links = data.sites; for (const link of links) { - const newSource = await prisma.botSource.create({ - data: { + const existingSource = await prisma.botSource.findFirst({ + where: { botId: source.botId, content: link, - isPending: true, - status: "PENDING", - type: "website", }, }); - await websiteQueueController( - { - ...newSource, - embedding: source.embedding, - chunkSize: source.chunkSize, - chunkOverlap: source.chunkOverlap, - }, - prisma - ); + if (!existingSource) { + const newSource = await prisma.botSource.create({ + data: { + botId: source.botId, + content: link, + isPending: true, + status: "PENDING", + type: "website", + }, + }); - await prisma.botSource.update({ - where: { - id: newSource.id, - }, - data: { - status: "FINISHED", - isPending: false, - }, - }); + await websiteQueueController( + { + ...newSource, + embedding: source.embedding, + chunkSize: source.chunkSize, + chunkOverlap: source.chunkOverlap, + }, + prisma + ); + + await prisma.botSource.update({ + where: { + id: newSource.id, + }, + data: { + status: "FINISHED", + isPending: false, + }, + }); + } } }; diff --git a/server/src/schema/api/v1/admin/index.ts b/server/src/schema/api/v1/admin/index.ts index fdbd383f..8149cb61 100644 --- a/server/src/schema/api/v1/admin/index.ts +++ b/server/src/schema/api/v1/admin/index.ts @@ -24,6 +24,7 @@ export const dialoqbaseSettingsSchema: FastifySchema = { ollamaURL: { type: "string" }, usePuppeteerFetch: { type: "boolean" }, fileUploadSizeLimit: { type: "number" }, + refetchDatasource: { type: "boolean" }, }, }, }; @@ -51,6 +52,7 @@ export const updateDialoqbaseSettingsSchema: FastifySchema = { ollamaURL: { type: "string" }, usePuppeteerFetch: { type: "boolean" }, fileUploadSizeLimit: { type: "number" }, + refetchDatasource: { type: "boolean" }, }, }, response: { diff --git a/server/yarn.lock b/server/yarn.lock index 1c63f5b5..26437ebc 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -1427,6 +1427,11 @@ resolved "https://registry.yarnpkg.com/@types/long/-/long-4.0.2.tgz#b74129719fc8d11c01868010082d483b7545591a" integrity sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA== +"@types/luxon@~3.4.0": + version "3.4.2" + resolved "https://registry.yarnpkg.com/@types/luxon/-/luxon-3.4.2.tgz#e4fc7214a420173cea47739c33cdf10874694db7" + integrity sha512-TifLZlFudklWlMBfhubvgqTXRzLDI5pCbGa4P8a3wPyUQSW+1xQ5eDsreP9DWHX3tjq1ke96uYG/nwundroWcA== + "@types/mime@*": version "3.0.1" resolved "https://registry.yarnpkg.com/@types/mime/-/mime-3.0.1.tgz#5f8f2bca0a5863cb69bc0b0acd88c96cb1d4ae10" @@ -2705,6 +2710,14 @@ cron-parser@^4.6.0: dependencies: luxon "^3.2.1" +cron@^3.1.7: + version "3.1.7" + resolved "https://registry.yarnpkg.com/cron/-/cron-3.1.7.tgz#3423d618ba625e78458fff8cb67001672d49ba0d" + integrity sha512-tlBg7ARsAMQLzgwqVxy8AZl/qlTc5nibqYwtNGoCrd+cV+ugI+tvZC1oT/8dFH8W455YrywGykx/KMmAqOr7Jw== + dependencies: + "@types/luxon" "~3.4.0" + luxon "~3.4.0" + cross-fetch@^3.1.5: version "3.1.8" resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.8.tgz#0327eba65fd68a7d119f8fb2bf9334a1a7956f82" @@ -5313,6 +5326,11 @@ luxon@^3.2.1: resolved "https://registry.yarnpkg.com/luxon/-/luxon-3.3.0.tgz#d73ab5b5d2b49a461c47cedbc7e73309b4805b48" integrity sha512-An0UCfG/rSiqtAIiBPO0Y9/zAnHUZxAMiCpTd5h2smgsj7GGmcenvrvww2cqNA8/4A5ZrD1gJpHN2mIHZQF+Mg== +luxon@~3.4.0: + version "3.4.4" + resolved "https://registry.yarnpkg.com/luxon/-/luxon-3.4.4.tgz#cf20dc27dc532ba41a169c43fdcc0063601577af" + integrity sha512-zobTr7akeGHnv7eBOXcRgMeCP6+uyYsczwmeRCauvpvaAltgNyTbLH/+VaEAPUeWBT+1GuNmz4wC/6jtQzbbVA== + m3u8stream@^0.8.6: version "0.8.6" resolved "https://registry.yarnpkg.com/m3u8stream/-/m3u8stream-0.8.6.tgz#0d6de4ce8ee69731734e6b616e7b05dd9d9a55b1" From 10f544b3d0673c0025d7c9e0919725fb7adda800 Mon Sep 17 00:00:00 2001 From: n4ze3m Date: Sat, 10 Aug 2024 20:49:16 +0530 Subject: [PATCH 6/6] chore: Update question and response templates with current date, time, and day --- server/src/chain/index.ts | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/server/src/chain/index.ts b/server/src/chain/index.ts index cf63f0c1..c8626695 100644 --- a/server/src/chain/index.ts +++ b/server/src/chain/index.ts @@ -21,6 +21,17 @@ type RetrievalChainInput = { question: string; }; +const updateTemplateVariables = (template: string) => { + // replace template {time} with current time + template = template.replace("{time}", new Date().toLocaleTimeString()); + // replace template {date} with current date + template = template.replace("{date}", new Date().toLocaleDateString()); + // replace template {day} with current day + template = template.replace("{day}", new Date().toLocaleString('en-us', { weekday: 'long' })); + + return template; +} + export function groupMessagesByConversation(messages: any[]) { // check if messages are in even numbers if not remove the last message if (messages.length % 2 !== 0) { @@ -109,12 +120,17 @@ export const createChain = ({ retriever, response_template, }: { - llm: BaseLanguageModel | BaseChatModel ; + llm: BaseLanguageModel | BaseChatModel; question_llm: BaseLanguageModel | BaseChatModel; retriever: Runnable; question_template: string; response_template: string; }) => { + + question_template = updateTemplateVariables(question_template); + + response_template = updateTemplateVariables(response_template); + const retrieverChain = createRetrieverChain( question_llm, retriever,