Skip to content

Commit

Permalink
Merge pull request #267 from n4ze3m/next
Browse files Browse the repository at this point in the history
v1.8.4
  • Loading branch information
n4ze3m authored Jun 1, 2024
2 parents 2e8c43b + c1f6c18 commit 19b3b28
Show file tree
Hide file tree
Showing 12 changed files with 113 additions and 89 deletions.
2 changes: 1 addition & 1 deletion app/ui/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "app",
"private": true,
"version": "1.8.3",
"version": "1.8.4",
"type": "module",
"scripts": {
"dev": "vite",
Expand Down
3 changes: 3 additions & 0 deletions app/ui/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ export default function App() {
theme={{
algorithm:
mode === "dark" ? theme.darkAlgorithm : theme.defaultAlgorithm,
token: {
fontFamily: "Inter",
},
}}
>
<StyleProvider hashPriority="high">
Expand Down
Binary file added app/ui/src/assets/Inter-Medium.ttf
Binary file not shown.
6 changes: 5 additions & 1 deletion app/ui/src/index.css
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
@import url("https://fonts.googleapis.com/css2?family=Inter:wght@500&display=swap");

@font-face {
font-family: "Inter";
src: url("./assets/Inter-Medium.ttf") format("truetype");
}

* {
font-family: "Inter", sans-serif !important;
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "dialoqbase",
"version": "1.8.3",
"version": "1.8.4",
"description": "Create chatbots with ease",
"scripts": {
"ui:dev": "pnpm run --filter ui dev",
Expand Down
4 changes: 2 additions & 2 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
"@google-ai/generativelanguage": "^2.0.0",
"@grammyjs/files": "^1.0.4",
"@huggingface/inference": "1",
"@langchain/anthropic": "^0.1.4",
"@langchain/anthropic": "0.1.4",
"@langchain/cohere": "^0.0.6",
"@langchain/community": "^0.0.35",
"@langchain/google-genai": "^0.0.10",
"@langchain/google-genai": "^0.0.16",
"@langchain/openai": "^0.0.18",
"@prisma/client": "^5.9.1",
"@slack/bolt": "^3.13.2",
Expand Down
16 changes: 16 additions & 0 deletions server/prisma/seed.ts
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,22 @@ const LLMS: {
stream_available: true,
model_provider: "OpenAI",
config: "{}",
},
{
model_id: "gemini-1.5-flash-dbase",
name: "Gemini 1.5 Flash (Google)",
model_type: "chat",
stream_available: true,
model_provider: "Google",
config: "{}",
},
{
model_id: "gemini-1.5-pro-dbase",
name: "Gemini 1.5 Pro (Google)",
model_type: "chat",
stream_available: true,
model_provider: "Google",
config: "{}",
}
];

Expand Down
6 changes: 4 additions & 2 deletions server/src/chain/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { BaseLanguageModel } from "@langchain/core/language_models/base";
import { BaseChatModel } from "@langchain/core/language_models/chat_models";
import { Document } from "@langchain/core/documents";
import {
ChatPromptTemplate,
Expand All @@ -14,6 +15,7 @@ import {
RunnableMap,
RunnableSequence,
} from "@langchain/core/runnables";

type RetrievalChainInput = {
chat_history: string;
question: string;
Expand Down Expand Up @@ -107,8 +109,8 @@ export const createChain = ({
retriever,
response_template,
}: {
llm: BaseLanguageModel;
question_llm: BaseLanguageModel;
llm: BaseLanguageModel<any> | BaseChatModel<any> ;
question_llm: BaseLanguageModel<any> | BaseChatModel<any>;
retriever: Runnable;
question_template: string;
response_template: string;
Expand Down
3 changes: 2 additions & 1 deletion server/src/queue/controllers/crawl.controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ const prisma = new PrismaClient();
export const crawlQueueController = async (source: QSource) => {
let maxDepth = source.maxDepth || 1;
let maxLinks = source.maxLinks || 1;
const links = Array.from(await crawl(source.content!, maxDepth, 0, maxLinks));
const data = await crawl(source.content!, maxDepth, maxLinks);
const links = Array.from(data?.links || []);

for (const link of links) {
const newSource = await prisma.botSource.create({
Expand Down
125 changes: 61 additions & 64 deletions server/src/utils/crawl.ts
Original file line number Diff line number Diff line change
@@ -1,85 +1,82 @@
import axios from "axios";
import { load } from "cheerio";

type CrawlResult = {
links: Set<string>;
errors: Set<string>;
};

const visitedLinks: Set<string> = new Set();
const errorLinks: Set<string> = new Set();
const queuedLinks: Set<string> = new Set();

export const crawl = async (
link: string,
startUrl: string,
maxDepth = 2,
currentDepth = 0,
maxLinks = 20,
): Promise<Set<string>> => {
const parentUrl = new URL(link);

if (currentDepth > maxDepth || visitedLinks.size >= maxLinks) {
return new Set();
}

if (visitedLinks.has(link)) {
return new Set();
}
maxLinks = 20
): Promise<CrawlResult> => {
const queue: { url: string; depth: number }[] = [{ url: startUrl, depth: 0 }];
const fetchedLinks: Set<string> = new Set();

visitedLinks.add(link);
while (queue.length > 0 && visitedLinks.size < maxLinks) {
const batch = queue.splice(0, Math.min(queue.length, maxLinks - visitedLinks.size));

await Promise.all(
batch.map(async ({ url, depth }) => {
if (visitedLinks.has(url) || depth > maxDepth) {
return;
}

try {
const response = await axios.get(link, {
headers: {
Accept: "text/html",
},
});

const contentType = response.headers["content-type"];
try {
const response = await axios.get(url, {
headers: { Accept: "text/html" },
});

if (!contentType.includes("text/html")) {
console.log(`Skipping ${link} (content type: ${contentType})`);
return new Set();
}
const contentType = response.headers['content-type'];
if (!contentType || !contentType.includes("text/html")) {
return;
}

const $ = load(response.data);
const links = $("a");
const fetchedLinks: Set<string> = new Set();
const $ = load(response.data);

for (let i = 0; i < links.length; i++) {
const href = $(links[i]).attr("href");
visitedLinks.add(url);
fetchedLinks.add(url);

if (!href) {
continue;
}
$("a").each((_, element) => {
const href = $(element).attr("href");
if (!href) {
return;
}

let absolute: string;
if (href.startsWith("/")) {
absolute = new URL(href, parentUrl.origin).href;
} else if (!isWebUrl(href)) {
absolute = new URL(href, parentUrl.origin).href;
} else {
absolute = href;
}
const absoluteUrl = normalizeUrl(new URL(href, url).href);
if (isSameDomain(absoluteUrl, startUrl) && !visitedLinks.has(absoluteUrl) && !queuedLinks.has(absoluteUrl)) {
queue.push({ url: absoluteUrl, depth: depth + 1 });
queuedLinks.add(absoluteUrl);
}
});
} catch (error: any) {
console.error(`Failed to fetch ${url}:`, error?.message || error);
errorLinks.add(url);
}
})
);
}

if (new URL(absolute).host !== parentUrl.host) {
continue;
}
return { links: fetchedLinks, errors: errorLinks };
};

const childLinks = await crawl(
absolute,
maxDepth,
currentDepth + 1,
maxLinks,
);
childLinks.forEach((childLink) => fetchedLinks.add(childLink));
}
fetchedLinks.add(link);
return fetchedLinks;
} catch (error: any) {
console.log(`Error crawling ${link}: ${error?.message}`);
return new Set();
}
const isSameDomain = (url1: string, url2: string): boolean => {
const { hostname: host1 } = new URL(url1);
const { hostname: host2 } = new URL(url2);
return host1 === host2;
};

function isWebUrl(url: string): boolean {
const normalizeUrl = (url: string): string => {
try {
new URL(url);
return true;
const urlObj = new URL(url);
urlObj.hash = '';
return urlObj.href;
} catch (error) {
return false;
return url;
}
}
};
4 changes: 2 additions & 2 deletions server/src/utils/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export const chatModelProvider = (
modelName: modelName,
temperature: temperature,
...otherFields,
});
}) as any;
case "google-bison":
return new ChatGooglePaLM({
temperature: temperature,
Expand Down Expand Up @@ -84,7 +84,7 @@ export const chatModelProvider = (
maxOutputTokens: 2048,
apiKey: process.env.GOOGLE_API_KEY,
...otherFields,
});
}) as any
case "ollama":
return new ChatOllama({
baseUrl: otherFields.baseURL,
Expand Down
31 changes: 16 additions & 15 deletions server/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -559,10 +559,10 @@
dependencies:
google-gax "^4.0.3"

"@google/generative-ai@^0.1.3":
version "0.1.3"
resolved "https://registry.yarnpkg.com/@google/generative-ai/-/generative-ai-0.1.3.tgz#8e529d4d86c85b64d297b4abf1a653d613a09a9f"
integrity sha512-Cm4uJX1sKarpm1mje/MiOIinM7zdUUrQp/5/qGPAgznbdd/B9zup5ehT6c1qGqycFcSopTA1J1HpqHS5kJR8hQ==
"@google/generative-ai@^0.7.0":
version "0.7.1"
resolved "https://registry.yarnpkg.com/@google/generative-ai/-/generative-ai-0.7.1.tgz#eb187c75080c0706245699dbc06816c830d8c6a7"
integrity sha512-WTjMLLYL/xfA5BW6xAycRPiAX7FNHKAxrid/ayqC1QMam0KAK0NbMeS9Lubw80gVg5xFMLE+H7pw4wdNzTOlxw==

"@grammyjs/files@^1.0.4":
version "1.0.4"
Expand Down Expand Up @@ -707,7 +707,7 @@
"@jridgewell/resolve-uri" "3.1.0"
"@jridgewell/sourcemap-codec" "1.4.14"

"@langchain/anthropic@^0.1.4":
"@langchain/[email protected]":
version "0.1.4"
resolved "https://registry.yarnpkg.com/@langchain/anthropic/-/anthropic-0.1.4.tgz#49c2e4625860baea0b9b5035c4c7e93a81bed704"
integrity sha512-4i25R0dHx+8N7ofI0NGE02LKG9UkhRiAjFS5iNbRcByCSIoovAuTBvdEqpwbDnqn+NkORnP/Wyw3tqFeMtMgYA==
Expand Down Expand Up @@ -738,7 +738,7 @@
uuid "^9.0.0"
zod "^3.22.3"

"@langchain/[email protected]", "@langchain/core@~0.1", "@langchain/core@~0.1.36", "@langchain/core@~0.1.41", "@langchain/core@~0.1.5":
"@langchain/[email protected]", "@langchain/core@>0.1.5 <0.3.0", "@langchain/core@~0.1", "@langchain/core@~0.1.36", "@langchain/core@~0.1.41":
version "0.1.43"
resolved "https://registry.yarnpkg.com/@langchain/core/-/core-0.1.43.tgz#2d0af42817f8d431bba5252b2ff667a9cb3a25e5"
integrity sha512-owE+UU38e4TsUq5yoaKCF+ag6u0ppwgdaqEt2Q57pdcr9nEcy8/PgTunxB10Vksq4fTJgnwWEYf/wMGZnFlRow==
Expand All @@ -755,13 +755,14 @@
zod "^3.22.4"
zod-to-json-schema "^3.22.3"

"@langchain/google-genai@^0.0.10":
version "0.0.10"
resolved "https://registry.yarnpkg.com/@langchain/google-genai/-/google-genai-0.0.10.tgz#05459e668cd018f2e4b0fb639083014151b0ef08"
integrity sha512-neFuCoMew9t8IYM5srh6RVUFQsZxqPtAFVJ0mWtZqHXtb627MECs5FYr+xw1ptPKSbhIAN5H8sgdObqes4bN3A==
"@langchain/google-genai@^0.0.16":
version "0.0.16"
resolved "https://registry.yarnpkg.com/@langchain/google-genai/-/google-genai-0.0.16.tgz#aa1c580b27110f03ce9c5f896a3957419ba95489"
integrity sha512-aUHEeY7sTwxNqj7L5scvnOhNLOKPVSvf7HR6p1Y3M7BPyU63fXP7faB+qyuHmibtKU8pj+ApoXPpjRflYKSv4w==
dependencies:
"@google/generative-ai" "^0.1.3"
"@langchain/core" "~0.1.5"
"@google/generative-ai" "^0.7.0"
"@langchain/core" ">0.1.5 <0.3.0"
zod-to-json-schema "^3.22.4"

"@langchain/openai@^0.0.18", "@langchain/openai@~0.0.14":
version "0.0.18"
Expand Down Expand Up @@ -2996,9 +2997,9 @@ fast-uri@^2.0.0, fast-uri@^2.1.0:
integrity sha512-eel5UKGn369gGEWOqBShmFJWfq/xSJvsgDzgLYC845GneayWvXBf0lJCBn5qTABfewy1ZDPoaR5OZCP+kssfuw==

fast-xml-parser@^4.3.5:
version "4.3.5"
resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.3.5.tgz#e2f2a2ae8377e9c3dc321b151e58f420ca7e5ccc"
integrity sha512-sWvP1Pl8H03B8oFJpFR3HE31HUfwtX7Rlf9BNsvdpujD4n7WMhfmu8h9wOV2u+c1k0ZilTADhPqypzx2J690ZQ==
version "4.4.0"
resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.4.0.tgz#341cc98de71e9ba9e651a67f41f1752d1441a501"
integrity sha512-kLY3jFlwIYwBNDojclKsNAC12sfD6NwW74QB2CoNGPvtVxjliYehVunB3HYyNi+n4Tt1dAcgwYvmKF/Z18flqg==
dependencies:
strnum "^1.0.5"

Expand Down

0 comments on commit 19b3b28

Please sign in to comment.