diff --git a/builder/partnerproduct/src/config.yaml b/builder/partnerproduct/src/config.yaml index 637dc3a0..68c1a469 100644 --- a/builder/partnerproduct/src/config.yaml +++ b/builder/partnerproduct/src/config.yaml @@ -14,6 +14,7 @@ vector_store: numCandidates: 150 minScore: 0.1 vectorSearchIndexName: 'vector_index' + textSearchIndexName: 'text_index' llms: class_name: Fireworks model_name: 'accounts/fireworks/models/mixtral-8x22b-instruct' diff --git a/builder/partnerproduct/src/loader.ts b/builder/partnerproduct/src/loader.ts index 67f77371..23a5f46f 100644 --- a/builder/partnerproduct/src/loader.ts +++ b/builder/partnerproduct/src/loader.ts @@ -26,10 +26,11 @@ try { chunksAdded += chunks.entriesAdded; }); } - + if (chunksAdded > 0) { console.log(`\n Total documents added : ${chunksAdded} `) await llmApplication.createVectorIndex(); + await llmApplication.createTextIndex(); } else { console.log("\n-- Data not inserted, please retry --") diff --git a/builder/partnerproduct/src/semantic-search.ts b/builder/partnerproduct/src/semantic-search.ts index d95659f3..c6fb2ca2 100644 --- a/builder/partnerproduct/src/semantic-search.ts +++ b/builder/partnerproduct/src/semantic-search.ts @@ -14,7 +14,7 @@ const app = express(); const port = 9001; app.use(express.json()); -app.use(cors()); +app.use(cors()); const llmApplication = await new RAGApplicationBuilder() .setModel(getModelClass()) @@ -42,6 +42,30 @@ app.get('/semantic-search', async (req: Request, res: Response) => { } }); +app.get('/hybrid-search', async (req: Request, res: Response) => { + try { + const userQuery = asString(req.query.query); + const vectorWeight = asFloat(req.query.vectorWeight ?? 0.5); + const fullTextWeight = asFloat(req.query.fullTextWeight ?? 0.5); + + if (!userQuery) { + return res.status(400).send('Query is required'); + } + + llmApplication.hybridQuery(userQuery, vectorWeight, fullTextWeight).then((result) => { + console.log('Result:', result); + res.send(result); + }); + + } catch (error) { + console.error('Error during hybrid vector search:', error); + res.status(500).send('An error occurred while processing your request.'); + } +}); + app.listen(port, () => { console.log(`Server is running on http://localhost:${port}`); -}); \ No newline at end of file +}); + +function asString(value: any): string { return typeof value !== 'undefined' ? value.toString() : ''; } +function asFloat(value: any): number { return typeof value !== 'undefined' ? parseFloat(value.toString()) || 0 : 0; } diff --git a/builder/partnerproduct/ui/package-lock.json b/builder/partnerproduct/ui/package-lock.json index 0ffff7fe..ec5e6f74 100644 --- a/builder/partnerproduct/ui/package-lock.json +++ b/builder/partnerproduct/ui/package-lock.json @@ -11,6 +11,7 @@ "@leafygreen-ui/card": "^11.0.0", "@leafygreen-ui/icon": "^12.6.0", "@leafygreen-ui/loading-indicator": "^2.0.12", + "@leafygreen-ui/number-input": "^2.2.1", "@leafygreen-ui/search-input": "^3.1.2", "@leafygreen-ui/side-nav": "^14.1.3", "@leafygreen-ui/tabs": "^13.0.1", @@ -3892,9 +3893,10 @@ } }, "node_modules/@leafygreen-ui/form-field": { - "version": "1.2.3", - "resolved": "https://registry.npmjs.org/@leafygreen-ui/form-field/-/form-field-1.2.3.tgz", - "integrity": "sha512-LW2fM9oFgoQH8V4ZrWcrVlo6phNhULy7i5MTQDiHi+HvAkJQl67R2DWl2QuXgBNgkOkgrDM5ccb/J7fD37VO4A==", + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/@leafygreen-ui/form-field/-/form-field-1.2.5.tgz", + "integrity": "sha512-XH7vJZbgn6wnS7Wv0DpNqcL8q0qPqxHsrVBnqk+iKlnGjCjo1GFzngjOIHODUymEfWRJERrxKO6z8FsSof0GsQ==", + "license": "Apache-2.0", "dependencies": { "@leafygreen-ui/emotion": "^4.0.8", "@leafygreen-ui/hooks": "^8.1.3", @@ -4160,6 +4162,58 @@ "@leafygreen-ui/leafygreen-provider": "^3.1.12" } }, + "node_modules/@leafygreen-ui/number-input": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/@leafygreen-ui/number-input/-/number-input-2.2.1.tgz", + "integrity": "sha512-hfJW3llFkz06PR9QzzJataT+OqRYYOFztZOA4hKZYiRpU6GkybFD/1Rr428KVuY5URP5gZLzAoLn5nNNwq5LTA==", + "license": "Apache-2.0", + "dependencies": { + "@leafygreen-ui/a11y": "^1.4.13", + "@leafygreen-ui/button": "^21.2.0", + "@leafygreen-ui/emotion": "^4.0.8", + "@leafygreen-ui/form-field": "^1.2.4", + "@leafygreen-ui/hooks": "^8.1.3", + "@leafygreen-ui/icon": "^12.5.0", + "@leafygreen-ui/lib": "^13.5.0", + "@leafygreen-ui/palette": "^4.0.9", + "@leafygreen-ui/popover": "^11.4.0", + "@leafygreen-ui/select": "^12.1.4", + "@leafygreen-ui/tokens": "^2.8.0", + "@leafygreen-ui/tooltip": "^11.1.0", + "@leafygreen-ui/typography": "^19.1.0", + "lodash": "^4.17.21" + }, + "peerDependencies": { + "@leafygreen-ui/leafygreen-provider": "^3.1.12" + } + }, + "node_modules/@leafygreen-ui/number-input/node_modules/@leafygreen-ui/polymorphic": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@leafygreen-ui/polymorphic/-/polymorphic-2.0.2.tgz", + "integrity": "sha512-OjP+hPG/cwADShcGa1SZdm51G2wVpbNqpU0B3GonEAvGLcAvG4LDMXa7BWo3GDliNkPtVMS86w0eZzEDmLfKmQ==", + "license": "Apache-2.0", + "dependencies": { + "@leafygreen-ui/lib": "^13.6.0", + "lodash": "^4.17.21" + } + }, + "node_modules/@leafygreen-ui/number-input/node_modules/@leafygreen-ui/typography": { + "version": "19.3.0", + "resolved": "https://registry.npmjs.org/@leafygreen-ui/typography/-/typography-19.3.0.tgz", + "integrity": "sha512-pgTRcc4usW/S9nDDzkf5Ac/JPEybhWtOnDpmrp99mAJHM6tH48Pd1HjRNHWjn6bnh0nXWjwANXX1ZEe+8ggCNg==", + "license": "Apache-2.0", + "dependencies": { + "@leafygreen-ui/emotion": "^4.0.8", + "@leafygreen-ui/icon": "^12.6.0", + "@leafygreen-ui/lib": "^13.6.1", + "@leafygreen-ui/palette": "^4.0.10", + "@leafygreen-ui/polymorphic": "^2.0.0", + "@leafygreen-ui/tokens": "^2.9.0" + }, + "peerDependencies": { + "@leafygreen-ui/leafygreen-provider": "^3.1.12" + } + }, "node_modules/@leafygreen-ui/palette": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/@leafygreen-ui/palette/-/palette-4.1.1.tgz", @@ -4279,21 +4333,22 @@ } }, "node_modules/@leafygreen-ui/select": { - "version": "12.1.0", - "resolved": "https://registry.npmjs.org/@leafygreen-ui/select/-/select-12.1.0.tgz", - "integrity": "sha512-4NaDvlYWciwg83snMwKrmUx9mvUrGzucqGRAIShsH9M4R21Bvdy2sXMWcjX/w8tZCwT500Jk6xWJll9op2Yx9w==", + "version": "12.1.4", + "resolved": "https://registry.npmjs.org/@leafygreen-ui/select/-/select-12.1.4.tgz", + "integrity": "sha512-FUb2y2UbytstILK11jOLCIH8am2rF8e/8QAoZ6D1SNgEgmOnOyn0krP8aM7B7P2YNy/oMKFYFwFIU/zsbG3g8w==", + "license": "Apache-2.0", "dependencies": { "@leafygreen-ui/button": "^21.2.0", "@leafygreen-ui/emotion": "^4.0.8", - "@leafygreen-ui/form-field": "^1.2.0", + "@leafygreen-ui/form-field": "^1.2.4", "@leafygreen-ui/hooks": "^8.1.3", - "@leafygreen-ui/icon": "^12.5.0", - "@leafygreen-ui/input-option": "^1.1.3", - "@leafygreen-ui/lib": "^13.5.0", + "@leafygreen-ui/icon": "^12.5.4", + "@leafygreen-ui/input-option": "^2.0.1", + "@leafygreen-ui/lib": "^13.6.1", "@leafygreen-ui/palette": "^4.0.10", "@leafygreen-ui/popover": "^11.4.0", - "@leafygreen-ui/tokens": "^2.8.0", - "@leafygreen-ui/typography": "^19.1.0", + "@leafygreen-ui/tokens": "^2.9.0", + "@leafygreen-ui/typography": "^19.2.1", "@lg-tools/test-harnesses": "^0.1.2", "@types/react-is": "^18.0.0", "lodash": "^4.17.21", @@ -4304,17 +4359,46 @@ "@leafygreen-ui/leafygreen-provider": "^3.1.12" } }, + "node_modules/@leafygreen-ui/select/node_modules/@leafygreen-ui/input-option": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@leafygreen-ui/input-option/-/input-option-2.0.2.tgz", + "integrity": "sha512-GD3TX/5uF6NMdlcOt89jg7NXrN43ZAm+TEg/84NT9Mpdik9pw44Nznhv/BD/jXaWpxPXlDQzq7ReAOi7WtUujg==", + "license": "Apache-2.0", + "dependencies": { + "@leafygreen-ui/a11y": "^1.5.0", + "@leafygreen-ui/emotion": "^4.0.8", + "@leafygreen-ui/lib": "^13.6.1", + "@leafygreen-ui/palette": "^4.0.9", + "@leafygreen-ui/polymorphic": "^2.0.0", + "@leafygreen-ui/tokens": "^2.9.0", + "@leafygreen-ui/typography": "^19.2.1" + }, + "peerDependencies": { + "@leafygreen-ui/leafygreen-provider": "^3.1.12" + } + }, + "node_modules/@leafygreen-ui/select/node_modules/@leafygreen-ui/polymorphic": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/@leafygreen-ui/polymorphic/-/polymorphic-2.0.2.tgz", + "integrity": "sha512-OjP+hPG/cwADShcGa1SZdm51G2wVpbNqpU0B3GonEAvGLcAvG4LDMXa7BWo3GDliNkPtVMS86w0eZzEDmLfKmQ==", + "license": "Apache-2.0", + "dependencies": { + "@leafygreen-ui/lib": "^13.6.0", + "lodash": "^4.17.21" + } + }, "node_modules/@leafygreen-ui/select/node_modules/@leafygreen-ui/typography": { - "version": "19.1.2", - "resolved": "https://registry.npmjs.org/@leafygreen-ui/typography/-/typography-19.1.2.tgz", - "integrity": "sha512-Fd5NJWrH5yaUSylqDlh+j53AxWmbng1lbjXtCpJ6l6YS4YBgnhjGH3+wGh3XjPxhsiSQv9/eyMNZ1BIgH5V11w==", + "version": "19.3.0", + "resolved": "https://registry.npmjs.org/@leafygreen-ui/typography/-/typography-19.3.0.tgz", + "integrity": "sha512-pgTRcc4usW/S9nDDzkf5Ac/JPEybhWtOnDpmrp99mAJHM6tH48Pd1HjRNHWjn6bnh0nXWjwANXX1ZEe+8ggCNg==", + "license": "Apache-2.0", "dependencies": { "@leafygreen-ui/emotion": "^4.0.8", - "@leafygreen-ui/icon": "^12.5.2", - "@leafygreen-ui/lib": "^13.4.0", + "@leafygreen-ui/icon": "^12.6.0", + "@leafygreen-ui/lib": "^13.6.1", "@leafygreen-ui/palette": "^4.0.10", - "@leafygreen-ui/polymorphic": "^1.3.7", - "@leafygreen-ui/tokens": "^2.7.0" + "@leafygreen-ui/polymorphic": "^2.0.0", + "@leafygreen-ui/tokens": "^2.9.0" }, "peerDependencies": { "@leafygreen-ui/leafygreen-provider": "^3.1.12" @@ -20540,6 +20624,20 @@ "is-typedarray": "^1.0.0" } }, + "node_modules/typescript": { + "version": "4.9.5", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz", + "integrity": "sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g==", + "license": "Apache-2.0", + "peer": true, + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=4.2.0" + } + }, "node_modules/unbox-primitive": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.2.tgz", diff --git a/builder/partnerproduct/ui/package.json b/builder/partnerproduct/ui/package.json index ed383c09..d23e5b28 100644 --- a/builder/partnerproduct/ui/package.json +++ b/builder/partnerproduct/ui/package.json @@ -6,6 +6,7 @@ "@leafygreen-ui/card": "^11.0.0", "@leafygreen-ui/icon": "^12.6.0", "@leafygreen-ui/loading-indicator": "^2.0.12", + "@leafygreen-ui/number-input": "^2.2.1", "@leafygreen-ui/search-input": "^3.1.2", "@leafygreen-ui/side-nav": "^14.1.3", "@leafygreen-ui/tabs": "^13.0.1", diff --git a/builder/partnerproduct/ui/src/App.js b/builder/partnerproduct/ui/src/App.js index 00e5b76e..6d3adb5f 100644 --- a/builder/partnerproduct/ui/src/App.js +++ b/builder/partnerproduct/ui/src/App.js @@ -49,6 +49,11 @@ function App() { Vector Search + + } > + Hybrid Search + +
@@ -56,6 +61,7 @@ function App() { } /> } /> } /> + } />
@@ -64,4 +70,4 @@ function App() { ); } -export default App; \ No newline at end of file +export default App; diff --git a/builder/partnerproduct/ui/src/modules/search/search.js b/builder/partnerproduct/ui/src/modules/search/search.js index 9bac2cdb..49fee737 100644 --- a/builder/partnerproduct/ui/src/modules/search/search.js +++ b/builder/partnerproduct/ui/src/modules/search/search.js @@ -6,9 +6,13 @@ import Card from '@leafygreen-ui/card'; import { PageLoader, Spinner } from "@leafygreen-ui/loading-indicator"; import './search.css'; import { H1, H2 } from '@leafygreen-ui/typography'; +import { NumberInput } from '@leafygreen-ui/number-input'; -function Search() { +function Search({ hybrid }) { + const isHybridSearch = hybrid const [searchValue, setSearchValue] = useState(''); + const [vectorWeight, setVectorWeight] = useState(0.5); + const [textWeight, setTextWeight] = useState(0.5); const [response, setResponse] = useState([]); const [loading, setLoading] = useState(false); @@ -16,11 +20,13 @@ function Search() { setSearchValue(event.target.value); }; + const searchEndpoint = `http://localhost:9001/${isHybridSearch ? 'hybrid-search' : 'semantic-search'}`; + const searchResults = () => { console.log(searchValue); setLoading(true); const query = encodeURIComponent(searchValue); - fetch(`http://localhost:9001/semantic-search?query=${query}`, { + fetch(`${searchEndpoint}?query=${query}&vectorWeight=${vectorWeight}&fullTextWeight=${textWeight}`, { method: 'GET', headers: { 'Content-Type': 'application/json' @@ -38,13 +44,29 @@ function Search() { return (
-

Vector Search

+

{ isHybridSearch ? "Hybrid Search" : "Vector Search"}

+ { isHybridSearch && <> + setVectorWeight(e.target.value)} + /> + setTextWeight(e.target.value)} + /> + }
<> @@ -52,6 +74,7 @@ function Search() { {response.map((item, index) => (

Score: {item.score}

+ { isHybridSearch &&
Text Score: {item.fts_score}, Vector Score: {item.vs_score}
}

Content: {item.pageContent}

Metadata:

@@ -66,4 +89,4 @@ function Search() { ); } -export default Search; \ No newline at end of file +export default Search; diff --git a/src/core/rag-application.ts b/src/core/rag-application.ts index 1d91a12e..c56a1db9 100644 --- a/src/core/rag-application.ts +++ b/src/core/rag-application.ts @@ -216,6 +216,12 @@ export class RAGApplication { return await this.vectorDb.similaritySearch(queryEmbedded, this.searchResultCount); } + public async hybridQuery(query: string, vectorWeight = 0.1, fullTextWeight = 0.9) { + const cleanQuery = cleanString(query); + const queryEmbedded = await RAGEmbedding.getEmbedding().embedQuery(cleanQuery); + return await this.vectorDb.hybridSearch(query, queryEmbedded, this.searchResultCount, vectorWeight, fullTextWeight); + } + public async getContext(query: string) { const cleanQuery = cleanString(query); const rawContext = await this.getEmbeddings(cleanQuery); @@ -224,7 +230,7 @@ export class RAGApplication { public async getQueryContext(cleanQuery: string, aggregatePipelineName: string) { - //TODO: Method override. Create a MQL query with user prompts using LLM. + //TODO: Method override. Create a MQL query with user prompts using LLM. // Generate output query with the user prompt and the context. let mqlQuery = await this.dbLookup.get(aggregatePipelineName).aggregateQuery; mqlQuery = JSON.stringify(mqlQuery); @@ -263,7 +269,7 @@ export class RAGApplication { // If there is not context lookup provided sources = []; } - + return { sources, result: await this.model.query(this.queryTemplate, userQuery, context, conversationId), @@ -274,6 +280,10 @@ export class RAGApplication { await this.vectorDb.createVectorIndex(RAGEmbedding.getEmbedding().getDimensions()); } + public async createTextIndex() { + await this.vectorDb.createTextIndex(); + } + public async docsCount() : Promise { return await this.vectorDb.docsCount(); } @@ -281,4 +291,4 @@ export class RAGApplication { public getDb(key: string): Map { return this.dbLookup.get(key); } -} \ No newline at end of file +} diff --git a/src/interfaces/base-db.ts b/src/interfaces/base-db.ts index ced439c4..2c96e9da 100644 --- a/src/interfaces/base-db.ts +++ b/src/interfaces/base-db.ts @@ -4,8 +4,10 @@ export interface BaseDb { init({}: { dimensions: number }): Promise; insertChunks(chunks: InsertChunkData[]): Promise; similaritySearch(query: number[], k: number): Promise; + hybridSearch(textQuery: string, query: number[], k: number, vectorWeight, fullTextWeight): Promise getVectorCount(): Promise; createVectorIndex(numDimensions: number): Promise; + createTextIndex(): Promise; docsCount(): Promise; deleteKeys(uniqueLoaderId: string): Promise; diff --git a/src/vectorDb/mongo-db-atlas.ts b/src/vectorDb/mongo-db-atlas.ts index 070a7abf..b651fcb8 100644 --- a/src/vectorDb/mongo-db-atlas.ts +++ b/src/vectorDb/mongo-db-atlas.ts @@ -10,6 +10,7 @@ export class MongoDBAtlas implements BaseDb { private static readonly INDEX_NAME = "vector_index"; private static readonly EMBEDDING_KEY = "embedding"; private static readonly TEXT_KEY = "text"; + private static readonly TEXT_INDEX_NAME = "text_index"; private readonly connectionString: string; private readonly dbName: string; private readonly collectionName: string; @@ -17,6 +18,7 @@ export class MongoDBAtlas implements BaseDb { private readonly embeddingKey: string; private readonly textKey: string; private readonly indexName: string; + private readonly textIndexName?: string; private similarityFunction: string; private collection: any; private numCandidates: number; @@ -32,7 +34,7 @@ export class MongoDBAtlas implements BaseDb { * @param numCandidates The number of candidates to consider during similarity search. Default is 100. * @param similarityFunction The similarity function to use during similarity search. */ - constructor({ connectionString, dbName, collectionName, embeddingKey = MongoDBAtlas.EMBEDDING_KEY, textKey = MongoDBAtlas.TEXT_KEY, indexName = MongoDBAtlas.INDEX_NAME, numCandidates = 100, similarityFunction, minScore = 0.1 }: { connectionString: string; dbName: string; collectionName: string; embeddingKey?: string; textKey?: string; indexName?: string; numCandidates: number; similarityFunction: string; minScore: number; } + constructor({ connectionString, dbName, collectionName, embeddingKey = MongoDBAtlas.EMBEDDING_KEY, textKey = MongoDBAtlas.TEXT_KEY, indexName = MongoDBAtlas.INDEX_NAME, numCandidates = 100, similarityFunction, minScore = 0.1, textIndexName = MongoDBAtlas.TEXT_INDEX_NAME }: { connectionString: string; dbName: string; collectionName: string; embeddingKey?: string; textKey?: string; indexName?: string; numCandidates: number; similarityFunction: string; minScore: number; textIndexName?: string;} ) { this.connectionString = connectionString; this.dbName = dbName; @@ -41,6 +43,7 @@ export class MongoDBAtlas implements BaseDb { this.embeddingKey = embeddingKey; this.textKey = textKey; this.indexName = indexName; + this.textIndexName = textIndexName; this.similarityFunction = similarityFunction; this.numCandidates = numCandidates; this.minScore = minScore; @@ -85,12 +88,144 @@ export class MongoDBAtlas implements BaseDb { const query_object = [await this.getVectorSearchQuery(query, k), { "$project": { "_id": 0, "score": { "$meta": "vectorSearchScore" }, "text": 1, "metadata": 1 } }, {"$match": {"score": {"$gt": this.minScore}}}]; const results = await this.collection.aggregate(query_object).toArray(); + const result = await results.map((result) => { + const pageContent = (result)[this.textKey]; + delete (result.metadata).pageContent; + + return { + score: result.score, + pageContent, + metadata: result.metadata, + } + }); + + return result + } + + /** + * Performs a hybrid search using reciprocal-rank-fusion for the given textQuery and vectorQuery. + * @see https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/reciprocal-rank-fusion/ + * + * @param textQuery The query text for hybrid search. + * @param query The query vector. + * @param k The number of results to return. + * @returns A Promise that resolves to an array of ExtractChunkData objects representing the search results. + */ + async hybridSearch(textQuery: string, query: number[], k: number, vectorWeight = 0.1, fullTextWeight = 0.9): Promise { + const query_object = [ + await this.getVectorSearchQuery(query, k), + { + "$group": { + "_id": null, + "docs": {"$push": "$$ROOT"} + } + }, + { + "$unwind": { + "path": "$docs", + "includeArrayIndex": "rank" + } + }, + { + "$addFields": { + "_id": "$docs._id", + "vs_score": { + "$multiply": [ + vectorWeight, { + "$divide": [ + 1.0, { + "$add": ["$rank", 60] + } + ] + } + ] + } + } + }, + { + "$unionWith": { + "coll": this.collectionName, + "pipeline": [ + { + "$search": { + "index": this.textIndexName, + "text": { + "query": textQuery, + "path": this.textKey + } + } + }, { + "$limit": k + }, { + "$group": { + "_id": null, + "docs": {"$push": "$$ROOT"} + } + }, { + "$unwind": { + "path": "$docs", + "includeArrayIndex": "rank" + } + }, { + "$addFields": { + "_id": "$docs._id", + "fts_score": { + "$multiply": [ + fullTextWeight, { + "$divide": [ + 1.0, { + "$add": ["$rank", 60] + } + ] + } + ] + } + } + } + ] + } + }, + { + "$group": { + "_id": "$_id", + docs: { + $first: "$docs", + }, + "vs_score": {"$max": "$vs_score"}, + "fts_score": {"$max": "$fts_score"} + } + }, + { + "$addFields": { + "docs.vs_score": {"$ifNull": ["$vs_score", 0]}, + "docs.fts_score": {"$ifNull": ["$fts_score", 0]} + } + }, + { + "$addFields": { + "docs.score": {"$add": ["$docs.vs_score", "$docs.fts_score"]} + } + }, + { + $replaceRoot: {newRoot: "$docs"} + }, + { + "$sort": {"score": -1} + }, + { + "$limit": k + } + ]; + const results = await this.collection.aggregate(query_object).toArray(); + return results.map((result) => { const pageContent = (result)[this.textKey]; delete (result.metadata).pageContent; return { score: result.score, + vs_score: result.vs_score, + fts_score: result.fts_score, pageContent, metadata: result.metadata, } @@ -153,7 +288,7 @@ export class MongoDBAtlas implements BaseDb { */ async createVectorIndex(numDimensions: number, similarityFunction?: string): Promise { try { - + this.similarityFunction = similarityFunction ?? "cosine"; const index = { name: this.indexName, @@ -176,6 +311,33 @@ export class MongoDBAtlas implements BaseDb { } } + /** + * Creates a text search index in the collection. + * @returns A Promise that resolves when the text search index has been created. + */ + async createTextIndex(): Promise { + try { + const index = { + name: this.textIndexName, + type: "search", + definition: { + "mappings": { + "dynamic": false, + "fields": { + "text": [{ + "type": "string" + }] + } + } + } + } + await this.collection.createSearchIndex(index); + console.log("\n-- Text search index created --") + } catch (e) { + return Promise.reject(e.codeName); + } + } + async docsCount(): Promise { try { const docsCount = await this.client.db(this.dbName).collection(this.collectionName).estimatedDocumentCount(); @@ -185,4 +347,4 @@ export class MongoDBAtlas implements BaseDb { return 0; } } -} \ No newline at end of file +} diff --git a/src/yaml_parser/src/LoadYaml.ts b/src/yaml_parser/src/LoadYaml.ts index 59e83260..1407a8d7 100644 --- a/src/yaml_parser/src/LoadYaml.ts +++ b/src/yaml_parser/src/LoadYaml.ts @@ -34,8 +34,8 @@ function getDataFromYamlFile() { export function getSystemPrompt() { const parsedData = getDataFromYamlFile(); - const systemPrompt = readFileSync(parsedData.systemPromptPath, 'utf8'); - return systemPrompt; + const systemPrompt = readFileSync(parsedData.systemPromptPath, 'utf8'); + return systemPrompt; } export function getDatabaseConfig() { @@ -74,9 +74,9 @@ try { query: query, jsonSchema: jsonSchema }); - + } catch (error) { - console.log('Error reading aggregate operator query file:', error); + console.log('Error reading aggregate operator query file:', error); } } return aggregateOperatorConfigs; @@ -110,19 +110,21 @@ export function getConditionOpConfigs(){ export function getVBDConfigInfo() { const parsedData = getDataFromYamlFile(); const { - vector_store: { connectionString, dbName, collectionName, vectorSearchIndexName, minScore, numCandidates }, + vector_store: { connectionString, dbName, collectionName, vectorSearchIndexName, textSearchIndexName, minScore, numCandidates }, } = parsedData; assert(typeof connectionString === 'string', 'connectionString is required'); assert(typeof dbName === 'string', 'dbName is required'); assert(typeof collectionName === 'string', 'collectionName is required'); assert(typeof vectorSearchIndexName === 'string', 'vectorSearchIndexName is required'); + assert(typeof textSearchIndexName === 'string', 'textSearchIndexName is required'); return { connectionString, dbName, collectionName, vectorSearchIndexName, + textSearchIndexName, minScore, numCandidates }; @@ -194,7 +196,7 @@ export function getEmbeddingModel() { case 'Bedrock': return new BedrockEmbedding({ modelName: parsedData.embedding.model_name, dimension: parsedData.embedding.dimension}); case 'Fireworks': - return new FireworksEmbedding({ modelName: parsedData.embedding.model_name, dimension: parsedData.embedding.dimension}); + return new FireworksEmbedding({ modelName: parsedData.embedding.model_name, dimension: parsedData.embedding.dimension}); case 'Nomic-v1': return new NomicEmbeddingsv1(); case 'Nomic-v1.5': @@ -367,4 +369,4 @@ export function getStreamOptions() { return { stream: parsedData.stream_options.stream ?? false, }; -} \ No newline at end of file +}