feat: Add text splitting configuration options
This commit is contained in:
@@ -3,15 +3,13 @@ import { cleanUrl } from "~/libs/clean-url"
|
||||
import { getSearxngURL, isSearxngJSONMode, getIsSimpleInternetSearch, totalSearchResults } from "@/services/search"
|
||||
import { pageAssistEmbeddingModel } from "@/models/embedding"
|
||||
import type { Document } from "@langchain/core/documents"
|
||||
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||
import { PageAssistHtmlLoader } from "~/loader/html"
|
||||
import {
|
||||
defaultEmbeddingChunkOverlap,
|
||||
defaultEmbeddingChunkSize,
|
||||
defaultEmbeddingModelForRag,
|
||||
getOllamaURL
|
||||
} from "~/services/ollama"
|
||||
import { getPageAssistTextSplitter } from "@/utils/text-splitter"
|
||||
|
||||
interface SearxNGJSONResult {
|
||||
title: string
|
||||
@@ -73,13 +71,9 @@ export const searxngSearch = async (query: string) => {
|
||||
baseUrl: cleanUrl(ollamaUrl)
|
||||
})
|
||||
|
||||
const chunkSize = await defaultEmbeddingChunkSize()
|
||||
const chunkOverlap = await defaultEmbeddingChunkOverlap()
|
||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize,
|
||||
chunkOverlap
|
||||
})
|
||||
|
||||
const textSplitter = await getPageAssistTextSplitter();
|
||||
|
||||
const chunks = await textSplitter.splitDocuments(docs)
|
||||
const store = new MemoryVectorStore(ollamaEmbedding)
|
||||
await store.addDocuments(chunks)
|
||||
|
||||
Reference in New Issue
Block a user