feat: Add text splitting configuration options
This commit is contained in:
@@ -3,8 +3,6 @@ import { urlRewriteRuntime } from "@/libs/runtime"
|
||||
import { PageAssistHtmlLoader } from "@/loader/html"
|
||||
import { pageAssistEmbeddingModel } from "@/models/embedding"
|
||||
import {
|
||||
defaultEmbeddingChunkOverlap,
|
||||
defaultEmbeddingChunkSize,
|
||||
defaultEmbeddingModelForRag,
|
||||
getOllamaURL
|
||||
} from "@/services/ollama"
|
||||
@@ -12,9 +10,9 @@ import {
|
||||
getIsSimpleInternetSearch,
|
||||
totalSearchResults
|
||||
} from "@/services/search"
|
||||
import { getPageAssistTextSplitter } from "@/utils/text-splitter"
|
||||
import type { Document } from "@langchain/core/documents"
|
||||
import * as cheerio from "cheerio"
|
||||
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"
|
||||
import { MemoryVectorStore } from "langchain/vectorstores/memory"
|
||||
const getCorrectTargeUrl = async (url: string) => {
|
||||
if (!url) return ""
|
||||
@@ -104,12 +102,7 @@ export const webSogouSearch = async (query: string) => {
|
||||
baseUrl: cleanUrl(ollamaUrl)
|
||||
})
|
||||
|
||||
const chunkSize = await defaultEmbeddingChunkSize()
|
||||
const chunkOverlap = await defaultEmbeddingChunkOverlap()
|
||||
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||
chunkSize,
|
||||
chunkOverlap
|
||||
})
|
||||
const textSplitter = await getPageAssistTextSplitter()
|
||||
|
||||
const chunks = await textSplitter.splitDocuments(docs)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user