Files
page-assist/src/web/web.ts

157 lines
4.6 KiB
TypeScript
Raw Normal View History

2024-03-23 14:44:05 +05:30
import { getWebSearchPrompt } from "~/services/ollama"
2024-04-28 00:36:33 +05:30
import { webGoogleSearch } from "./search-engines/google"
import { webDuckDuckGoSearch } from "./search-engines/duckduckgo"
import { getIsVisitSpecificWebsite, getSearchProvider } from "@/services/search"
2024-04-28 00:36:33 +05:30
import { webSogouSearch } from "./search-engines/sogou"
import { webBraveSearch } from "./search-engines/brave"
import { getWebsiteFromQuery, processSingleWebsite } from "./website"
import { searxngSearch } from "./search-engines/searxng"
2024-12-13 20:03:52 +05:30
import { braveAPISearch } from "./search-engines/brave-api"
2025-02-01 11:22:12 +05:30
import { webBaiduSearch } from "./search-engines/baidu"
2025-02-14 18:17:12 +08:00
import { searchIod } from "./iod"
import type { WebSearchResult } from "~/types/web"
import type { IodRegistryEntry } from "~/types/iod"
const getHostName = (url: string) => {
2024-03-31 15:57:56 +05:30
try {
const hostname = new URL(url).hostname
return hostname
} catch (e) {
return ""
}
}
async function searchWeb(
provider: string,
query: string
): Promise<WebSearchResult[]> {
let results = []
2024-03-31 15:57:56 +05:30
switch (provider) {
case "duckduckgo":
results = await webDuckDuckGoSearch(query)
break
2024-04-28 00:36:33 +05:30
case "sogou":
results = await webSogouSearch(query)
break
case "brave":
results = await webBraveSearch(query)
break
case "searxng":
results = await searxngSearch(query)
break
2024-12-13 20:03:52 +05:30
case "brave-api":
results = await braveAPISearch(query)
break
2025-02-01 11:22:12 +05:30
case "baidu":
results = await webBaiduSearch(query)
break
2024-03-31 15:57:56 +05:30
default:
results = await webGoogleSearch(query)
break
2024-03-31 15:57:56 +05:30
}
return results.map((r) => ({ ...r, name: getHostName(r.url) }))
}
2025-02-14 18:17:12 +08:00
export const getSystemPromptForWeb = async (
query: string,
keywords: string[] = [],
webSearch = true,
iodSearch = false
) => {
2024-03-31 15:57:56 +05:30
try {
const websiteVisit = getWebsiteFromQuery(query)
let webSearchResults: WebSearchResult[] = []
2025-02-14 18:17:12 +08:00
// let search_results_web = ""
2025-02-14 18:17:12 +08:00
if (webSearch) {
const isVisitSpecificWebsite = await getIsVisitSpecificWebsite()
2025-02-14 18:17:12 +08:00
if (isVisitSpecificWebsite && websiteVisit.hasUrl) {
const url = websiteVisit.url
const queryWithoutUrl = websiteVisit.queryWithouUrls
webSearchResults = await processSingleWebsite(url, queryWithoutUrl)
} else {
const searchProvider = await getSearchProvider()
webSearchResults = await searchWeb(searchProvider, query)
}
2025-02-14 18:17:12 +08:00
// search_results_web = webSearchResults
// .map(
// (result, idx) =>
// `<result source="${result.url}" id="${idx}">${result.content}</result>`
// )
// .join("\n")
}
let iodSearchResults: IodRegistryEntry[] = []
2025-02-14 18:17:12 +08:00
// let search_results_iod = ""
if (iodSearch) {
iodSearchResults = await searchIod(query, keywords)
// search_results_iod = iodSearchResults
// .map(
// (result, idx) =>
// `<result source="${result.url}" id="${idx}">${result.content}</result>`
// )
// .join("\n")
}
2025-02-23 13:02:32 +08:00
const _iodSearchResults = iodSearchResults
.map((res) => ({
doId: res.doId,
name: res.name,
url: res.url,
data_space: res.data_space,
content: res.content || res.description,
2025-02-23 13:02:32 +08:00
tokenCount: (res.content || res.description)?.length ?? 0,
}))
const iod_search_results = _iodSearchResults
.map(
(result, idx) =>
`<result doId="${result.doId}" name="${result.name}" source="${result.url}" id="${idx + 1}">${result.content}</result>`
)
.join("\n")
console.log("iod_search_result: " + iod_search_results)
const web_search_results = webSearchResults
.map(
(result, idx) =>
`<result source="${result.url}" name="${result.name}" id="${idx + 1}">${result.content}</result>`
)
.join("\n")
console.log("web_search_result: " + web_search_results)
2024-03-31 15:57:56 +05:30
const current_date_time = new Date().toLocaleString()
2024-03-31 15:57:56 +05:30
const system = await getWebSearchPrompt()
2024-03-31 15:57:56 +05:30
const prompt = system
.replace("{current_date_time}", current_date_time)
.replace("{iod_search_results}", iod_search_results)
.replace("{web_search_results}", web_search_results)
2024-03-31 15:57:56 +05:30
return {
prompt,
2025-02-14 18:17:12 +08:00
webSources: webSearchResults.map((result) => {
return {
2024-03-31 15:57:56 +05:30
url: result.url,
name: result.name,
2024-03-31 15:57:56 +05:30
type: "url"
}
2025-02-14 18:17:12 +08:00
}),
2025-02-23 13:02:32 +08:00
iodSources: iodSearchResults,
iodSearchResults: _iodSearchResults,
iodTokenCount: _iodSearchResults.reduce((acc, cur) => (acc + cur.content.length), 0)
2024-03-31 15:57:56 +05:30
}
} catch (e) {
console.error(e)
return {
prompt: "",
webSources: [],
2025-02-24 08:36:42 +08:00
iodSources: [],
iodSearchResults: [],
iodTokenCount: 0,
}
2024-03-31 15:57:56 +05:30
}
}