feat: upgrade registry

This commit is contained in:
CaiHQ
2025-08-17 22:39:12 +08:00
parent 3fb66b4c36
commit ef0e315bdc
18 changed files with 3288 additions and 92 deletions

View File

@@ -3,6 +3,7 @@ import { PageAssistHtmlLoader } from "@/loader/html"
import { PageAssistPDFUrlLoader } from "@/loader/pdf-url"
import { pageAssistEmbeddingModel } from "@/models/embedding"
import { defaultEmbeddingModelForRag, getOllamaURL } from "@/services/ollama"
import {
getIsSimpleInternetSearch,
totalSearchResults
@@ -14,65 +15,84 @@ import type { IodRegistryEntry } from "~/types/iod"
import { PageAssitDatabase } from "@/db"
import exp from "constants"
import { Segment, useDefault, cnPOSTag, enPOSTag} from 'segmentit';
const segment = useDefault(new Segment());
export const tokenizeInput = function (input: string): string[] {
const words = segment.doSegment(input, { simple: false });
console.log(words.map(function(word){return {w:word.w, p:enPOSTag(word.p)}}) );
return words.filter(word =>( word.w.length > 1)).map(word=>word.w);
}
//doipUrl = tcp://reg01.public.internetofdata.cn:21037
export const iodConfig = {
"gatewayUrl": "tcp://127.0.0.1:21051",
"gatewayUrl": "tcp://127.0.0.1:21036",
"registry":"bdware/Registry",
"localRepository":"bdtest.local/myrepo1",
"doBrowser":"http://127.0.0.1:21030/SCIDE/SCManager"
}
export const makeRegSearchParams = (count: number, keyword: string) => ({
action: "executeContract",
contractID: "BDBrowser",
operation: "sendRequestDirectly",
arg: {
id: iodConfig.registry,
doipUrl: iodConfig.gatewayUrl,
op: "Search",
attributes: {
offset: 0,
count,
bodyBase64Encoded: false,
searchMode: [
{
key: "data_type",
type: "MUST",
value: "paper"
},
// {
// key: "title",
// type: "MUST",
// value: keyword,
// },
{
key: "description",
type: "MUST",
value: keyword
}
]
},
body: ""
function inGrepList(str: string){
return "什么|问题|需要|合适|设计|考虑|合作|精度|传感器|最新|研究|药物".indexOf(str)!=-1;
}
export const makeRegSearchParams = function(count: number, keyword: string| string[]){
const searchMode = [];
if (typeof keyword === 'string') {
// 如果 keyword 是字符串,则直接添加一个 searchMode 条目
searchMode.push({
key: "description",
type: "MUST",
value: keyword
});
} else if (Array.isArray(keyword)) {
// 如果 keyword 是数组,则为每个元素添加一个 searchMode 条目
keyword.forEach(str => {
if (!inGrepList(str))
searchMode.push({
key: "description",
type: "SHOULD",
value: str
});
});
}
})
export const makeDOIPParams = (doId:string, op:string, attributes:Object, requestBody: string) => ({
return {
action: "executeContract",
contractID: "BDBrowser",
operation: "sendRequestDirectly",
arg: {
id: doId,
id: iodConfig.registry,
//doipUrl:"tcp://127.0.0.1:21039",
doipUrl: iodConfig.gatewayUrl,
op: op,
attributes: attributes,
body: requestBody
op: "Search",
vars:{
timeout:15000
},
attributes: {
offset: 0,
count,
bodyBase64Encoded: false,
searchMode:searchMode
},
body: ""
}
})
}
}
export const retrieveDoc = function(doId: string, traceId: string) : Promise<Document> {
console.log("retriveDoc:"+doId+" -> traceId:"+traceId)
export const makeDOIPParams = (doId:string, op:string, attributes:Object, requestBody: string) => ({
action: "executeContract",
contractID: "BDBrowser",
operation: "sendRequestDirectly",
arg: {
id: doId,
doipUrl: iodConfig.gatewayUrl,
op: op,
attributes: attributes,
body: requestBody
}
})
export const retrieveDoc = function(doId: string) : Promise<Document> {
console.log("retriveDoc:"+doId)
const params = makeDOIPParams(doId,"Retrieve",{
"traceId": traceId,
bodyBase64Encoded: false
}, "");
const abortController = new AbortController()
@@ -88,7 +108,11 @@ export const retrieveDoc = function(doId: string, traceId: string) : Promise<Doc
.then((res) => {
console.log("res:");
console.log(res.result.body);
return res.result.body
//TODO
return {
metadata:{traceId:res.result.header.attributes?.traceId},
pageContent:res.result.body
}
})
}
@@ -119,8 +143,6 @@ export const updateDialog = async function(histroyId : string, botMessage: any):
if (userMessage.role=='user') break;
}
let updateBody:any = {};
console.log(userMessage)
console.log(botMessage)
// !!!IMPORTANT!!! traceId = histroyId+"/"+userMessage.id;
// Update traceId in retrieveDoc!
updateBody.traceId = histroyId+"/"+userMessage.id;
@@ -141,12 +163,14 @@ export const updateDialog = async function(histroyId : string, botMessage: any):
updateBody.webSources = botMessage.webSources?.map((r) => ({
url: r.url,
tokenCount: r.url.length,
content: r.url
content: r.url,
traceId: r?.traceId
})) ?? [];
updateBody.IoDSources = botMessage.iodSources?.map((r) => ({
id: r.doId,
tokenCount: r.description.length,
content: r.description
tokenCount: (r.content || r.description)?calculateTokenCount((r.content || r.description)):0,
content: r.content || r.description,
traceId: r?.traceId
})) ?? [];
console.log("updateBody:");
console.log(updateBody)
@@ -158,7 +182,47 @@ export async function localIodSearch(
keywords: string[]
): Promise<IodRegistryEntry[]> {
const TOTAL_SEARCH_RESULTS = await totalSearchResults()
const abortController = new AbortController();
setTimeout(() => abortController.abort(), 10000);
const params = makeRegSearchParams(TOTAL_SEARCH_RESULTS, keywords);
try {
const response = await fetch(iodConfig.doBrowser, {
method: "POST",
body: JSON.stringify(params),
signal: abortController.signal
});
const res = await response.json();
if (res.status !== "Success") {
console.log(res);
return [];
}
const body = JSON.parse(res.result.body);
if (body.code !== 0) {
console.log(body);
return [];
}
let results: IodRegistryEntry[] = body.data?.results || [];
for (const r of results) {
r.url = r.url || r.pdf_url;
}
for (const r of results) {
r.doId = r.doId || r.doid;
}
// results 根据 doId 去重
const map = new Map<string, IodRegistryEntry>();
for (const r of results) {
map.set(r.doId, r);
}
return Array.from(map.values());
} catch (e) {
console.log(e);
return [];
}
/*
const results = (
await Promise.all(
keywords.map(async (keyword) => {
@@ -187,6 +251,9 @@ export async function localIodSearch(
for (const r of results) {
r.url = r.url || r.pdf_url
}
for (const r of results) {
r.doId = r.doId || r.doid
}
return results
})
.catch((e) => {
@@ -202,8 +269,8 @@ export async function localIodSearch(
for (const r of results) {
map.set(r.doId, r)
}
console.log("result from IoD:"+JSON.stringify(map)+"--> kw:"+JSON.stringify(keywords));
return Array.from(map.values())
*/
}
const ARXIV_URL_PATTERN = /^https?:\/\/arxiv\.org\//
@@ -213,8 +280,7 @@ export const searchIod = async (query: string, keywords: string[]) => {
const searchResults = await localIodSearch(query, keywords)
const isSimpleMode = await getIsSimpleInternetSearch()
console.log("searchMode:"+isSimpleMode+" ->searchResult:\n"+JSON.stringify(searchResults))
console.log("searchMode:"+isSimpleMode+"\n kw:"+JSON.stringify(keywords)+"\n"+" ->searchResult:\n"+JSON.stringify(searchResults))
if (isSimpleMode) {
await getOllamaURL()
return searchResults
@@ -224,13 +290,13 @@ export const searchIod = async (query: string, keywords: string[]) => {
const resMap = new Map<string, IodRegistryEntry>()
for (const result of searchResults) {
const url = result.url
if (result.doId){
//TODO !!!!@Nex traceId should be the id of history/question!
const traceId = new Date().getTime() + "";
let docFromRetrieve = await retrieveDoc(result.doId, traceId);
let docFromRetrieve = await retrieveDoc(result.doId);
console.log("doc from Retrieve:"+result.doId+" -->"+JSON.stringify(docFromRetrieve))
docs.push(docFromRetrieve)
result.description = docFromRetrieve.pageContent;
result.traceId = docFromRetrieve.metadata?.traceId;
continue;
}
if (!url) {
@@ -296,6 +362,9 @@ export const searchIod = async (query: string, keywords: string[]) => {
}
}
}
return searchResults
/*
const ollamaUrl = await getOllamaURL()
const embeddingModle = await defaultEmbeddingModelForRag()
@@ -326,4 +395,11 @@ export const searchIod = async (query: string, keywords: string[]) => {
}).filter((r) => r)
return searchResult
*/
}
export const calculateTokenCount = function(str:string){
const byteArray = new TextEncoder().encode(str);
return byteArray.length;
}

View File

@@ -11,6 +11,7 @@ import { webBaiduSearch } from "./search-engines/baidu"
import { searchIod } from "./iod"
import type { WebSearchResult } from "~/types/web"
import type { IodRegistryEntry } from "~/types/iod"
import {calculateTokenCount} from "./iod"
const getHostName = (url: string) => {
try {
@@ -100,18 +101,24 @@ export const getSystemPromptForWeb = async (
doId: res.doId,
name: res.name,
url: res.url,
data_type: res.data_type,
data_space: res.data_space,
content: res.content || res.description,
tokenCount: (res.content || res.description)?.length ?? 0,
tokenCount: (res.content || res.description)?calculateTokenCount((res.content || res.description)):0,
traceId:res?.traceId
}))
const iod_search_results = _iodSearchResults
.map(
(result, idx) =>
`<result doId="${result.doId}" name="${result.name}" source="${result.url}" id="${idx + 1}">${result.content}</result>`
(result, idx) =>{
const nameAttr = result.name ? ` name="${result.name}"` : '';
const sourceAttr = result.url ? ` source="${result.url}"` : '';
const dataTypeAttr = result.data_type ? ` dataType="${result.data_type}"` : '';
const dataSourceAttr = result.data_space ?` 数据来源="${result.data_space}"`:''
return `<result doId="${result.doId}"${nameAttr}${sourceAttr}${dataTypeAttr}${dataSourceAttr}" >${result.content}</result>`
}
)
.join("\n")
console.log("iod_search_result: " + iod_search_results)
const web_search_results = webSearchResults
.map(
@@ -119,7 +126,6 @@ export const getSystemPromptForWeb = async (
`<result source="${result.url}" name="${result.name}" id="${idx + 1}">${result.content}</result>`
)
.join("\n")
console.log("web_search_result: " + web_search_results)
const current_date_time = new Date().toLocaleString()