feat: upgrade registry
This commit is contained in:
188
src/web/iod.ts
188
src/web/iod.ts
@@ -3,6 +3,7 @@ import { PageAssistHtmlLoader } from "@/loader/html"
|
||||
import { PageAssistPDFUrlLoader } from "@/loader/pdf-url"
|
||||
import { pageAssistEmbeddingModel } from "@/models/embedding"
|
||||
import { defaultEmbeddingModelForRag, getOllamaURL } from "@/services/ollama"
|
||||
|
||||
import {
|
||||
getIsSimpleInternetSearch,
|
||||
totalSearchResults
|
||||
@@ -14,65 +15,84 @@ import type { IodRegistryEntry } from "~/types/iod"
|
||||
|
||||
|
||||
import { PageAssitDatabase } from "@/db"
|
||||
import exp from "constants"
|
||||
|
||||
import { Segment, useDefault, cnPOSTag, enPOSTag} from 'segmentit';
|
||||
const segment = useDefault(new Segment());
|
||||
export const tokenizeInput = function (input: string): string[] {
|
||||
const words = segment.doSegment(input, { simple: false });
|
||||
console.log(words.map(function(word){return {w:word.w, p:enPOSTag(word.p)}}) );
|
||||
return words.filter(word =>( word.w.length > 1)).map(word=>word.w);
|
||||
}
|
||||
//doipUrl = tcp://reg01.public.internetofdata.cn:21037
|
||||
export const iodConfig = {
|
||||
"gatewayUrl": "tcp://127.0.0.1:21051",
|
||||
"gatewayUrl": "tcp://127.0.0.1:21036",
|
||||
"registry":"bdware/Registry",
|
||||
"localRepository":"bdtest.local/myrepo1",
|
||||
"doBrowser":"http://127.0.0.1:21030/SCIDE/SCManager"
|
||||
}
|
||||
export const makeRegSearchParams = (count: number, keyword: string) => ({
|
||||
action: "executeContract",
|
||||
contractID: "BDBrowser",
|
||||
operation: "sendRequestDirectly",
|
||||
arg: {
|
||||
id: iodConfig.registry,
|
||||
doipUrl: iodConfig.gatewayUrl,
|
||||
op: "Search",
|
||||
attributes: {
|
||||
offset: 0,
|
||||
count,
|
||||
bodyBase64Encoded: false,
|
||||
searchMode: [
|
||||
{
|
||||
key: "data_type",
|
||||
type: "MUST",
|
||||
value: "paper"
|
||||
},
|
||||
// {
|
||||
// key: "title",
|
||||
// type: "MUST",
|
||||
// value: keyword,
|
||||
// },
|
||||
{
|
||||
key: "description",
|
||||
type: "MUST",
|
||||
value: keyword
|
||||
}
|
||||
]
|
||||
},
|
||||
body: ""
|
||||
function inGrepList(str: string){
|
||||
return "什么|问题|需要|合适|设计|考虑|合作|精度|传感器|最新|研究|药物".indexOf(str)!=-1;
|
||||
}
|
||||
export const makeRegSearchParams = function(count: number, keyword: string| string[]){
|
||||
const searchMode = [];
|
||||
if (typeof keyword === 'string') {
|
||||
// 如果 keyword 是字符串,则直接添加一个 searchMode 条目
|
||||
searchMode.push({
|
||||
key: "description",
|
||||
type: "MUST",
|
||||
value: keyword
|
||||
});
|
||||
} else if (Array.isArray(keyword)) {
|
||||
// 如果 keyword 是数组,则为每个元素添加一个 searchMode 条目
|
||||
keyword.forEach(str => {
|
||||
if (!inGrepList(str))
|
||||
searchMode.push({
|
||||
key: "description",
|
||||
type: "SHOULD",
|
||||
value: str
|
||||
});
|
||||
});
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
export const makeDOIPParams = (doId:string, op:string, attributes:Object, requestBody: string) => ({
|
||||
return {
|
||||
action: "executeContract",
|
||||
contractID: "BDBrowser",
|
||||
operation: "sendRequestDirectly",
|
||||
arg: {
|
||||
id: doId,
|
||||
id: iodConfig.registry,
|
||||
//doipUrl:"tcp://127.0.0.1:21039",
|
||||
doipUrl: iodConfig.gatewayUrl,
|
||||
op: op,
|
||||
attributes: attributes,
|
||||
body: requestBody
|
||||
op: "Search",
|
||||
vars:{
|
||||
timeout:15000
|
||||
},
|
||||
attributes: {
|
||||
offset: 0,
|
||||
count,
|
||||
bodyBase64Encoded: false,
|
||||
searchMode:searchMode
|
||||
},
|
||||
body: ""
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export const retrieveDoc = function(doId: string, traceId: string) : Promise<Document> {
|
||||
console.log("retriveDoc:"+doId+" -> traceId:"+traceId)
|
||||
export const makeDOIPParams = (doId:string, op:string, attributes:Object, requestBody: string) => ({
|
||||
action: "executeContract",
|
||||
contractID: "BDBrowser",
|
||||
operation: "sendRequestDirectly",
|
||||
arg: {
|
||||
id: doId,
|
||||
doipUrl: iodConfig.gatewayUrl,
|
||||
op: op,
|
||||
attributes: attributes,
|
||||
body: requestBody
|
||||
}
|
||||
})
|
||||
|
||||
export const retrieveDoc = function(doId: string) : Promise<Document> {
|
||||
console.log("retriveDoc:"+doId)
|
||||
const params = makeDOIPParams(doId,"Retrieve",{
|
||||
"traceId": traceId,
|
||||
bodyBase64Encoded: false
|
||||
}, "");
|
||||
const abortController = new AbortController()
|
||||
@@ -88,7 +108,11 @@ export const retrieveDoc = function(doId: string, traceId: string) : Promise<Doc
|
||||
.then((res) => {
|
||||
console.log("res:");
|
||||
console.log(res.result.body);
|
||||
return res.result.body
|
||||
//TODO
|
||||
return {
|
||||
metadata:{traceId:res.result.header.attributes?.traceId},
|
||||
pageContent:res.result.body
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -119,8 +143,6 @@ export const updateDialog = async function(histroyId : string, botMessage: any):
|
||||
if (userMessage.role=='user') break;
|
||||
}
|
||||
let updateBody:any = {};
|
||||
console.log(userMessage)
|
||||
console.log(botMessage)
|
||||
// !!!IMPORTANT!!! traceId = histroyId+"/"+userMessage.id;
|
||||
// Update traceId in retrieveDoc!
|
||||
updateBody.traceId = histroyId+"/"+userMessage.id;
|
||||
@@ -141,12 +163,14 @@ export const updateDialog = async function(histroyId : string, botMessage: any):
|
||||
updateBody.webSources = botMessage.webSources?.map((r) => ({
|
||||
url: r.url,
|
||||
tokenCount: r.url.length,
|
||||
content: r.url
|
||||
content: r.url,
|
||||
traceId: r?.traceId
|
||||
})) ?? [];
|
||||
updateBody.IoDSources = botMessage.iodSources?.map((r) => ({
|
||||
id: r.doId,
|
||||
tokenCount: r.description.length,
|
||||
content: r.description
|
||||
tokenCount: (r.content || r.description)?calculateTokenCount((r.content || r.description)):0,
|
||||
content: r.content || r.description,
|
||||
traceId: r?.traceId
|
||||
})) ?? [];
|
||||
console.log("updateBody:");
|
||||
console.log(updateBody)
|
||||
@@ -158,7 +182,47 @@ export async function localIodSearch(
|
||||
keywords: string[]
|
||||
): Promise<IodRegistryEntry[]> {
|
||||
const TOTAL_SEARCH_RESULTS = await totalSearchResults()
|
||||
const abortController = new AbortController();
|
||||
setTimeout(() => abortController.abort(), 10000);
|
||||
const params = makeRegSearchParams(TOTAL_SEARCH_RESULTS, keywords);
|
||||
try {
|
||||
const response = await fetch(iodConfig.doBrowser, {
|
||||
method: "POST",
|
||||
body: JSON.stringify(params),
|
||||
signal: abortController.signal
|
||||
});
|
||||
|
||||
const res = await response.json();
|
||||
if (res.status !== "Success") {
|
||||
console.log(res);
|
||||
return [];
|
||||
}
|
||||
|
||||
const body = JSON.parse(res.result.body);
|
||||
if (body.code !== 0) {
|
||||
console.log(body);
|
||||
return [];
|
||||
}
|
||||
|
||||
let results: IodRegistryEntry[] = body.data?.results || [];
|
||||
for (const r of results) {
|
||||
r.url = r.url || r.pdf_url;
|
||||
}
|
||||
for (const r of results) {
|
||||
r.doId = r.doId || r.doid;
|
||||
}
|
||||
|
||||
// results 根据 doId 去重
|
||||
const map = new Map<string, IodRegistryEntry>();
|
||||
for (const r of results) {
|
||||
map.set(r.doId, r);
|
||||
}
|
||||
return Array.from(map.values());
|
||||
} catch (e) {
|
||||
console.log(e);
|
||||
return [];
|
||||
}
|
||||
/*
|
||||
const results = (
|
||||
await Promise.all(
|
||||
keywords.map(async (keyword) => {
|
||||
@@ -187,6 +251,9 @@ export async function localIodSearch(
|
||||
for (const r of results) {
|
||||
r.url = r.url || r.pdf_url
|
||||
}
|
||||
for (const r of results) {
|
||||
r.doId = r.doId || r.doid
|
||||
}
|
||||
return results
|
||||
})
|
||||
.catch((e) => {
|
||||
@@ -202,8 +269,8 @@ export async function localIodSearch(
|
||||
for (const r of results) {
|
||||
map.set(r.doId, r)
|
||||
}
|
||||
console.log("result from IoD:"+JSON.stringify(map)+"--> kw:"+JSON.stringify(keywords));
|
||||
return Array.from(map.values())
|
||||
*/
|
||||
}
|
||||
|
||||
const ARXIV_URL_PATTERN = /^https?:\/\/arxiv\.org\//
|
||||
@@ -213,8 +280,7 @@ export const searchIod = async (query: string, keywords: string[]) => {
|
||||
const searchResults = await localIodSearch(query, keywords)
|
||||
|
||||
const isSimpleMode = await getIsSimpleInternetSearch()
|
||||
console.log("searchMode:"+isSimpleMode+" ->searchResult:\n"+JSON.stringify(searchResults))
|
||||
|
||||
console.log("searchMode:"+isSimpleMode+"\n kw:"+JSON.stringify(keywords)+"\n"+" ->searchResult:\n"+JSON.stringify(searchResults))
|
||||
if (isSimpleMode) {
|
||||
await getOllamaURL()
|
||||
return searchResults
|
||||
@@ -224,13 +290,13 @@ export const searchIod = async (query: string, keywords: string[]) => {
|
||||
const resMap = new Map<string, IodRegistryEntry>()
|
||||
for (const result of searchResults) {
|
||||
const url = result.url
|
||||
|
||||
if (result.doId){
|
||||
//TODO !!!!@Nex traceId should be the id of history/question!
|
||||
const traceId = new Date().getTime() + "";
|
||||
let docFromRetrieve = await retrieveDoc(result.doId, traceId);
|
||||
let docFromRetrieve = await retrieveDoc(result.doId);
|
||||
console.log("doc from Retrieve:"+result.doId+" -->"+JSON.stringify(docFromRetrieve))
|
||||
docs.push(docFromRetrieve)
|
||||
result.description = docFromRetrieve.pageContent;
|
||||
result.traceId = docFromRetrieve.metadata?.traceId;
|
||||
continue;
|
||||
}
|
||||
if (!url) {
|
||||
@@ -296,6 +362,9 @@ export const searchIod = async (query: string, keywords: string[]) => {
|
||||
}
|
||||
}
|
||||
}
|
||||
return searchResults
|
||||
|
||||
/*
|
||||
const ollamaUrl = await getOllamaURL()
|
||||
|
||||
const embeddingModle = await defaultEmbeddingModelForRag()
|
||||
@@ -326,4 +395,11 @@ export const searchIod = async (query: string, keywords: string[]) => {
|
||||
}).filter((r) => r)
|
||||
|
||||
return searchResult
|
||||
*/
|
||||
}
|
||||
|
||||
export const calculateTokenCount = function(str:string){
|
||||
const byteArray = new TextEncoder().encode(str);
|
||||
return byteArray.length;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user