Files
page-assist/py_server/handlers/chat.py

161 lines
4.8 KiB
Python
Raw Normal View History

2023-04-15 18:00:11 +05:30
from models import ChatBody, ChatAppBody
2023-04-11 15:19:39 +05:30
from bs4 import BeautifulSoup
from langchain.docstore.document import Document as LDocument
from langchain.vectorstores.faiss import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate
)
2023-04-11 21:07:42 +05:30
from langchain.vectorstores import Chroma
2023-04-15 18:00:11 +05:30
from db.supa import SupaService
supabase = SupaService()
async def chat_app_handler(body: ChatAppBody, jwt: str):
try:
user = supabase.get_user(jwt)
if not user:
return {
"bot_response": "You are not logged in",
"human_message": body.user_message,
}
user_id = user.user.id
website_response = supabase.find_website(body.id, user_id)
website = website_response.data
if len(website) == 0:
return {
"bot_response": "Website not found",
"human_message": body.user_message,
}
2023-04-15 21:59:27 +05:30
website = website[0]
2023-04-15 18:00:11 +05:30
text = website["html"]
2023-04-15 21:59:27 +05:30
text = text.strip()
2023-04-15 18:00:11 +05:30
result = [LDocument(page_content=text, metadata={"source": "test"})]
2023-04-15 21:59:27 +05:30
token_splitter = CharacterTextSplitter(
chunk_size=1000, chunk_overlap=0)
2023-04-15 18:00:11 +05:30
doc = token_splitter.split_documents(result)
print(f'Number of documents: {len(doc)}')
vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())
messages = [
2023-04-15 21:59:27 +05:30
SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.
Answer must be in markdown format.
2023-04-15 18:00:11 +05:30
-----------------
2023-04-15 21:59:27 +05:30
context:
2023-04-15 18:00:11 +05:30
{context}
"""),
HumanMessagePromptTemplate.from_template("{question}")
]
prompt = ChatPromptTemplate.from_messages(messages)
2023-04-15 21:59:27 +05:30
chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(
search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)
2023-04-15 18:00:11 +05:30
2023-04-15 21:59:27 +05:30
history = [(d["human_message"], d["bot_response"])
for d in body.history]
2023-04-15 18:00:11 +05:30
response = chat({
"question": body.user_message,
"chat_history": history
})
answer = response["answer"]
answer = answer[answer.find(":")+1:].strip()
return {
"bot_response": answer,
"human_message": body.user_message,
}
except Exception as e:
print(e)
return {
"bot_response": "Something went wrong please try again later",
"human_message": body.user_message,
}
2023-04-11 15:19:39 +05:30
async def chat_extension_handler(body: ChatBody):
try:
soup = BeautifulSoup(body.html, 'lxml')
iframe = soup.find('iframe', id='pageassist-iframe')
if iframe:
iframe.decompose()
div = soup.find('div', id='pageassist-icon')
if div:
div.decompose()
div = soup.find('div', id='__plasmo-loading__')
if div:
div.decompose()
text = soup.get_text()
2023-04-15 21:59:27 +05:30
text = text.strip()
2023-04-11 15:19:39 +05:30
result = [LDocument(page_content=text, metadata={"source": "test"})]
2023-04-15 21:59:27 +05:30
token_splitter = CharacterTextSplitter(
chunk_size=1000, chunk_overlap=0)
2023-04-11 15:19:39 +05:30
doc = token_splitter.split_documents(result)
print(f'Number of documents: {len(doc)}')
2023-04-11 21:07:42 +05:30
vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())
2023-04-11 15:19:39 +05:30
messages = [
2023-04-15 21:59:27 +05:30
SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.
Answer must be in markdown format.
2023-04-11 15:19:39 +05:30
-----------------
2023-04-15 21:59:27 +05:30
context:
2023-04-11 15:19:39 +05:30
{context}
"""),
HumanMessagePromptTemplate.from_template("{question}")
]
prompt = ChatPromptTemplate.from_messages(messages)
2023-04-15 21:59:27 +05:30
chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(
search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)
2023-04-11 15:19:39 +05:30
2023-04-15 21:59:27 +05:30
history = [(d["human_message"], d["bot_response"])
for d in body.history]
2023-04-11 15:19:39 +05:30
response = chat({
"question": body.user_message,
"chat_history": history
})
answer = response["answer"]
answer = answer[answer.find(":")+1:].strip()
return {
"bot_response": answer,
"human_message": body.user_message,
}
except Exception as e:
print(e)
return {
"bot_response": "Something went wrong please try again later",
"human_message": body.user_message,
}