py_server/handlers/chat.py

from models import ChatBody, ChatAppBody
from bs4 import BeautifulSoup

from langchain.docstore.document import Document as LDocument
from langchain.vectorstores.faiss import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate
)
from langchain.vectorstores import Chroma

from db.supa import SupaService


supabase = SupaService()


async def chat_app_handler(body: ChatAppBody, jwt: str):
    try:

        user = supabase.get_user(jwt)

        if not user:
            return {
                "bot_response": "You are not logged in",
                "human_message": body.user_message,
            }

        user_id = user.user.id

        website_response = supabase.find_website(body.id, user_id)

        website = website_response.data

        if len(website) == 0:
            return {
                "bot_response": "Website not found",
                "human_message": body.user_message,
            }

        website = website[0]

        text = website["html"]
        text = text.strip()

        result = [LDocument(page_content=text, metadata={"source": "test"})]
        token_splitter = CharacterTextSplitter(
            chunk_size=1000, chunk_overlap=0)
        doc = token_splitter.split_documents(result)

        print(f'Number of documents: {len(doc)}')

        vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())

        messages = [
            SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.
Answer must be in markdown format.
-----------------
context:
{context}
            """),
            HumanMessagePromptTemplate.from_template("{question}")
        ]

        prompt = ChatPromptTemplate.from_messages(messages)

        chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(
            search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)

        history = [(d["human_message"], d["bot_response"])
                   for d in body.history]

        response = chat({
            "question": body.user_message,
            "chat_history": history
        })

        answer = response["answer"]
        answer = answer[answer.find(":")+1:].strip()

        return {
            "bot_response": answer,
            "human_message": body.user_message,
        }

    except Exception as e:
        print(e)
        return {
            "bot_response": "Something went wrong please try again later",
            "human_message": body.user_message,
        }


async def chat_extension_handler(body: ChatBody):
    try:
        soup = BeautifulSoup(body.html, 'lxml')

        iframe = soup.find('iframe', id='pageassist-iframe')
        if iframe:
            iframe.decompose()
        div = soup.find('div', id='pageassist-icon')
        if div:
            div.decompose()
        div = soup.find('div', id='__plasmo-loading__')
        if div:
            div.decompose()
        text = soup.get_text()
        text = text.strip()

        result = [LDocument(page_content=text, metadata={"source": "test"})]
        token_splitter = CharacterTextSplitter(
            chunk_size=1000, chunk_overlap=0)
        doc = token_splitter.split_documents(result)

        print(f'Number of documents: {len(doc)}')

        vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())

        messages = [
            SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.
Answer must be in markdown format.
-----------------
context:
{context}
            """),
            HumanMessagePromptTemplate.from_template("{question}")
        ]

        prompt = ChatPromptTemplate.from_messages(messages)

        chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(
            search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)

        history = [(d["human_message"], d["bot_response"])
                   for d in body.history]

        response = chat({
            "question": body.user_message,
            "chat_history": history
        })

        answer = response["answer"]
        answer = answer[answer.find(":")+1:].strip()

        return {
            "bot_response": answer,
            "human_message": body.user_message,
        }

    except Exception as e:
        print(e)
        return {
            "bot_response": "Something went wrong please try again later",
            "human_message": body.user_message,
        }
new changes 2023-04-15 18:00:11 +05:30			`from models import ChatBody, ChatAppBody`
added supabase 2023-04-11 15:19:39 +05:30			`from bs4 import BeautifulSoup`

			`from langchain.docstore.document import Document as LDocument`
			`from langchain.vectorstores.faiss import FAISS`
			`from langchain.embeddings.openai import OpenAIEmbeddings`
			`from langchain.llms import OpenAI`
			`from langchain.text_splitter import CharacterTextSplitter`
			`from langchain.chains import ConversationalRetrievalChain`
			`from langchain.prompts.chat import (`
			`ChatPromptTemplate,`
			`SystemMessagePromptTemplate,`
			`HumanMessagePromptTemplate`
			`)`
use chromadb 2023-04-11 21:07:42 +05:30			`from langchain.vectorstores import Chroma`

new changes 2023-04-15 18:00:11 +05:30			`from db.supa import SupaService`


			`supabase = SupaService()`


			`async def chat_app_handler(body: ChatAppBody, jwt: str):`
			`try:`

			`user = supabase.get_user(jwt)`

			`if not user:`
			`return {`
			`"bot_response": "You are not logged in",`
			`"human_message": body.user_message,`
			`}`

			`user_id = user.user.id`

			`website_response = supabase.find_website(body.id, user_id)`

			`website = website_response.data`

			`if len(website) == 0:`
			`return {`
			`"bot_response": "Website not found",`
			`"human_message": body.user_message,`
			`}`

new changes 2023-04-15 21:59:27 +05:30			`website = website[0]`
new changes 2023-04-15 18:00:11 +05:30
			`text = website["html"]`
new changes 2023-04-15 21:59:27 +05:30			`text = text.strip()`
new changes 2023-04-15 18:00:11 +05:30
			`result = [LDocument(page_content=text, metadata={"source": "test"})]`
new changes 2023-04-15 21:59:27 +05:30			`token_splitter = CharacterTextSplitter(`
			`chunk_size=1000, chunk_overlap=0)`
new changes 2023-04-15 18:00:11 +05:30			`doc = token_splitter.split_documents(result)`

			`print(f'Number of documents: {len(doc)}')`

			`vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())`

			`messages = [`
new changes 2023-04-15 21:59:27 +05:30			`SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.`
			`Answer must be in markdown format.`
new changes 2023-04-15 18:00:11 +05:30			`-----------------`
new changes 2023-04-15 21:59:27 +05:30			`context:`
new changes 2023-04-15 18:00:11 +05:30			`{context}`
			`"""),`
			`HumanMessagePromptTemplate.from_template("{question}")`
			`]`

			`prompt = ChatPromptTemplate.from_messages(messages)`

new changes 2023-04-15 21:59:27 +05:30			`chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(`
			`search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)`
new changes 2023-04-15 18:00:11 +05:30
new changes 2023-04-15 21:59:27 +05:30			`history = [(d["human_message"], d["bot_response"])`
			`for d in body.history]`
new changes 2023-04-15 18:00:11 +05:30
			`response = chat({`
			`"question": body.user_message,`
			`"chat_history": history`
			`})`

			`answer = response["answer"]`
			`answer = answer[answer.find(":")+1:].strip()`

			`return {`
			`"bot_response": answer,`
			`"human_message": body.user_message,`
			`}`

			`except Exception as e:`
			`print(e)`
			`return {`
			`"bot_response": "Something went wrong please try again later",`
			`"human_message": body.user_message,`
			`}`


added supabase 2023-04-11 15:19:39 +05:30			`async def chat_extension_handler(body: ChatBody):`
			`try:`
			`soup = BeautifulSoup(body.html, 'lxml')`

			`iframe = soup.find('iframe', id='pageassist-iframe')`
			`if iframe:`
			`iframe.decompose()`
			`div = soup.find('div', id='pageassist-icon')`
			`if div:`
			`div.decompose()`
			`div = soup.find('div', id='__plasmo-loading__')`
			`if div:`
			`div.decompose()`
			`text = soup.get_text()`
new changes 2023-04-15 21:59:27 +05:30			`text = text.strip()`
added supabase 2023-04-11 15:19:39 +05:30
			`result = [LDocument(page_content=text, metadata={"source": "test"})]`
new changes 2023-04-15 21:59:27 +05:30			`token_splitter = CharacterTextSplitter(`
			`chunk_size=1000, chunk_overlap=0)`
added supabase 2023-04-11 15:19:39 +05:30			`doc = token_splitter.split_documents(result)`

			`print(f'Number of documents: {len(doc)}')`

use chromadb 2023-04-11 21:07:42 +05:30			`vectorstore = Chroma.from_documents(doc, OpenAIEmbeddings())`
added supabase 2023-04-11 15:19:39 +05:30
			`messages = [`
new changes 2023-04-15 21:59:27 +05:30			`SystemMessagePromptTemplate.from_template("""You are PageAssist bot. Answer the question based on the following context from the webpage you are on.`
			`Answer must be in markdown format.`
added supabase 2023-04-11 15:19:39 +05:30			`-----------------`
new changes 2023-04-15 21:59:27 +05:30			`context:`
added supabase 2023-04-11 15:19:39 +05:30			`{context}`
			`"""),`
			`HumanMessagePromptTemplate.from_template("{question}")`
			`]`

			`prompt = ChatPromptTemplate.from_messages(messages)`

new changes 2023-04-15 21:59:27 +05:30			`chat = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0, model_name="gpt-3.5-turbo"), vectorstore.as_retriever(`
			`search_kwargs={"k": 1}), return_source_documents=True, qa_prompt=prompt,)`
added supabase 2023-04-11 15:19:39 +05:30
new changes 2023-04-15 21:59:27 +05:30			`history = [(d["human_message"], d["bot_response"])`
			`for d in body.history]`
added supabase 2023-04-11 15:19:39 +05:30
			`response = chat({`
			`"question": body.user_message,`
			`"chat_history": history`
			`})`

			`answer = response["answer"]`
			`answer = answer[answer.find(":")+1:].strip()`

			`return {`
			`"bot_response": answer,`
			`"human_message": body.user_message,`
			`}`

			`except Exception as e:`
			`print(e)`
			`return {`
			`"bot_response": "Something went wrong please try again later",`
			`"human_message": body.user_message,`
			`}`