2024-04-11 00:08:20 +05:30
|
|
|
import { useEffect, useState } from "react"
|
|
|
|
|
import { notification } from "antd"
|
2024-12-28 20:10:50 +05:30
|
|
|
import {
|
|
|
|
|
getElevenLabsApiKey,
|
|
|
|
|
getElevenLabsModel,
|
|
|
|
|
getElevenLabsVoiceId,
|
2025-02-09 13:00:04 +05:30
|
|
|
getRemoveReasoningTagTTS,
|
2024-12-28 20:10:50 +05:30
|
|
|
getTTSProvider,
|
|
|
|
|
getVoice,
|
|
|
|
|
isSSMLEnabled
|
|
|
|
|
} from "@/services/tts"
|
2024-04-11 00:08:20 +05:30
|
|
|
import { markdownToSSML } from "@/utils/markdown-to-ssml"
|
2024-12-28 20:10:50 +05:30
|
|
|
import { generateSpeech } from "@/services/elevenlabs"
|
|
|
|
|
import { splitMessageContent } from "@/utils/tts"
|
2025-02-09 13:00:04 +05:30
|
|
|
import { removeReasoning } from "@/libs/reasoning"
|
2024-12-28 20:10:50 +05:30
|
|
|
|
|
|
|
|
export interface VoiceOptions {
|
2024-04-11 00:08:20 +05:30
|
|
|
utterance: string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export const useTTS = () => {
|
|
|
|
|
const [isSpeaking, setIsSpeaking] = useState(false)
|
2024-12-28 20:10:50 +05:30
|
|
|
const [audioElement, setAudioElement] = useState<HTMLAudioElement | null>(
|
|
|
|
|
null
|
|
|
|
|
)
|
2024-04-11 00:08:20 +05:30
|
|
|
|
|
|
|
|
const speak = async ({ utterance }: VoiceOptions) => {
|
|
|
|
|
try {
|
|
|
|
|
const voice = await getVoice()
|
2024-12-28 20:10:50 +05:30
|
|
|
const provider = await getTTSProvider()
|
2025-02-09 13:00:04 +05:30
|
|
|
const isRemoveReasoning = await getRemoveReasoningTagTTS()
|
|
|
|
|
|
|
|
|
|
if (isRemoveReasoning) {
|
|
|
|
|
utterance = removeReasoning(utterance)
|
|
|
|
|
}
|
2024-12-28 20:10:50 +05:30
|
|
|
|
|
|
|
|
if (provider === "browser") {
|
|
|
|
|
const isSSML = await isSSMLEnabled()
|
|
|
|
|
if (isSSML) {
|
|
|
|
|
utterance = markdownToSSML(utterance)
|
|
|
|
|
}
|
2025-02-08 23:06:09 +05:30
|
|
|
if (
|
|
|
|
|
import.meta.env.BROWSER === "chrome" ||
|
|
|
|
|
import.meta.env.BROWSER === "edge"
|
|
|
|
|
) {
|
2024-12-28 20:10:50 +05:30
|
|
|
chrome.tts.speak(utterance, {
|
|
|
|
|
voiceName: voice,
|
|
|
|
|
onEvent(event) {
|
|
|
|
|
if (event.type === "start") {
|
|
|
|
|
setIsSpeaking(true)
|
|
|
|
|
} else if (event.type === "end") {
|
|
|
|
|
setIsSpeaking(false)
|
|
|
|
|
}
|
2024-05-11 19:32:36 +05:30
|
|
|
}
|
2024-12-28 20:10:50 +05:30
|
|
|
})
|
|
|
|
|
} else {
|
|
|
|
|
window.speechSynthesis.speak(new SpeechSynthesisUtterance(utterance))
|
|
|
|
|
window.speechSynthesis.onvoiceschanged = () => {
|
|
|
|
|
const voices = window.speechSynthesis.getVoices()
|
|
|
|
|
const voice = voices.find((v) => v.name === voice)
|
|
|
|
|
const utter = new SpeechSynthesisUtterance(utterance)
|
|
|
|
|
utter.voice = voice
|
|
|
|
|
window.speechSynthesis.speak(utter)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else if (provider === "elevenlabs") {
|
|
|
|
|
const apiKey = await getElevenLabsApiKey()
|
|
|
|
|
const modelId = await getElevenLabsModel()
|
|
|
|
|
const voiceId = await getElevenLabsVoiceId()
|
|
|
|
|
const sentences = splitMessageContent(utterance)
|
|
|
|
|
let nextAudioData: ArrayBuffer | null = null
|
|
|
|
|
if (!apiKey || !modelId || !voiceId) {
|
|
|
|
|
throw new Error("Missing ElevenLabs configuration")
|
|
|
|
|
}
|
|
|
|
|
for (let i = 0; i < sentences.length; i++) {
|
|
|
|
|
setIsSpeaking(true)
|
|
|
|
|
|
|
|
|
|
let currentAudioData =
|
|
|
|
|
nextAudioData ||
|
|
|
|
|
(await generateSpeech(apiKey, sentences[i], voiceId, modelId))
|
2025-02-08 23:06:09 +05:30
|
|
|
|
2024-12-28 20:10:50 +05:30
|
|
|
if (i < sentences.length - 1) {
|
|
|
|
|
generateSpeech(apiKey, sentences[i + 1], voiceId, modelId)
|
|
|
|
|
.then((nextAudioData) => {
|
|
|
|
|
nextAudioData = nextAudioData
|
|
|
|
|
})
|
|
|
|
|
.catch(console.error)
|
2024-04-11 00:08:20 +05:30
|
|
|
}
|
2024-12-28 20:10:50 +05:30
|
|
|
|
|
|
|
|
const blob = new Blob([currentAudioData], { type: "audio/mpeg" })
|
|
|
|
|
const url = URL.createObjectURL(blob)
|
|
|
|
|
const audio = new Audio(url)
|
|
|
|
|
setAudioElement(audio)
|
|
|
|
|
|
|
|
|
|
await new Promise((resolve) => {
|
|
|
|
|
audio.onended = resolve
|
|
|
|
|
audio.play()
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
URL.revokeObjectURL(url)
|
2024-04-11 00:08:20 +05:30
|
|
|
}
|
2024-12-28 20:10:50 +05:30
|
|
|
|
|
|
|
|
setIsSpeaking(false)
|
|
|
|
|
setAudioElement(null)
|
2024-05-11 19:32:36 +05:30
|
|
|
}
|
2024-04-11 00:08:20 +05:30
|
|
|
} catch (error) {
|
2024-12-28 20:10:50 +05:30
|
|
|
setIsSpeaking(false)
|
|
|
|
|
setAudioElement(null)
|
2024-04-11 00:08:20 +05:30
|
|
|
notification.error({
|
|
|
|
|
message: "Error",
|
|
|
|
|
description: "Something went wrong while trying to play the audio"
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const cancel = () => {
|
2024-12-28 20:10:50 +05:30
|
|
|
if (audioElement) {
|
|
|
|
|
audioElement.pause()
|
|
|
|
|
audioElement.currentTime = 0
|
|
|
|
|
setAudioElement(null)
|
|
|
|
|
setIsSpeaking(false)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2025-02-09 13:00:04 +05:30
|
|
|
if (
|
|
|
|
|
import.meta.env.BROWSER === "chrome" ||
|
|
|
|
|
import.meta.env.BROWSER === "edge"
|
|
|
|
|
) {
|
2024-05-11 19:32:36 +05:30
|
|
|
chrome.tts.stop()
|
|
|
|
|
} else {
|
|
|
|
|
window.speechSynthesis.cancel()
|
|
|
|
|
}
|
2024-04-11 00:08:20 +05:30
|
|
|
setIsSpeaking(false)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
useEffect(() => {
|
|
|
|
|
return () => {
|
|
|
|
|
cancel()
|
|
|
|
|
}
|
|
|
|
|
}, [])
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
speak,
|
|
|
|
|
cancel,
|
|
|
|
|
isSpeaking
|
|
|
|
|
}
|
|
|
|
|
}
|