2024-03-24 23:56:41 +05:30
|
|
|
import { pdfDist } from "./pdfjs"
|
2024-03-08 00:45:28 +05:30
|
|
|
|
2024-03-24 23:56:41 +05:30
|
|
|
export const getPdf = async (data: ArrayBuffer) => {
|
|
|
|
|
const pdf = pdfDist.getDocument({
|
|
|
|
|
data,
|
|
|
|
|
useWorkerFetch: false,
|
|
|
|
|
isEvalSupported: false,
|
|
|
|
|
useSystemFonts: true,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
pdf.onPassword = (callback: any) => {
|
|
|
|
|
const password = prompt("Enter the password: ")
|
|
|
|
|
if (!password) {
|
|
|
|
|
throw new Error("Password required to open the PDF.");
|
|
|
|
|
}
|
|
|
|
|
callback(password);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const pdfDocument = await pdf.promise;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return pdfDocument
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const _getHtml = async () => {
|
2024-02-03 00:33:31 +05:30
|
|
|
const url = window.location.href
|
2024-03-24 23:56:41 +05:30
|
|
|
if (document.contentType === "application/pdf") {
|
|
|
|
|
return { url, content: "", type: "pdf" }
|
|
|
|
|
}
|
2024-02-03 00:33:31 +05:30
|
|
|
const html = Array.from(document.querySelectorAll("script")).reduce(
|
|
|
|
|
(acc, script) => {
|
|
|
|
|
return acc.replace(script.outerHTML, "")
|
|
|
|
|
},
|
|
|
|
|
document.documentElement.outerHTML
|
|
|
|
|
)
|
2024-03-24 23:56:41 +05:30
|
|
|
return { url, content: html, type: "html" }
|
2024-02-02 22:01:16 +05:30
|
|
|
}
|
2024-03-25 23:17:43 +05:30
|
|
|
|
2024-03-24 23:56:41 +05:30
|
|
|
export const getDataFromCurrentTab = async () => {
|
2024-02-02 22:01:16 +05:30
|
|
|
const result = new Promise((resolve) => {
|
|
|
|
|
chrome.tabs.query({ active: true, currentWindow: true }, async (tabs) => {
|
|
|
|
|
const tab = tabs[0]
|
2024-02-25 00:12:46 +05:30
|
|
|
|
2024-02-02 22:01:16 +05:30
|
|
|
const data = await chrome.scripting.executeScript({
|
|
|
|
|
target: { tabId: tab.id },
|
|
|
|
|
func: _getHtml
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
if (data.length > 0) {
|
|
|
|
|
resolve(data[0].result)
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}) as Promise<{
|
|
|
|
|
url: string
|
2024-03-24 23:56:41 +05:30
|
|
|
content: string
|
|
|
|
|
type: string
|
2024-02-02 22:01:16 +05:30
|
|
|
}>
|
|
|
|
|
|
2024-03-24 23:56:41 +05:30
|
|
|
|
|
|
|
|
const { content, type, url } = await result
|
|
|
|
|
|
|
|
|
|
if (type === "pdf") {
|
|
|
|
|
const res = await fetch(url)
|
|
|
|
|
const data = await res.arrayBuffer()
|
2024-03-25 23:17:43 +05:30
|
|
|
let pdfHtml: {
|
|
|
|
|
content: string
|
|
|
|
|
page: number
|
|
|
|
|
}[] = []
|
2024-03-24 23:56:41 +05:30
|
|
|
const pdf = await getPdf(data)
|
|
|
|
|
|
|
|
|
|
for (let i = 1; i <= pdf.numPages; i += 1) {
|
|
|
|
|
const page = await pdf.getPage(i);
|
|
|
|
|
const content = await page.getTextContent();
|
|
|
|
|
|
|
|
|
|
if (content?.items.length === 0) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const text = content?.items.map((item: any) => item.str).join("\n")
|
|
|
|
|
.replace(/\x00/g, "").trim();
|
2024-03-25 23:17:43 +05:30
|
|
|
pdfHtml.push({
|
|
|
|
|
content: text,
|
|
|
|
|
page: i
|
|
|
|
|
})
|
2024-03-24 23:56:41 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
url,
|
2024-03-25 23:17:43 +05:30
|
|
|
content: "",
|
|
|
|
|
pdf: pdfHtml,
|
|
|
|
|
type: "pdf"
|
2024-03-24 23:56:41 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2024-03-25 23:17:43 +05:30
|
|
|
return { url, content, type, pdf: [] }
|
2024-02-02 22:01:16 +05:30
|
|
|
}
|
2024-03-08 00:45:28 +05:30
|
|
|
|