import {GlobalWorkerOptions, getDocument} from "pdfjs-dist";

export async function extractText(buffer: ArrayBuffer): Promise<string[]>
{
    if(typeof window === "object")
    {
        const {href} = new URL("pdfjs-dist/build/pdf.worker.mjs", import.meta.url);
        GlobalWorkerOptions.workerPort = new Worker(href, {type: "module"});
    }
    const task = getDocument(buffer);
    const pdf = await task.promise;
    const pages = pdf.numPages;
    const promises: Promise<string>[] = [];
    for(let n = 1; n <= pages; n++)
    {
        const promise = pdf.getPage(n).then((page) => page.getTextContent()).then(({items}) => items.map((v) => "str" in v ? v.str : "").join(" ").replace(/\s+/g, " ").trim());
        promises.push(promise);
    }
    const texts = await Promise.all(promises);
    return texts;
}