import {pdfjs} from "react-pdf";

pdfjs.GlobalWorkerOptions.workerSrc = new URL(
  "pdfjs-dist/build/pdf.worker.min.js",
  import.meta.url
).toString();


interface HtmlContentElement {
  text: string;
  style: string;
}

export async function readPdfDocument(document: ArrayBuffer): Promise<string> {
  let htmlText = '';

  // Using pdfjs to extract text from the PDF
  const pdf = await (async () => pdfjs.getDocument(new Uint8Array(document)).promise)();
  const maxPages = pdf.numPages;

  for (let pageNum = 1; pageNum <= maxPages; pageNum++) {
    let htmlContent: HtmlContentElement[] = [];

    const page = await pdf.getPage(pageNum);
    const pdfContent = await page.getTextContent();

    pdfContent.items.forEach((item) => {
      // consider string items only , except embedded images
      if ("str" in item && !item.str.startsWith('data:image/')) {
        htmlContent.push({ text: item.str, style: item.fontName });
        if (item.hasEOL) {
          htmlContent.push({ text: '<br />', style: 'break' });
        }
      }
    });

    htmlContent = mergeSameStyle(htmlContent);
    htmlContent.forEach((element) => {
      const index = Object.keys(pdfContent.styles).findIndex((el) => el === element.style);

      // assuming that bold is not the first one of the styles
      if (index > 0) {
        htmlText += `<strong>${element.text}</strong>`;
      } else {
        htmlText += `${element.text}`;
      }
    });
  }

  return htmlText;
}

/**
 * Function that combines neighboring elements with the same style.
 */
function mergeSameStyle(elements: HtmlContentElement[]): HtmlContentElement[] {
  if (elements.length <= 1) {
    return elements;
  }

  let mergedElements: HtmlContentElement[] = [elements[0]];

  for (let i = 1; i < elements.length; i++) {
    const currentElement = elements[i];
    const lastMergedElement = mergedElements[mergedElements.length - 1];

    if (currentElement.style === lastMergedElement.style) {
      lastMergedElement.text += currentElement.text;
    } else {
      mergedElements.push(currentElement);
    }
  }

  return mergedElements;
}
