/**
 * Cut HTML till the first <p> tag.
 *
 * @param content - The HTML content to process.
 * @returns The extracted text starting from the first <p> tag.
 */
const skipHtmlTillFirstParagraph = (content: string): string => {
  // Split the HTML content to skip everything before the first <p>
  const splitContent: string[] = content.split(/<p[^>]*>/i); // Using regex to account for possible attributes in <p> tags
  let extractedHTML = "";
  if (splitContent.length > 1) {
    // Re-add the <p> tag that was removed by split
    extractedHTML = `<p>${splitContent.slice(1).join("<p>")}`;
  } else {
    // No <p> tags found; fallback to using all content
    extractedHTML = content;
  }
  return extractedHTML;
};

/**
 * Extract all text from the HTML content.
 *
 * @param content - The HTML content to process.
 * @returns All extracted text from the content.
 */
const htmlToText = (content: string): string => {
  // Initialize the DOMParser
  const parser = new DOMParser();
  // Parse the HTML string into a Document
  const doc = parser.parseFromString(content, "text/html");
  // Extract text content from the <body> element
  let text: string = doc.body.textContent || "";
  // Replace multiple whitespace characters with a single space and trim the text
  text = text.replace(/\s+/g, " ").trim();
  return text;
};

/**
 * Truncate the text without cutting off words and append appropriate ellipses.
 *
 * @param text - The text to truncate.
 * @param targetLength - The desired maximum length of the truncated text.
 * @returns The truncated text with ellipses.
 */
const truncateText = (text: string, targetLength: number): string => {
  if (text.length <= targetLength) {
    return text;
  }
  // Find the last space within the targetLength to avoid cutting words
  let truncated: string = text.substring(0, targetLength).trim();
  const lastSpace: number = truncated.lastIndexOf(" ");
  if (lastSpace > 0) {
    truncated = truncated.substring(0, lastSpace);
  }
  // Determine the appropriate number of dots to add
  const punctuationMarks: string[] = [".", "!", "?", ","];
  const lastChar: string = truncated.slice(-1);
  if (punctuationMarks.includes(lastChar)) {
    // Remove the punctuation mark before adding ellipses
    truncated = truncated.slice(0, -1);
  }
  truncated += "...";
  return truncated;
};

/**
 * Extracts an excerpt from HTML content based on the target length.
 *
 * @param targetLength - The desired length of the excerpt.
 * @param content - The HTML content to process.
 * @returns The extracted and truncated excerpt.
 */
const extractExcerpt = (targetLength: number, content: string): string => {
  let text: string = skipHtmlTillFirstParagraph(content);
  text = htmlToText(text);
  // If text is too short, don't skip to first paragraph
  if (text.length < targetLength) {
    text = htmlToText(content);
  }
  text = truncateText(text, targetLength);
  return text;
};

export default extractExcerpt;
