// Inspired by https://github.com/ianstormtaylor/slate/blob/main/site/examples/paste-html.tsx

import { Document, Node as NodeType } from "domhandler";
import { parseDocument } from "htmlparser2";
import type { Descendant } from "slate";

import { isDocumentNode, isElementNode, isTextNode } from "../../../lib/typing";

import { parseBlogHTML } from "./elements/blog";
import { parseHeadingHTML } from "./elements/heading";
import { parseHighlightHTML } from "./elements/highlight";
import { parseFigureHTML, parseImageHTML } from "./elements/image";
import { parseLinkHTML } from "./elements/link";
import { parsePensionProviderHTML } from "./elements/pensionProvider";
import { parseTableCell, parseTableHead } from "./elements/tables";
import { parseTypeformHTML } from "./elements/typeform";
import { parseVideoHTML } from "./elements/video";
import { BlockType, ElementType, HTMLParser } from "./types";

const ELEMENT_TAGS: Record<string, HTMLParser> = {
  a: parseLinkHTML,
  aside: parseHighlightHTML,
  blockquote: () => ({ type: BlockType.quote }),
  blog: parseBlogHTML,
  figure: parseFigureHTML,
  h1: parseHeadingHTML,
  h2: parseHeadingHTML,
  h3: parseHeadingHTML,
  h4: parseHeadingHTML,
  h5: parseHeadingHTML,
  h6: parseHeadingHTML,
  hr: () => ({ type: ElementType.thematicBreak }),
  img: parseImageHTML,
  li: () => ({ type: ElementType.listItem }),
  ol: () => ({ type: BlockType.olList }),
  p: () => ({ type: BlockType.paragraph }),
  pension: parsePensionProviderHTML,
  pre: () => ({ type: BlockType.code }),
  table: () => ({ type: BlockType.table }),
  tbody: () => ({ type: BlockType.tableBody }),
  td: parseTableCell(BlockType.tableCell),
  th: parseTableCell(BlockType.tableHCell),
  thead: parseTableHead,
  tr: () => ({ type: BlockType.tableRow }),
  typeform: parseTypeformHTML,
  ul: () => ({ type: BlockType.ulList }),
  video: parseVideoHTML,
};

// COMPAT: `B` is omitted here because Google Docs uses `<b>` in weird ways.
const TEXT_TAGS: Record<string, HTMLParser> = {
  code: () => ({ inlineCode: true }),
  del: () => ({ strikeThrough: true }),
  em: () => ({ emphasis: true }),
  i: () => ({ emphasis: true }),
  s: () => ({ strikeThrough: true }),
  strong: () => ({ strong: true }),
};

const deserialize = (el: NodeType | Document): Array<Descendant> => {
  if (isTextNode(el)) {
    if (!el.data || el.data.length === 0 || el.data === "\n") {
      return [];
    }

    return [{ text: el.data }];
  } else if (isDocumentNode(el)) {
    return el.children.flatMap((c) => deserialize(c));
  } else if (!isElementNode(el)) {
    return [];
  } else if (el.name === "br") {
    return [{ text: "\n" }];
  }

  const { childNodes, name } = el;
  let parent = el;

  if (
    name === "pre" &&
    isElementNode(childNodes[0]) &&
    childNodes[0].name === "code"
  ) {
    parent = childNodes[0];
  }
  let children = Array.from(parent.childNodes).map(deserialize).flat();

  if (children.length === 0) {
    children = [{ text: "" }];
  }

  if (name === "body") {
    return children;
  }

  if (name === "figure") {
    const attrs = ELEMENT_TAGS[name](el, children);

    return [{ ...attrs, children: [{ text: "" }] } as Descendant];
  }

  if (ELEMENT_TAGS[name]) {
    const attrs = ELEMENT_TAGS[name](el, children);

    return [{ children, ...attrs } as Descendant];
  }

  if (TEXT_TAGS[name]) {
    const attrs = TEXT_TAGS[name](el, children);

    return children.map((child) => ({ ...attrs, ...child } as Descendant));
  }

  return children;
};

export const parseHTML = (html: string) => {
  const p = parseDocument(html);

  return deserialize(p as NodeType);
};
