/** * Parses spec content using markdown AST for robust section extraction. */ import { unified } from "unified"; import remarkParse from "remark-parse"; import remarkRehype from "remark-rehype"; import rehypeStringify from "rehype-stringify"; import { getIconData, iconToSVG, iconToHTML } from "@iconify/utils"; import heroicons from "@iconify-json/heroicons/icons.json"; import type { Root, RootContent, Heading, List, ListItem, Html } from "mdast"; import type { Root as HastRoot } from "hast"; export interface TocItem { id: string; title: string; level: number; clause?: string; } export interface FAQItem { id: string; question: string; answer: string; } export interface SpecSection { id: string; title: string; content: string; clause: string; } export interface ParsedSpec { introduction: string; summary: string; terminology: string; terminologyTitle: string; specification: string; specificationTitle: string; specSections: SpecSection[]; faq: FAQItem[]; license: string; tocItems: TocItem[]; } /** * Convert text to a URL-friendly ID */ function slugify(text: string): string { return text .toLowerCase() .replace(/[^\w\s-]/g, "") .replace(/\s+/g, "-") .trim(); } /** * Generate link icon SVG from heroicons icon set */ function generateLinkIconSvg(): string { const iconData = getIconData(heroicons, "link"); if (!iconData) { return ""; } const result = iconToSVG(iconData); return iconToHTML(result.body, { ...result.attributes, class: "clause-link-icon", stroke: "currentColor", "stroke-width": "2", }); } type MdastNode = Root | RootContent; /** * Extract plain text from an mdast node tree */ function extractText(node: MdastNode): string { if ("value" in node && typeof node.value === "string") { return node.value; } if ("children" in node && Array.isArray(node.children)) { return node.children.map((child) => extractText(child)).join(""); } return ""; } /** * Find index of heading containing specific text */ function findHeadingIndex( nodes: RootContent[], text: string, depth: number = 2, ): number { return nodes.findIndex( (node) => node.type === "heading" && (node as Heading).depth === depth && extractText(node).toLowerCase().includes(text.toLowerCase()), ); } /** * Extract nodes between two headings */ function extractSectionNodes( nodes: RootContent[], startText: string, depth: number = 2, ): RootContent[] { const startIdx = findHeadingIndex(nodes, startText, depth); if (startIdx === -1) return []; // Find the next heading of same or higher level let endIdx = nodes.length; for (let i = startIdx + 1; i < nodes.length; i++) { const node = nodes[i]; if (node.type === "heading" && (node as Heading).depth <= depth) { endIdx = i; break; } } // Return nodes after the heading (not including the heading itself) return nodes.slice(startIdx + 1, endIdx); } /** * Get the full heading text */ function getHeadingText( nodes: RootContent[], text: string, depth: number = 2, ): string { const idx = findHeadingIndex(nodes, text, depth); if (idx === -1) return text; return extractText(nodes[idx]); } /** * Convert mdast nodes to HTML string */ async function nodesToHtml(nodes: RootContent[]): Promise { if (nodes.length === 0) return ""; // Create a root node with these children const root: Root = { type: "root", children: nodes }; const result = await unified() .use(remarkRehype, { allowDangerousHtml: true }) .use(rehypeStringify, { allowDangerousHtml: true }) .run(root); const html = unified() .use(rehypeStringify, { allowDangerousHtml: true }) .stringify(result as HastRoot); return html; } /** * Extract top-level list item titles from an ordered list */ function extractListItemTitles(list: List): string[] { const titles: string[] = []; for (const item of list.children) { if (item.type !== "listItem") continue; // Get the first paragraph or text content of the list item // The title is the text before any nested list let title = ""; for (const child of item.children) { if (child.type === "list") break; // Stop at nested list if (child.type === "paragraph") { title = extractText(child); break; } // Handle inline text directly in list item title += extractText(child); } title = title.split("\n")[0].trim(); if (title) { titles.push(title); } } return titles; } /** * Find the first ordered list in nodes and extract its structure */ function findSpecSections(nodes: RootContent[]): SpecSection[] { const sections: SpecSection[] = []; for (const node of nodes) { if (node.type === "list" && (node as List).ordered) { const titles = extractListItemTitles(node as List); for (let i = 0; i < titles.length; i++) { const title = titles[i]; const clauseNum = i + 1; sections.push({ id: `clause-${clauseNum}`, title, content: "", clause: `${clauseNum}.`, }); } break; // Only process first ordered list } } return sections; } /** * Add anchor IDs and links to ordered list items recursively. * Injects an invisible anchor link before content for hover-to-reveal behavior. */ function addClauseAnchors(list: List, prefix: string = ""): void { for (let i = 0; i < list.children.length; i++) { const item = list.children[i]; if (item.type !== "listItem") continue; // Calculate clause number and ID const clauseNum = prefix ? `${prefix}.${i + 1}` : `${i + 1}`; const clauseId = `clause-${clauseNum.replace(/\./g, "-")}`; // Add ID to the list item via hProperties (item as ListItem & { data?: { hProperties?: { id?: string } } }).data = { hProperties: { id: clauseId }, }; // Find the first paragraph in the item and prepend an anchor link for (const child of item.children) { if (child.type === "paragraph") { // Create anchor link HTML with clause number text and link icon const linkIcon = generateLinkIconSvg(); const anchorHtml: Html = { type: "html", value: ``, }; // Prepend anchor to paragraph children (child as { children: RootContent[] }).children.unshift( anchorHtml as unknown as RootContent, ); break; } } // Recursively process nested ordered lists for (const child of item.children) { if (child.type === "list" && (child as List).ordered) { addClauseAnchors(child as List, clauseNum); } } } } /** * Extract FAQ items from FAQ section nodes */ function extractFAQFromNodes(nodes: RootContent[]): FAQItem[] { const items: FAQItem[] = []; let currentQuestion = ""; let currentId = ""; for (const node of nodes) { if (node.type === "heading" && (node as Heading).depth === 3) { // Save previous FAQ item if we had one if (currentQuestion) { items.push({ id: currentId, question: currentQuestion, answer: "", // Placeholder, will be filled later }); } currentQuestion = extractText(node); currentId = `faq-${slugify(currentQuestion).slice(0, 50)}`; } } // Don't forget the last item if (currentQuestion) { items.push({ id: currentId, question: currentQuestion, answer: "", }); } return items; } /** * Build table of contents from parsed sections */ function buildTocItems(parsed: Partial): TocItem[] { const items: TocItem[] = []; if (parsed.terminology) { items.push({ id: "terminology", title: parsed.terminologyTitle || "Terminology", level: 2, }); } if (parsed.specification) { items.push({ id: "specification", title: "Specification", level: 2, }); if (parsed.specSections) { for (const section of parsed.specSections) { items.push({ id: section.id, title: section.title, level: 3, clause: section.clause, }); } } } return items; } /** * Main parsing function - takes markdown content and returns structured content */ export async function parseSpecContent( markdown: string, ): Promise { // Parse markdown to AST const tree = unified().use(remarkParse).parse(markdown) as Root; // Remove title (h1) from the tree - it's displayed separately in the Hero const nodes = tree.children.filter((node) => { if (node.type === "heading" && (node as Heading).depth === 1) return false; return true; }); // Get heading titles const terminologyTitle = getHeadingText(nodes, "Terminology"); const specificationTitle = getHeadingText( nodes, "Git Common-Flow Specification", ); // Extract section nodes const introNodes = extractSectionNodes(nodes, "Introduction"); const summaryNodes = extractSectionNodes(nodes, "Summary"); const terminologyNodes = extractSectionNodes(nodes, "Terminology"); const specNodes = extractSectionNodes(nodes, "Git Common-Flow Specification"); const faqNodes = extractSectionNodes(nodes, "FAQ"); const licenseNodes = extractSectionNodes(nodes, "License"); // Extract spec sections from the first ordered list const specSections = findSpecSections(specNodes); // Add anchor IDs and links to spec list items for (const node of specNodes) { if (node.type === "list" && (node as List).ordered) { addClauseAnchors(node as List); break; } } // Extract FAQ items structure const faqItems = extractFAQFromNodes(faqNodes); // Collect FAQ answer nodes for each item const faqAnswerNodes: RootContent[][] = []; let currentAnswerNodes: RootContent[] = []; for (const node of faqNodes) { if (node.type === "heading" && (node as Heading).depth === 3) { if (currentAnswerNodes.length > 0) { faqAnswerNodes.push(currentAnswerNodes); } currentAnswerNodes = []; } else { currentAnswerNodes.push(node); } } // Don't forget the last answer if (currentAnswerNodes.length > 0) { faqAnswerNodes.push(currentAnswerNodes); } // Convert sections to HTML const [introduction, summary, terminology, specification, license] = await Promise.all([ nodesToHtml(introNodes), nodesToHtml(summaryNodes), nodesToHtml(terminologyNodes), nodesToHtml(specNodes), nodesToHtml(licenseNodes), ]); // Convert FAQ answers to HTML const faqAnswers = await Promise.all( faqAnswerNodes.map((nodes) => nodesToHtml(nodes)), ); // Assign FAQ answers const faq = faqItems.map((item, i) => ({ ...item, answer: faqAnswers[i] || "", })); const parsed: ParsedSpec = { introduction, summary, terminology, terminologyTitle, specification, specificationTitle, specSections, faq, license, tocItems: [], }; parsed.tocItems = buildTocItems(parsed); return parsed; }