wip: improve spec parsing

2026-02-19 05:46:40 +00:00 · 2026-01-10 21:52:45 +00:00
parent f1fa264ed7
commit 208219ca2c
10 changed files with 324 additions and 255 deletions
--- a/docs/index.html
+++ b/docs/index.html
@@ -199,7 +199,7 @@ its merge target, allowing others to review, discuss and approve the changes.</l
 effectively just a git tag named after the version of the release.</li>
 <li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
 also for long-term maintenance of older version.</li>
-</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Git Common-Flow Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
+</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
 interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
 <ol>
@@ -387,7 +387,7 @@ in question. Meaning it MUST always be in a non-broken state, MUST NOT be
 force pushed to, etc.</li>
 </ol>
 </li>
-<li>Bug Fixes &#x26; Rollback
+<li id="spec-bug-fixes-rollback">Bug Fixes &#x26; Rollback
 <ol>
 <li>You MUST NOT under any circumstances force push to the master branch or
 to long-term release branches.</li>
--- a/docs/spec/1.0.0-rc.1/index.html
+++ b/docs/spec/1.0.0-rc.1/index.html
--- a/docs/spec/1.0.0-rc.2/index.html
+++ b/docs/spec/1.0.0-rc.2/index.html
--- a/docs/spec/1.0.0-rc.3/index.html
+++ b/docs/spec/1.0.0-rc.3/index.html
--- a/docs/spec/1.0.0-rc.4/index.html
+++ b/docs/spec/1.0.0-rc.4/index.html
@@ -188,7 +188,7 @@ its merge target, allowing others to review, discuss and approve the changes.</l
 effectively just a git tag named after the version of the release.</li>
 <li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
 also for long-term maintenance of older version.</li>
-</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Git Common-Flow Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
+</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
 interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
 <ol>
@@ -375,7 +375,7 @@ in question. Meaning it MUST always be in a non-broken state, MUST NOT be
 force pushed to, etc.</li>
 </ol>
 </li>
-<li>Bug Fixes &#x26; Rollback
+<li id="spec-bug-fixes-rollback">Bug Fixes &#x26; Rollback
 <ol>
 <li>You MUST NOT under any circumstances force push to the master branch or
 to long-term release branches.</li>
--- a/docs/spec/1.0.0-rc.5/index.html
+++ b/docs/spec/1.0.0-rc.5/index.html
@@ -199,7 +199,7 @@ its merge target, allowing others to review, discuss and approve the changes.</l
 effectively just a git tag named after the version of the release.</li>
 <li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
 also for long-term maintenance of older version.</li>
-</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Git Common-Flow Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
+</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
 interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
 <ol>
@@ -387,7 +387,7 @@ in question. Meaning it MUST always be in a non-broken state, MUST NOT be
 force pushed to, etc.</li>
 </ol>
 </li>
-<li>Bug Fixes &#x26; Rollback
+<li id="spec-bug-fixes-rollback">Bug Fixes &#x26; Rollback
 <ol>
 <li>You MUST NOT under any circumstances force push to the master branch or
 to long-term release branches.</li>
--- a/src/components/SpecSection.astro
+++ b/src/components/SpecSection.astro
@@ -4,11 +4,12 @@ import type { TocItem } from "../utils/parseSpecContent";
 interface Props {
  terminology: string;
  terminologyTitle: string;
  specification: string;
  tocItems: TocItem[];
 }
-const { terminology, specification, tocItems } = Astro.props;
+const { terminology, terminologyTitle, specification, tocItems } = Astro.props;
 ---
 <section id="spec" class="py-20 sm:py-28">
@@ -33,13 +34,13 @@ const { terminology, specification, tocItems } = Astro.props;
        <article class="prose-spec spec-content">
          <!-- Terminology -->
          <section id="terminology">
-            <h2>Terminology</h2>
+            <h2>{terminologyTitle}</h2>
            <Fragment set:html={terminology} />
          </section>
          <!-- Main specification -->
          <section id="specification">
-            <h2>Git Common-Flow Specification</h2>
+            <h2>Specification</h2>
            <Fragment set:html={specification} />
          </section>
        </article>
--- a/src/pages/index.astro
+++ b/src/pages/index.astro
@@ -2,10 +2,6 @@
 import { getCollection } from "astro:content";
 import * as fs from "node:fs";
 import * as path from "node:path";
 import { unified } from "unified";
 import remarkParse from "remark-parse";
 import remarkRehype from "remark-rehype";
 import rehypeStringify from "rehype-stringify";
 import SinglePage from "../layouts/SinglePage.astro";
 import Header from "../components/Header.astro";
@@ -25,28 +21,15 @@ if (!spec) {
  throw new Error(`Spec version ${version} not found`);
 }
-// Read and process the markdown file
+// Read the markdown file
-const filePath = path.join(
+const filePath = path.join(process.cwd(), "src/content/spec", `${version}.md`);
  process.cwd(),
  "src/content/spec",
  `${version}.md`
 );
 const content = fs.readFileSync(filePath, "utf-8");
 // Remove frontmatter
-const body = content.replace(/^---[\s\S]*?---\n/, "");
+const markdown = content.replace(/^---[\s\S]*?---\n/, "");
-// Process markdown to HTML
+// Parse the content into sections (handles markdown -> HTML internally)
-const result = await unified()
+const parsed = await parseSpecContent(markdown, version);
  .use(remarkParse)
  .use(remarkRehype, { allowDangerousHtml: true })
  .use(rehypeStringify, { allowDangerousHtml: true })
  .process(body);
 const html = String(result);
 // Parse the content into sections
 const parsed = parseSpecContent(html, version);
 ---
 <SinglePage title={spec.data.title} version={version}>
@@ -64,6 +47,7 @@ const parsed = parseSpecContent(html, version);
    <SpecSection
      terminology={parsed.terminology}
      terminologyTitle={parsed.terminologyTitle}
      specification={parsed.specification}
      tocItems={parsed.tocItems}
    />
--- a/src/pages/spec/[version].astro
+++ b/src/pages/spec/[version].astro
@@ -2,10 +2,6 @@
 import { getCollection } from "astro:content";
 import * as fs from "node:fs";
 import * as path from "node:path";
 import { unified } from "unified";
 import remarkParse from "remark-parse";
 import remarkRehype from "remark-rehype";
 import rehypeStringify from "rehype-stringify";
 import SinglePage from "../../layouts/SinglePage.astro";
 import Header from "../../components/Header.astro";
@@ -27,28 +23,15 @@ export async function getStaticPaths() {
 const { spec } = Astro.props;
 const version = spec.data.version;
-// Read and process the markdown file
+// Read the markdown file
-const filePath = path.join(
+const filePath = path.join(process.cwd(), "src/content/spec", `${version}.md`);
  process.cwd(),
  "src/content/spec",
  `${version}.md`
 );
 const content = fs.readFileSync(filePath, "utf-8");
 // Remove frontmatter
-const body = content.replace(/^---[\s\S]*?---\n/, "");
+const markdown = content.replace(/^---[\s\S]*?---\n/, "");
-// Process markdown to HTML
+// Parse the content into sections (handles markdown -> HTML internally)
-const result = await unified()
+const parsed = await parseSpecContent(markdown, version);
  .use(remarkParse)
  .use(remarkRehype, { allowDangerousHtml: true })
  .use(rehypeStringify, { allowDangerousHtml: true })
  .process(body);
 const html = String(result);
 // Parse the content into sections
 const parsed = parseSpecContent(html, version);
 ---
 <SinglePage title={spec.data.title} version={version}>
@@ -66,6 +49,7 @@ const parsed = parseSpecContent(html, version);
    <SpecSection
      terminology={parsed.terminology}
      terminologyTitle={parsed.terminologyTitle}
      specification={parsed.specification}
      tocItems={parsed.tocItems}
    />
--- a/src/utils/parseSpecContent.ts
+++ b/src/utils/parseSpecContent.ts
@@ -1,8 +1,14 @@
 /**
- * Parses rendered spec HTML into structured sections for the single-page
+ * Parses spec content using markdown AST for robust section extraction.
 * layout.
 */
 import { unified } from "unified";
 import remarkParse from "remark-parse";
 import remarkRehype from "remark-rehype";
 import rehypeStringify from "rehype-stringify";
 import type { Root, RootContent, Heading, List, ListItem } from "mdast";
 import type { Root as HastRoot } from "hast";
 export interface TocItem {
  id: string;
  title: string;
@@ -26,7 +32,9 @@ export interface ParsedSpec {
  introduction: string;
  summary: string;
  terminology: string;
  terminologyTitle: string;
  specification: string;
  specificationTitle: string;
  specSections: SpecSection[];
  faq: FAQItem[];
  about: string;
@@ -35,7 +43,7 @@ export interface ParsedSpec {
 }
 /**
- * Convert a heading text to a URL-friendly ID
+ * Convert text to a URL-friendly ID
 */
 function slugify(text: string): string {
  return text
@@ -45,160 +53,212 @@ function slugify(text: string): string {
    .trim();
 }
 type MdastNode = Root | RootContent;
 /**
- * Extract content between two headings or to the end of the document
+ * Extract plain text from an mdast node tree
 */
-function extractSection(
+function extractText(node: MdastNode): string {
-  html: string,
+  if ("value" in node && typeof node.value === "string") {
-  startHeading: string,
+    return node.value;
-  endHeadings: string[] = []
+  }
-): string {
+  if ("children" in node && Array.isArray(node.children)) {
-  // Find the heading (h2) - use partial match to handle additional text
+    return node.children.map((child) => extractText(child)).join("");
-  // e.g., "Git Common-Flow Specification (Common-Flow)"
+  }
-  const headingPattern = new RegExp(
+  return "";
-    `<h2[^>]*>[^<]*${escapeRegex(startHeading)}[^<]*</h2>`,
+}
-    "i"
+
 /**
 * Find index of heading containing specific text
 */
 function findHeadingIndex(
  nodes: RootContent[],
  text: string,
  depth: number = 2
 ): number {
  return nodes.findIndex(
    (node) =>
      node.type === "heading" &&
      (node as Heading).depth === depth &&
      extractText(node).toLowerCase().includes(text.toLowerCase())
  );
  const match = html.match(headingPattern);
  if (!match || match.index === undefined) return "";
  const startIdx = match.index + match[0].length;
  // Find the next section heading
  let endIdx = html.length;
  for (const endHeading of endHeadings) {
    const endPattern = new RegExp(
      `<h2[^>]*>\\s*${escapeRegex(endHeading)}\\s*</h2>`,
      "i"
    );
    const endMatch = html.slice(startIdx).match(endPattern);
    if (endMatch && endMatch.index !== undefined) {
      const possibleEnd = startIdx + endMatch.index;
      if (possibleEnd < endIdx) {
        endIdx = possibleEnd;
      }
    }
  }
  // Also check for any h2 as a fallback
  const anyH2 = html.slice(startIdx).match(/<h2[^>]*>/i);
  if (anyH2 && anyH2.index !== undefined) {
    const possibleEnd = startIdx + anyH2.index;
    if (possibleEnd < endIdx) {
      endIdx = possibleEnd;
    }
  }
  return html.slice(startIdx, endIdx).trim();
 }
 function escapeRegex(str: string): string {
  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
 }
 // Spec section titles in order (used for both ToC and anchor injection)
 const SPEC_SECTION_TITLES = [
  "TL;DR",
  "The Master Branch",
  "Change Branches",
  "Pull Requests",
  "Versioning",
  "Releases",
  "Short-Term Release Branches",
  "Long-term Release Branches",
  "Bug Fixes & Rollback",
  "Git Best Practices",
 ];
 /**
- * Extract the numbered spec sections (1. TL;DR, 2. The Master Branch, etc.)
+ * Extract nodes between two headings
 */
-function extractSpecSections(specContent: string): SpecSection[] {
+function extractSectionNodes(
  nodes: RootContent[],
  startText: string,
  depth: number = 2
 ): RootContent[] {
  const startIdx = findHeadingIndex(nodes, startText, depth);
  if (startIdx === -1) return [];
  // Find the next heading of same or higher level
  let endIdx = nodes.length;
  for (let i = startIdx + 1; i < nodes.length; i++) {
    const node = nodes[i];
    if (node.type === "heading" && (node as Heading).depth <= depth) {
      endIdx = i;
      break;
    }
  }
  // Return nodes after the heading (not including the heading itself)
  return nodes.slice(startIdx + 1, endIdx);
 }
 /**
 * Get the full heading text
 */
 function getHeadingText(
  nodes: RootContent[],
  text: string,
  depth: number = 2
 ): string {
  const idx = findHeadingIndex(nodes, text, depth);
  if (idx === -1) return text;
  return extractText(nodes[idx]);
 }
 /**
 * Convert mdast nodes to HTML string
 */
 async function nodesToHtml(nodes: RootContent[]): Promise<string> {
  if (nodes.length === 0) return "";
  // Create a root node with these children
  const root: Root = { type: "root", children: nodes };
  const result = await unified()
    .use(remarkRehype, { allowDangerousHtml: true })
    .use(rehypeStringify, { allowDangerousHtml: true })
    .run(root);
  const html = await unified()
    .use(rehypeStringify, { allowDangerousHtml: true })
    .stringify(result as HastRoot);
  return html;
 }
 /**
 * Extract top-level list item titles from an ordered list
 */
 function extractListItemTitles(list: List): string[] {
  const titles: string[] = [];
  for (const item of list.children) {
    if (item.type !== "listItem") continue;
    // Get the first paragraph or text content of the list item
    // The title is the text before any nested list
    let title = "";
    for (const child of item.children) {
      if (child.type === "list") break; // Stop at nested list
      if (child.type === "paragraph") {
        title = extractText(child);
        break;
      }
      // Handle inline text directly in list item
      title += extractText(child);
    }
    title = title.split("\n")[0].trim();
    if (title) {
      titles.push(title);
    }
  }
  return titles;
 }
 /**
 * Find the first ordered list in nodes and extract its structure
 */
 function findSpecSections(nodes: RootContent[]): SpecSection[] {
  const sections: SpecSection[] = [];
-  // The spec uses an ordered list with nested items
+  for (const node of nodes) {
-  // Each top-level li starts a new section
+    if (node.type === "list" && (node as List).ordered) {
-  const olMatch = specContent.match(/<ol[^>]*>([\s\S]*?)<\/ol>/i);
+      const titles = extractListItemTitles(node as List);
-  if (!olMatch) return sections;
+      for (const title of titles) {
-
+        sections.push({
-  // Find each section by looking for the title pattern
+          id: `spec-${slugify(title)}`,
-  for (const title of SPEC_SECTION_TITLES) {
+          title,
-    const id = slugify(title);
+          content: "",
-
+        });
-    // For the content, we'll just use the title for navigation
+      }
-    // The actual content stays in the main specification block
+      break; // Only process first ordered list
-    sections.push({
+    }
      id: `spec-${id}`,
      title,
      content: "", // Content handled inline
    });
  }
  return sections;
 }
 /**
- * Add anchor IDs to spec section list items.
+ * Add anchor IDs to list items in the spec ordered list
 * Finds top-level <li> elements that start with section titles and adds IDs.
 */
-function addSpecSectionAnchors(specContent: string): string {
+function addAnchorsToList(list: List, sections: SpecSection[]): void {
-  let result = specContent;
+  const titleMap = new Map(sections.map((s) => [s.title, s.id]));
-  for (const title of SPEC_SECTION_TITLES) {
+  for (const item of list.children) {
-    const id = `spec-${slugify(title)}`;
+    if (item.type !== "listItem") continue;
-    // Match <li> followed by the section title (possibly with whitespace)
+
-    // The title appears right after <li> in the rendered HTML
+    // Get the title of this item
-    const pattern = new RegExp(
+    let title = "";
-      `(<li>)(\\s*${escapeRegex(title)})`,
+    for (const child of item.children) {
-      "i"
+      if (child.type === "list") break;
-    );
+      if (child.type === "paragraph") {
-    result = result.replace(pattern, `<li id="${id}">$2`);
+        title = extractText(child).split("\n")[0].trim();
        break;
      }
      title += extractText(child);
    }
    title = title.split("\n")[0].trim();
    // Add ID as data attribute (will be processed by rehype)
    const id = titleMap.get(title);
    if (id) {
      // Add hProperties for rehype to convert to HTML id attribute
      (item as ListItem & { data?: { hProperties?: { id?: string } } }).data = {
        hProperties: { id },
      };
    }
  }
  return result;
 }
 /**
- * Extract FAQ items from the FAQ section HTML
+ * Extract FAQ items from FAQ section nodes
 */
-function extractFAQItems(faqContent: string): FAQItem[] {
+function extractFAQFromNodes(nodes: RootContent[]): FAQItem[] {
  const items: FAQItem[] = [];
  let currentQuestion = "";
  let currentId = "";
-  // Split by h3 headings
+  for (const node of nodes) {
-  const h3Pattern = /<h3[^>]*>([\s\S]*?)<\/h3>/gi;
+    if (node.type === "heading" && (node as Heading).depth === 3) {
-  let lastIndex = 0;
+      // Save previous FAQ item if we had one
-  let lastQuestion = "";
+      if (currentQuestion) {
-  let lastId = "";
+        items.push({
          id: currentId,
          question: currentQuestion,
          answer: "", // Placeholder, will be filled later
        });
      }
-  const matches = [...faqContent.matchAll(h3Pattern)];
+      currentQuestion = extractText(node);
-
+      currentId = `faq-${slugify(currentQuestion).slice(0, 50)}`;
  for (let i = 0; i < matches.length; i++) {
    const match = matches[i];
    const question = match[1].replace(/<[^>]+>/g, "").trim();
    const id = slugify(question).slice(0, 50);
    if (i > 0 && match.index !== undefined) {
      // Get content between previous h3 and this one
      const answer = faqContent.slice(lastIndex, match.index).trim();
      items.push({
        id: `faq-${lastId}`,
        question: lastQuestion,
        answer,
      });
    }
    lastQuestion = question;
    lastId = id;
    lastIndex = match.index! + match[0].length;
  }
-  // Don't forget the last FAQ item
+  // Don't forget the last item
-  if (lastQuestion) {
+  if (currentQuestion) {
    const answer = faqContent.slice(lastIndex).trim();
    items.push({
-      id: `faq-${lastId}`,
+      id: currentId,
-      question: lastQuestion,
+      question: currentQuestion,
-      answer,
+      answer: "",
    });
  }
@@ -206,20 +266,25 @@ function extractFAQItems(faqContent: string): FAQItem[] {
 }
 /**
- * Build table of contents from parsed sections.
+ * Build table of contents from parsed sections
 * Only includes sections rendered in SpecSection (Terminology + Specification).
 * Introduction/Summary are in AboutSection and excluded from this ToC.
 */
 function buildTocItems(parsed: Partial<ParsedSpec>): TocItem[] {
  const items: TocItem[] = [];
  if (parsed.terminology) {
-    items.push({ id: "terminology", title: "Terminology", level: 2 });
+    items.push({
      id: "terminology",
      title: parsed.terminologyTitle || "Terminology",
      level: 2,
    });
  }
  if (parsed.specification) {
-    items.push({ id: "specification", title: "Specification", level: 2 });
+    items.push({
      id: "specification",
      title: "Specification",
      level: 2,
    });
    // Add spec subsections
    if (parsed.specSections) {
      for (const section of parsed.specSections) {
        items.push({ id: section.id, title: section.title, level: 3 });
@@ -231,70 +296,106 @@ function buildTocItems(parsed: Partial<ParsedSpec>): TocItem[] {
 }
 /**
- * Main parsing function - takes rendered HTML and returns structured content
+ * Main parsing function - takes markdown content and returns structured content
 */
-export function parseSpecContent(html: string, version: string): ParsedSpec {
+export async function parseSpecContent(
  markdown: string,
  version: string
 ): Promise<ParsedSpec> {
  const svgPath = `/spec/${version}.svg`;
-  // Remove the title (h1) and SVG from the content for parsing
+  // Parse markdown to AST
-  let content = html;
+  const tree = unified().use(remarkParse).parse(markdown) as Root;
-  // Remove the h1 title
+  // Remove title (h1) and SVG image from the tree
-  content = content.replace(/<h1[^>]*>[\s\S]*?<\/h1>/i, "");
+  const nodes = tree.children.filter((node) => {
    if (node.type === "heading" && (node as Heading).depth === 1) return false;
    if (node.type === "paragraph") {
      const text = extractText(node);
      if (text.includes(".svg")) return false;
    }
    return true;
  });
-  // Remove the SVG img tag
+  // Get heading titles
-  content = content.replace(/<img[^>]*\.svg[^>]*>/i, "");
+  const terminologyTitle = getHeadingText(nodes, "Terminology");
-
+  const specificationTitle = getHeadingText(
-  // Extract each section
+    nodes,
-  const introduction = extractSection(content, "Introduction", [
+    "Git Common-Flow Specification"
    "Summary",
    "Terminology",
    "Git Common-Flow",
    "FAQ",
    "About",
    "License",
  ]);
  const summary = extractSection(content, "Summary", [
    "Terminology",
    "Git Common-Flow",
    "FAQ",
    "About",
    "License",
  ]);
  const terminology = extractSection(content, "Terminology", [
    "Git Common-Flow",
    "FAQ",
    "About",
    "License",
  ]);
  const specificationRaw = extractSection(
    content,
    "Git Common-Flow Specification",
    ["FAQ", "About", "License"]
  );
-  // Add anchor IDs to spec section list items for ToC navigation
+  // Extract section nodes
-  const specification = addSpecSectionAnchors(specificationRaw);
+  const introNodes = extractSectionNodes(nodes, "Introduction");
  const summaryNodes = extractSectionNodes(nodes, "Summary");
  const terminologyNodes = extractSectionNodes(nodes, "Terminology");
  const specNodes = extractSectionNodes(nodes, "Git Common-Flow Specification");
  const faqNodes = extractSectionNodes(nodes, "FAQ");
  const aboutNodes = extractSectionNodes(nodes, "About");
  const licenseNodes = extractSectionNodes(nodes, "License");
-  const faqContent = extractSection(content, "FAQ", ["About", "License"]);
+  // Extract spec sections from the first ordered list
  const specSections = findSpecSections(specNodes);
-  const about = extractSection(content, "About", ["License"]);
+  // Add anchor IDs to spec list items
  for (const node of specNodes) {
    if (node.type === "list" && (node as List).ordered) {
      addAnchorsToList(node as List, specSections);
      break;
    }
  }
-  const license = extractSection(content, "License", []);
+  // Extract FAQ items structure
  const faqItems = extractFAQFromNodes(faqNodes);
-  // Parse subsections
+  // Collect FAQ answer nodes for each item
-  const specSections = extractSpecSections(specificationRaw);
+  const faqAnswerNodes: RootContent[][] = [];
-  const faq = extractFAQItems(faqContent);
+  let currentAnswerNodes: RootContent[] = [];
  for (const node of faqNodes) {
    if (node.type === "heading" && (node as Heading).depth === 3) {
      if (currentAnswerNodes.length > 0) {
        faqAnswerNodes.push(currentAnswerNodes);
      }
      currentAnswerNodes = [];
    } else {
      currentAnswerNodes.push(node);
    }
  }
  // Don't forget the last answer
  if (currentAnswerNodes.length > 0) {
    faqAnswerNodes.push(currentAnswerNodes);
  }
  // Convert sections to HTML
  const [introduction, summary, terminology, specification, about, license] =
    await Promise.all([
      nodesToHtml(introNodes),
      nodesToHtml(summaryNodes),
      nodesToHtml(terminologyNodes),
      nodesToHtml(specNodes),
      nodesToHtml(aboutNodes),
      nodesToHtml(licenseNodes),
    ]);
  // Convert FAQ answers to HTML
  const faqAnswers = await Promise.all(
    faqAnswerNodes.map((nodes) => nodesToHtml(nodes))
  );
  // Assign FAQ answers
  const faq = faqItems.map((item, i) => ({
    ...item,
    answer: faqAnswers[i] || "",
  }));
  const parsed: ParsedSpec = {
    svgPath,
    introduction,
    summary,
    terminology,
    terminologyTitle,
    specification,
    specificationTitle,
    specSections,
    faq,
    about,
@@ -302,7 +403,6 @@ export function parseSpecContent(html: string, version: string): ParsedSpec {
    tocItems: [],
  };
  // Build TOC
  parsed.tocItems = buildTocItems(parsed);
  return parsed;