mirror of
https://github.com/jimeh/commonflow.org.git
synced 2026-02-19 05:46:40 +00:00
438 lines
11 KiB
TypeScript
438 lines
11 KiB
TypeScript
/**
|
|
* Parses spec content using markdown AST for robust section extraction.
|
|
*/
|
|
|
|
import { unified } from "unified";
|
|
import remarkParse from "remark-parse";
|
|
import remarkRehype from "remark-rehype";
|
|
import rehypeStringify from "rehype-stringify";
|
|
import { getIconData, iconToSVG, iconToHTML } from "@iconify/utils";
|
|
import heroicons from "@iconify-json/heroicons/icons.json";
|
|
import type { Root, RootContent, Heading, List, ListItem, Html } from "mdast";
|
|
import type { Root as HastRoot } from "hast";
|
|
|
|
export interface TocItem {
|
|
id: string;
|
|
title: string;
|
|
level: number;
|
|
clause?: string;
|
|
}
|
|
|
|
export interface FAQItem {
|
|
id: string;
|
|
question: string;
|
|
answer: string;
|
|
}
|
|
|
|
export interface SpecSection {
|
|
id: string;
|
|
title: string;
|
|
content: string;
|
|
clause: string;
|
|
}
|
|
|
|
export interface ParsedSpec {
|
|
introduction: string;
|
|
summary: string;
|
|
terminology: string;
|
|
terminologyTitle: string;
|
|
specification: string;
|
|
specificationTitle: string;
|
|
specSections: SpecSection[];
|
|
faq: FAQItem[];
|
|
license: string;
|
|
tocItems: TocItem[];
|
|
}
|
|
|
|
/**
|
|
* Convert text to a URL-friendly ID
|
|
*/
|
|
function slugify(text: string): string {
|
|
return text
|
|
.toLowerCase()
|
|
.replace(/[^\w\s-]/g, "")
|
|
.replace(/\s+/g, "-")
|
|
.trim();
|
|
}
|
|
|
|
/**
|
|
* Generate link icon SVG from heroicons icon set
|
|
*/
|
|
function generateLinkIconSvg(): string {
|
|
const iconData = getIconData(heroicons, "link");
|
|
if (!iconData) {
|
|
return "";
|
|
}
|
|
const result = iconToSVG(iconData);
|
|
return iconToHTML(result.body, {
|
|
...result.attributes,
|
|
class: "clause-link-icon",
|
|
stroke: "currentColor",
|
|
"stroke-width": "2",
|
|
});
|
|
}
|
|
|
|
type MdastNode = Root | RootContent;
|
|
|
|
/**
|
|
* Extract plain text from an mdast node tree
|
|
*/
|
|
function extractText(node: MdastNode): string {
|
|
if ("value" in node && typeof node.value === "string") {
|
|
return node.value;
|
|
}
|
|
if ("children" in node && Array.isArray(node.children)) {
|
|
return node.children.map((child) => extractText(child)).join("");
|
|
}
|
|
return "";
|
|
}
|
|
|
|
/**
|
|
* Find index of heading containing specific text
|
|
*/
|
|
function findHeadingIndex(
|
|
nodes: RootContent[],
|
|
text: string,
|
|
depth: number = 2,
|
|
): number {
|
|
return nodes.findIndex(
|
|
(node) =>
|
|
node.type === "heading" &&
|
|
(node as Heading).depth === depth &&
|
|
extractText(node).toLowerCase().includes(text.toLowerCase()),
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Extract nodes between two headings
|
|
*/
|
|
function extractSectionNodes(
|
|
nodes: RootContent[],
|
|
startText: string,
|
|
depth: number = 2,
|
|
): RootContent[] {
|
|
const startIdx = findHeadingIndex(nodes, startText, depth);
|
|
if (startIdx === -1) return [];
|
|
|
|
// Find the next heading of same or higher level
|
|
let endIdx = nodes.length;
|
|
for (let i = startIdx + 1; i < nodes.length; i++) {
|
|
const node = nodes[i];
|
|
if (node.type === "heading" && (node as Heading).depth <= depth) {
|
|
endIdx = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Return nodes after the heading (not including the heading itself)
|
|
return nodes.slice(startIdx + 1, endIdx);
|
|
}
|
|
|
|
/**
|
|
* Get the full heading text
|
|
*/
|
|
function getHeadingText(
|
|
nodes: RootContent[],
|
|
text: string,
|
|
depth: number = 2,
|
|
): string {
|
|
const idx = findHeadingIndex(nodes, text, depth);
|
|
if (idx === -1) return text;
|
|
return extractText(nodes[idx]);
|
|
}
|
|
|
|
/**
|
|
* Convert mdast nodes to HTML string
|
|
*/
|
|
async function nodesToHtml(nodes: RootContent[]): Promise<string> {
|
|
if (nodes.length === 0) return "";
|
|
|
|
// Create a root node with these children
|
|
const root: Root = { type: "root", children: nodes };
|
|
|
|
const result = await unified()
|
|
.use(remarkRehype, { allowDangerousHtml: true })
|
|
.use(rehypeStringify, { allowDangerousHtml: true })
|
|
.run(root);
|
|
|
|
const html = unified()
|
|
.use(rehypeStringify, { allowDangerousHtml: true })
|
|
.stringify(result as HastRoot);
|
|
|
|
return html;
|
|
}
|
|
|
|
/**
|
|
* Extract top-level list item titles from an ordered list
|
|
*/
|
|
function extractListItemTitles(list: List): string[] {
|
|
const titles: string[] = [];
|
|
|
|
for (const item of list.children) {
|
|
if (item.type !== "listItem") continue;
|
|
|
|
// Get the first paragraph or text content of the list item
|
|
// The title is the text before any nested list
|
|
let title = "";
|
|
for (const child of item.children) {
|
|
if (child.type === "list") break; // Stop at nested list
|
|
if (child.type === "paragraph") {
|
|
title = extractText(child);
|
|
break;
|
|
}
|
|
// Handle inline text directly in list item
|
|
title += extractText(child);
|
|
}
|
|
|
|
title = title.split("\n")[0].trim();
|
|
if (title) {
|
|
titles.push(title);
|
|
}
|
|
}
|
|
|
|
return titles;
|
|
}
|
|
|
|
/**
|
|
* Find the first ordered list in nodes and extract its structure
|
|
*/
|
|
function findSpecSections(nodes: RootContent[]): SpecSection[] {
|
|
const sections: SpecSection[] = [];
|
|
|
|
for (const node of nodes) {
|
|
if (node.type === "list" && (node as List).ordered) {
|
|
const titles = extractListItemTitles(node as List);
|
|
for (let i = 0; i < titles.length; i++) {
|
|
const title = titles[i];
|
|
const clauseNum = i + 1;
|
|
sections.push({
|
|
id: `clause-${clauseNum}`,
|
|
title,
|
|
content: "",
|
|
clause: `${clauseNum}.`,
|
|
});
|
|
}
|
|
break; // Only process first ordered list
|
|
}
|
|
}
|
|
|
|
return sections;
|
|
}
|
|
|
|
/**
|
|
* Add anchor IDs and links to ordered list items recursively.
|
|
* Injects an invisible anchor link before content for hover-to-reveal behavior.
|
|
*/
|
|
function addClauseAnchors(list: List, prefix: string = ""): void {
|
|
for (let i = 0; i < list.children.length; i++) {
|
|
const item = list.children[i];
|
|
if (item.type !== "listItem") continue;
|
|
|
|
// Calculate clause number and ID
|
|
const clauseNum = prefix ? `${prefix}.${i + 1}` : `${i + 1}`;
|
|
const clauseId = `clause-${clauseNum.replace(/\./g, "-")}`;
|
|
|
|
// Add ID to the list item via hProperties
|
|
(item as ListItem & { data?: { hProperties?: { id?: string } } }).data = {
|
|
hProperties: { id: clauseId },
|
|
};
|
|
|
|
// Find the first paragraph in the item and prepend an anchor link
|
|
for (const child of item.children) {
|
|
if (child.type === "paragraph") {
|
|
// Create anchor link HTML with clause number text and link icon
|
|
const linkIcon = generateLinkIconSvg();
|
|
const anchorHtml: Html = {
|
|
type: "html",
|
|
value: `<a href="#${clauseId}" class="clause-link" aria-hidden="true">${linkIcon}${clauseNum}.</a>`,
|
|
};
|
|
// Prepend anchor to paragraph children
|
|
(child as { children: RootContent[] }).children.unshift(
|
|
anchorHtml as unknown as RootContent,
|
|
);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Recursively process nested ordered lists
|
|
for (const child of item.children) {
|
|
if (child.type === "list" && (child as List).ordered) {
|
|
addClauseAnchors(child as List, clauseNum);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract FAQ items from FAQ section nodes
|
|
*/
|
|
function extractFAQFromNodes(nodes: RootContent[]): FAQItem[] {
|
|
const items: FAQItem[] = [];
|
|
let currentQuestion = "";
|
|
let currentId = "";
|
|
|
|
for (const node of nodes) {
|
|
if (node.type === "heading" && (node as Heading).depth === 3) {
|
|
// Save previous FAQ item if we had one
|
|
if (currentQuestion) {
|
|
items.push({
|
|
id: currentId,
|
|
question: currentQuestion,
|
|
answer: "", // Placeholder, will be filled later
|
|
});
|
|
}
|
|
|
|
currentQuestion = extractText(node);
|
|
currentId = `faq-${slugify(currentQuestion).slice(0, 50)}`;
|
|
}
|
|
}
|
|
|
|
// Don't forget the last item
|
|
if (currentQuestion) {
|
|
items.push({
|
|
id: currentId,
|
|
question: currentQuestion,
|
|
answer: "",
|
|
});
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
/**
|
|
* Build table of contents from parsed sections
|
|
*/
|
|
function buildTocItems(parsed: Partial<ParsedSpec>): TocItem[] {
|
|
const items: TocItem[] = [];
|
|
|
|
if (parsed.terminology) {
|
|
items.push({
|
|
id: "terminology",
|
|
title: parsed.terminologyTitle || "Terminology",
|
|
level: 2,
|
|
});
|
|
}
|
|
if (parsed.specification) {
|
|
items.push({
|
|
id: "specification",
|
|
title: "Specification",
|
|
level: 2,
|
|
});
|
|
|
|
if (parsed.specSections) {
|
|
for (const section of parsed.specSections) {
|
|
items.push({
|
|
id: section.id,
|
|
title: section.title,
|
|
level: 3,
|
|
clause: section.clause,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
return items;
|
|
}
|
|
|
|
/**
|
|
* Main parsing function - takes markdown content and returns structured content
|
|
*/
|
|
export async function parseSpecContent(
|
|
markdown: string,
|
|
): Promise<ParsedSpec> {
|
|
// Parse markdown to AST
|
|
const tree = unified().use(remarkParse).parse(markdown) as Root;
|
|
|
|
// Remove title (h1) from the tree - it's displayed separately in the Hero
|
|
const nodes = tree.children.filter((node) => {
|
|
if (node.type === "heading" && (node as Heading).depth === 1) return false;
|
|
return true;
|
|
});
|
|
|
|
// Get heading titles
|
|
const terminologyTitle = getHeadingText(nodes, "Terminology");
|
|
const specificationTitle = getHeadingText(
|
|
nodes,
|
|
"Git Common-Flow Specification",
|
|
);
|
|
|
|
// Extract section nodes
|
|
const introNodes = extractSectionNodes(nodes, "Introduction");
|
|
const summaryNodes = extractSectionNodes(nodes, "Summary");
|
|
const terminologyNodes = extractSectionNodes(nodes, "Terminology");
|
|
const specNodes = extractSectionNodes(nodes, "Git Common-Flow Specification");
|
|
const faqNodes = extractSectionNodes(nodes, "FAQ");
|
|
const licenseNodes = extractSectionNodes(nodes, "License");
|
|
|
|
// Extract spec sections from the first ordered list
|
|
const specSections = findSpecSections(specNodes);
|
|
|
|
// Add anchor IDs and links to spec list items
|
|
for (const node of specNodes) {
|
|
if (node.type === "list" && (node as List).ordered) {
|
|
addClauseAnchors(node as List);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Extract FAQ items structure
|
|
const faqItems = extractFAQFromNodes(faqNodes);
|
|
|
|
// Collect FAQ answer nodes for each item
|
|
const faqAnswerNodes: RootContent[][] = [];
|
|
let currentAnswerNodes: RootContent[] = [];
|
|
|
|
for (const node of faqNodes) {
|
|
if (node.type === "heading" && (node as Heading).depth === 3) {
|
|
if (currentAnswerNodes.length > 0) {
|
|
faqAnswerNodes.push(currentAnswerNodes);
|
|
}
|
|
currentAnswerNodes = [];
|
|
} else {
|
|
currentAnswerNodes.push(node);
|
|
}
|
|
}
|
|
// Don't forget the last answer
|
|
if (currentAnswerNodes.length > 0) {
|
|
faqAnswerNodes.push(currentAnswerNodes);
|
|
}
|
|
|
|
// Convert sections to HTML
|
|
const [introduction, summary, terminology, specification, license] =
|
|
await Promise.all([
|
|
nodesToHtml(introNodes),
|
|
nodesToHtml(summaryNodes),
|
|
nodesToHtml(terminologyNodes),
|
|
nodesToHtml(specNodes),
|
|
nodesToHtml(licenseNodes),
|
|
]);
|
|
|
|
// Convert FAQ answers to HTML
|
|
const faqAnswers = await Promise.all(
|
|
faqAnswerNodes.map((nodes) => nodesToHtml(nodes)),
|
|
);
|
|
|
|
// Assign FAQ answers
|
|
const faq = faqItems.map((item, i) => ({
|
|
...item,
|
|
answer: faqAnswers[i] || "",
|
|
}));
|
|
|
|
const parsed: ParsedSpec = {
|
|
introduction,
|
|
summary,
|
|
terminology,
|
|
terminologyTitle,
|
|
specification,
|
|
specificationTitle,
|
|
specSections,
|
|
faq,
|
|
license,
|
|
tocItems: [],
|
|
};
|
|
|
|
parsed.tocItems = buildTocItems(parsed);
|
|
|
|
return parsed;
|
|
}
|