mirror of
https://github.com/jimeh/commonflow.org.git
synced 2026-02-19 05:46:40 +00:00
wip: improve spec parsing
This commit is contained in:
@@ -199,7 +199,7 @@ its merge target, allowing others to review, discuss and approve the changes.</l
|
||||
effectively just a git tag named after the version of the release.</li>
|
||||
<li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
|
||||
also for long-term maintenance of older version.</li>
|
||||
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Git Common-Flow Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
||||
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
||||
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
|
||||
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
|
||||
<ol>
|
||||
@@ -387,7 +387,7 @@ in question. Meaning it MUST always be in a non-broken state, MUST NOT be
|
||||
force pushed to, etc.</li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>Bug Fixes & Rollback
|
||||
<li id="spec-bug-fixes-rollback">Bug Fixes & Rollback
|
||||
<ol>
|
||||
<li>You MUST NOT under any circumstances force push to the master branch or
|
||||
to long-term release branches.</li>
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -188,7 +188,7 @@ its merge target, allowing others to review, discuss and approve the changes.</l
|
||||
effectively just a git tag named after the version of the release.</li>
|
||||
<li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
|
||||
also for long-term maintenance of older version.</li>
|
||||
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Git Common-Flow Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
||||
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
||||
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
|
||||
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
|
||||
<ol>
|
||||
@@ -375,7 +375,7 @@ in question. Meaning it MUST always be in a non-broken state, MUST NOT be
|
||||
force pushed to, etc.</li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>Bug Fixes & Rollback
|
||||
<li id="spec-bug-fixes-rollback">Bug Fixes & Rollback
|
||||
<ol>
|
||||
<li>You MUST NOT under any circumstances force push to the master branch or
|
||||
to long-term release branches.</li>
|
||||
|
||||
@@ -199,7 +199,7 @@ its merge target, allowing others to review, discuss and approve the changes.</l
|
||||
effectively just a git tag named after the version of the release.</li>
|
||||
<li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
|
||||
also for long-term maintenance of older version.</li>
|
||||
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Git Common-Flow Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
||||
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
||||
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
|
||||
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
|
||||
<ol>
|
||||
@@ -387,7 +387,7 @@ in question. Meaning it MUST always be in a non-broken state, MUST NOT be
|
||||
force pushed to, etc.</li>
|
||||
</ol>
|
||||
</li>
|
||||
<li>Bug Fixes & Rollback
|
||||
<li id="spec-bug-fixes-rollback">Bug Fixes & Rollback
|
||||
<ol>
|
||||
<li>You MUST NOT under any circumstances force push to the master branch or
|
||||
to long-term release branches.</li>
|
||||
|
||||
@@ -4,11 +4,12 @@ import type { TocItem } from "../utils/parseSpecContent";
|
||||
|
||||
interface Props {
|
||||
terminology: string;
|
||||
terminologyTitle: string;
|
||||
specification: string;
|
||||
tocItems: TocItem[];
|
||||
}
|
||||
|
||||
const { terminology, specification, tocItems } = Astro.props;
|
||||
const { terminology, terminologyTitle, specification, tocItems } = Astro.props;
|
||||
---
|
||||
|
||||
<section id="spec" class="py-20 sm:py-28">
|
||||
@@ -33,13 +34,13 @@ const { terminology, specification, tocItems } = Astro.props;
|
||||
<article class="prose-spec spec-content">
|
||||
<!-- Terminology -->
|
||||
<section id="terminology">
|
||||
<h2>Terminology</h2>
|
||||
<h2>{terminologyTitle}</h2>
|
||||
<Fragment set:html={terminology} />
|
||||
</section>
|
||||
|
||||
<!-- Main specification -->
|
||||
<section id="specification">
|
||||
<h2>Git Common-Flow Specification</h2>
|
||||
<h2>Specification</h2>
|
||||
<Fragment set:html={specification} />
|
||||
</section>
|
||||
</article>
|
||||
|
||||
@@ -2,10 +2,6 @@
|
||||
import { getCollection } from "astro:content";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { unified } from "unified";
|
||||
import remarkParse from "remark-parse";
|
||||
import remarkRehype from "remark-rehype";
|
||||
import rehypeStringify from "rehype-stringify";
|
||||
|
||||
import SinglePage from "../layouts/SinglePage.astro";
|
||||
import Header from "../components/Header.astro";
|
||||
@@ -25,28 +21,15 @@ if (!spec) {
|
||||
throw new Error(`Spec version ${version} not found`);
|
||||
}
|
||||
|
||||
// Read and process the markdown file
|
||||
const filePath = path.join(
|
||||
process.cwd(),
|
||||
"src/content/spec",
|
||||
`${version}.md`
|
||||
);
|
||||
// Read the markdown file
|
||||
const filePath = path.join(process.cwd(), "src/content/spec", `${version}.md`);
|
||||
const content = fs.readFileSync(filePath, "utf-8");
|
||||
|
||||
// Remove frontmatter
|
||||
const body = content.replace(/^---[\s\S]*?---\n/, "");
|
||||
const markdown = content.replace(/^---[\s\S]*?---\n/, "");
|
||||
|
||||
// Process markdown to HTML
|
||||
const result = await unified()
|
||||
.use(remarkParse)
|
||||
.use(remarkRehype, { allowDangerousHtml: true })
|
||||
.use(rehypeStringify, { allowDangerousHtml: true })
|
||||
.process(body);
|
||||
|
||||
const html = String(result);
|
||||
|
||||
// Parse the content into sections
|
||||
const parsed = parseSpecContent(html, version);
|
||||
// Parse the content into sections (handles markdown -> HTML internally)
|
||||
const parsed = await parseSpecContent(markdown, version);
|
||||
---
|
||||
|
||||
<SinglePage title={spec.data.title} version={version}>
|
||||
@@ -64,6 +47,7 @@ const parsed = parseSpecContent(html, version);
|
||||
|
||||
<SpecSection
|
||||
terminology={parsed.terminology}
|
||||
terminologyTitle={parsed.terminologyTitle}
|
||||
specification={parsed.specification}
|
||||
tocItems={parsed.tocItems}
|
||||
/>
|
||||
|
||||
@@ -2,10 +2,6 @@
|
||||
import { getCollection } from "astro:content";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { unified } from "unified";
|
||||
import remarkParse from "remark-parse";
|
||||
import remarkRehype from "remark-rehype";
|
||||
import rehypeStringify from "rehype-stringify";
|
||||
|
||||
import SinglePage from "../../layouts/SinglePage.astro";
|
||||
import Header from "../../components/Header.astro";
|
||||
@@ -27,28 +23,15 @@ export async function getStaticPaths() {
|
||||
const { spec } = Astro.props;
|
||||
const version = spec.data.version;
|
||||
|
||||
// Read and process the markdown file
|
||||
const filePath = path.join(
|
||||
process.cwd(),
|
||||
"src/content/spec",
|
||||
`${version}.md`
|
||||
);
|
||||
// Read the markdown file
|
||||
const filePath = path.join(process.cwd(), "src/content/spec", `${version}.md`);
|
||||
const content = fs.readFileSync(filePath, "utf-8");
|
||||
|
||||
// Remove frontmatter
|
||||
const body = content.replace(/^---[\s\S]*?---\n/, "");
|
||||
const markdown = content.replace(/^---[\s\S]*?---\n/, "");
|
||||
|
||||
// Process markdown to HTML
|
||||
const result = await unified()
|
||||
.use(remarkParse)
|
||||
.use(remarkRehype, { allowDangerousHtml: true })
|
||||
.use(rehypeStringify, { allowDangerousHtml: true })
|
||||
.process(body);
|
||||
|
||||
const html = String(result);
|
||||
|
||||
// Parse the content into sections
|
||||
const parsed = parseSpecContent(html, version);
|
||||
// Parse the content into sections (handles markdown -> HTML internally)
|
||||
const parsed = await parseSpecContent(markdown, version);
|
||||
---
|
||||
|
||||
<SinglePage title={spec.data.title} version={version}>
|
||||
@@ -66,6 +49,7 @@ const parsed = parseSpecContent(html, version);
|
||||
|
||||
<SpecSection
|
||||
terminology={parsed.terminology}
|
||||
terminologyTitle={parsed.terminologyTitle}
|
||||
specification={parsed.specification}
|
||||
tocItems={parsed.tocItems}
|
||||
/>
|
||||
|
||||
@@ -1,8 +1,14 @@
|
||||
/**
|
||||
* Parses rendered spec HTML into structured sections for the single-page
|
||||
* layout.
|
||||
* Parses spec content using markdown AST for robust section extraction.
|
||||
*/
|
||||
|
||||
import { unified } from "unified";
|
||||
import remarkParse from "remark-parse";
|
||||
import remarkRehype from "remark-rehype";
|
||||
import rehypeStringify from "rehype-stringify";
|
||||
import type { Root, RootContent, Heading, List, ListItem } from "mdast";
|
||||
import type { Root as HastRoot } from "hast";
|
||||
|
||||
export interface TocItem {
|
||||
id: string;
|
||||
title: string;
|
||||
@@ -26,7 +32,9 @@ export interface ParsedSpec {
|
||||
introduction: string;
|
||||
summary: string;
|
||||
terminology: string;
|
||||
terminologyTitle: string;
|
||||
specification: string;
|
||||
specificationTitle: string;
|
||||
specSections: SpecSection[];
|
||||
faq: FAQItem[];
|
||||
about: string;
|
||||
@@ -35,7 +43,7 @@ export interface ParsedSpec {
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a heading text to a URL-friendly ID
|
||||
* Convert text to a URL-friendly ID
|
||||
*/
|
||||
function slugify(text: string): string {
|
||||
return text
|
||||
@@ -45,160 +53,212 @@ function slugify(text: string): string {
|
||||
.trim();
|
||||
}
|
||||
|
||||
type MdastNode = Root | RootContent;
|
||||
|
||||
/**
|
||||
* Extract content between two headings or to the end of the document
|
||||
* Extract plain text from an mdast node tree
|
||||
*/
|
||||
function extractSection(
|
||||
html: string,
|
||||
startHeading: string,
|
||||
endHeadings: string[] = []
|
||||
): string {
|
||||
// Find the heading (h2) - use partial match to handle additional text
|
||||
// e.g., "Git Common-Flow Specification (Common-Flow)"
|
||||
const headingPattern = new RegExp(
|
||||
`<h2[^>]*>[^<]*${escapeRegex(startHeading)}[^<]*</h2>`,
|
||||
"i"
|
||||
function extractText(node: MdastNode): string {
|
||||
if ("value" in node && typeof node.value === "string") {
|
||||
return node.value;
|
||||
}
|
||||
if ("children" in node && Array.isArray(node.children)) {
|
||||
return node.children.map((child) => extractText(child)).join("");
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Find index of heading containing specific text
|
||||
*/
|
||||
function findHeadingIndex(
|
||||
nodes: RootContent[],
|
||||
text: string,
|
||||
depth: number = 2
|
||||
): number {
|
||||
return nodes.findIndex(
|
||||
(node) =>
|
||||
node.type === "heading" &&
|
||||
(node as Heading).depth === depth &&
|
||||
extractText(node).toLowerCase().includes(text.toLowerCase())
|
||||
);
|
||||
const match = html.match(headingPattern);
|
||||
if (!match || match.index === undefined) return "";
|
||||
|
||||
const startIdx = match.index + match[0].length;
|
||||
|
||||
// Find the next section heading
|
||||
let endIdx = html.length;
|
||||
for (const endHeading of endHeadings) {
|
||||
const endPattern = new RegExp(
|
||||
`<h2[^>]*>\\s*${escapeRegex(endHeading)}\\s*</h2>`,
|
||||
"i"
|
||||
);
|
||||
const endMatch = html.slice(startIdx).match(endPattern);
|
||||
if (endMatch && endMatch.index !== undefined) {
|
||||
const possibleEnd = startIdx + endMatch.index;
|
||||
if (possibleEnd < endIdx) {
|
||||
endIdx = possibleEnd;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Also check for any h2 as a fallback
|
||||
const anyH2 = html.slice(startIdx).match(/<h2[^>]*>/i);
|
||||
if (anyH2 && anyH2.index !== undefined) {
|
||||
const possibleEnd = startIdx + anyH2.index;
|
||||
if (possibleEnd < endIdx) {
|
||||
endIdx = possibleEnd;
|
||||
}
|
||||
}
|
||||
|
||||
return html.slice(startIdx, endIdx).trim();
|
||||
}
|
||||
|
||||
function escapeRegex(str: string): string {
|
||||
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
|
||||
// Spec section titles in order (used for both ToC and anchor injection)
|
||||
const SPEC_SECTION_TITLES = [
|
||||
"TL;DR",
|
||||
"The Master Branch",
|
||||
"Change Branches",
|
||||
"Pull Requests",
|
||||
"Versioning",
|
||||
"Releases",
|
||||
"Short-Term Release Branches",
|
||||
"Long-term Release Branches",
|
||||
"Bug Fixes & Rollback",
|
||||
"Git Best Practices",
|
||||
];
|
||||
|
||||
/**
|
||||
* Extract the numbered spec sections (1. TL;DR, 2. The Master Branch, etc.)
|
||||
* Extract nodes between two headings
|
||||
*/
|
||||
function extractSpecSections(specContent: string): SpecSection[] {
|
||||
function extractSectionNodes(
|
||||
nodes: RootContent[],
|
||||
startText: string,
|
||||
depth: number = 2
|
||||
): RootContent[] {
|
||||
const startIdx = findHeadingIndex(nodes, startText, depth);
|
||||
if (startIdx === -1) return [];
|
||||
|
||||
// Find the next heading of same or higher level
|
||||
let endIdx = nodes.length;
|
||||
for (let i = startIdx + 1; i < nodes.length; i++) {
|
||||
const node = nodes[i];
|
||||
if (node.type === "heading" && (node as Heading).depth <= depth) {
|
||||
endIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Return nodes after the heading (not including the heading itself)
|
||||
return nodes.slice(startIdx + 1, endIdx);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the full heading text
|
||||
*/
|
||||
function getHeadingText(
|
||||
nodes: RootContent[],
|
||||
text: string,
|
||||
depth: number = 2
|
||||
): string {
|
||||
const idx = findHeadingIndex(nodes, text, depth);
|
||||
if (idx === -1) return text;
|
||||
return extractText(nodes[idx]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert mdast nodes to HTML string
|
||||
*/
|
||||
async function nodesToHtml(nodes: RootContent[]): Promise<string> {
|
||||
if (nodes.length === 0) return "";
|
||||
|
||||
// Create a root node with these children
|
||||
const root: Root = { type: "root", children: nodes };
|
||||
|
||||
const result = await unified()
|
||||
.use(remarkRehype, { allowDangerousHtml: true })
|
||||
.use(rehypeStringify, { allowDangerousHtml: true })
|
||||
.run(root);
|
||||
|
||||
const html = await unified()
|
||||
.use(rehypeStringify, { allowDangerousHtml: true })
|
||||
.stringify(result as HastRoot);
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract top-level list item titles from an ordered list
|
||||
*/
|
||||
function extractListItemTitles(list: List): string[] {
|
||||
const titles: string[] = [];
|
||||
|
||||
for (const item of list.children) {
|
||||
if (item.type !== "listItem") continue;
|
||||
|
||||
// Get the first paragraph or text content of the list item
|
||||
// The title is the text before any nested list
|
||||
let title = "";
|
||||
for (const child of item.children) {
|
||||
if (child.type === "list") break; // Stop at nested list
|
||||
if (child.type === "paragraph") {
|
||||
title = extractText(child);
|
||||
break;
|
||||
}
|
||||
// Handle inline text directly in list item
|
||||
title += extractText(child);
|
||||
}
|
||||
|
||||
title = title.split("\n")[0].trim();
|
||||
if (title) {
|
||||
titles.push(title);
|
||||
}
|
||||
}
|
||||
|
||||
return titles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the first ordered list in nodes and extract its structure
|
||||
*/
|
||||
function findSpecSections(nodes: RootContent[]): SpecSection[] {
|
||||
const sections: SpecSection[] = [];
|
||||
|
||||
// The spec uses an ordered list with nested items
|
||||
// Each top-level li starts a new section
|
||||
const olMatch = specContent.match(/<ol[^>]*>([\s\S]*?)<\/ol>/i);
|
||||
if (!olMatch) return sections;
|
||||
|
||||
// Find each section by looking for the title pattern
|
||||
for (const title of SPEC_SECTION_TITLES) {
|
||||
const id = slugify(title);
|
||||
|
||||
// For the content, we'll just use the title for navigation
|
||||
// The actual content stays in the main specification block
|
||||
sections.push({
|
||||
id: `spec-${id}`,
|
||||
title,
|
||||
content: "", // Content handled inline
|
||||
});
|
||||
for (const node of nodes) {
|
||||
if (node.type === "list" && (node as List).ordered) {
|
||||
const titles = extractListItemTitles(node as List);
|
||||
for (const title of titles) {
|
||||
sections.push({
|
||||
id: `spec-${slugify(title)}`,
|
||||
title,
|
||||
content: "",
|
||||
});
|
||||
}
|
||||
break; // Only process first ordered list
|
||||
}
|
||||
}
|
||||
|
||||
return sections;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add anchor IDs to spec section list items.
|
||||
* Finds top-level <li> elements that start with section titles and adds IDs.
|
||||
* Add anchor IDs to list items in the spec ordered list
|
||||
*/
|
||||
function addSpecSectionAnchors(specContent: string): string {
|
||||
let result = specContent;
|
||||
function addAnchorsToList(list: List, sections: SpecSection[]): void {
|
||||
const titleMap = new Map(sections.map((s) => [s.title, s.id]));
|
||||
|
||||
for (const title of SPEC_SECTION_TITLES) {
|
||||
const id = `spec-${slugify(title)}`;
|
||||
// Match <li> followed by the section title (possibly with whitespace)
|
||||
// The title appears right after <li> in the rendered HTML
|
||||
const pattern = new RegExp(
|
||||
`(<li>)(\\s*${escapeRegex(title)})`,
|
||||
"i"
|
||||
);
|
||||
result = result.replace(pattern, `<li id="${id}">$2`);
|
||||
for (const item of list.children) {
|
||||
if (item.type !== "listItem") continue;
|
||||
|
||||
// Get the title of this item
|
||||
let title = "";
|
||||
for (const child of item.children) {
|
||||
if (child.type === "list") break;
|
||||
if (child.type === "paragraph") {
|
||||
title = extractText(child).split("\n")[0].trim();
|
||||
break;
|
||||
}
|
||||
title += extractText(child);
|
||||
}
|
||||
title = title.split("\n")[0].trim();
|
||||
|
||||
// Add ID as data attribute (will be processed by rehype)
|
||||
const id = titleMap.get(title);
|
||||
if (id) {
|
||||
// Add hProperties for rehype to convert to HTML id attribute
|
||||
(item as ListItem & { data?: { hProperties?: { id?: string } } }).data = {
|
||||
hProperties: { id },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract FAQ items from the FAQ section HTML
|
||||
* Extract FAQ items from FAQ section nodes
|
||||
*/
|
||||
function extractFAQItems(faqContent: string): FAQItem[] {
|
||||
function extractFAQFromNodes(nodes: RootContent[]): FAQItem[] {
|
||||
const items: FAQItem[] = [];
|
||||
let currentQuestion = "";
|
||||
let currentId = "";
|
||||
|
||||
// Split by h3 headings
|
||||
const h3Pattern = /<h3[^>]*>([\s\S]*?)<\/h3>/gi;
|
||||
let lastIndex = 0;
|
||||
let lastQuestion = "";
|
||||
let lastId = "";
|
||||
for (const node of nodes) {
|
||||
if (node.type === "heading" && (node as Heading).depth === 3) {
|
||||
// Save previous FAQ item if we had one
|
||||
if (currentQuestion) {
|
||||
items.push({
|
||||
id: currentId,
|
||||
question: currentQuestion,
|
||||
answer: "", // Placeholder, will be filled later
|
||||
});
|
||||
}
|
||||
|
||||
const matches = [...faqContent.matchAll(h3Pattern)];
|
||||
|
||||
for (let i = 0; i < matches.length; i++) {
|
||||
const match = matches[i];
|
||||
const question = match[1].replace(/<[^>]+>/g, "").trim();
|
||||
const id = slugify(question).slice(0, 50);
|
||||
|
||||
if (i > 0 && match.index !== undefined) {
|
||||
// Get content between previous h3 and this one
|
||||
const answer = faqContent.slice(lastIndex, match.index).trim();
|
||||
items.push({
|
||||
id: `faq-${lastId}`,
|
||||
question: lastQuestion,
|
||||
answer,
|
||||
});
|
||||
currentQuestion = extractText(node);
|
||||
currentId = `faq-${slugify(currentQuestion).slice(0, 50)}`;
|
||||
}
|
||||
|
||||
lastQuestion = question;
|
||||
lastId = id;
|
||||
lastIndex = match.index! + match[0].length;
|
||||
}
|
||||
|
||||
// Don't forget the last FAQ item
|
||||
if (lastQuestion) {
|
||||
const answer = faqContent.slice(lastIndex).trim();
|
||||
// Don't forget the last item
|
||||
if (currentQuestion) {
|
||||
items.push({
|
||||
id: `faq-${lastId}`,
|
||||
question: lastQuestion,
|
||||
answer,
|
||||
id: currentId,
|
||||
question: currentQuestion,
|
||||
answer: "",
|
||||
});
|
||||
}
|
||||
|
||||
@@ -206,20 +266,25 @@ function extractFAQItems(faqContent: string): FAQItem[] {
|
||||
}
|
||||
|
||||
/**
|
||||
* Build table of contents from parsed sections.
|
||||
* Only includes sections rendered in SpecSection (Terminology + Specification).
|
||||
* Introduction/Summary are in AboutSection and excluded from this ToC.
|
||||
* Build table of contents from parsed sections
|
||||
*/
|
||||
function buildTocItems(parsed: Partial<ParsedSpec>): TocItem[] {
|
||||
const items: TocItem[] = [];
|
||||
|
||||
if (parsed.terminology) {
|
||||
items.push({ id: "terminology", title: "Terminology", level: 2 });
|
||||
items.push({
|
||||
id: "terminology",
|
||||
title: parsed.terminologyTitle || "Terminology",
|
||||
level: 2,
|
||||
});
|
||||
}
|
||||
if (parsed.specification) {
|
||||
items.push({ id: "specification", title: "Specification", level: 2 });
|
||||
items.push({
|
||||
id: "specification",
|
||||
title: "Specification",
|
||||
level: 2,
|
||||
});
|
||||
|
||||
// Add spec subsections
|
||||
if (parsed.specSections) {
|
||||
for (const section of parsed.specSections) {
|
||||
items.push({ id: section.id, title: section.title, level: 3 });
|
||||
@@ -231,70 +296,106 @@ function buildTocItems(parsed: Partial<ParsedSpec>): TocItem[] {
|
||||
}
|
||||
|
||||
/**
|
||||
* Main parsing function - takes rendered HTML and returns structured content
|
||||
* Main parsing function - takes markdown content and returns structured content
|
||||
*/
|
||||
export function parseSpecContent(html: string, version: string): ParsedSpec {
|
||||
export async function parseSpecContent(
|
||||
markdown: string,
|
||||
version: string
|
||||
): Promise<ParsedSpec> {
|
||||
const svgPath = `/spec/${version}.svg`;
|
||||
|
||||
// Remove the title (h1) and SVG from the content for parsing
|
||||
let content = html;
|
||||
// Parse markdown to AST
|
||||
const tree = unified().use(remarkParse).parse(markdown) as Root;
|
||||
|
||||
// Remove the h1 title
|
||||
content = content.replace(/<h1[^>]*>[\s\S]*?<\/h1>/i, "");
|
||||
// Remove title (h1) and SVG image from the tree
|
||||
const nodes = tree.children.filter((node) => {
|
||||
if (node.type === "heading" && (node as Heading).depth === 1) return false;
|
||||
if (node.type === "paragraph") {
|
||||
const text = extractText(node);
|
||||
if (text.includes(".svg")) return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
// Remove the SVG img tag
|
||||
content = content.replace(/<img[^>]*\.svg[^>]*>/i, "");
|
||||
|
||||
// Extract each section
|
||||
const introduction = extractSection(content, "Introduction", [
|
||||
"Summary",
|
||||
"Terminology",
|
||||
"Git Common-Flow",
|
||||
"FAQ",
|
||||
"About",
|
||||
"License",
|
||||
]);
|
||||
|
||||
const summary = extractSection(content, "Summary", [
|
||||
"Terminology",
|
||||
"Git Common-Flow",
|
||||
"FAQ",
|
||||
"About",
|
||||
"License",
|
||||
]);
|
||||
|
||||
const terminology = extractSection(content, "Terminology", [
|
||||
"Git Common-Flow",
|
||||
"FAQ",
|
||||
"About",
|
||||
"License",
|
||||
]);
|
||||
|
||||
const specificationRaw = extractSection(
|
||||
content,
|
||||
"Git Common-Flow Specification",
|
||||
["FAQ", "About", "License"]
|
||||
// Get heading titles
|
||||
const terminologyTitle = getHeadingText(nodes, "Terminology");
|
||||
const specificationTitle = getHeadingText(
|
||||
nodes,
|
||||
"Git Common-Flow Specification"
|
||||
);
|
||||
|
||||
// Add anchor IDs to spec section list items for ToC navigation
|
||||
const specification = addSpecSectionAnchors(specificationRaw);
|
||||
// Extract section nodes
|
||||
const introNodes = extractSectionNodes(nodes, "Introduction");
|
||||
const summaryNodes = extractSectionNodes(nodes, "Summary");
|
||||
const terminologyNodes = extractSectionNodes(nodes, "Terminology");
|
||||
const specNodes = extractSectionNodes(nodes, "Git Common-Flow Specification");
|
||||
const faqNodes = extractSectionNodes(nodes, "FAQ");
|
||||
const aboutNodes = extractSectionNodes(nodes, "About");
|
||||
const licenseNodes = extractSectionNodes(nodes, "License");
|
||||
|
||||
const faqContent = extractSection(content, "FAQ", ["About", "License"]);
|
||||
// Extract spec sections from the first ordered list
|
||||
const specSections = findSpecSections(specNodes);
|
||||
|
||||
const about = extractSection(content, "About", ["License"]);
|
||||
// Add anchor IDs to spec list items
|
||||
for (const node of specNodes) {
|
||||
if (node.type === "list" && (node as List).ordered) {
|
||||
addAnchorsToList(node as List, specSections);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const license = extractSection(content, "License", []);
|
||||
// Extract FAQ items structure
|
||||
const faqItems = extractFAQFromNodes(faqNodes);
|
||||
|
||||
// Parse subsections
|
||||
const specSections = extractSpecSections(specificationRaw);
|
||||
const faq = extractFAQItems(faqContent);
|
||||
// Collect FAQ answer nodes for each item
|
||||
const faqAnswerNodes: RootContent[][] = [];
|
||||
let currentAnswerNodes: RootContent[] = [];
|
||||
|
||||
for (const node of faqNodes) {
|
||||
if (node.type === "heading" && (node as Heading).depth === 3) {
|
||||
if (currentAnswerNodes.length > 0) {
|
||||
faqAnswerNodes.push(currentAnswerNodes);
|
||||
}
|
||||
currentAnswerNodes = [];
|
||||
} else {
|
||||
currentAnswerNodes.push(node);
|
||||
}
|
||||
}
|
||||
// Don't forget the last answer
|
||||
if (currentAnswerNodes.length > 0) {
|
||||
faqAnswerNodes.push(currentAnswerNodes);
|
||||
}
|
||||
|
||||
// Convert sections to HTML
|
||||
const [introduction, summary, terminology, specification, about, license] =
|
||||
await Promise.all([
|
||||
nodesToHtml(introNodes),
|
||||
nodesToHtml(summaryNodes),
|
||||
nodesToHtml(terminologyNodes),
|
||||
nodesToHtml(specNodes),
|
||||
nodesToHtml(aboutNodes),
|
||||
nodesToHtml(licenseNodes),
|
||||
]);
|
||||
|
||||
// Convert FAQ answers to HTML
|
||||
const faqAnswers = await Promise.all(
|
||||
faqAnswerNodes.map((nodes) => nodesToHtml(nodes))
|
||||
);
|
||||
|
||||
// Assign FAQ answers
|
||||
const faq = faqItems.map((item, i) => ({
|
||||
...item,
|
||||
answer: faqAnswers[i] || "",
|
||||
}));
|
||||
|
||||
const parsed: ParsedSpec = {
|
||||
svgPath,
|
||||
introduction,
|
||||
summary,
|
||||
terminology,
|
||||
terminologyTitle,
|
||||
specification,
|
||||
specificationTitle,
|
||||
specSections,
|
||||
faq,
|
||||
about,
|
||||
@@ -302,7 +403,6 @@ export function parseSpecContent(html: string, version: string): ParsedSpec {
|
||||
tocItems: [],
|
||||
};
|
||||
|
||||
// Build TOC
|
||||
parsed.tocItems = buildTocItems(parsed);
|
||||
|
||||
return parsed;
|
||||
|
||||
Reference in New Issue
Block a user