mirror of
https://github.com/jimeh/commonflow.org.git
synced 2026-02-19 05:46:40 +00:00
wip: improve spec parsing
This commit is contained in:
@@ -199,7 +199,7 @@ its merge target, allowing others to review, discuss and approve the changes.</l
|
|||||||
effectively just a git tag named after the version of the release.</li>
|
effectively just a git tag named after the version of the release.</li>
|
||||||
<li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
|
<li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
|
||||||
also for long-term maintenance of older version.</li>
|
also for long-term maintenance of older version.</li>
|
||||||
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Git Common-Flow Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
||||||
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
|
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
|
||||||
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
|
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
|
||||||
<ol>
|
<ol>
|
||||||
@@ -387,7 +387,7 @@ in question. Meaning it MUST always be in a non-broken state, MUST NOT be
|
|||||||
force pushed to, etc.</li>
|
force pushed to, etc.</li>
|
||||||
</ol>
|
</ol>
|
||||||
</li>
|
</li>
|
||||||
<li>Bug Fixes & Rollback
|
<li id="spec-bug-fixes-rollback">Bug Fixes & Rollback
|
||||||
<ol>
|
<ol>
|
||||||
<li>You MUST NOT under any circumstances force push to the master branch or
|
<li>You MUST NOT under any circumstances force push to the master branch or
|
||||||
to long-term release branches.</li>
|
to long-term release branches.</li>
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -188,7 +188,7 @@ its merge target, allowing others to review, discuss and approve the changes.</l
|
|||||||
effectively just a git tag named after the version of the release.</li>
|
effectively just a git tag named after the version of the release.</li>
|
||||||
<li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
|
<li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
|
||||||
also for long-term maintenance of older version.</li>
|
also for long-term maintenance of older version.</li>
|
||||||
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Git Common-Flow Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
||||||
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
|
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
|
||||||
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
|
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
|
||||||
<ol>
|
<ol>
|
||||||
@@ -375,7 +375,7 @@ in question. Meaning it MUST always be in a non-broken state, MUST NOT be
|
|||||||
force pushed to, etc.</li>
|
force pushed to, etc.</li>
|
||||||
</ol>
|
</ol>
|
||||||
</li>
|
</li>
|
||||||
<li>Bug Fixes & Rollback
|
<li id="spec-bug-fixes-rollback">Bug Fixes & Rollback
|
||||||
<ol>
|
<ol>
|
||||||
<li>You MUST NOT under any circumstances force push to the master branch or
|
<li>You MUST NOT under any circumstances force push to the master branch or
|
||||||
to long-term release branches.</li>
|
to long-term release branches.</li>
|
||||||
|
|||||||
@@ -199,7 +199,7 @@ its merge target, allowing others to review, discuss and approve the changes.</l
|
|||||||
effectively just a git tag named after the version of the release.</li>
|
effectively just a git tag named after the version of the release.</li>
|
||||||
<li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
|
<li><strong>Release Branches</strong> - Used both for short-term preparations of a release, and
|
||||||
also for long-term maintenance of older version.</li>
|
also for long-term maintenance of older version.</li>
|
||||||
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Git Common-Flow Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
</ul> </section> <!-- Main specification --> <section id="specification" data-astro-cid-6lwcykzv> <h2 data-astro-cid-6lwcykzv>Specification</h2> <p>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
|
||||||
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
|
"SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be
|
||||||
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
|
interpreted as described in <a href="https://tools.ietf.org/html/rfc2119">RFC 2119</a>.</p>
|
||||||
<ol>
|
<ol>
|
||||||
@@ -387,7 +387,7 @@ in question. Meaning it MUST always be in a non-broken state, MUST NOT be
|
|||||||
force pushed to, etc.</li>
|
force pushed to, etc.</li>
|
||||||
</ol>
|
</ol>
|
||||||
</li>
|
</li>
|
||||||
<li>Bug Fixes & Rollback
|
<li id="spec-bug-fixes-rollback">Bug Fixes & Rollback
|
||||||
<ol>
|
<ol>
|
||||||
<li>You MUST NOT under any circumstances force push to the master branch or
|
<li>You MUST NOT under any circumstances force push to the master branch or
|
||||||
to long-term release branches.</li>
|
to long-term release branches.</li>
|
||||||
|
|||||||
@@ -4,11 +4,12 @@ import type { TocItem } from "../utils/parseSpecContent";
|
|||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
terminology: string;
|
terminology: string;
|
||||||
|
terminologyTitle: string;
|
||||||
specification: string;
|
specification: string;
|
||||||
tocItems: TocItem[];
|
tocItems: TocItem[];
|
||||||
}
|
}
|
||||||
|
|
||||||
const { terminology, specification, tocItems } = Astro.props;
|
const { terminology, terminologyTitle, specification, tocItems } = Astro.props;
|
||||||
---
|
---
|
||||||
|
|
||||||
<section id="spec" class="py-20 sm:py-28">
|
<section id="spec" class="py-20 sm:py-28">
|
||||||
@@ -33,13 +34,13 @@ const { terminology, specification, tocItems } = Astro.props;
|
|||||||
<article class="prose-spec spec-content">
|
<article class="prose-spec spec-content">
|
||||||
<!-- Terminology -->
|
<!-- Terminology -->
|
||||||
<section id="terminology">
|
<section id="terminology">
|
||||||
<h2>Terminology</h2>
|
<h2>{terminologyTitle}</h2>
|
||||||
<Fragment set:html={terminology} />
|
<Fragment set:html={terminology} />
|
||||||
</section>
|
</section>
|
||||||
|
|
||||||
<!-- Main specification -->
|
<!-- Main specification -->
|
||||||
<section id="specification">
|
<section id="specification">
|
||||||
<h2>Git Common-Flow Specification</h2>
|
<h2>Specification</h2>
|
||||||
<Fragment set:html={specification} />
|
<Fragment set:html={specification} />
|
||||||
</section>
|
</section>
|
||||||
</article>
|
</article>
|
||||||
|
|||||||
@@ -2,10 +2,6 @@
|
|||||||
import { getCollection } from "astro:content";
|
import { getCollection } from "astro:content";
|
||||||
import * as fs from "node:fs";
|
import * as fs from "node:fs";
|
||||||
import * as path from "node:path";
|
import * as path from "node:path";
|
||||||
import { unified } from "unified";
|
|
||||||
import remarkParse from "remark-parse";
|
|
||||||
import remarkRehype from "remark-rehype";
|
|
||||||
import rehypeStringify from "rehype-stringify";
|
|
||||||
|
|
||||||
import SinglePage from "../layouts/SinglePage.astro";
|
import SinglePage from "../layouts/SinglePage.astro";
|
||||||
import Header from "../components/Header.astro";
|
import Header from "../components/Header.astro";
|
||||||
@@ -25,28 +21,15 @@ if (!spec) {
|
|||||||
throw new Error(`Spec version ${version} not found`);
|
throw new Error(`Spec version ${version} not found`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read and process the markdown file
|
// Read the markdown file
|
||||||
const filePath = path.join(
|
const filePath = path.join(process.cwd(), "src/content/spec", `${version}.md`);
|
||||||
process.cwd(),
|
|
||||||
"src/content/spec",
|
|
||||||
`${version}.md`
|
|
||||||
);
|
|
||||||
const content = fs.readFileSync(filePath, "utf-8");
|
const content = fs.readFileSync(filePath, "utf-8");
|
||||||
|
|
||||||
// Remove frontmatter
|
// Remove frontmatter
|
||||||
const body = content.replace(/^---[\s\S]*?---\n/, "");
|
const markdown = content.replace(/^---[\s\S]*?---\n/, "");
|
||||||
|
|
||||||
// Process markdown to HTML
|
// Parse the content into sections (handles markdown -> HTML internally)
|
||||||
const result = await unified()
|
const parsed = await parseSpecContent(markdown, version);
|
||||||
.use(remarkParse)
|
|
||||||
.use(remarkRehype, { allowDangerousHtml: true })
|
|
||||||
.use(rehypeStringify, { allowDangerousHtml: true })
|
|
||||||
.process(body);
|
|
||||||
|
|
||||||
const html = String(result);
|
|
||||||
|
|
||||||
// Parse the content into sections
|
|
||||||
const parsed = parseSpecContent(html, version);
|
|
||||||
---
|
---
|
||||||
|
|
||||||
<SinglePage title={spec.data.title} version={version}>
|
<SinglePage title={spec.data.title} version={version}>
|
||||||
@@ -64,6 +47,7 @@ const parsed = parseSpecContent(html, version);
|
|||||||
|
|
||||||
<SpecSection
|
<SpecSection
|
||||||
terminology={parsed.terminology}
|
terminology={parsed.terminology}
|
||||||
|
terminologyTitle={parsed.terminologyTitle}
|
||||||
specification={parsed.specification}
|
specification={parsed.specification}
|
||||||
tocItems={parsed.tocItems}
|
tocItems={parsed.tocItems}
|
||||||
/>
|
/>
|
||||||
|
|||||||
@@ -2,10 +2,6 @@
|
|||||||
import { getCollection } from "astro:content";
|
import { getCollection } from "astro:content";
|
||||||
import * as fs from "node:fs";
|
import * as fs from "node:fs";
|
||||||
import * as path from "node:path";
|
import * as path from "node:path";
|
||||||
import { unified } from "unified";
|
|
||||||
import remarkParse from "remark-parse";
|
|
||||||
import remarkRehype from "remark-rehype";
|
|
||||||
import rehypeStringify from "rehype-stringify";
|
|
||||||
|
|
||||||
import SinglePage from "../../layouts/SinglePage.astro";
|
import SinglePage from "../../layouts/SinglePage.astro";
|
||||||
import Header from "../../components/Header.astro";
|
import Header from "../../components/Header.astro";
|
||||||
@@ -27,28 +23,15 @@ export async function getStaticPaths() {
|
|||||||
const { spec } = Astro.props;
|
const { spec } = Astro.props;
|
||||||
const version = spec.data.version;
|
const version = spec.data.version;
|
||||||
|
|
||||||
// Read and process the markdown file
|
// Read the markdown file
|
||||||
const filePath = path.join(
|
const filePath = path.join(process.cwd(), "src/content/spec", `${version}.md`);
|
||||||
process.cwd(),
|
|
||||||
"src/content/spec",
|
|
||||||
`${version}.md`
|
|
||||||
);
|
|
||||||
const content = fs.readFileSync(filePath, "utf-8");
|
const content = fs.readFileSync(filePath, "utf-8");
|
||||||
|
|
||||||
// Remove frontmatter
|
// Remove frontmatter
|
||||||
const body = content.replace(/^---[\s\S]*?---\n/, "");
|
const markdown = content.replace(/^---[\s\S]*?---\n/, "");
|
||||||
|
|
||||||
// Process markdown to HTML
|
// Parse the content into sections (handles markdown -> HTML internally)
|
||||||
const result = await unified()
|
const parsed = await parseSpecContent(markdown, version);
|
||||||
.use(remarkParse)
|
|
||||||
.use(remarkRehype, { allowDangerousHtml: true })
|
|
||||||
.use(rehypeStringify, { allowDangerousHtml: true })
|
|
||||||
.process(body);
|
|
||||||
|
|
||||||
const html = String(result);
|
|
||||||
|
|
||||||
// Parse the content into sections
|
|
||||||
const parsed = parseSpecContent(html, version);
|
|
||||||
---
|
---
|
||||||
|
|
||||||
<SinglePage title={spec.data.title} version={version}>
|
<SinglePage title={spec.data.title} version={version}>
|
||||||
@@ -66,6 +49,7 @@ const parsed = parseSpecContent(html, version);
|
|||||||
|
|
||||||
<SpecSection
|
<SpecSection
|
||||||
terminology={parsed.terminology}
|
terminology={parsed.terminology}
|
||||||
|
terminologyTitle={parsed.terminologyTitle}
|
||||||
specification={parsed.specification}
|
specification={parsed.specification}
|
||||||
tocItems={parsed.tocItems}
|
tocItems={parsed.tocItems}
|
||||||
/>
|
/>
|
||||||
|
|||||||
@@ -1,8 +1,14 @@
|
|||||||
/**
|
/**
|
||||||
* Parses rendered spec HTML into structured sections for the single-page
|
* Parses spec content using markdown AST for robust section extraction.
|
||||||
* layout.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import { unified } from "unified";
|
||||||
|
import remarkParse from "remark-parse";
|
||||||
|
import remarkRehype from "remark-rehype";
|
||||||
|
import rehypeStringify from "rehype-stringify";
|
||||||
|
import type { Root, RootContent, Heading, List, ListItem } from "mdast";
|
||||||
|
import type { Root as HastRoot } from "hast";
|
||||||
|
|
||||||
export interface TocItem {
|
export interface TocItem {
|
||||||
id: string;
|
id: string;
|
||||||
title: string;
|
title: string;
|
||||||
@@ -26,7 +32,9 @@ export interface ParsedSpec {
|
|||||||
introduction: string;
|
introduction: string;
|
||||||
summary: string;
|
summary: string;
|
||||||
terminology: string;
|
terminology: string;
|
||||||
|
terminologyTitle: string;
|
||||||
specification: string;
|
specification: string;
|
||||||
|
specificationTitle: string;
|
||||||
specSections: SpecSection[];
|
specSections: SpecSection[];
|
||||||
faq: FAQItem[];
|
faq: FAQItem[];
|
||||||
about: string;
|
about: string;
|
||||||
@@ -35,7 +43,7 @@ export interface ParsedSpec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert a heading text to a URL-friendly ID
|
* Convert text to a URL-friendly ID
|
||||||
*/
|
*/
|
||||||
function slugify(text: string): string {
|
function slugify(text: string): string {
|
||||||
return text
|
return text
|
||||||
@@ -45,160 +53,212 @@ function slugify(text: string): string {
|
|||||||
.trim();
|
.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type MdastNode = Root | RootContent;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract content between two headings or to the end of the document
|
* Extract plain text from an mdast node tree
|
||||||
*/
|
*/
|
||||||
function extractSection(
|
function extractText(node: MdastNode): string {
|
||||||
html: string,
|
if ("value" in node && typeof node.value === "string") {
|
||||||
startHeading: string,
|
return node.value;
|
||||||
endHeadings: string[] = []
|
}
|
||||||
): string {
|
if ("children" in node && Array.isArray(node.children)) {
|
||||||
// Find the heading (h2) - use partial match to handle additional text
|
return node.children.map((child) => extractText(child)).join("");
|
||||||
// e.g., "Git Common-Flow Specification (Common-Flow)"
|
}
|
||||||
const headingPattern = new RegExp(
|
return "";
|
||||||
`<h2[^>]*>[^<]*${escapeRegex(startHeading)}[^<]*</h2>`,
|
}
|
||||||
"i"
|
|
||||||
|
/**
|
||||||
|
* Find index of heading containing specific text
|
||||||
|
*/
|
||||||
|
function findHeadingIndex(
|
||||||
|
nodes: RootContent[],
|
||||||
|
text: string,
|
||||||
|
depth: number = 2
|
||||||
|
): number {
|
||||||
|
return nodes.findIndex(
|
||||||
|
(node) =>
|
||||||
|
node.type === "heading" &&
|
||||||
|
(node as Heading).depth === depth &&
|
||||||
|
extractText(node).toLowerCase().includes(text.toLowerCase())
|
||||||
);
|
);
|
||||||
const match = html.match(headingPattern);
|
|
||||||
if (!match || match.index === undefined) return "";
|
|
||||||
|
|
||||||
const startIdx = match.index + match[0].length;
|
|
||||||
|
|
||||||
// Find the next section heading
|
|
||||||
let endIdx = html.length;
|
|
||||||
for (const endHeading of endHeadings) {
|
|
||||||
const endPattern = new RegExp(
|
|
||||||
`<h2[^>]*>\\s*${escapeRegex(endHeading)}\\s*</h2>`,
|
|
||||||
"i"
|
|
||||||
);
|
|
||||||
const endMatch = html.slice(startIdx).match(endPattern);
|
|
||||||
if (endMatch && endMatch.index !== undefined) {
|
|
||||||
const possibleEnd = startIdx + endMatch.index;
|
|
||||||
if (possibleEnd < endIdx) {
|
|
||||||
endIdx = possibleEnd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Also check for any h2 as a fallback
|
|
||||||
const anyH2 = html.slice(startIdx).match(/<h2[^>]*>/i);
|
|
||||||
if (anyH2 && anyH2.index !== undefined) {
|
|
||||||
const possibleEnd = startIdx + anyH2.index;
|
|
||||||
if (possibleEnd < endIdx) {
|
|
||||||
endIdx = possibleEnd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return html.slice(startIdx, endIdx).trim();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function escapeRegex(str: string): string {
|
|
||||||
return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spec section titles in order (used for both ToC and anchor injection)
|
|
||||||
const SPEC_SECTION_TITLES = [
|
|
||||||
"TL;DR",
|
|
||||||
"The Master Branch",
|
|
||||||
"Change Branches",
|
|
||||||
"Pull Requests",
|
|
||||||
"Versioning",
|
|
||||||
"Releases",
|
|
||||||
"Short-Term Release Branches",
|
|
||||||
"Long-term Release Branches",
|
|
||||||
"Bug Fixes & Rollback",
|
|
||||||
"Git Best Practices",
|
|
||||||
];
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract the numbered spec sections (1. TL;DR, 2. The Master Branch, etc.)
|
* Extract nodes between two headings
|
||||||
*/
|
*/
|
||||||
function extractSpecSections(specContent: string): SpecSection[] {
|
function extractSectionNodes(
|
||||||
|
nodes: RootContent[],
|
||||||
|
startText: string,
|
||||||
|
depth: number = 2
|
||||||
|
): RootContent[] {
|
||||||
|
const startIdx = findHeadingIndex(nodes, startText, depth);
|
||||||
|
if (startIdx === -1) return [];
|
||||||
|
|
||||||
|
// Find the next heading of same or higher level
|
||||||
|
let endIdx = nodes.length;
|
||||||
|
for (let i = startIdx + 1; i < nodes.length; i++) {
|
||||||
|
const node = nodes[i];
|
||||||
|
if (node.type === "heading" && (node as Heading).depth <= depth) {
|
||||||
|
endIdx = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return nodes after the heading (not including the heading itself)
|
||||||
|
return nodes.slice(startIdx + 1, endIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the full heading text
|
||||||
|
*/
|
||||||
|
function getHeadingText(
|
||||||
|
nodes: RootContent[],
|
||||||
|
text: string,
|
||||||
|
depth: number = 2
|
||||||
|
): string {
|
||||||
|
const idx = findHeadingIndex(nodes, text, depth);
|
||||||
|
if (idx === -1) return text;
|
||||||
|
return extractText(nodes[idx]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert mdast nodes to HTML string
|
||||||
|
*/
|
||||||
|
async function nodesToHtml(nodes: RootContent[]): Promise<string> {
|
||||||
|
if (nodes.length === 0) return "";
|
||||||
|
|
||||||
|
// Create a root node with these children
|
||||||
|
const root: Root = { type: "root", children: nodes };
|
||||||
|
|
||||||
|
const result = await unified()
|
||||||
|
.use(remarkRehype, { allowDangerousHtml: true })
|
||||||
|
.use(rehypeStringify, { allowDangerousHtml: true })
|
||||||
|
.run(root);
|
||||||
|
|
||||||
|
const html = await unified()
|
||||||
|
.use(rehypeStringify, { allowDangerousHtml: true })
|
||||||
|
.stringify(result as HastRoot);
|
||||||
|
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract top-level list item titles from an ordered list
|
||||||
|
*/
|
||||||
|
function extractListItemTitles(list: List): string[] {
|
||||||
|
const titles: string[] = [];
|
||||||
|
|
||||||
|
for (const item of list.children) {
|
||||||
|
if (item.type !== "listItem") continue;
|
||||||
|
|
||||||
|
// Get the first paragraph or text content of the list item
|
||||||
|
// The title is the text before any nested list
|
||||||
|
let title = "";
|
||||||
|
for (const child of item.children) {
|
||||||
|
if (child.type === "list") break; // Stop at nested list
|
||||||
|
if (child.type === "paragraph") {
|
||||||
|
title = extractText(child);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Handle inline text directly in list item
|
||||||
|
title += extractText(child);
|
||||||
|
}
|
||||||
|
|
||||||
|
title = title.split("\n")[0].trim();
|
||||||
|
if (title) {
|
||||||
|
titles.push(title);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return titles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find the first ordered list in nodes and extract its structure
|
||||||
|
*/
|
||||||
|
function findSpecSections(nodes: RootContent[]): SpecSection[] {
|
||||||
const sections: SpecSection[] = [];
|
const sections: SpecSection[] = [];
|
||||||
|
|
||||||
// The spec uses an ordered list with nested items
|
for (const node of nodes) {
|
||||||
// Each top-level li starts a new section
|
if (node.type === "list" && (node as List).ordered) {
|
||||||
const olMatch = specContent.match(/<ol[^>]*>([\s\S]*?)<\/ol>/i);
|
const titles = extractListItemTitles(node as List);
|
||||||
if (!olMatch) return sections;
|
for (const title of titles) {
|
||||||
|
sections.push({
|
||||||
// Find each section by looking for the title pattern
|
id: `spec-${slugify(title)}`,
|
||||||
for (const title of SPEC_SECTION_TITLES) {
|
title,
|
||||||
const id = slugify(title);
|
content: "",
|
||||||
|
});
|
||||||
// For the content, we'll just use the title for navigation
|
}
|
||||||
// The actual content stays in the main specification block
|
break; // Only process first ordered list
|
||||||
sections.push({
|
}
|
||||||
id: `spec-${id}`,
|
|
||||||
title,
|
|
||||||
content: "", // Content handled inline
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return sections;
|
return sections;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add anchor IDs to spec section list items.
|
* Add anchor IDs to list items in the spec ordered list
|
||||||
* Finds top-level <li> elements that start with section titles and adds IDs.
|
|
||||||
*/
|
*/
|
||||||
function addSpecSectionAnchors(specContent: string): string {
|
function addAnchorsToList(list: List, sections: SpecSection[]): void {
|
||||||
let result = specContent;
|
const titleMap = new Map(sections.map((s) => [s.title, s.id]));
|
||||||
|
|
||||||
for (const title of SPEC_SECTION_TITLES) {
|
for (const item of list.children) {
|
||||||
const id = `spec-${slugify(title)}`;
|
if (item.type !== "listItem") continue;
|
||||||
// Match <li> followed by the section title (possibly with whitespace)
|
|
||||||
// The title appears right after <li> in the rendered HTML
|
// Get the title of this item
|
||||||
const pattern = new RegExp(
|
let title = "";
|
||||||
`(<li>)(\\s*${escapeRegex(title)})`,
|
for (const child of item.children) {
|
||||||
"i"
|
if (child.type === "list") break;
|
||||||
);
|
if (child.type === "paragraph") {
|
||||||
result = result.replace(pattern, `<li id="${id}">$2`);
|
title = extractText(child).split("\n")[0].trim();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
title += extractText(child);
|
||||||
|
}
|
||||||
|
title = title.split("\n")[0].trim();
|
||||||
|
|
||||||
|
// Add ID as data attribute (will be processed by rehype)
|
||||||
|
const id = titleMap.get(title);
|
||||||
|
if (id) {
|
||||||
|
// Add hProperties for rehype to convert to HTML id attribute
|
||||||
|
(item as ListItem & { data?: { hProperties?: { id?: string } } }).data = {
|
||||||
|
hProperties: { id },
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract FAQ items from the FAQ section HTML
|
* Extract FAQ items from FAQ section nodes
|
||||||
*/
|
*/
|
||||||
function extractFAQItems(faqContent: string): FAQItem[] {
|
function extractFAQFromNodes(nodes: RootContent[]): FAQItem[] {
|
||||||
const items: FAQItem[] = [];
|
const items: FAQItem[] = [];
|
||||||
|
let currentQuestion = "";
|
||||||
|
let currentId = "";
|
||||||
|
|
||||||
// Split by h3 headings
|
for (const node of nodes) {
|
||||||
const h3Pattern = /<h3[^>]*>([\s\S]*?)<\/h3>/gi;
|
if (node.type === "heading" && (node as Heading).depth === 3) {
|
||||||
let lastIndex = 0;
|
// Save previous FAQ item if we had one
|
||||||
let lastQuestion = "";
|
if (currentQuestion) {
|
||||||
let lastId = "";
|
items.push({
|
||||||
|
id: currentId,
|
||||||
|
question: currentQuestion,
|
||||||
|
answer: "", // Placeholder, will be filled later
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
const matches = [...faqContent.matchAll(h3Pattern)];
|
currentQuestion = extractText(node);
|
||||||
|
currentId = `faq-${slugify(currentQuestion).slice(0, 50)}`;
|
||||||
for (let i = 0; i < matches.length; i++) {
|
|
||||||
const match = matches[i];
|
|
||||||
const question = match[1].replace(/<[^>]+>/g, "").trim();
|
|
||||||
const id = slugify(question).slice(0, 50);
|
|
||||||
|
|
||||||
if (i > 0 && match.index !== undefined) {
|
|
||||||
// Get content between previous h3 and this one
|
|
||||||
const answer = faqContent.slice(lastIndex, match.index).trim();
|
|
||||||
items.push({
|
|
||||||
id: `faq-${lastId}`,
|
|
||||||
question: lastQuestion,
|
|
||||||
answer,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lastQuestion = question;
|
|
||||||
lastId = id;
|
|
||||||
lastIndex = match.index! + match[0].length;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't forget the last FAQ item
|
// Don't forget the last item
|
||||||
if (lastQuestion) {
|
if (currentQuestion) {
|
||||||
const answer = faqContent.slice(lastIndex).trim();
|
|
||||||
items.push({
|
items.push({
|
||||||
id: `faq-${lastId}`,
|
id: currentId,
|
||||||
question: lastQuestion,
|
question: currentQuestion,
|
||||||
answer,
|
answer: "",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -206,20 +266,25 @@ function extractFAQItems(faqContent: string): FAQItem[] {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build table of contents from parsed sections.
|
* Build table of contents from parsed sections
|
||||||
* Only includes sections rendered in SpecSection (Terminology + Specification).
|
|
||||||
* Introduction/Summary are in AboutSection and excluded from this ToC.
|
|
||||||
*/
|
*/
|
||||||
function buildTocItems(parsed: Partial<ParsedSpec>): TocItem[] {
|
function buildTocItems(parsed: Partial<ParsedSpec>): TocItem[] {
|
||||||
const items: TocItem[] = [];
|
const items: TocItem[] = [];
|
||||||
|
|
||||||
if (parsed.terminology) {
|
if (parsed.terminology) {
|
||||||
items.push({ id: "terminology", title: "Terminology", level: 2 });
|
items.push({
|
||||||
|
id: "terminology",
|
||||||
|
title: parsed.terminologyTitle || "Terminology",
|
||||||
|
level: 2,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
if (parsed.specification) {
|
if (parsed.specification) {
|
||||||
items.push({ id: "specification", title: "Specification", level: 2 });
|
items.push({
|
||||||
|
id: "specification",
|
||||||
|
title: "Specification",
|
||||||
|
level: 2,
|
||||||
|
});
|
||||||
|
|
||||||
// Add spec subsections
|
|
||||||
if (parsed.specSections) {
|
if (parsed.specSections) {
|
||||||
for (const section of parsed.specSections) {
|
for (const section of parsed.specSections) {
|
||||||
items.push({ id: section.id, title: section.title, level: 3 });
|
items.push({ id: section.id, title: section.title, level: 3 });
|
||||||
@@ -231,70 +296,106 @@ function buildTocItems(parsed: Partial<ParsedSpec>): TocItem[] {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main parsing function - takes rendered HTML and returns structured content
|
* Main parsing function - takes markdown content and returns structured content
|
||||||
*/
|
*/
|
||||||
export function parseSpecContent(html: string, version: string): ParsedSpec {
|
export async function parseSpecContent(
|
||||||
|
markdown: string,
|
||||||
|
version: string
|
||||||
|
): Promise<ParsedSpec> {
|
||||||
const svgPath = `/spec/${version}.svg`;
|
const svgPath = `/spec/${version}.svg`;
|
||||||
|
|
||||||
// Remove the title (h1) and SVG from the content for parsing
|
// Parse markdown to AST
|
||||||
let content = html;
|
const tree = unified().use(remarkParse).parse(markdown) as Root;
|
||||||
|
|
||||||
// Remove the h1 title
|
// Remove title (h1) and SVG image from the tree
|
||||||
content = content.replace(/<h1[^>]*>[\s\S]*?<\/h1>/i, "");
|
const nodes = tree.children.filter((node) => {
|
||||||
|
if (node.type === "heading" && (node as Heading).depth === 1) return false;
|
||||||
|
if (node.type === "paragraph") {
|
||||||
|
const text = extractText(node);
|
||||||
|
if (text.includes(".svg")) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
// Remove the SVG img tag
|
// Get heading titles
|
||||||
content = content.replace(/<img[^>]*\.svg[^>]*>/i, "");
|
const terminologyTitle = getHeadingText(nodes, "Terminology");
|
||||||
|
const specificationTitle = getHeadingText(
|
||||||
// Extract each section
|
nodes,
|
||||||
const introduction = extractSection(content, "Introduction", [
|
"Git Common-Flow Specification"
|
||||||
"Summary",
|
|
||||||
"Terminology",
|
|
||||||
"Git Common-Flow",
|
|
||||||
"FAQ",
|
|
||||||
"About",
|
|
||||||
"License",
|
|
||||||
]);
|
|
||||||
|
|
||||||
const summary = extractSection(content, "Summary", [
|
|
||||||
"Terminology",
|
|
||||||
"Git Common-Flow",
|
|
||||||
"FAQ",
|
|
||||||
"About",
|
|
||||||
"License",
|
|
||||||
]);
|
|
||||||
|
|
||||||
const terminology = extractSection(content, "Terminology", [
|
|
||||||
"Git Common-Flow",
|
|
||||||
"FAQ",
|
|
||||||
"About",
|
|
||||||
"License",
|
|
||||||
]);
|
|
||||||
|
|
||||||
const specificationRaw = extractSection(
|
|
||||||
content,
|
|
||||||
"Git Common-Flow Specification",
|
|
||||||
["FAQ", "About", "License"]
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// Add anchor IDs to spec section list items for ToC navigation
|
// Extract section nodes
|
||||||
const specification = addSpecSectionAnchors(specificationRaw);
|
const introNodes = extractSectionNodes(nodes, "Introduction");
|
||||||
|
const summaryNodes = extractSectionNodes(nodes, "Summary");
|
||||||
|
const terminologyNodes = extractSectionNodes(nodes, "Terminology");
|
||||||
|
const specNodes = extractSectionNodes(nodes, "Git Common-Flow Specification");
|
||||||
|
const faqNodes = extractSectionNodes(nodes, "FAQ");
|
||||||
|
const aboutNodes = extractSectionNodes(nodes, "About");
|
||||||
|
const licenseNodes = extractSectionNodes(nodes, "License");
|
||||||
|
|
||||||
const faqContent = extractSection(content, "FAQ", ["About", "License"]);
|
// Extract spec sections from the first ordered list
|
||||||
|
const specSections = findSpecSections(specNodes);
|
||||||
|
|
||||||
const about = extractSection(content, "About", ["License"]);
|
// Add anchor IDs to spec list items
|
||||||
|
for (const node of specNodes) {
|
||||||
|
if (node.type === "list" && (node as List).ordered) {
|
||||||
|
addAnchorsToList(node as List, specSections);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const license = extractSection(content, "License", []);
|
// Extract FAQ items structure
|
||||||
|
const faqItems = extractFAQFromNodes(faqNodes);
|
||||||
|
|
||||||
// Parse subsections
|
// Collect FAQ answer nodes for each item
|
||||||
const specSections = extractSpecSections(specificationRaw);
|
const faqAnswerNodes: RootContent[][] = [];
|
||||||
const faq = extractFAQItems(faqContent);
|
let currentAnswerNodes: RootContent[] = [];
|
||||||
|
|
||||||
|
for (const node of faqNodes) {
|
||||||
|
if (node.type === "heading" && (node as Heading).depth === 3) {
|
||||||
|
if (currentAnswerNodes.length > 0) {
|
||||||
|
faqAnswerNodes.push(currentAnswerNodes);
|
||||||
|
}
|
||||||
|
currentAnswerNodes = [];
|
||||||
|
} else {
|
||||||
|
currentAnswerNodes.push(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Don't forget the last answer
|
||||||
|
if (currentAnswerNodes.length > 0) {
|
||||||
|
faqAnswerNodes.push(currentAnswerNodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert sections to HTML
|
||||||
|
const [introduction, summary, terminology, specification, about, license] =
|
||||||
|
await Promise.all([
|
||||||
|
nodesToHtml(introNodes),
|
||||||
|
nodesToHtml(summaryNodes),
|
||||||
|
nodesToHtml(terminologyNodes),
|
||||||
|
nodesToHtml(specNodes),
|
||||||
|
nodesToHtml(aboutNodes),
|
||||||
|
nodesToHtml(licenseNodes),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Convert FAQ answers to HTML
|
||||||
|
const faqAnswers = await Promise.all(
|
||||||
|
faqAnswerNodes.map((nodes) => nodesToHtml(nodes))
|
||||||
|
);
|
||||||
|
|
||||||
|
// Assign FAQ answers
|
||||||
|
const faq = faqItems.map((item, i) => ({
|
||||||
|
...item,
|
||||||
|
answer: faqAnswers[i] || "",
|
||||||
|
}));
|
||||||
|
|
||||||
const parsed: ParsedSpec = {
|
const parsed: ParsedSpec = {
|
||||||
svgPath,
|
svgPath,
|
||||||
introduction,
|
introduction,
|
||||||
summary,
|
summary,
|
||||||
terminology,
|
terminology,
|
||||||
|
terminologyTitle,
|
||||||
specification,
|
specification,
|
||||||
|
specificationTitle,
|
||||||
specSections,
|
specSections,
|
||||||
faq,
|
faq,
|
||||||
about,
|
about,
|
||||||
@@ -302,7 +403,6 @@ export function parseSpecContent(html: string, version: string): ParsedSpec {
|
|||||||
tocItems: [],
|
tocItems: [],
|
||||||
};
|
};
|
||||||
|
|
||||||
// Build TOC
|
|
||||||
parsed.tocItems = buildTocItems(parsed);
|
parsed.tocItems = buildTocItems(parsed);
|
||||||
|
|
||||||
return parsed;
|
return parsed;
|
||||||
|
|||||||
Reference in New Issue
Block a user