Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ add_library(cmark
latex.c
man.c
node.c
front_matter.c
references.c
render.c
scanners.c
Expand Down
17 changes: 17 additions & 0 deletions src/blocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "cmark.h"
#include "node.h"
#include "references.h"
#include "front_matter.h"
#include "utf8.h"
#include "scanners.h"
#include "inlines.h"
Expand Down Expand Up @@ -97,6 +98,8 @@ cmark_parser *cmark_parser_new_with_mem_into_root(int options, cmark_mem *mem, c
cmark_strbuf_init(mem, &parser->curline, 256);
cmark_strbuf_init(mem, &parser->linebuf, 0);
cmark_strbuf_init(mem, &parser->content, 0);
cmark_strbuf_init(mem, &parser->front_matter_buf, 0);
cmark_strbuf_init(mem, &parser->front_matter_info, 0);

root->flags = CMARK_NODE__OPEN;

Expand Down Expand Up @@ -133,6 +136,8 @@ void cmark_parser_free(cmark_parser *parser) {
cmark_mem *mem = parser->mem;
cmark_strbuf_free(&parser->curline);
cmark_strbuf_free(&parser->linebuf);
cmark_strbuf_free(&parser->front_matter_buf);
cmark_strbuf_free(&parser->front_matter_info);
cmark_reference_map_free(parser->refmap);
mem->free(parser);
}
Expand Down Expand Up @@ -1301,6 +1306,10 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,

parser->line_number++;

if ((parser->options & CMARK_OPT_FRONT_MATTER) &&
cmark_front_matter_process_line(parser, &input))
goto finished;

last_matched_container = check_open_blocks(parser, &input, &all_matched);

if (!last_matched_container)
Expand Down Expand Up @@ -1334,6 +1343,14 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) {

cmark_consolidate_text_nodes(parser->root);

// If front matter scanning was still active when the document ended, no
// closing delimiter was found. The entire document (after the opening ---)
// is treated as front matter.
if ((parser->options & CMARK_OPT_FRONT_MATTER) && parser->front_matter_scanning)
cmark_front_matter_process_line(parser, NULL);

cmark_strbuf_free(&parser->front_matter_buf);
cmark_strbuf_free(&parser->front_matter_info);
cmark_strbuf_free(&parser->curline);

#if CMARK_DEBUG_NODES
Expand Down
10 changes: 9 additions & 1 deletion src/cmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ typedef enum {
CMARK_NODE_PARAGRAPH,
CMARK_NODE_HEADING,
CMARK_NODE_THEMATIC_BREAK,
CMARK_NODE_FRONT_MATTER,

CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
CMARK_NODE_LAST_BLOCK = CMARK_NODE_THEMATIC_BREAK,
CMARK_NODE_LAST_BLOCK = CMARK_NODE_FRONT_MATTER,

/* Inline */
CMARK_NODE_TEXT,
Expand Down Expand Up @@ -641,6 +642,13 @@ char *cmark_render_latex(cmark_node *root, int options, int width);
*/
#define CMARK_OPT_SMART (1 << 10)

/** Parse front matter ("---" delimited block at the start of the document)
* and expose it as a CMARK_NODE_FRONT_MATTER node. The raw content between
* the delimiters is available via cmark_node_get_literal(); how it is
* interpreted (e.g. as YAML, TOML, JSON) is left to the caller.
*/
#define CMARK_OPT_FRONT_MATTER (1 << 11)

/**
* ## Version information
*/
Expand Down
13 changes: 13 additions & 0 deletions src/commonmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,19 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
}
break;

case CMARK_NODE_FRONT_MATTER:
if (entering) {
const char *info = cmark_node_get_fence_info(node);
BLANKLINE();
LIT("---");
if (info && *info) { LIT(" "); OUT(info, false, LITERAL); }
LIT("\n");
OUT(cmark_node_get_literal(node), false, LITERAL);
LIT("---\n");
BLANKLINE();
}
break;

default:
assert(false);
break;
Expand Down
132 changes: 132 additions & 0 deletions src/front_matter.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#include "front_matter.h"
#include "cmark.h"

#include <string.h>

// ---------------------------------------------------------------------------
// Delimiter and info string parsing
// ---------------------------------------------------------------------------

// Return true if `input` is an opening front matter delimiter: "---" followed
// by an optional info string and a newline. No leading whitespace before
// "---" is permitted.
//
// Note: some tools (e.g. Jekyll) also accept "..." as a closing delimiter,
// derived from the YAML document-end marker. We intentionally do not support
// it here because this implementation is format-agnostic — the content between
// the delimiters may be YAML, TOML, JSON, or anything else. "..." has no
// meaning outside of YAML, so "---" is the only unambiguous delimiter.
static bool is_opening_delimiter(cmark_chunk *input) {
const unsigned char *p = input->data;
return input->len >= 3 && p[0] == '-' && p[1] == '-' && p[2] == '-';
}

// Return true if `input` is a closing front matter delimiter: exactly "---"
// with optional trailing whitespace then a newline. An info string is not
// permitted on the closing delimiter.
static bool is_closing_delimiter(cmark_chunk *input) {
const unsigned char *p = input->data;
int len = input->len;

if (len < 3 || !(p[0] == '-' && p[1] == '-' && p[2] == '-'))
return false;

for (int i = 3; i < len; i++) {
if (p[i] == '\n' || p[i] == '\r')
return true;
if (p[i] != ' ' && p[i] != '\t')
return false;
}
return true;
}

// Extract the optional info string from an opening delimiter line, e.g.
// "--- yaml\n" yields "yaml". Returns a zero-length chunk if absent.
static cmark_chunk parse_info(cmark_chunk *input) {
const unsigned char *p = input->data + 3;
int len = input->len - 3;

while (len > 0 && (*p == ' ' || *p == '\t')) { p++; len--; }
while (len > 0 && (p[len-1] == '\n' || p[len-1] == '\r' ||
p[len-1] == ' ' || p[len-1] == '\t'))
len--;

return (cmark_chunk){ .data = (unsigned char *)p, .len = (bufsize_t)len };
}

// ---------------------------------------------------------------------------
// Node creation
// ---------------------------------------------------------------------------

static void create_front_matter_node(cmark_parser *parser) {
cmark_node *node =
cmark_node_new_with_mem(CMARK_NODE_FRONT_MATTER, parser->mem);

// Store identically to a code block: info string + literal content.
cmark_node_set_fence_info(node,
parser->front_matter_info.size > 0
? (const char *)parser->front_matter_info.ptr
: "");

cmark_node_set_literal(node,
parser->front_matter_buf.size > 0
? (const char *)parser->front_matter_buf.ptr
: "");

node->start_line = 1;
node->start_column = 1;
node->end_line = parser->line_number;
node->end_column = 3;

cmark_node *first = cmark_node_first_child(parser->root);
if (first)
cmark_node_insert_before(first, node);
else
cmark_node_append_child(parser->root, node);

parser->front_matter_scanning = false;
cmark_strbuf_clear(&parser->front_matter_buf);
cmark_strbuf_clear(&parser->front_matter_info);
}

// ---------------------------------------------------------------------------
// State machine — called from S_process_line in blocks.c
// ---------------------------------------------------------------------------

bool cmark_front_matter_process_line(cmark_parser *parser, cmark_chunk *input) {
// NULL signals end-of-document: the whole document is the front matter.
if (input == NULL) {
create_front_matter_node(parser);
return true;
}

// Adjust for any offset already consumed (e.g. a UTF-8 BOM on line 1).
cmark_chunk adjusted = {
.data = input->data + parser->offset,
.len = input->len - parser->offset,
};
input = &adjusted;

if (parser->line_number == 1) {
if (is_opening_delimiter(input)) {
parser->front_matter_scanning = true;
// Capture optional info string (e.g. "yaml" from "--- yaml\n").
cmark_chunk info = parse_info(input);
if (info.len > 0)
cmark_strbuf_put(&parser->front_matter_info, info.data, info.len);
}
return parser->front_matter_scanning;
}

if (!parser->front_matter_scanning)
return false;

if (is_closing_delimiter(input)) {
create_front_matter_node(parser);
return true;
}

// Accumulate this content line.
cmark_strbuf_put(&parser->front_matter_buf, input->data, input->len);
return true;
}
24 changes: 24 additions & 0 deletions src/front_matter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#ifndef CMARK_FRONT_MATTER_H
#define CMARK_FRONT_MATTER_H

#ifdef __cplusplus
extern "C" {
#endif

#include "cmark.h"
#include "parser.h"
#include "chunk.h"

// Called from S_process_line in blocks.c for every line when
// CMARK_OPT_FRONT_MATTER is set. Drives the front matter state machine
// stored directly on the parser (front_matter_scanning / front_matter_buf).
//
// Returns true if the line was consumed by the front matter scanner and
// should not be passed to the normal block parser.
bool cmark_front_matter_process_line(cmark_parser *parser, cmark_chunk *input);

#ifdef __cplusplus
}
#endif

#endif
3 changes: 3 additions & 0 deletions src/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
}
break;

case CMARK_NODE_FRONT_MATTER:
break;

default:
assert(false);
break;
Expand Down
3 changes: 3 additions & 0 deletions src/latex.c
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
}
break;

case CMARK_NODE_FRONT_MATTER:
break;

default:
assert(false);
break;
Expand Down
3 changes: 3 additions & 0 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ void print_usage(void) {
printf(" --safe Omit raw HTML and dangerous URLs\n");
printf(" --unsafe Render raw HTML and dangerous URLs\n");
printf(" --smart Use smart punctuation\n");
printf(" --front-matter Parse front matter (--- delimited block at start of document)\n");
printf(" --validate-utf8 Replace invalid UTF-8 sequences with U+FFFD\n");
printf(" --help, -h Print usage information\n");
printf(" --version Print version\n");
Expand Down Expand Up @@ -112,6 +113,8 @@ int main(int argc, char *argv[]) {
options |= CMARK_OPT_NOBREAKS;
} else if (strcmp(argv[i], "--smart") == 0) {
options |= CMARK_OPT_SMART;
} else if (strcmp(argv[i], "--front-matter") == 0) {
options |= CMARK_OPT_FRONT_MATTER;
} else if (strcmp(argv[i], "--safe") == 0) {
options |= CMARK_OPT_SAFE;
} else if (strcmp(argv[i], "--unsafe") == 0) {
Expand Down
3 changes: 3 additions & 0 deletions src/man.c
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,9 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
}
break;

case CMARK_NODE_FRONT_MATTER:
break;

default:
assert(false);
break;
Expand Down
11 changes: 9 additions & 2 deletions src/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ static void S_free_nodes(cmark_node *e) {
while (e != NULL) {
switch (e->type) {
case CMARK_NODE_CODE_BLOCK:
case CMARK_NODE_FRONT_MATTER:
mem->free(e->data);
mem->free(e->as.code.info);
break;
Expand Down Expand Up @@ -199,6 +200,8 @@ const char *cmark_node_get_type_string(cmark_node *node) {
return "heading";
case CMARK_NODE_THEMATIC_BREAK:
return "thematic_break";
case CMARK_NODE_FRONT_MATTER:
return "front_matter";
case CMARK_NODE_TEXT:
return "text";
case CMARK_NODE_SOFTBREAK:
Expand Down Expand Up @@ -311,6 +314,7 @@ const char *cmark_node_get_literal(cmark_node *node) {
case CMARK_NODE_HTML_INLINE:
case CMARK_NODE_CODE:
case CMARK_NODE_CODE_BLOCK:
case CMARK_NODE_FRONT_MATTER:
return node->data ? (char *)node->data : "";

default:
Expand All @@ -331,6 +335,7 @@ int cmark_node_set_literal(cmark_node *node, const char *content) {
case CMARK_NODE_HTML_INLINE:
case CMARK_NODE_CODE:
case CMARK_NODE_CODE_BLOCK:
case CMARK_NODE_FRONT_MATTER:
node->len = cmark_set_cstr(node->mem, &node->data, content);
return 1;

Expand Down Expand Up @@ -487,7 +492,8 @@ const char *cmark_node_get_fence_info(cmark_node *node) {
return NULL;
}

if (node->type == CMARK_NODE_CODE_BLOCK) {
if (node->type == CMARK_NODE_CODE_BLOCK ||
node->type == CMARK_NODE_FRONT_MATTER) {
return node->as.code.info ? (char *)node->as.code.info : "";
} else {
return NULL;
Expand All @@ -499,7 +505,8 @@ int cmark_node_set_fence_info(cmark_node *node, const char *info) {
return 0;
}

if (node->type == CMARK_NODE_CODE_BLOCK) {
if (node->type == CMARK_NODE_CODE_BLOCK ||
node->type == CMARK_NODE_FRONT_MATTER) {
cmark_set_cstr(node->mem, &node->as.code.info, info);
return 1;
} else {
Expand Down
18 changes: 18 additions & 0 deletions src/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,24 @@ struct cmark_parser {
int options;
bool last_buffer_ended_with_cr;
unsigned int total_size;

/* Front matter scanning state (CMARK_OPT_FRONT_MATTER).
*
* cmark_front_matter_process_line() is called from S_process_line() in
* blocks.c immediately after parser->line_number is incremented, so the
* first line of the document arrives with line_number == 1. The function
* relies on this: it uses line_number == 1 as the trigger to decide
* whether the document opens with a front matter block.
*
* front_matter_scanning is set to true when a valid opening "---" is seen
* on line 1 and remains true until the matching closing "---" is found or
* the document ends. While scanning, each content line is accumulated in
* front_matter_buf. Both fields are freed explicitly in
* cmark_parser_finish() and cmark_parser_free().
*/
bool front_matter_scanning;
cmark_strbuf front_matter_buf; /* accumulated content lines */
cmark_strbuf front_matter_info; /* optional format hint from opening "--- <info>" */
};

#ifdef __cplusplus
Expand Down
Loading
Loading