Refactor: Isolate dictionary of reference defintions.
This commit is contained in:
parent
07c5dba9ba
commit
ff524e9104
314
md4c/md4c.c
314
md4c/md4c.c
@ -75,7 +75,7 @@
|
||||
typedef struct MD_MARK_tag MD_MARK;
|
||||
typedef struct MD_BLOCK_tag MD_BLOCK;
|
||||
typedef struct MD_CONTAINER_tag MD_CONTAINER;
|
||||
typedef struct MD_LINK_REF_DEF_tag MD_LINK_REF_DEF;
|
||||
typedef struct MD_REF_DEF_tag MD_REF_DEF;
|
||||
|
||||
|
||||
/* During analyzes of inline marks, we need to manage some "mark chains",
|
||||
@ -101,10 +101,10 @@ struct MD_CTX_tag {
|
||||
CHAR* buffer;
|
||||
unsigned alloc_buffer;
|
||||
|
||||
/* Link reference definitions. */
|
||||
MD_LINK_REF_DEF* link_ref_defs;
|
||||
int n_link_ref_defs;
|
||||
int alloc_link_ref_defs;
|
||||
/* Reference definitions. */
|
||||
MD_REF_DEF* ref_defs;
|
||||
int n_ref_defs;
|
||||
int alloc_ref_defs;
|
||||
|
||||
/* Stack of inline/span markers.
|
||||
* This is only used for parsing a single block contents but by storing it
|
||||
@ -910,6 +910,22 @@ md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_
|
||||
return 0;
|
||||
}
|
||||
|
||||
static OFF
|
||||
md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
|
||||
{
|
||||
SZ char_size;
|
||||
int codepoint;
|
||||
|
||||
while(off < size) {
|
||||
codepoint = md_decode_unicode(label, off, size, &char_size);
|
||||
if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off]))
|
||||
break;
|
||||
off += char_size;
|
||||
}
|
||||
|
||||
return off;
|
||||
}
|
||||
|
||||
|
||||
/******************************
|
||||
*** Recognizing raw HTML ***
|
||||
@ -1459,15 +1475,11 @@ md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE* attr)
|
||||
}
|
||||
|
||||
|
||||
/***************************
|
||||
*** Recognizing Links ***
|
||||
***************************/
|
||||
/*********************************************
|
||||
*** Dictionary of Reference Definitions ***
|
||||
*********************************************/
|
||||
|
||||
/* Note this code is partially shared between processing inlines and blocks
|
||||
* as link reference definitions and links share some helper parser functions.
|
||||
*/
|
||||
|
||||
struct MD_LINK_REF_DEF_tag {
|
||||
struct MD_REF_DEF_tag {
|
||||
CHAR* label;
|
||||
CHAR* title;
|
||||
SZ label_size : 24;
|
||||
@ -1478,6 +1490,113 @@ struct MD_LINK_REF_DEF_tag {
|
||||
OFF dest_end;
|
||||
};
|
||||
|
||||
static int
|
||||
md_link_label_cmp(const void* a, const void* b)
|
||||
{
|
||||
const CHAR* a_label = ((const MD_REF_DEF*)a)->label;
|
||||
const CHAR* b_label = ((const MD_REF_DEF*)b)->label;
|
||||
SZ a_size = ((const MD_REF_DEF*)a)->label_size;
|
||||
SZ b_size = ((const MD_REF_DEF*)b)->label_size;
|
||||
OFF a_off;
|
||||
OFF b_off;
|
||||
|
||||
/* The slow path, with Unicode case folding and Unicode whitespace collapsing. */
|
||||
a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
|
||||
b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
|
||||
while(a_off < a_size || b_off < b_size) {
|
||||
int a_codepoint, b_codepoint;
|
||||
SZ a_char_size, b_char_size;
|
||||
int a_is_whitespace, b_is_whitespace;
|
||||
|
||||
if(a_off < a_size) {
|
||||
a_codepoint = md_decode_unicode(a_label, a_off, a_size, &a_char_size);
|
||||
a_is_whitespace = ISUNICODEWHITESPACE_(a_codepoint) || ISNEWLINE_(a_label[a_off]);
|
||||
} else {
|
||||
/* Treat end of label as a whitespace. */
|
||||
a_codepoint = -1;
|
||||
a_is_whitespace = TRUE;
|
||||
}
|
||||
|
||||
if(b_off < b_size) {
|
||||
b_codepoint = md_decode_unicode(b_label, b_off, b_size, &b_char_size);
|
||||
b_is_whitespace = ISUNICODEWHITESPACE_(b_codepoint) || ISNEWLINE_(b_label[b_off]);
|
||||
} else {
|
||||
/* Treat end of label as a whitespace. */
|
||||
b_codepoint = -1;
|
||||
b_is_whitespace = TRUE;
|
||||
}
|
||||
|
||||
if(a_is_whitespace || b_is_whitespace) {
|
||||
if(!a_is_whitespace || !b_is_whitespace)
|
||||
return (a_is_whitespace ? -1 : +1);
|
||||
|
||||
a_off = md_skip_unicode_whitespace(a_label, a_off, a_size);
|
||||
b_off = md_skip_unicode_whitespace(b_label, b_off, b_size);
|
||||
} else {
|
||||
MD_UNICODE_FOLD_INFO a_fold_info, b_fold_info;
|
||||
int cmp;
|
||||
|
||||
md_get_unicode_fold_info(a_codepoint, &a_fold_info);
|
||||
md_get_unicode_fold_info(b_codepoint, &b_fold_info);
|
||||
|
||||
if(a_fold_info.n_codepoints != b_fold_info.n_codepoints)
|
||||
return (a_fold_info.n_codepoints - b_fold_info.n_codepoints);
|
||||
cmp = memcmp(a_fold_info.codepoints, b_fold_info.codepoints, a_fold_info.n_codepoints * sizeof(int));
|
||||
if(cmp != 0)
|
||||
return cmp;
|
||||
|
||||
a_off += a_char_size;
|
||||
b_off += b_char_size;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
md_link_label_cmp_for_qsort(const void* a, const void* b)
|
||||
{
|
||||
int cmp;
|
||||
|
||||
cmp = md_link_label_cmp(a, b);
|
||||
if(cmp != 0)
|
||||
return cmp;
|
||||
|
||||
/* The specification requests that only first reference definition
|
||||
* with the same label is valid. So make sure we make a stable sort
|
||||
* from the qsort(). */
|
||||
return ((MD_REF_DEF*) a - (MD_REF_DEF*) b);
|
||||
}
|
||||
|
||||
static inline const MD_REF_DEF*
|
||||
md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
|
||||
{
|
||||
MD_REF_DEF key;
|
||||
|
||||
key.label = (CHAR*) label;
|
||||
key.label_size = label_size;
|
||||
|
||||
return bsearch(&key, ctx->ref_defs, ctx->n_ref_defs,
|
||||
sizeof(MD_REF_DEF), md_link_label_cmp);
|
||||
}
|
||||
|
||||
static void
|
||||
md_build_ref_def_dict(MD_CTX* ctx)
|
||||
{
|
||||
/* Sort reference definitions so we can use bsearch() for their lookup. */
|
||||
qsort(ctx->ref_defs, ctx->n_ref_defs, sizeof(MD_REF_DEF),
|
||||
md_link_label_cmp_for_qsort);
|
||||
}
|
||||
|
||||
|
||||
/***************************
|
||||
*** Recognizing Links ***
|
||||
***************************/
|
||||
|
||||
/* Note this code is partially shared between processing inlines and blocks
|
||||
* as reference definitions and links share some helper parser functions.
|
||||
*/
|
||||
|
||||
typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
|
||||
struct MD_LINK_ATTR_tag {
|
||||
OFF dest_beg;
|
||||
@ -1695,9 +1814,9 @@ md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Returns 0 if it is not a link reference definition.
|
||||
/* Returns 0 if it is not a reference definition.
|
||||
*
|
||||
* Returns N > 0 if it is not a link reference definition (then N corresponds
|
||||
* Returns N > 0 if it is not a reference definition (then N corresponds
|
||||
* to the number of lines forming it). In this case the definition is stored
|
||||
* for resolving any links referring to it.
|
||||
*
|
||||
@ -1722,7 +1841,7 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
||||
OFF off;
|
||||
int line_index = 0;
|
||||
int tmp_line_index;
|
||||
MD_LINK_REF_DEF* def;
|
||||
MD_REF_DEF* def;
|
||||
int ret;
|
||||
|
||||
/* Link label. */
|
||||
@ -1786,23 +1905,23 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
||||
label_needs_free = TRUE;
|
||||
}
|
||||
|
||||
/* Store the link reference definition. */
|
||||
if(ctx->n_link_ref_defs >= ctx->alloc_link_ref_defs) {
|
||||
MD_LINK_REF_DEF* new_defs;
|
||||
/* Store the reference definition. */
|
||||
if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
|
||||
MD_REF_DEF* new_defs;
|
||||
|
||||
ctx->alloc_link_ref_defs = (ctx->alloc_link_ref_defs > 0 ? ctx->alloc_link_ref_defs * 2 : 16);
|
||||
new_defs = (MD_LINK_REF_DEF*) realloc(ctx->link_ref_defs, ctx->alloc_link_ref_defs * sizeof(MD_LINK_REF_DEF));
|
||||
ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0 ? ctx->alloc_ref_defs * 2 : 16);
|
||||
new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
|
||||
if(new_defs == NULL) {
|
||||
MD_LOG("realloc() failed.");
|
||||
ret = -1;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
ctx->link_ref_defs = new_defs;
|
||||
ctx->ref_defs = new_defs;
|
||||
}
|
||||
|
||||
def = &ctx->link_ref_defs[ctx->n_link_ref_defs];
|
||||
memset(def, 0, sizeof(MD_LINK_REF_DEF));
|
||||
def = &ctx->ref_defs[ctx->n_ref_defs];
|
||||
memset(def, 0, sizeof(MD_REF_DEF));
|
||||
|
||||
def->label = label;
|
||||
def->label_size = label_size;
|
||||
@ -1825,7 +1944,7 @@ md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
||||
}
|
||||
|
||||
/* Success. */
|
||||
ctx->n_link_ref_defs++;
|
||||
ctx->n_ref_defs++;
|
||||
return line_index + 1;
|
||||
|
||||
abort:
|
||||
@ -1835,118 +1954,11 @@ abort:
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
static OFF
|
||||
md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
|
||||
{
|
||||
SZ char_size;
|
||||
int codepoint;
|
||||
|
||||
while(off < size) {
|
||||
codepoint = md_decode_unicode(label, off, size, &char_size);
|
||||
if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off]))
|
||||
break;
|
||||
off += char_size;
|
||||
}
|
||||
|
||||
return off;
|
||||
}
|
||||
|
||||
static int
|
||||
md_link_label_cmp(const void* a, const void* b)
|
||||
{
|
||||
const CHAR* a_label = ((const MD_LINK_REF_DEF*)a)->label;
|
||||
const CHAR* b_label = ((const MD_LINK_REF_DEF*)b)->label;
|
||||
SZ a_size = ((const MD_LINK_REF_DEF*)a)->label_size;
|
||||
SZ b_size = ((const MD_LINK_REF_DEF*)b)->label_size;
|
||||
OFF a_off;
|
||||
OFF b_off;
|
||||
|
||||
/* The slow path, with Unicode case folding and Unicode whitespace collapsing. */
|
||||
a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
|
||||
b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
|
||||
while(a_off < a_size || b_off < b_size) {
|
||||
int a_codepoint, b_codepoint;
|
||||
SZ a_char_size, b_char_size;
|
||||
int a_is_whitespace, b_is_whitespace;
|
||||
|
||||
if(a_off < a_size) {
|
||||
a_codepoint = md_decode_unicode(a_label, a_off, a_size, &a_char_size);
|
||||
a_is_whitespace = ISUNICODEWHITESPACE_(a_codepoint) || ISNEWLINE_(a_label[a_off]);
|
||||
} else {
|
||||
/* Treat end of label as a whitespace. */
|
||||
a_codepoint = -1;
|
||||
a_is_whitespace = TRUE;
|
||||
}
|
||||
|
||||
if(b_off < b_size) {
|
||||
b_codepoint = md_decode_unicode(b_label, b_off, b_size, &b_char_size);
|
||||
b_is_whitespace = ISUNICODEWHITESPACE_(b_codepoint) || ISNEWLINE_(b_label[b_off]);
|
||||
} else {
|
||||
/* Treat end of label as a whitespace. */
|
||||
b_codepoint = -1;
|
||||
b_is_whitespace = TRUE;
|
||||
}
|
||||
|
||||
if(a_is_whitespace || b_is_whitespace) {
|
||||
if(!a_is_whitespace || !b_is_whitespace)
|
||||
return (a_is_whitespace ? -1 : +1);
|
||||
|
||||
a_off = md_skip_unicode_whitespace(a_label, a_off, a_size);
|
||||
b_off = md_skip_unicode_whitespace(b_label, b_off, b_size);
|
||||
} else {
|
||||
MD_UNICODE_FOLD_INFO a_fold_info, b_fold_info;
|
||||
int cmp;
|
||||
|
||||
md_get_unicode_fold_info(a_codepoint, &a_fold_info);
|
||||
md_get_unicode_fold_info(b_codepoint, &b_fold_info);
|
||||
|
||||
if(a_fold_info.n_codepoints != b_fold_info.n_codepoints)
|
||||
return (a_fold_info.n_codepoints - b_fold_info.n_codepoints);
|
||||
cmp = memcmp(a_fold_info.codepoints, b_fold_info.codepoints, a_fold_info.n_codepoints * sizeof(int));
|
||||
if(cmp != 0)
|
||||
return cmp;
|
||||
|
||||
a_off += a_char_size;
|
||||
b_off += b_char_size;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
md_link_label_cmp_for_qsort(const void* a, const void* b)
|
||||
{
|
||||
int cmp;
|
||||
|
||||
cmp = md_link_label_cmp(a, b);
|
||||
if(cmp != 0)
|
||||
return cmp;
|
||||
|
||||
/* The specification requests that only first link reference definition
|
||||
* with the same label is valid. So make sure we make a stable sort
|
||||
* from the qsort(). */
|
||||
return ((MD_LINK_REF_DEF*) a - (MD_LINK_REF_DEF*) b);
|
||||
}
|
||||
|
||||
static inline const MD_LINK_REF_DEF*
|
||||
md_lookup_link_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
|
||||
{
|
||||
MD_LINK_REF_DEF key;
|
||||
|
||||
key.label = (CHAR*) label;
|
||||
key.label_size = label_size;
|
||||
|
||||
return bsearch(&key, ctx->link_ref_defs, ctx->n_link_ref_defs,
|
||||
sizeof(MD_LINK_REF_DEF), md_link_label_cmp);
|
||||
}
|
||||
|
||||
static int
|
||||
md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
|
||||
OFF beg, OFF end, MD_LINK_ATTR* attr)
|
||||
{
|
||||
const MD_LINK_REF_DEF* def;
|
||||
const MD_REF_DEF* def;
|
||||
const MD_LINE* beg_line;
|
||||
const MD_LINE* end_line;
|
||||
CHAR* label;
|
||||
@ -1978,7 +1990,7 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
|
||||
label_size = end - beg;
|
||||
}
|
||||
|
||||
def = md_lookup_link_ref_def(ctx, label, label_size);
|
||||
def = md_lookup_ref_def(ctx, label, label_size);
|
||||
if(def != NULL) {
|
||||
attr->dest_beg = def->dest_beg;
|
||||
attr->dest_end = def->dest_end;
|
||||
@ -2084,12 +2096,12 @@ abort:
|
||||
}
|
||||
|
||||
static void
|
||||
md_free_link_ref_defs(MD_CTX* ctx)
|
||||
md_free_ref_defs(MD_CTX* ctx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i = 0; i < ctx->n_link_ref_defs; i++) {
|
||||
MD_LINK_REF_DEF* def = &ctx->link_ref_defs[i];
|
||||
for(i = 0; i < ctx->n_ref_defs; i++) {
|
||||
MD_REF_DEF* def = &ctx->ref_defs[i];
|
||||
|
||||
if(def->label_needs_free)
|
||||
free(def->label);
|
||||
@ -2097,7 +2109,7 @@ md_free_link_ref_defs(MD_CTX* ctx)
|
||||
free(def->title);
|
||||
}
|
||||
|
||||
free(ctx->link_ref_defs);
|
||||
free(ctx->ref_defs);
|
||||
}
|
||||
|
||||
|
||||
@ -4335,10 +4347,10 @@ md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Eat from start of current (textual) block any link reference definitions
|
||||
* and remember them so we can resolve any links referring to them.
|
||||
/* Eat from start of current (textual) block any reference definitions and
|
||||
* remember them so we can resolve any links referring to them.
|
||||
*
|
||||
* (Link reference definitions can only be at start of it as they cannot break
|
||||
* (Reference definitions can only be at start of it as they cannot break
|
||||
* a paragraph.)
|
||||
*/
|
||||
static int
|
||||
@ -4349,17 +4361,17 @@ md_consume_link_reference_definitions(MD_CTX* ctx)
|
||||
int n = 0;
|
||||
|
||||
/* Compute how many lines at the start of the block form one or more
|
||||
* link reference definitions. */
|
||||
* reference definitions. */
|
||||
while(n < n_lines) {
|
||||
int n_link_ref_lines;
|
||||
|
||||
n_link_ref_lines = md_is_link_reference_definition(ctx,
|
||||
lines + n, n_lines - n);
|
||||
/* Not a link reference definition? */
|
||||
/* Not a reference definition? */
|
||||
if(n_link_ref_lines == 0)
|
||||
break;
|
||||
|
||||
/* We fail if it is the link ref. def. but it could not be stored due
|
||||
/* We fail if it is the ref. def. but it could not be stored due
|
||||
* a memory allocation error. */
|
||||
if(n_link_ref_lines < 0)
|
||||
return -1;
|
||||
@ -4367,7 +4379,7 @@ md_consume_link_reference_definitions(MD_CTX* ctx)
|
||||
n += n_link_ref_lines;
|
||||
}
|
||||
|
||||
/* If there was at least one link reference definition, we need to remove
|
||||
/* If there was at least one reference definition, we need to remove
|
||||
* its lines from the block, or perhaps even the whole block. */
|
||||
if(n > 0) {
|
||||
if(n == n_lines) {
|
||||
@ -4393,9 +4405,9 @@ md_end_current_block(MD_CTX* ctx)
|
||||
if(ctx->current_block == NULL)
|
||||
return ret;
|
||||
|
||||
/* Check whether there is a link reference definition. (We do this here
|
||||
* instead of in md_analyze_line() because link reference definition can
|
||||
* take multiple lines.) */
|
||||
/* Check whether there is a reference definition. (We do this here instead
|
||||
* of in md_analyze_line() because reference definition can take multiple
|
||||
* lines.) */
|
||||
if(ctx->current_block->type == MD_BLOCK_P) {
|
||||
MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
|
||||
if(CH(lines[0].beg) == _T('['))
|
||||
@ -5556,9 +5568,7 @@ md_process_doc(MD_CTX *ctx)
|
||||
|
||||
md_end_current_block(ctx);
|
||||
|
||||
/* Sort link reference definitions so we can use bsearch() for their lookup. */
|
||||
qsort(ctx->link_ref_defs, ctx->n_link_ref_defs, sizeof(MD_LINK_REF_DEF),
|
||||
md_link_label_cmp_for_qsort);
|
||||
md_build_ref_def_dict(ctx);
|
||||
|
||||
/* Process all blocks. */
|
||||
MD_CHECK(md_leave_child_containers(ctx, 0));
|
||||
@ -5626,7 +5636,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* u
|
||||
ret = md_process_doc(&ctx);
|
||||
|
||||
/* Clean-up. */
|
||||
md_free_link_ref_defs(&ctx);
|
||||
md_free_ref_defs(&ctx);
|
||||
free(ctx.buffer);
|
||||
free(ctx.marks);
|
||||
free(ctx.block_bytes);
|
||||
|
Loading…
Reference in New Issue
Block a user