Initial commit.
This commit is contained in:
commit
efed58af8e
32
CMakeLists.txt
Normal file
32
CMakeLists.txt
Normal file
@ -0,0 +1,32 @@
|
||||
|
||||
cmake_minimum_required(VERSION 2.8)
|
||||
project(MD4C C)
|
||||
|
||||
|
||||
set(CMAKE_CONFIGURATION_TYPES Debug Release RelWithDebInfo MinSizeRel)
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "")
|
||||
set(CMAKE_BUILD_TYPE $ENV{CMAKE_BUILD_TYPE})
|
||||
endif()
|
||||
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
|
||||
|
||||
# By default, CMake uses -O3 for Release builds. Lets stick with safer -O2:
|
||||
string(REGEX REPLACE "(^| )-O[0-9a-z]+" "" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -O2")
|
||||
elseif(MSVC)
|
||||
# Disable warnings about the so-called unsecured functions:
|
||||
add_definitions(/D_CRT_SECURE_NO_WARNINGS)
|
||||
|
||||
# Specify proper C runtime library:
|
||||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE} /MT")
|
||||
set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_RELEASE} /MT")
|
||||
endif()
|
||||
|
||||
|
||||
set(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}")
|
||||
add_subdirectory(md4c)
|
||||
add_subdirectory(md2html)
|
25
LICENSE.md
Normal file
25
LICENSE.md
Normal file
@ -0,0 +1,25 @@
|
||||
|
||||
MD4C is licensed under the MIT License.
|
||||
|
||||
> Copyright (c) 2016: Martin Mitáš and other contributors:
|
||||
>
|
||||
> https://github.com/mity/md4c/contributors
|
||||
>
|
||||
> Permission is hereby granted, free of charge, to any person obtaining
|
||||
> a copy of this software and associated documentation files (the
|
||||
> "Software"), to deal in the Software without restriction, including
|
||||
> without limitation the rights to use, copy, modify, merge, publish,
|
||||
> distribute, sublicense, and/or sell copies of the Software, and to
|
||||
> permit persons to whom the Software is furnished to do so, subject to
|
||||
> the following conditions:
|
||||
>
|
||||
> The above copyright notice and this permission notice shall be
|
||||
> included in all copies or substantial portions of the Software.
|
||||
>
|
||||
> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
155
README.md
Normal file
155
README.md
Normal file
@ -0,0 +1,155 @@
|
||||
|
||||
# MD4C Readme
|
||||
|
||||
Home: http://github.com/mity/md4c
|
||||
|
||||
|
||||
**Warning:** This project is very young (read "immature") and work in progress.
|
||||
Most important features are not yet implemented. See the current status below.
|
||||
And there may be bugs.
|
||||
|
||||
|
||||
## What is Markdown
|
||||
|
||||
In short, Markdown is the markup language this `README.md` file is written in.
|
||||
|
||||
The following resources can explain more if you are unfamiliar with it:
|
||||
* [Wikipedia article](http://en.wikipedia.org/wiki/Markdown)
|
||||
* [CommonMark site](http://commonmark.org)
|
||||
|
||||
|
||||
## What is MD4C
|
||||
|
||||
MD4C stands for "MarkDown for C" and, unsurprisingly, it is a C Markdown parser
|
||||
implementation.
|
||||
|
||||
Main features:
|
||||
* **Compactness:** MD4C is implemented in one source file and one header file.
|
||||
* **Flexibility:** Flags allow to tune the desired dialect of the Markdown
|
||||
parser.
|
||||
* **Encoding agnosticism:** As much as possible, MD4C by design does not care
|
||||
about input text encoding, relying only on the Markdown control characters
|
||||
being ASCII compatible. (The actual text data are propagated back to the
|
||||
caller in the same encoding unchanged.)
|
||||
* **UTF-16LE support:** On Windows, MD4C may be built to consume (and produce)
|
||||
wide strings (`WCHAR*` instead of `char*`).
|
||||
* **Easily embeddable:** MD4C depends only on few functions of C standard
|
||||
library.
|
||||
* **Portability:** MD4C builds and works on Windows and Linux, and it should
|
||||
be fairly trivial to build it also on other systems.
|
||||
* **Permissive license:** MD4C is available under the MIT license.
|
||||
|
||||
|
||||
## Using MD4C
|
||||
|
||||
The parser is implemented in a single C source file `md4c.c` and its
|
||||
accompanying header `md4c.h`.
|
||||
|
||||
The main provided function is `md_parse()`. It takes a text in Markdown syntax
|
||||
as an input and a renderer structure which holds pointers to few callback
|
||||
functions. As `md_parse()` eats the input, it calls appropriate callbacks
|
||||
allowing application to convert it into another format or render it onto
|
||||
the screen.
|
||||
|
||||
Refer to the header file for more details, the API is mostly self-explaining
|
||||
and there are some explanatory comments.
|
||||
|
||||
Example implementation of simple renderer is available in the `md2html`
|
||||
directory which implements a conversion utility from Markdown to HTML.
|
||||
|
||||
|
||||
## Current status ##
|
||||
|
||||
### CommonMark Specification ###
|
||||
|
||||
The goal is be compliant to the latest version of
|
||||
[CommonMark specification](http://spec.commonmark.org/).
|
||||
|
||||
The list below corresponds to chapters of the specification version 0.26 and
|
||||
more or less forms our to do list.
|
||||
|
||||
- **Preliminaries:**
|
||||
- [ ] 2.1 Character and lines
|
||||
- [ ] 2.2 Tabs
|
||||
- [ ] 2.3 Insecure characters
|
||||
|
||||
- **Blocks and Inlines:**
|
||||
- [ ] 3.1 Precedence
|
||||
- [ ] 3.2 Container blocks and leaf blocks
|
||||
|
||||
- **Leaf Blocks:**
|
||||
- [ ] 4.1 Thematic breaks
|
||||
- [ ] 4.2 ATX headings
|
||||
- [ ] 4.3 Setext headings
|
||||
- [ ] 4.4 Indented code blocks
|
||||
- [ ] 4.5 Fenced code blocks
|
||||
- [ ] 4.6 HTML blocks
|
||||
- [ ] 4.7 Link reference definitions
|
||||
- [x] 4.8 Paragraphs
|
||||
- [x] 4.9 Blank lines
|
||||
|
||||
- **Container Blocks:**
|
||||
- [ ] 5.1 Block quotes
|
||||
- [ ] 5.2 List items
|
||||
- [ ] 5.3 Lists
|
||||
|
||||
- **Inlines:**
|
||||
- [ ] 6.1 Backslash escapes
|
||||
- [ ] 6.2 Entity and numeric character references
|
||||
- [ ] 6.3 Code spans
|
||||
- [ ] 6.4 Emphasis and strong emphasis
|
||||
- [ ] 6.5 Links
|
||||
- [ ] 6.6 Images
|
||||
- [ ] 6.7 Autolinks
|
||||
- [ ] 6.8 Raw HTML
|
||||
- [ ] 6.9 Hard line breaks
|
||||
- [ ] 6.10 Soft line breaks
|
||||
- [x] 6.11 Textual content
|
||||
|
||||
|
||||
### Considered Extensions ###
|
||||
|
||||
Aside of CommonMark features, various Markdown implementations out there support
|
||||
various extensions and/or some deviations from the CommonMark specification
|
||||
which may be found desired or useful in some situations.
|
||||
|
||||
Therefore some extensions or deviations from the CommonMark specification may
|
||||
be considered and implemented. However, such extensions and deviations from the
|
||||
standard shall be enabled only if explicitly enabled by the application.
|
||||
|
||||
Default behavior shall stick to the CommonMark specification.
|
||||
|
||||
The list below is incomplete list of extensions I see as worth of
|
||||
consideration.
|
||||
|
||||
- **Block Extensions:**
|
||||
- [ ] Tables
|
||||
- [ ] Header anchors: `## Chapter {#anchor}`
|
||||
(allowing fragment links pointing to it, e.g. `[link text](#anchor)`)
|
||||
|
||||
- **Inline Extensions:**
|
||||
- [ ] Underline: `__foo bar__`
|
||||
- [ ] Strikethrough: `~~foo bar~~`
|
||||
- [ ] Highlight: `==foo bar==`
|
||||
- [ ] Quote: `"foo bar"`
|
||||
- [ ] Superscript: `a^2^ + b^2^ = c^2^`
|
||||
- [ ] Subscript: `matrix A~i,j~`
|
||||
|
||||
- **Miscellaneous:**
|
||||
- [ ] Permissive ATX headers: `###Header` (without space)
|
||||
- [ ] Permissive autolinks: `http://google.com` (without `<`...`>`)
|
||||
- [ ] Disabling indented code blocks
|
||||
- [ ] Disabling raw HTML blocks/spans
|
||||
|
||||
|
||||
## License
|
||||
|
||||
MD4C is covered with MIT license, see the file `LICENSE.md`.
|
||||
|
||||
|
||||
## Reporting Bugs
|
||||
|
||||
If you encounter any bug, please be so kind and report it. Unheard bugs cannot
|
||||
get fixed. You can submit bug reports here:
|
||||
|
||||
* http://github.com/mity/md4c/issues
|
5
md2html/CMakeLists.txt
Normal file
5
md2html/CMakeLists.txt
Normal file
@ -0,0 +1,5 @@
|
||||
|
||||
include_directories("${PROJECT_SOURCE_DIR}/md4c")
|
||||
|
||||
add_executable(md2html cmdline.c cmdline.h md2html.c)
|
||||
target_link_libraries(md2html md4c)
|
296
md2html/cmdline.c
Normal file
296
md2html/cmdline.c
Normal file
@ -0,0 +1,296 @@
|
||||
/* cmdline.c: a reentrant version of getopt(). Written 2006 by Brian
|
||||
* Raiter. This code is in the public domain.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include "cmdline.h"
|
||||
|
||||
#define docallback(opt, val) \
|
||||
do { if ((r = callback(opt, val, data)) != 0) return r; } while (0)
|
||||
|
||||
/* Parse the given cmdline arguments.
|
||||
*/
|
||||
int readoptions(option const* list, int argc, char **argv,
|
||||
int (*callback)(int, char const*, void*), void *data)
|
||||
{
|
||||
char argstring[] = "--";
|
||||
option const *opt;
|
||||
char const *val;
|
||||
char const *p;
|
||||
int stop = 0;
|
||||
int argi, len, r;
|
||||
|
||||
if (!list || !callback)
|
||||
return -1;
|
||||
|
||||
for (argi = 1 ; argi < argc ; ++argi)
|
||||
{
|
||||
/* First, check for "--", which forces all remaining arguments
|
||||
* to be treated as non-options.
|
||||
*/
|
||||
if (!stop && argv[argi][0] == '-' && argv[argi][1] == '-'
|
||||
&& argv[argi][2] == '\0') {
|
||||
stop = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Arguments that do not begin with '-' (or are only "-") are
|
||||
* not options.
|
||||
*/
|
||||
if (stop || argv[argi][0] != '-' || argv[argi][1] == '\0') {
|
||||
docallback(0, argv[argi]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argv[argi][1] == '-')
|
||||
{
|
||||
/* Arguments that begin with a double-dash are long
|
||||
* options.
|
||||
*/
|
||||
p = argv[argi] + 2;
|
||||
val = strchr(p, '=');
|
||||
if (val)
|
||||
len = val++ - p;
|
||||
else
|
||||
len = strlen(p);
|
||||
|
||||
/* Is it on the list of valid options? If so, does it
|
||||
* expect a parameter?
|
||||
*/
|
||||
for (opt = list ; opt->optval ; ++opt)
|
||||
if (opt->name && !strncmp(p, opt->name, len)
|
||||
&& !opt->name[len])
|
||||
break;
|
||||
if (!opt->optval) {
|
||||
docallback('?', argv[argi]);
|
||||
} else if (!val && opt->arg == 1) {
|
||||
docallback(':', argv[argi]);
|
||||
} else if (val && opt->arg == 0) {
|
||||
docallback('=', argv[argi]);
|
||||
} else {
|
||||
docallback(opt->optval, val);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Arguments that begin with a single dash contain one or
|
||||
* more short options. Each character in the argument is
|
||||
* examined in turn, unless a parameter consumes the rest
|
||||
* of the argument (or possibly even the following
|
||||
* argument).
|
||||
*/
|
||||
for (p = argv[argi] + 1 ; *p ; ++p) {
|
||||
for (opt = list ; opt->optval ; ++opt)
|
||||
if (opt->chname == *p)
|
||||
break;
|
||||
if (!opt->optval) {
|
||||
argstring[1] = *p;
|
||||
docallback('?', argstring);
|
||||
continue;
|
||||
} else if (opt->arg == 0) {
|
||||
docallback(opt->optval, NULL);
|
||||
continue;
|
||||
} else if (p[1]) {
|
||||
docallback(opt->optval, p + 1);
|
||||
break;
|
||||
} else if (argi + 1 < argc && strcmp(argv[argi + 1], "--")) {
|
||||
++argi;
|
||||
docallback(opt->optval, argv[argi]);
|
||||
break;
|
||||
} else if (opt->arg == 2) {
|
||||
docallback(opt->optval, NULL);
|
||||
continue;
|
||||
} else {
|
||||
argstring[1] = *p;
|
||||
docallback(':', argstring);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Verify that str points to an ASCII zero or one (optionally with
|
||||
* whitespace) and return the value present, or -1 if str's contents
|
||||
* are anything else.
|
||||
*/
|
||||
static int readboolvalue(char const *str)
|
||||
{
|
||||
char d;
|
||||
|
||||
while (isspace(*str))
|
||||
++str;
|
||||
if (!*str)
|
||||
return -1;
|
||||
d = *str++;
|
||||
while (isspace(*str))
|
||||
++str;
|
||||
if (*str)
|
||||
return -1;
|
||||
if (d == '0')
|
||||
return 0;
|
||||
else if (d == '1')
|
||||
return 1;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Parse a configuration file.
|
||||
*/
|
||||
int readcfgfile(option const* list, FILE *fp,
|
||||
int (*callback)(int, char const*, void*), void *data)
|
||||
{
|
||||
char buf[1024];
|
||||
option const *opt;
|
||||
char *name, *val, *p;
|
||||
int len, f, r;
|
||||
|
||||
while (fgets(buf, sizeof buf, fp) != NULL)
|
||||
{
|
||||
/* Strip off the trailing newline and any leading whitespace.
|
||||
* If the line begins with a hash sign, skip it entirely.
|
||||
*/
|
||||
len = strlen(buf);
|
||||
if (len && buf[len - 1] == '\n')
|
||||
buf[--len] = '\0';
|
||||
for (p = buf ; isspace(*p) ; ++p) ;
|
||||
if (!*p || *p == '#')
|
||||
continue;
|
||||
|
||||
/* Find the end of the option's name and the beginning of the
|
||||
* parameter, if any.
|
||||
*/
|
||||
for (name = p ; *p && *p != '=' && !isspace(*p) ; ++p) ;
|
||||
len = p - name;
|
||||
for ( ; *p == '=' || isspace(*p) ; ++p) ;
|
||||
val = p;
|
||||
|
||||
/* Is it on the list of valid options? Does it take a
|
||||
* full parameter, or just an optional boolean?
|
||||
*/
|
||||
for (opt = list ; opt->optval ; ++opt)
|
||||
if (opt->name && !strncmp(name, opt->name, len)
|
||||
&& !opt->name[len])
|
||||
break;
|
||||
if (!opt->optval) {
|
||||
docallback('?', name);
|
||||
} else if (!*val && opt->arg == 1) {
|
||||
docallback(':', name);
|
||||
} else if (*val && opt->arg == 0) {
|
||||
f = readboolvalue(val);
|
||||
if (f < 0)
|
||||
docallback('=', name);
|
||||
else if (f == 1)
|
||||
docallback(opt->optval, NULL);
|
||||
} else {
|
||||
docallback(opt->optval, val);
|
||||
}
|
||||
}
|
||||
return ferror(fp) ? -1 : 0;
|
||||
}
|
||||
|
||||
/* Turn a string containing a cmdline into an argc-argv pair.
|
||||
*/
|
||||
int makecmdline(char const *cmdline, int *argcp, char ***argvp)
|
||||
{
|
||||
char **argv;
|
||||
int argc;
|
||||
char const *s;
|
||||
int n, quoted;
|
||||
|
||||
if (!cmdline)
|
||||
return 0;
|
||||
|
||||
/* Calcuate argc by counting the number of "clumps" of non-spaces.
|
||||
*/
|
||||
for (s = cmdline ; isspace(*s) ; ++s) ;
|
||||
if (!*s) {
|
||||
*argcp = 1;
|
||||
if (argvp) {
|
||||
*argvp = malloc(2 * sizeof(char*));
|
||||
if (!*argvp)
|
||||
return 0;
|
||||
(*argvp)[0] = NULL;
|
||||
(*argvp)[1] = NULL;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
for (argc = 2, quoted = 0 ; *s ; ++s) {
|
||||
if (quoted == '"') {
|
||||
if (*s == '"')
|
||||
quoted = 0;
|
||||
else if (*s == '\\' && s[1])
|
||||
++s;
|
||||
} else if (quoted == '\'') {
|
||||
if (*s == '\'')
|
||||
quoted = 0;
|
||||
} else {
|
||||
if (isspace(*s)) {
|
||||
for ( ; isspace(s[1]) ; ++s) ;
|
||||
if (!s[1])
|
||||
break;
|
||||
++argc;
|
||||
} else if (*s == '"' || *s == '\'') {
|
||||
quoted = *s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*argcp = argc;
|
||||
if (!argvp)
|
||||
return 1;
|
||||
|
||||
/* Allocate space for all the arguments and their pointers.
|
||||
*/
|
||||
argv = malloc((argc + 1) * sizeof(char*) + strlen(cmdline) + 1);
|
||||
*argvp = argv;
|
||||
if (!argv)
|
||||
return 0;
|
||||
argv[0] = NULL;
|
||||
argv[1] = (char*)(argv + argc + 1);
|
||||
|
||||
/* Copy the string into the allocated memory immediately after the
|
||||
* argv array. Where spaces immediately follows a nonspace,
|
||||
* replace it with a \0. Where a nonspace immediately follows
|
||||
* spaces, store a pointer to it. (Except, of course, when the
|
||||
* space-nonspace transitions occur within quotes.)
|
||||
*/
|
||||
for (s = cmdline ; isspace(*s) ; ++s) ;
|
||||
for (argc = 1, n = 0, quoted = 0 ; *s ; ++s) {
|
||||
if (quoted == '"') {
|
||||
if (*s == '"') {
|
||||
quoted = 0;
|
||||
} else {
|
||||
if (*s == '\\' && s[1])
|
||||
++s;
|
||||
argv[argc][n++] = *s;
|
||||
}
|
||||
} else if (quoted == '\'') {
|
||||
if (*s == '\'')
|
||||
quoted = 0;
|
||||
else
|
||||
argv[argc][n++] = *s;
|
||||
} else {
|
||||
if (isspace(*s)) {
|
||||
argv[argc][n] = '\0';
|
||||
for ( ; isspace(s[1]) ; ++s) ;
|
||||
if (!s[1])
|
||||
break;
|
||||
argv[argc + 1] = argv[argc] + n + 1;
|
||||
++argc;
|
||||
n = 0;
|
||||
} else {
|
||||
if (*s == '"' || *s == '\'')
|
||||
quoted = *s;
|
||||
else
|
||||
argv[argc][n++] = *s;
|
||||
}
|
||||
}
|
||||
}
|
||||
argv[argc + 1] = NULL;
|
||||
return 1;
|
||||
}
|
86
md2html/cmdline.h
Normal file
86
md2html/cmdline.h
Normal file
@ -0,0 +1,86 @@
|
||||
/* cmdline.h: a reentrant version of getopt(). Written 2006 by Brian
|
||||
* Raiter. This code is in the public domain.
|
||||
*/
|
||||
|
||||
#ifndef _cmdline_h_
|
||||
#define _cmdline_h_
|
||||
|
||||
/* The information specifying a single cmdline option.
|
||||
*/
|
||||
typedef struct option {
|
||||
char const *name; /* the option's long name, or "" if none */
|
||||
char chname; /* a single-char name, or zero if none */
|
||||
int optval; /* a unique value representing this option */
|
||||
int arg; /* 0 = no arg, 1 = arg req'd, 2 = optional */
|
||||
} option;
|
||||
|
||||
/* Parse the given cmdline arguments. list is an array of option
|
||||
* structs, each entry specifying a valid option. The last struct in
|
||||
* the array must have name set to NULL. argc and argv give the
|
||||
* cmdline to parse. callback is the function to call for each option
|
||||
* and non-option found on the cmdline. data is a pointer that is
|
||||
* passed to each invocation of callback. The return value of callback
|
||||
* should be zero to continue processing the cmdline, or any other
|
||||
* value to abort. The return value of readoptions() is the value
|
||||
* returned from the last callback, or zero if no arguments were
|
||||
* found, or -1 if an error occurred.
|
||||
*
|
||||
* When readoptions() encounters a regular cmdline argument (i.e. a
|
||||
* non-option argument), callback() is invoked with opt equal to zero
|
||||
* and val pointing to the argument. When an option is found,
|
||||
* callback() is invoked with opt equal to the optval field in the
|
||||
* option struct corresponding to that option, and val points to the
|
||||
* option's paramter, or is NULL if the option does not take a
|
||||
* parameter. If readoptions() finds an option that does not appear in
|
||||
* the list of valid options, callback() is invoked with opt equal to
|
||||
* '?'. If readoptions() encounters an option that is missing its
|
||||
* required parameter, callback() is invoked with opt equal to ':'. If
|
||||
* readoptions() finds a parameter on a long option that does not
|
||||
* admit a parameter, callback() is invoked with opt equal to '='. In
|
||||
* each of these cases, val will point to the erroneous option
|
||||
* argument.
|
||||
*/
|
||||
extern int readoptions(option const* list, int argc, char **argv,
|
||||
int (*callback)(int opt, char const *val, void *data),
|
||||
void *data);
|
||||
|
||||
/* Parse the given file. list is an array of option structs, in the
|
||||
* same form as taken by readoptions(). fp is a pointer to an open
|
||||
* text file. callback is the function to call for each line found in
|
||||
* the configuration file. data is a pointer that is passed to each
|
||||
* invocation of callback. The return value of readcfgfile() is the
|
||||
* value returned from the last callback, or zero if no arguments were
|
||||
* found, or -1 if an error occurred while reading the file.
|
||||
*
|
||||
* The function will ignore lines that contain only whitespace, or
|
||||
* lines that begin with a hash sign. All other lines should be of the
|
||||
* form "OPTION=VALUE", where OPTION is one of the long options in
|
||||
* list. Whitespace around the equal sign is permitted. An option that
|
||||
* takes no arguments can either have a VALUE of 0 or 1, or omit the
|
||||
* "=VALUE" entirely. (A VALUE of 0 will behave the same as if the
|
||||
* line was not present.)
|
||||
*/
|
||||
extern int readcfgfile(option const* list, FILE *fp,
|
||||
int (*callback)(int opt, char const *val, void *data),
|
||||
void *data);
|
||||
|
||||
|
||||
/* Create an argc-argv pair from a string containing a command line.
|
||||
* cmdline is the string to be parsed. argcp points to the variable to
|
||||
* receive the argc value, and argvp points to the variable to receive
|
||||
* the argv value. argvp can be NULL if the caller just wants to get
|
||||
* argc. Zero is returned on failure. This function allocates memory
|
||||
* on behalf of the caller. The memory is allocated as a single block,
|
||||
* so it is sufficient to simply free() the pointer returned through
|
||||
* argvp. Note that argv[0] will always be initialized to NULL; the
|
||||
* first argument will be stored in argv[1]. The string is parsed by
|
||||
* separating arguments on whitespace boundaries. Space within
|
||||
* substrings enclosed in single-quotes is ignored. A substring
|
||||
* enclosed in double-quotes is treated the same, except that the
|
||||
* backslash is recognized as an escape character within such a
|
||||
* substring. Enclosing quotes and escaping backslashes are not copied
|
||||
* into the argv values.
|
||||
*/
|
||||
extern int makecmdline(char const *cmdline, int *argcp, char ***argvp);
|
||||
|
||||
#endif
|
367
md2html/md2html.c
Normal file
367
md2html/md2html.c
Normal file
@ -0,0 +1,367 @@
|
||||
/*
|
||||
* MD4C: Markdown parser for C
|
||||
* (http://github.com/mity/md4c)
|
||||
*
|
||||
* Copyright (c) 2016 Martin Mitas
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "md4c.h"
|
||||
#include "cmdline.h"
|
||||
|
||||
|
||||
/********************************
|
||||
*** Simple growable buffer ***
|
||||
********************************/
|
||||
|
||||
/* We render to a memory buffer instead of directly outputting the rendered
|
||||
* documents, as this allows using this utility for evaluating performance
|
||||
* of MD4C (--stat option). This allows us to measure just time of the parser,
|
||||
* without the I/O.
|
||||
*/
|
||||
|
||||
struct membuffer {
|
||||
char* data;
|
||||
MD_SIZE asize;
|
||||
MD_SIZE size;
|
||||
};
|
||||
|
||||
static void
|
||||
membuf_init(struct membuffer* buf, MD_SIZE new_asize)
|
||||
{
|
||||
buf->size = 0;
|
||||
buf->asize = new_asize;
|
||||
buf->data = malloc(buf->asize);
|
||||
if(buf->data == NULL) {
|
||||
fprintf(stderr, "membuf_init: malloc() failed.");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
membuf_fini(struct membuffer* buf)
|
||||
{
|
||||
if(buf->data)
|
||||
free(buf->data);
|
||||
}
|
||||
|
||||
static void
|
||||
membuf_grow(struct membuffer* buf, MD_SIZE new_asize)
|
||||
{
|
||||
buf->data = realloc(buf->data, new_asize);
|
||||
if(buf->data == NULL) {
|
||||
fprintf(stderr, "membuf_grow: realloc() failed.");
|
||||
exit(1);
|
||||
}
|
||||
buf->asize = new_asize;
|
||||
}
|
||||
|
||||
static void
|
||||
membuf_append(struct membuffer* buf, const char* data, MD_SIZE size)
|
||||
{
|
||||
if(buf->asize < buf->size + size)
|
||||
membuf_grow(buf, (buf->size + size) * 2);
|
||||
memcpy(buf->data + buf->size, data, size);
|
||||
buf->size += size;
|
||||
}
|
||||
|
||||
#define MEMBUF_APPEND_LITERAL(buf, literal) membuf_append((buf), (literal), strlen(literal))
|
||||
|
||||
#define HTML_NEED_ESCAPE(ch) ((ch) == '&' || (ch) == '<' || (ch) == '>' || (ch) == '"')
|
||||
|
||||
static void
|
||||
membuf_append_escaped(struct membuffer* buf, const char* data, MD_SIZE size)
|
||||
{
|
||||
MD_OFFSET beg = 0;
|
||||
MD_OFFSET off = 0;
|
||||
|
||||
/* Some characters need to be escaped in normal HTML text. */
|
||||
|
||||
while(1) {
|
||||
while(off < size && !HTML_NEED_ESCAPE(data[off]))
|
||||
off++;
|
||||
if(off > beg)
|
||||
membuf_append(buf, data + beg, off - beg);
|
||||
|
||||
if(off < size) {
|
||||
switch(data[off]) {
|
||||
case '&': MEMBUF_APPEND_LITERAL(buf, "&"); break;
|
||||
case '<': MEMBUF_APPEND_LITERAL(buf, "<"); break;
|
||||
case '>': MEMBUF_APPEND_LITERAL(buf, ">"); break;
|
||||
case '"': MEMBUF_APPEND_LITERAL(buf, """); break;
|
||||
}
|
||||
off++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
beg = off;
|
||||
}
|
||||
}
|
||||
|
||||
/**************************************
|
||||
*** HTML renderer implementation ***
|
||||
**************************************/
|
||||
|
||||
static int
|
||||
enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
|
||||
{
|
||||
struct membuffer* out = (struct membuffer*) userdata;
|
||||
|
||||
switch(type) {
|
||||
case MD_BLOCK_DOC: /* noop */ break;
|
||||
case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "<p>"); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata)
|
||||
{
|
||||
struct membuffer* out = (struct membuffer*) userdata;
|
||||
|
||||
switch(type) {
|
||||
case MD_BLOCK_DOC: /*noop*/ break;
|
||||
case MD_BLOCK_P: MEMBUF_APPEND_LITERAL(out, "</p>\n"); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
text_callback(MD_TEXTTYPE type, const MD_CHAR* text, MD_SIZE size, void* userdata)
|
||||
{
|
||||
struct membuffer* out = (struct membuffer*) userdata;
|
||||
|
||||
switch(type) {
|
||||
default: membuf_append_escaped(out, text, size); break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
debug_log_callback(const char* msg, void* userdata)
|
||||
{
|
||||
fprintf(stderr, "Error:%s\n", msg);
|
||||
}
|
||||
|
||||
|
||||
/**********************
|
||||
*** Main program ***
|
||||
**********************/
|
||||
|
||||
static int
|
||||
process_file(FILE* in, FILE* out, unsigned flags, int fullhtml, int print_stat)
|
||||
{
|
||||
MD_RENDERER renderer = {
|
||||
enter_block_callback,
|
||||
leave_block_callback,
|
||||
enter_span_callback,
|
||||
leave_span_callback,
|
||||
text_callback,
|
||||
debug_log_callback,
|
||||
flags
|
||||
};
|
||||
|
||||
MD_SIZE n;
|
||||
struct membuffer buf_in = {0};
|
||||
struct membuffer buf_out = {0};
|
||||
int ret = -1;
|
||||
clock_t t0, t1;
|
||||
|
||||
membuf_init(&buf_in, 32 * 1024);
|
||||
|
||||
/* Read the input file into a buffer. */
|
||||
while(1) {
|
||||
if(buf_in.size >= buf_in.asize)
|
||||
membuf_grow(&buf_in, 2 * buf_in.asize);
|
||||
|
||||
n = fread(buf_in.data + buf_in.size, 1, buf_in.asize - buf_in.size, in);
|
||||
if(n == 0)
|
||||
break;
|
||||
buf_in.size += n;
|
||||
}
|
||||
|
||||
/* Input size is good estimation of output size. Add some more reserve to
|
||||
* deal with the HTML header/footer and tags. */
|
||||
membuf_init(&buf_out, buf_in.size + buf_in.size/8 + 64);
|
||||
|
||||
/* Parse the document. This shall call our callbacks provided via the
|
||||
* md_renderer_t structure. */
|
||||
t0 = clock();
|
||||
ret = md_parse(buf_in.data, buf_in.size, &renderer, (void*) &buf_out);
|
||||
t1 = clock();
|
||||
if(ret != 0) {
|
||||
fprintf(stderr, "Parsing failed.\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Write down the document in the HTML format. */
|
||||
if(fullhtml) {
|
||||
fprintf(out, "<html>\n");
|
||||
fprintf(out, "<head>\n");
|
||||
fprintf(out, "<title></title>\n");
|
||||
fprintf(out, "<meta name=\"generator\" content=\"md2html\">\n");
|
||||
fprintf(out, "</head>\n");
|
||||
fprintf(out, "<body>\n");
|
||||
}
|
||||
|
||||
fwrite(buf_out.data, 1, buf_out.size, out);
|
||||
|
||||
if(fullhtml) {
|
||||
fprintf(out, "</body>\n");
|
||||
fprintf(out, "</html>\n");
|
||||
}
|
||||
|
||||
if(print_stat) {
|
||||
if(t0 != (clock_t)-1 && t1 != (clock_t)-1) {
|
||||
double elapsed = (double)(t1 - t0) / CLOCKS_PER_SEC;
|
||||
if (elapsed < 1)
|
||||
fprintf(stderr, "Time spent on parsing: %7.2f ms.\n", elapsed*1e3);
|
||||
else
|
||||
fprintf(stderr, "Time spent on parsing: %6.3f s.\n", elapsed);
|
||||
}
|
||||
}
|
||||
|
||||
/* Success if we have reached here. */
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
membuf_fini(&buf_in);
|
||||
membuf_fini(&buf_out);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#define OPTION_ARG_NONE 0
|
||||
#define OPTION_ARG_REQUIRED 1
|
||||
#define OPTION_ARG_OPTIONAL 2
|
||||
|
||||
static const option cmdline_options[] = {
|
||||
{ "output", 'o', 'o', OPTION_ARG_REQUIRED },
|
||||
{ "full-html", 'f', 'f', OPTION_ARG_NONE },
|
||||
{ "stat", 's', 's', OPTION_ARG_NONE },
|
||||
{ "help", 'h', 'h', OPTION_ARG_NONE },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
{
|
||||
printf(
|
||||
"Usage: md2html [OPTION]... [FILE]\n"
|
||||
"Convert input FILE (or standard input) in Markdown format to HTML.\n"
|
||||
"\n"
|
||||
"General options:\n"
|
||||
" -o --output=FILE output file (default is standard output)\n"
|
||||
" -f, --full-html generate full HTML document, including header\n"
|
||||
" -s, --stat measure time of input parsing\n"
|
||||
" -h, --help display this help and exit\n"
|
||||
);
|
||||
}
|
||||
|
||||
static const char* input_path = NULL;
|
||||
static const char* output_path = NULL;
|
||||
static int want_fullhtml = 0;
|
||||
static int want_stat = 0;
|
||||
|
||||
static int
|
||||
cmdline_callback(int opt, char const* value, void* data)
|
||||
{
|
||||
switch(opt) {
|
||||
case 0:
|
||||
if(input_path) {
|
||||
fprintf(stderr, "Too many arguments. Only one input file can be specified.\n");
|
||||
fprintf(stderr, "Use --help for more info.\n");
|
||||
exit(1);
|
||||
}
|
||||
input_path = value;
|
||||
break;
|
||||
|
||||
case 'o': output_path = value; break;
|
||||
case 'f': want_fullhtml = 1; break;
|
||||
case 's': want_stat = 1; break;
|
||||
case 'h': usage(); exit(0); break;
|
||||
|
||||
default:
|
||||
fprintf(stderr, "Illegal option: %s\n", value);
|
||||
fprintf(stderr, "Use --help for more info.\n");
|
||||
exit(1);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char** argv)
|
||||
{
|
||||
FILE* in = stdin;
|
||||
FILE* out = stdout;
|
||||
int ret = 0;
|
||||
|
||||
if(readoptions(cmdline_options, argc, argv, cmdline_callback, NULL) < 0) {
|
||||
usage();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if(input_path != NULL && strcmp(input_path, "-") != 0) {
|
||||
in = fopen(input_path, "rb");
|
||||
if(in == NULL) {
|
||||
fprintf(stderr, "Cannot open %s.\n", input_path);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if(output_path != NULL && strcmp(output_path, "-") != 0) {
|
||||
out = fopen(output_path, "wt");
|
||||
if(out == NULL) {
|
||||
fprintf(stderr, "Cannot open %s.\n", input_path);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
ret = process_file(in, out, 0, want_fullhtml, want_stat);
|
||||
if(in != stdin)
|
||||
fclose(in);
|
||||
if(out != stdout)
|
||||
fclose(out);
|
||||
|
||||
return ret;
|
||||
}
|
2
md4c/CMakeLists.txt
Normal file
2
md4c/CMakeLists.txt
Normal file
@ -0,0 +1,2 @@
|
||||
|
||||
add_library(md4c STATIC md4c.c md4c.h)
|
406
md4c/md4c.c
Normal file
406
md4c/md4c.c
Normal file
@ -0,0 +1,406 @@
|
||||
/*
|
||||
* MD4C: Markdown parser for C
|
||||
* (http://github.com/mity/md4c)
|
||||
*
|
||||
* Copyright (c) 2016 Martin Mitas
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "md4c.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
/*****************************
|
||||
*** Miscellaneous Stuff ***
|
||||
*****************************/
|
||||
|
||||
/* Magic to support UTF16-LE (i.e. what is called Unicode among Windows
|
||||
* developers) input/output on Windows. */
|
||||
#ifdef _T
|
||||
#undef _T
|
||||
#endif
|
||||
#if defined _WIN32 && defined MD_WIN_UNICODE
|
||||
#define _T(x) L##x
|
||||
#else
|
||||
#define _T(x) x
|
||||
#endif
|
||||
|
||||
/* Misc. macros. */
|
||||
#define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
|
||||
|
||||
|
||||
/************************
|
||||
*** Internal Types ***
|
||||
************************/
|
||||
|
||||
/* These are omnipresent so lets save some typing. */
|
||||
typedef MD_CHAR CHAR;
|
||||
typedef MD_SIZE SZ;
|
||||
typedef MD_OFFSET OFF;
|
||||
|
||||
/* Context propagated through all the parsing. */
|
||||
typedef struct MD_CTX_tag MD_CTX;
|
||||
struct MD_CTX_tag {
|
||||
/* Immutables (parameters of md_parse()). */
|
||||
const CHAR* text;
|
||||
SZ size;
|
||||
MD_RENDERER r;
|
||||
void* userdata;
|
||||
};
|
||||
|
||||
typedef enum MD_LINETYPE_tag MD_LINETYPE;
|
||||
enum MD_LINETYPE_tag {
|
||||
MD_LINE_BLANK,
|
||||
MD_LINE_TEXT
|
||||
};
|
||||
|
||||
typedef struct MD_LINE_tag MD_LINE;
|
||||
struct MD_LINE_tag {
|
||||
MD_LINETYPE type;
|
||||
OFF beg;
|
||||
OFF end;
|
||||
};
|
||||
|
||||
|
||||
/*******************
|
||||
*** Debugging ***
|
||||
*******************/
|
||||
|
||||
static void
|
||||
md_log(MD_CTX* ctx, const char* fmt, ...)
|
||||
{
|
||||
char buffer[256];
|
||||
va_list args;
|
||||
|
||||
if(ctx->r.debug_log == NULL)
|
||||
return;
|
||||
|
||||
va_start(args, fmt);
|
||||
vsnprintf(buffer, sizeof(buffer), fmt, args);
|
||||
va_end(args);
|
||||
buffer[sizeof(buffer) - 1] = '\0';
|
||||
ctx->r.debug_log(buffer, ctx->userdata);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
#define MD_ASSERT(cond) \
|
||||
do { \
|
||||
if(!(cond)) { \
|
||||
md_log(ctx, "%s:%d: Assertion '" #cond "' failed.", \
|
||||
__FILE__, (int)__LINE__); \
|
||||
ret = -2; \
|
||||
goto abort; \
|
||||
} \
|
||||
} while(0)
|
||||
#else
|
||||
#ifdef __gnuc__
|
||||
#define MD_ASSERT(cond) do { __builtin_expect((condition) != 0, !0); } while(0)
|
||||
#elif defined _MSC_VER && _MSC_VER > 120
|
||||
#define MD_ASSERT(cond) do { __assume(cond); } while(0)
|
||||
#else
|
||||
#define MD_ASSERT(cond) do {} while(0)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define MD_UNREACHABLE() MD_ASSERT(1 == 0)
|
||||
|
||||
|
||||
/*****************
|
||||
*** Helpers ***
|
||||
*****************/
|
||||
|
||||
/* Character accessors. */
|
||||
#define CH(off) (ctx->text[(off)])
|
||||
#define STR(off) (ctx->text + (off))
|
||||
|
||||
/* Character classification.
|
||||
* Note we assume ASCII compatibility of code points < 128 here. */
|
||||
#define ISASCII_(ch) ((ch) <= 127)
|
||||
#define ISBLANK_(ch) ((ch) == _T(' ') || (ch) == _T('\t'))
|
||||
#define ISNEWLINE_(ch) ((ch) == _T('\r') || (ch) == _T('\n'))
|
||||
#define ISWHITESPACE_(ch) (ISBLANK_(ch) || ch == _T('\v') || ch == _T('\f'))
|
||||
#define ISCNTRL_(ch) ((ch) <= 31 || (ch) == 127)
|
||||
#define ISPUNCT_(ch) ((33 <= (ch) && (ch) <= 47) || (58 <= (ch) && (ch) <= 64) || (91 <= (ch) && (ch) <= 96) || (123 <= (ch) && (ch) <= 126))
|
||||
#define ISUPPER_(ch) (_T('A') <= (ch) && (ch) <= _T('Z'))
|
||||
#define ISLOWER_(ch) (_T('a') <= (ch) && (ch) <= _T('z'))
|
||||
#define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch))
|
||||
#define ISDIGIT_(ch) (_T('0') <= (ch) && (ch) <= _T('9'))
|
||||
#define ISXDIGIT_(ch) (ISDIGIT_(ch) || (_T('a') < (ch) && (ch) <= _T('f') || (_T('A') < (ch) && (ch) <= _T('F'))
|
||||
#define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch))
|
||||
|
||||
#define ISASCII(off) ISASCII_(CH(off))
|
||||
#define ISBLANK(off) ISBLANK_(CH(off))
|
||||
#define ISNEWLINE(off) ISNEWLINE_(CH(off))
|
||||
#define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
|
||||
#define ISCNTRL(off) ISCNTRL_(CH(off))
|
||||
#define ISPUNCT(off) ISPUNCT_(CH(off))
|
||||
#define ISUPPER(off) ISUPPER_(CH(off))
|
||||
#define ISLOWER(off) ISLOWER_(CH(off))
|
||||
#define ISALPHA(off) ISALPHA_(CH(off))
|
||||
#define ISDIGIT(off) ISDIGIT_(CH(off))
|
||||
#define ISXDIGIT(off) ISXDIGIT_(CH(off))
|
||||
#define ISALNUM(off) ISALNUM_(CH(off))
|
||||
|
||||
|
||||
#define MD_ENTER_BLOCK(type, arg) \
|
||||
do { \
|
||||
ret = ctx->r.enter_block((type), (arg), ctx->userdata); \
|
||||
if(ret != 0) { \
|
||||
md_log(ctx, "Aborted from enter_block() callback."); \
|
||||
goto abort; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define MD_LEAVE_BLOCK(type, arg) \
|
||||
do { \
|
||||
ret = ctx->r.leave_block((type), (arg), ctx->userdata); \
|
||||
if(ret != 0) { \
|
||||
md_log(ctx, "Aborted from leave_block() callback."); \
|
||||
goto abort; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define MD_ENTER_SPAN(type, arg) \
|
||||
do { \
|
||||
ret = ctx->r.enter_span((type), (arg), ctx->userdata); \
|
||||
if(ret != 0) { \
|
||||
md_log(ctx, "Aborted from enter_span() callback."); \
|
||||
goto abort; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define MD_LEAVE_SPAN(type, arg) \
|
||||
do { \
|
||||
ret = ctx->r.leave_span((type), (arg), ctx->userdata); \
|
||||
if(ret != 0) { \
|
||||
md_log(ctx, "Aborted from leave_span() callback."); \
|
||||
goto abort; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define MD_TEXT(type, str, size) \
|
||||
do { \
|
||||
if(size > 0) { \
|
||||
ret = ctx->r.text((type), (str), (size), ctx->userdata); \
|
||||
if(ret != 0) { \
|
||||
md_log(ctx, "Aborted from text() callback."); \
|
||||
goto abort; \
|
||||
} \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
/******************************************
|
||||
*** Processing Single Block Contents ***
|
||||
******************************************/
|
||||
|
||||
static int
|
||||
md_process_normal_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
||||
{
|
||||
int i;
|
||||
int ret = 0;
|
||||
|
||||
for(i = 0; i < n_lines; i++) {
|
||||
MD_TEXT(MD_TEXT_NORMAL, STR(lines[i].beg), lines[i].end - lines[i].beg);
|
||||
MD_TEXT(MD_TEXT_NORMAL, _T("\n"), 1);
|
||||
}
|
||||
|
||||
abort:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/***************************************
|
||||
*** Breaking Document into Blocks ***
|
||||
***************************************/
|
||||
|
||||
/* Analyze type of the line and find some its properties. This serves as a
|
||||
* main input for determining type and boundaries of a block. */
|
||||
static void
|
||||
md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, const MD_LINE* pivot_line, MD_LINE* line)
|
||||
{
|
||||
OFF off = beg;
|
||||
|
||||
line->type = MD_LINE_BLANK;
|
||||
|
||||
/* Eat indentation. */
|
||||
while(off < ctx->size && ISBLANK(off)) {
|
||||
off++;
|
||||
}
|
||||
|
||||
line->beg = off;
|
||||
|
||||
/* Check whether we are blank line. Note we fall here even if we are beyond
|
||||
* the document end. */
|
||||
if(off >= ctx->size || ISNEWLINE(off)) {
|
||||
line->type = MD_LINE_BLANK;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* By default, we are normal text line. */
|
||||
line->type = MD_LINE_TEXT;
|
||||
|
||||
done:
|
||||
/* Eat rest of the line contents */
|
||||
while(off < ctx->size && !ISNEWLINE(off))
|
||||
off++;
|
||||
|
||||
/* Set end of the line. */
|
||||
line->end = off;
|
||||
|
||||
/* Eat also the new line. */
|
||||
if(off < ctx->size && CH(off) == _T('\r'))
|
||||
off++;
|
||||
if(off < ctx->size && CH(off) == _T('\n'))
|
||||
off++;
|
||||
|
||||
*p_end = off;
|
||||
}
|
||||
|
||||
/* Determine type of the block (from type of its 1st line and some context),
|
||||
* call block_enter() callback, then appropriate function to parse contents
|
||||
* of the block, and finally block_leave() callback.
|
||||
*/
|
||||
static int
|
||||
md_process_block(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
|
||||
{
|
||||
MD_BLOCKTYPE block_type;
|
||||
int ret = 0;
|
||||
|
||||
if(n_lines == 0)
|
||||
return 0;
|
||||
|
||||
/* Derive block type from type of the first line. */
|
||||
switch(lines[0].type) {
|
||||
case MD_LINE_BLANK:
|
||||
return 0;
|
||||
|
||||
case MD_LINE_TEXT:
|
||||
block_type = MD_BLOCK_P;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Process the block accordingly to is type. */
|
||||
MD_ENTER_BLOCK(block_type, NULL);
|
||||
ret = md_process_normal_block(ctx, lines, n_lines);
|
||||
if(ret != 0)
|
||||
goto abort;
|
||||
MD_LEAVE_BLOCK(block_type, NULL);
|
||||
|
||||
abort:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Go through the document, analyze each line, on the fly identify block
|
||||
* boundaries and call md_process_block() for sequence of MD_LINE composing
|
||||
* the block.
|
||||
*/
|
||||
static int
|
||||
md_process_doc(MD_CTX *ctx)
|
||||
{
|
||||
static const MD_LINE dummy_line = { MD_LINE_BLANK, 0 };
|
||||
const MD_LINE* pivot_line = &dummy_line;
|
||||
MD_LINE* line;
|
||||
MD_LINE* lines = NULL;
|
||||
int alloc_lines = 0;
|
||||
int n_lines = 0;
|
||||
OFF off = 0;
|
||||
int ret = 0;
|
||||
|
||||
MD_ENTER_BLOCK(MD_BLOCK_DOC, NULL);
|
||||
|
||||
while(off < ctx->size) {
|
||||
if(n_lines >= alloc_lines) {
|
||||
MD_LINE* new_lines;
|
||||
|
||||
alloc_lines = (alloc_lines == 0 ? 32 : alloc_lines * 2);
|
||||
new_lines = (MD_LINE*) realloc(lines, alloc_lines * sizeof(MD_LINE));
|
||||
if(new_lines == NULL) {
|
||||
md_log(ctx, "realloc() failed.");
|
||||
ret = -1;
|
||||
goto abort;
|
||||
}
|
||||
|
||||
lines = new_lines;
|
||||
}
|
||||
|
||||
md_analyze_line(ctx, off, &off, pivot_line, &lines[n_lines]);
|
||||
line = &lines[n_lines];
|
||||
|
||||
/* The same block continues as long lines are of the same type. */
|
||||
if(line->type == pivot_line->type) {
|
||||
/* Do not grow the 'lines' because of blank lines. Semantically
|
||||
* one blank line is equivalent to many. */
|
||||
if(line->type != MD_LINE_BLANK)
|
||||
n_lines++;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Otherwise the old block is complete and we have to process it. */
|
||||
ret = md_process_block(ctx, lines, n_lines);
|
||||
if(ret != 0)
|
||||
goto abort;
|
||||
|
||||
/* Keep the current line as the new pivot. */
|
||||
if(line != &lines[0])
|
||||
memcpy(&lines[0], line, sizeof(MD_LINE));
|
||||
pivot_line = &lines[0];
|
||||
n_lines = 1;
|
||||
}
|
||||
|
||||
/* Process also the last block. */
|
||||
if(pivot_line->type != MD_LINE_BLANK) {
|
||||
ret = md_process_block(ctx, lines, n_lines);
|
||||
if(ret != 0)
|
||||
goto abort;
|
||||
}
|
||||
|
||||
MD_LEAVE_BLOCK(MD_BLOCK_DOC, NULL);
|
||||
|
||||
abort:
|
||||
free(lines);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/********************
|
||||
*** Public API ***
|
||||
********************/
|
||||
|
||||
int
|
||||
md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* userdata)
|
||||
{
|
||||
MD_CTX ctx;
|
||||
|
||||
/* Setup context structure. */
|
||||
memset(&ctx, 0, sizeof(MD_CTX));
|
||||
ctx.text = text;
|
||||
ctx.size = size;
|
||||
memcpy(&ctx.r, renderer, sizeof(MD_RENDERER));
|
||||
ctx.userdata = userdata;
|
||||
|
||||
/* Doo all the hard work. */
|
||||
return md_process_doc(&ctx);
|
||||
}
|
136
md4c/md4c.h
Normal file
136
md4c/md4c.h
Normal file
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* MD4C: Markdown parser for C
|
||||
* (http://github.com/mity/md4c)
|
||||
*
|
||||
* Copyright (c) 2016 Martin Mitas
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef MD4C_MARKDOWN_H
|
||||
#define MD4C_MARKDOWN_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Magic to support UTF16-LE (i.e. what is called Unicode among Windows
|
||||
* developers) input/output on Windows.
|
||||
*
|
||||
* On most platforms, we handle char strings and do not care about encoding
|
||||
* as far as the controlling Markdown syntax is actually ASCII-friendly.
|
||||
* The actual text is provided into callbacks as it is.
|
||||
*
|
||||
* On Windows, when UNICODE is defined, we by default switch to WCHAR.
|
||||
* This behavior may be disabled by predefining MD4C_DISABLE_WIN_UNICODE.
|
||||
*/
|
||||
#if defined _WIN32 && defined UNICODE && !defined MD4C_DISABLE_WIN_UNICODE
|
||||
#include <windows.h>
|
||||
|
||||
#define MD4C_USE_WIN_UNICODE
|
||||
typedef WCHAR MD_CHAR;
|
||||
#else
|
||||
typedef char MD_CHAR;
|
||||
#endif
|
||||
|
||||
typedef unsigned MD_SIZE;
|
||||
typedef unsigned MD_OFFSET;
|
||||
|
||||
|
||||
/* Block represents a part of document hierarchy structure like a paragraph
|
||||
* or list item. */
|
||||
typedef enum MD_BLOCKTYPE_tag MD_BLOCKTYPE;
|
||||
enum MD_BLOCKTYPE_tag {
|
||||
/* <body>...</body> */
|
||||
MD_BLOCK_DOC = 0,
|
||||
|
||||
/* <p>...</p> */
|
||||
MD_BLOCK_P
|
||||
};
|
||||
|
||||
|
||||
/* Span represents an in-line piece of a document which should be rendered with
|
||||
* the same font, color and other attributes. A sequence of spans forms a block
|
||||
* like paragraph or list item. */
|
||||
typedef enum MD_SPANTYPE_tag MD_SPANTYPE;
|
||||
enum MD_SPANTYPE_tag {
|
||||
MD_SPAN_DUMMY = 0 /* not yet used... */
|
||||
};
|
||||
|
||||
|
||||
/* Text is the actual textual contents of span. */
|
||||
typedef enum MD_TEXTTYPE_tag MD_TEXTTYPE;
|
||||
enum MD_TEXTTYPE_tag {
|
||||
/* Normal text. */
|
||||
MD_TEXT_NORMAL = 0
|
||||
};
|
||||
|
||||
|
||||
/* Caller-provided callbacks.
|
||||
*
|
||||
* For some block/span types, more detailed information is provided in a
|
||||
* type-specific structure pointed by the argument 'detail'.
|
||||
*
|
||||
* The last argument of all callbacks, 'userdata', is just propagated from
|
||||
* md_parse() and is available for ue by the caller.
|
||||
*
|
||||
* Callbacks may abort further parsing of the document by returning non-zero.
|
||||
*/
|
||||
typedef struct MD_RENDERER_tag MD_RENDERER;
|
||||
struct MD_RENDERER_tag {
|
||||
int (*enter_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
int (*leave_block)(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
|
||||
int (*enter_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
int (*leave_span)(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/);
|
||||
|
||||
int (*text)(MD_TEXTTYPE /*type*/, const MD_CHAR* /*text*/, MD_SIZE /*size*/, void* /*userdata*/);
|
||||
|
||||
/* If not NULL and something goes wrong, this function gets called.
|
||||
* This is intended for debugging and problem diagnosis for developers;
|
||||
* it is not intended to provide any errors suitable for displaying to an
|
||||
* end user.
|
||||
*/
|
||||
void (*debug_log)(const char* /*msg*/, void* /*userdata*/);
|
||||
|
||||
/* Dialect options. */
|
||||
unsigned flags;
|
||||
};
|
||||
|
||||
|
||||
/* Parse the Markdown document stored in the string 'text' of size 'size'.
|
||||
* The renderer provides callbacks to be called during the parsing so the
|
||||
* caller can render the document on the screen or convert the Markdown
|
||||
* to another format.
|
||||
*
|
||||
* Zero is returned on success. If a runtime error occurs (e.g. a memory
|
||||
* fails), -1 is returned. If an internal error occurs (i.e. an internal
|
||||
* assertion fails, implying there is a bug in MD4C), then -2 is returned.
|
||||
* If the processing is aborted due any callback returning non-zero,
|
||||
* md_parse() returns return value of the callback.
|
||||
*/
|
||||
int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_RENDERER* renderer, void* userdata);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" { */
|
||||
#endif
|
||||
|
||||
#endif /* MD4C_MARKDOWN_H */
|
Loading…
Reference in New Issue
Block a user