/* ** Copyright (c) 2012 D. Richard Hipp ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the Simplified BSD License (also ** known as the "2-Clause License" or "FreeBSD License".) ** This program is distributed in the hope that it will be useful, ** but without any warranty; without even the implied warranty of ** merchantability or fitness for a particular purpose. ** ** Author contact information: ** drh@hwaci.com ** http://www.hwaci.com/drh/ ** ******************************************************************************* ** ** This file contains code to parse a blob containing markdown text, ** using an external renderer. */ #include "config.h" #include "markdown.h" #include #include #include #define MKD_LI_END 8 /* internal list flag */ /******************** * TYPE DEFINITIONS * ********************/ #if INTERFACE /* mkd_autolink -- type of autolink */ enum mkd_autolink { MKDA_NOT_AUTOLINK, /* used internally when it is not an autolink*/ MKDA_NORMAL, /* normal http/http/ftp link */ MKDA_EXPLICIT_EMAIL, /* e-mail link with explicit mailto: */ MKDA_IMPLICIT_EMAIL /* e-mail link without mailto: */ }; /* mkd_renderer -- functions for rendering parsed data */ struct mkd_renderer { /* document level callbacks */ void (*prolog)(struct Blob *ob, void *opaque); void (*epilog)(struct Blob *ob, void *opaque); /* block level callbacks - NULL skips the block */ void (*blockcode)(struct Blob *ob, struct Blob *text, void *opaque); void (*blockquote)(struct Blob *ob, struct Blob *text, void *opaque); void (*blockhtml)(struct Blob *ob, struct Blob *text, void *opaque); void (*header)(struct Blob *ob, struct Blob *text, int level, void *opaque); void (*hrule)(struct Blob *ob, void *opaque); void (*list)(struct Blob *ob, struct Blob *text, int flags, void *opaque); void (*listitem)(struct Blob *ob, struct Blob *text, int flags, void *opaque); void (*paragraph)(struct Blob *ob, struct Blob *text, void *opaque); void (*table)(struct Blob *ob, struct Blob *head_row, struct Blob *rows, void *opaque); void (*table_cell)(struct Blob *ob, struct Blob *text, int flags, void *opaque); void (*table_row)(struct Blob *ob, struct Blob *cells, int flags, void *opaque); /* span level callbacks - NULL or return 0 prints the span verbatim */ int (*autolink)(struct Blob *ob, struct Blob *link, enum mkd_autolink type, void *opaque); int (*codespan)(struct Blob *ob, struct Blob *text, void *opaque); int (*double_emphasis)(struct Blob *ob, struct Blob *text, char c, void *opaque); int (*emphasis)(struct Blob *ob, struct Blob *text, char c,void*opaque); int (*image)(struct Blob *ob, struct Blob *link, struct Blob *title, struct Blob *alt, void *opaque); int (*linebreak)(struct Blob *ob, void *opaque); int (*link)(struct Blob *ob, struct Blob *link, struct Blob *title, struct Blob *content, void *opaque); int (*raw_html_tag)(struct Blob *ob, struct Blob *tag, void *opaque); int (*triple_emphasis)(struct Blob *ob, struct Blob *text, char c, void *opaque); /* low level callbacks - NULL copies input directly into the output */ void (*entity)(struct Blob *ob, struct Blob *entity, void *opaque); void (*normal_text)(struct Blob *ob, struct Blob *text, void *opaque); /* renderer data */ int max_work_stack; /* prevent arbitrary deep recursion, cf README */ const char *emph_chars; /* chars that trigger emphasis rendering */ void *opaque; /* opaque data send to every rendering callback */ }; /********* * FLAGS * *********/ /* list/listitem flags */ #define MKD_LIST_ORDERED 1 #define MKD_LI_BLOCK 2 /*

containing block data */ /* table cell flags */ #define MKD_CELL_ALIGN_DEFAULT 0 #define MKD_CELL_ALIGN_LEFT 1 #define MKD_CELL_ALIGN_RIGHT 2 #define MKD_CELL_ALIGN_CENTER 3 /* LEFT | RIGHT */ #define MKD_CELL_ALIGN_MASK 3 #define MKD_CELL_HEAD 4 /********************** * EXPORTED FUNCTIONS * **********************/ /* markdown -- parses the input buffer and renders it into the output buffer */ void markdown( struct Blob *ob, struct Blob *ib, const struct mkd_renderer *rndr); #endif /* INTERFACE */ /*************** * LOCAL TYPES * ***************/ /* link_ref -- reference to a link */ struct link_ref { struct Blob id; struct Blob link; struct Blob title; }; /* char_trigger -- function pointer to render active chars */ /* returns the number of chars taken care of */ /* data is the pointer of the beginning of the span */ /* offset is the number of valid chars before data */ struct render; typedef size_t (*char_trigger)( struct Blob *ob, struct render *rndr, char *data, size_t offset, size_t size); /* render -- structure containing one particular render */ struct render { struct mkd_renderer make; struct Blob refs; char_trigger active_char[256]; int work_active; struct Blob *work; }; /* html_tag -- structure for quick HTML tag search (inspired from discount) */ struct html_tag { const char *text; int size; }; /******************** * GLOBAL VARIABLES * ********************/ /* block_tags -- recognised block tags, sorted by cmp_html_tag */ static const struct html_tag block_tags[] = { { "p", 1 }, { "dl", 2 }, { "h1", 2 }, { "h2", 2 }, { "h3", 2 }, { "h4", 2 }, { "h5", 2 }, { "h6", 2 }, { "ol", 2 }, { "ul", 2 }, { "del", 3 }, { "div", 3 }, { "ins", 3 }, { "pre", 3 }, { "form", 4 }, { "math", 4 }, { "table", 5 }, { "iframe", 6 }, { "script", 6 }, { "fieldset", 8 }, { "noscript", 8 }, { "blockquote", 10 } }; #define INS_TAG (block_tags + 12) #define DEL_TAG (block_tags + 10) /*************************** * STATIC HELPER FUNCTIONS * ***************************/ /* build_ref_id -- collapse whitespace from input text to make it a ref id */ static int build_ref_id(struct Blob *id, const char *data, size_t size){ size_t beg, i; char *id_data; /* skip leading whitespace */ while( size>0 && (data[0]==' ' || data[0]=='\t' || data[0]=='\n') ){ data++; size--; } /* skip trailing whitespace */ while( size>0 && (data[size-1]==' ' || data[size-1]=='\t' || data[size-1]=='\n') ){ size--; } if( size==0 ) return -1; /* making the ref id */ i = 0; blob_reset(id); while( i='A' && id_data[i]<='Z' ) id_data[i] += 'a' - 'A'; } return 0; } /* cmp_link_ref -- comparison function for link_ref sorted arrays */ static int cmp_link_ref(const void *key, const void *array_entry){ struct link_ref *lr = (void *)array_entry; return blob_compare((void *)key, &lr->id); } /* cmp_link_ref_sort -- comparison function for link_ref qsort */ static int cmp_link_ref_sort(const void *a, const void *b){ struct link_ref *lra = (void *)a; struct link_ref *lrb = (void *)b; return blob_compare(&lra->id, &lrb->id); } /* cmp_html_tag -- comparison function for bsearch() (stolen from discount) */ static int cmp_html_tag(const void *a, const void *b){ const struct html_tag *hta = a; const struct html_tag *htb = b; if( hta->size!=htb->size ) return hta->size-htb->size; return fossil_strnicmp(hta->text, htb->text, hta->size); } /* find_block_tag -- returns the current block tag */ static const struct html_tag *find_block_tag(const char *data, size_t size){ size_t i = 0; struct html_tag key; /* looking for the word end */ while( i='0' && data[i]<='9') || (data[i]>='A' && data[i]<='Z') || (data[i]>='a' && data[i]<='z')) ){ i++; } if( i>=size ) return 0; /* binary search of the tag */ key.text = data; key.size = i; return bsearch(&key, block_tags, count(block_tags), sizeof block_tags[0], cmp_html_tag); } /* new_work_buffer -- get a new working buffer from the stack or create one */ static struct Blob *new_work_buffer(struct render *rndr){ struct Blob *ret = 0; if( rndr->work_active < rndr->make.max_work_stack ){ ret = rndr->work + rndr->work_active; rndr->work_active += 1; blob_reset(ret); } return ret; } /* release_work_buffer -- release the given working buffer */ static void release_work_buffer(struct render *rndr, struct Blob *buf){ if( !buf ) return; assert(rndr->work_active>0 && buf==(rndr->work+rndr->work_active-1)); rndr->work_active -= 1; } /**************************** * INLINE PARSING FUNCTIONS * ****************************/ /* is_mail_autolink -- looks for the address part of a mail autolink and '>' */ /* this is less strict than the original markdown e-mail address matching */ static size_t is_mail_autolink(char *data, size_t size){ size_t i = 0, nb = 0; /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */ while( i='a' && data[i]<='z') || (data[i]>='A' && data[i]<='Z') || (data[i]>='0' && data[i]<='9')) ){ if( data[i]=='@' ) nb++; i++; } if( i>=size || data[i]!='>' || nb!=1 ) return 0; return i+1; } /* tag_length -- returns the length of the given tag, or 0 if it's not valid */ static size_t tag_length(char *data, size_t size, enum mkd_autolink *autolink){ size_t i, j; /* a valid tag can't be shorter than 3 chars */ if( size<3 ) return 0; /* begins with a '<' optionally followed by '/', followed by letter */ if( data[0]!='<' ) return 0; i = (data[1]=='/') ? 2 : 1; if( (data[i]<'a' || data[i]>'z') && (data[i]<'A' || data[i]>'Z') ){ return 0; } /* scheme test */ *autolink = MKDA_NOT_AUTOLINK; if( size>6 && fossil_strnicmp(data+1, "http", 4)==0 && (data[5]==':' || ((data[5]=='s' || data[5]=='S') && data[6]==':')) ){ i = (data[5]==':') ? 6 : 7; *autolink = MKDA_NORMAL; }else if( size>5 && fossil_strnicmp(data+1, "ftp:", 4)==0 ){ i = 5; *autolink = MKDA_NORMAL; }else if( size>7 && fossil_strnicmp(data+1, "mailto:", 7)==0 ){ i = 8; /* not changing *autolink to go to the address test */ } /* completing autolink test: no whitespace or ' or " */ if( i>=size || i=='>' ){ *autolink = MKDA_NOT_AUTOLINK; }else if( *autolink ){ j = i; while( i=size ) return 0; if( i>j && data[i]=='>' ) return i+1; /* one of the forbidden chars has been found */ *autolink = MKDA_NOT_AUTOLINK; }else if( (j = is_mail_autolink(data+i, size-i))!=0 ){ *autolink = (i==8) ? MKDA_EXPLICIT_EMAIL : MKDA_IMPLICIT_EMAIL; return i+j; } /* looking for something looking like a tag end */ while( i=size ) return 0; return i+1; } /* parse_inline -- parses inline markdown elements */ static void parse_inline( struct Blob *ob, struct render *rndr, char *data, size_t size ){ size_t i = 0, end = 0; char_trigger action = 0; struct Blob work = BLOB_INITIALIZER; while( iactive_char[(unsigned char)data[end]])==0 ){ end++; } if( end>i ){ if( rndr->make.normal_text ){ blob_init(&work, data+i, end-i); rndr->make.normal_text(ob, &work, rndr->make.opaque); }else{ blob_append(ob, data+i, end-i); } } if( end>=size ) break; i = end; /* calling the trigger */ end = action(ob, rndr, data+i, i, size-i); if( !end ){ /* no action from the callback */ end = i+1; }else{ i += end; end = i; } } } /* find_emph_char -- looks for the next emph char, skipping other constructs */ static size_t find_emph_char(char *data, size_t size, char c){ size_t i = 1; while( i=size ) return 0; /* not counting escaped chars */ if( i && data[i-1]=='\\' ){ i++; continue; } if( data[i]==c ) return i; /* skipping a code span */ if( data[i]=='`' ){ size_t span_nb = 0, bt; size_t tmp_i = 0; /* counting the number of opening backticks */ while( i=size ) return 0; /* finding the matching closing sequence */ bt = 0; while( i=size ) return tmp_i; i++; /* skipping a link */ }else if( data[i]=='[' ){ size_t tmp_i = 0; char cc; i++; while( i=size ) return tmp_i; if( data[i]!='[' && data[i]!='(' ){ /* not a link*/ if( tmp_i ) return tmp_i; else continue; } cc = data[i]; i++; while( i=size ) return tmp_i; i++; } } return 0; } /* parse_emph1 -- parsing single emphasis */ /* closed by a symbol not preceded by whitespace and not followed by symbol */ static size_t parse_emph1( struct Blob *ob, struct render *rndr, char *data, size_t size, char c ){ size_t i = 0, len; struct Blob *work = 0; int r; if( !rndr->make.emphasis ) return 0; /* skipping one symbol if coming from emph3 */ if( size>1 && data[0]==c && data[1]==c ) i = 1; while( i=size ) return 0; if( i+1make.emphasis(ob, work, c, rndr->make.opaque); release_work_buffer(rndr, work); return r ? i+1 : 0; } } return 0; } /* parse_emph2 -- parsing single emphasis */ static size_t parse_emph2( struct Blob *ob, struct render *rndr, char *data, size_t size, char c ){ size_t i = 0, len; struct Blob *work = 0; int r; if( !rndr->make.double_emphasis ) return 0; while( imake.double_emphasis(ob, work, c, rndr->make.opaque); release_work_buffer(rndr, work); return r ? i+2 : 0; } i++; } return 0; } /* parse_emph3 -- parsing single emphasis */ /* finds the first closing tag, and delegates to the other emph */ static size_t parse_emph3( struct Blob *ob, struct render *rndr, char *data, size_t size, char c ){ size_t i = 0, len; int r; while( imake.triple_emphasis ){ /* triple symbol found */ struct Blob *work = new_work_buffer(rndr); if( !work ) return 0; parse_inline(work, rndr, data, i); r = rndr->make.triple_emphasis(ob, work, c, rndr->make.opaque); release_work_buffer(rndr, work); return r ? i+3 : 0; }else if( i+12 && data[1]!=c ){ /* whitespace cannot follow an opening emphasis */ if( data[1]==' ' || data[1]=='\t' || data[1]=='\n' || (ret = parse_emph1(ob, rndr, data+1, size-1, c))==0 ){ return 0; } return ret+1; } if( size>3 && data[1]==c && data[2]!=c ){ if( data[2]==' ' || data[2]=='\t' || data[2]=='\n' || (ret = parse_emph2(ob, rndr, data+2, size-2, c))==0 ){ return 0; } return ret+2; } if( size>4 && data[1]==c && data[2]==c && data[3]!=c ){ if( data[3]==' ' || data[3]=='\t' || data[3]=='\n' || (ret = parse_emph3(ob, rndr, data+3, size-3, c))==0 ){ return 0; } return ret+3; } return 0; } /* char_linebreak -- '\n' preceded by two spaces (assuming linebreak != 0) */ static size_t char_linebreak( struct Blob *ob, struct render *rndr, char *data, size_t offset, size_t size ){ if( offset<2 || data[-1]!=' ' || data[-2]!=' ' ) return 0; /* removing the last space from ob and rendering */ if( blob_size(ob)>0 && blob_buffer(ob)[blob_size(ob)-1]==' ' ) ob->nUsed--; return rndr->make.linebreak(ob, rndr->make.opaque) ? 1 : 0; } /* char_codespan -- '`' parsing a code span (assuming codespan != 0) */ static size_t char_codespan( struct Blob *ob, struct render *rndr, char *data, size_t offset, size_t size ){ size_t end, nb = 0, i, f_begin, f_end; /* counting the number of backticks in the delimiter */ while( nb=size ) return 0; /* no matching delimiter */ /* trimming outside whitespaces */ f_begin = nb; while( f_beginnb && (data[f_end-1]==' ' || data[f_end-1]=='\t') ){ f_end--; } /* real code span */ if( f_beginmake.codespan(ob, &work, rndr->make.opaque) ) end = 0; }else{ if( !rndr->make.codespan(ob, 0, rndr->make.opaque) ) end = 0; } return end; } /* char_escape -- '\\' backslash escape */ static size_t char_escape( struct Blob *ob, struct render *rndr, char *data, size_t offset, size_t size ){ struct Blob work = BLOB_INITIALIZER; if( size>1 ){ if( rndr->make.normal_text ){ blob_init(&work, data+1,1); rndr->make.normal_text(ob, &work, rndr->make.opaque); }else{ blob_append(ob, data+1, 1); } } return 2; } /* char_entity -- '&' escaped when it doesn't belong to an entity */ /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */ static size_t char_entity( struct Blob *ob, struct render *rndr, char *data, size_t offset, size_t size ){ size_t end = 1; struct Blob work = BLOB_INITIALIZER; if( end='0' && data[end]<='9') || (data[end]>='a' && data[end]<='z') || (data[end]>='A' && data[end]<='Z')) ){ end++; } if( endmake.entity ){ blob_init(&work, data, end); rndr->make.entity(ob, &work, rndr->make.opaque); }else{ blob_append(ob, data, end); } return end; } /* char_langle_tag -- '<' when tags or autolinks are allowed */ static size_t char_langle_tag( struct Blob *ob, struct render *rndr, char *data, size_t offset, size_t size ){ enum mkd_autolink altype = MKDA_NOT_AUTOLINK; size_t end = tag_length(data, size, &altype); struct Blob work = BLOB_INITIALIZER; int ret = 0; if( end ){ if( rndr->make.autolink && altype!=MKDA_NOT_AUTOLINK ){ blob_init(&work, data+1, end-2); ret = rndr->make.autolink(ob, &work, altype, rndr->make.opaque); }else if( rndr->make.raw_html_tag ){ blob_init(&work, data, end); ret = rndr->make.raw_html_tag(ob, &work, rndr->make.opaque); } } if( !ret ){ return 0; }else{ return end; } } /* get_link_inline -- extract inline-style link and title from ** parenthesed data */ static int get_link_inline( struct Blob *link, struct Blob *title, char *data, size_t size ){ size_t i = 0, mark; size_t link_b, link_e; size_t title_b = 0, title_e = 0; /* skipping initial whitespace */ while( ititle_b && (data[title_e]==' ' || data[title_e]=='\t' || data[title_e]=='\n') ){ title_e--; } /* checking for closing quote presence */ if (data[title_e] != '\'' && data[title_e] != '"') { title_b = title_e = 0; link_e = i; } } /* remove whitespace at the end of the link */ while( link_e>link_b && (data[link_e-1]==' ' || data[link_e-1]=='\t' || data[link_e-1]=='\n') ){ link_e--; } /* remove optional angle brackets around the link */ if( data[link_b]=='<' ) link_b += 1; if( data[link_e-1]=='>' ) link_e -= 1; /* escape backslashed character from link */ blob_reset(link); i = link_b; while( ititle_b ) blob_append(title, data+title_b, title_e-title_b); /* this function always succeed */ return 0; } /* get_link_ref -- extract referenced link and title from id */ static int get_link_ref( struct render *rndr, struct Blob *link, struct Blob *title, char *data, size_t size ){ struct link_ref *lr; /* find the link from its id (stored temporarily in link) */ blob_reset(link); if( build_ref_id(link, data, size)<0 ) return -1; lr = bsearch(link, blob_buffer(&rndr->refs), blob_size(&rndr->refs)/sizeof(struct link_ref), sizeof (struct link_ref), cmp_link_ref); if( !lr ) return -1; /* fill the output buffers */ blob_reset(link); blob_reset(title); blob_append(link, blob_buffer(&lr->link), blob_size(&lr->link)); blob_append(title, blob_buffer(&lr->title), blob_size(&lr->title)); return 0; } /* char_link -- '[': parsing a link or an image */ static size_t char_link( struct Blob *ob, struct render *rndr, char *data, size_t offset, size_t size ){ int is_img = (offset && data[-1] == '!'), level; size_t i = 1, txt_e; struct Blob *content = 0; struct Blob *link = 0; struct Blob *title = 0; int ret; /* checking whether the correct renderer exists */ if( (is_img && !rndr->make.image) || (!is_img && !rndr->make.link) ){ return 0; } /* looking for the matching closing bracket */ for(level=1; i=size ) return 0; txt_e = i; i++; /* skip any amount of whitespace or newline */ /* (this is much more laxist than original markdown syntax) */ while( i=size || get_link_inline(link, title, data+i+1, span_end-(i+1))<0 ){ goto char_link_cleanup; } i = span_end+1; /* reference style link */ }else if( i=size ) goto char_link_cleanup; if( i+1==id_end ){ /* implicit id - use the contents */ id_data = data+1; id_size = txt_e-1; }else{ /* explicit id - between brackets */ id_data = data+i+1; id_size = id_end-(i+1); } if( get_link_ref(rndr, link, title, id_data, id_size)<0 ){ goto char_link_cleanup; } i = id_end+1; /* shortcut reference style link */ }else{ if( get_link_ref(rndr, link, title, data+1, txt_e-1)<0 ){ goto char_link_cleanup; } /* rewinding the whitespace */ i = txt_e+1; } /* building content: img alt is escaped, link content is parsed */ if( txt_e>1 ){ if( is_img ) blob_append(content, data+1, txt_e-1); else parse_inline(content, rndr, data+1, txt_e-1); } /* calling the relevant rendering function */ if( is_img ){ if( blob_size(ob)>0 && blob_buffer(ob)[blob_size(ob)-1]=='!' ) ob->nUsed--; ret = rndr->make.image(ob, link, title, content, rndr->make.opaque); }else{ ret = rndr->make.link(ob, link, title, content, rndr->make.opaque); } /* cleanup */ char_link_cleanup: release_work_buffer(rndr, title); release_work_buffer(rndr, link); release_work_buffer(rndr, content); return ret ? i : 0; } /********************************* * BLOCK-LEVEL PARSING FUNCTIONS * *********************************/ /* is_empty -- returns the line length when it is empty, 0 otherwise */ static size_t is_empty(const char *data, size_t size){ size_t i; for(i=0; i=size || (data[i]!='*' && data[i]!='-' && data[i]!='_') ) return 0; c = data[i]; /* the whole line must be the char or whitespace */ while (i < size && data[i] != '\n') { if( data[i]==c ){ n += 1; }else if( data[i]!=' ' && data[i]!='\t' ){ return 0; } i++; } return n>=3; } /* is_headerline -- returns whether the line is a setext-style hdr underline */ static int is_headerline(char *data, size_t size){ size_t i = 0; /* test of level 1 header */ if( data[i]=='=' ){ for(i=1; i=size || data[i]=='\n') ? 1 : 0; } /* test of level 2 header */ if( data[i]=='-' ){ for(i=1; i=size || data[i]=='\n') ? 2 : 0; } return 0; } /* is_table_sep -- returns whether there is a table separator at pos */ static int is_table_sep(char *data, size_t pos){ return data[pos]=='|' && (pos==0 || data[pos-1]!='\\'); } /* is_tableline -- returns the number of column tables in the given line */ static int is_tableline(char *data, size_t size){ size_t i = 0; int n_sep = 0, outer_sep = 0; /* skip initial blanks */ while( i0) ? (n_sep-outer_sep+1) : 0; } /* prefix_quote -- returns blockquote prefix length */ static size_t prefix_quote(char *data, size_t size){ size_t i = 0; if( i' ){ if( i+10 && data[0]=='\t' ) return 1; if( size>3 && data[0]==' ' && data[1]==' ' && data[2]==' ' && data[3]==' ' ){ return 4; } return 0; } /* prefix_oli -- returns ordered list item prefix */ static size_t prefix_oli(char *data, size_t size){ size_t i = 0; if( i=size || data[i]<'0' || data[i]>'9' ) return 0; while( i='0' && data[i]<='9' ){ i++; } if( i+1>=size || data[i]!='.' || (data[i+1]!=' ' && data[i+1]!='\t') ){ return 0; } i = i+2; while( i=size || (data[i]!='*' && data[i]!='+' && data[i]!='-') || (data[i+1]!=' ' && data[i+1]!='\t') ){ return 0; } i = i+2; while( i=size || (prefix_quote(data+end, size-end)==0 && !is_empty(data+end, size-end))) ){ /* empty line followed by non-quote line */ break; } if( begmake.blockquote ){ struct Blob fallback = BLOB_INITIALIZER; if( out ){ parse_block(out, rndr, work_data, work_size); }else{ blob_init(&fallback, work_data, work_size); } rndr->make.blockquote(ob, out ? out : &fallback, rndr->make.opaque); } release_work_buffer(rndr, out); return end; } /* parse_paragraph -- handles parsing of a regular paragraph */ static size_t parse_paragraph( struct Blob *ob, struct render *rndr, char *data, size_t size ){ size_t i = 0, end = 0; int level = 0; char *work_data = data; size_t work_size = 0; struct Blob fallback = BLOB_INITIALIZER; while( imake.paragraph ){ struct Blob *tmp = new_work_buffer(rndr); if( tmp ){ parse_inline(tmp, rndr, work_data, work_size); }else{ blob_init(&fallback, work_data, work_size); } rndr->make.paragraph(ob, tmp ? tmp : &fallback, rndr->make.opaque); release_work_buffer(rndr, tmp); } }else{ if( work_size ){ size_t beg; i = work_size; work_size -= 1; while( work_size && data[work_size]!='\n' ){ work_size--; } beg = work_size+1; while( work_size && data[work_size-1]=='\n'){ work_size--; } if( work_size ){ struct Blob *tmp = new_work_buffer(rndr); if( tmp ){ parse_inline(tmp, rndr, work_data, work_size); }else{ blob_init (&fallback, work_data, work_size); } if( rndr->make.paragraph ){ rndr->make.paragraph(ob, tmp ? tmp : &fallback, rndr->make.opaque); } release_work_buffer(rndr, tmp); work_data += beg; work_size = i - beg; }else{ work_size = i; } } if( rndr->make.header ){ struct Blob *span = new_work_buffer(rndr); if( span ){ parse_inline(span, rndr, work_data, work_size); rndr->make.header(ob, span, level, rndr->make.opaque); }else{ blob_init(&fallback, work_data, work_size); rndr->make.header(ob, &fallback, level, rndr->make.opaque); } release_work_buffer(rndr, span); } } return end; } /* parse_blockcode -- handles parsing of a block-level code fragment */ static size_t parse_blockcode( struct Blob *ob, struct render *rndr, char *data, size_t size ){ size_t beg, end, pre; struct Blob *work = new_work_buffer(rndr); if( !work ) work = ob; beg = 0; while( beg0 && blob_buffer(work)[end-1]=='\n' ){ end--; } work->nUsed = end; blob_append(work, "\n", 1); if( work!=ob ){ if( rndr->make.blockcode ){ rndr->make.blockcode(ob, work, rndr->make.opaque); } release_work_buffer(rndr, work); } return beg; } /* parse_listitem -- parsing of a single list item */ /* assuming initial prefix is already removed */ static size_t parse_listitem( struct Blob *ob, struct render *rndr, char *data, size_t size, int *flags ){ struct Blob fallback = BLOB_INITIALIZER; struct Blob *work = 0, *inter = 0; size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i; int in_empty = 0, has_inside_empty = 0; /* keeping track of the first indentation prefix */ if( size>1 && data[0]==' ' ){ orgpre = 1; if( size>2 && data[1]==' ' ){ orgpre = 2; if( size>3 && data[2]==' ' ){ orgpre = 3; } } } beg = prefix_uli(data, size); if( !beg ) beg = prefix_oli(data, size); if( !beg ) return 0; /* skipping to the beginning of the following line */ end = beg; while( end1 && data[beg]==' ' ){ i = 1; if( end-beg>2 && data[beg+1]==' ' ){ i = 2; if( end-beg>3 && data[beg+2]==' ' ){ i = 3; if( end-beg>3 && data[beg+3]==' ' ){ i = 4; } } } } pre = i; if( data[beg]=='\t' ){ i = 1; pre = 8; } /* checking for a new item */ if( (prefix_uli(data+beg+i, end-beg-i) && !is_hrule(data+beg+i, end-beg-i)) || prefix_oli(data+beg+i, end-beg-i) ){ if( in_empty ) has_inside_empty = 1; if( pre == orgpre ){ /* the following item must have */ break; /* the same indentation */ } if( !sublist ) sublist = blob_size(work); /* joining only indented stuff after empty lines */ }else if( in_empty && i<4 && data[beg]!='\t' ){ *flags |= MKD_LI_END; break; }else if( in_empty ){ blob_append(work, "\n", 1); has_inside_empty = 1; } in_empty = 0; /* adding the line without prefix into the working buffer */ blob_append(work, data+beg+i, end-beg-i); beg = end; } /* non-recursive fallback when working buffer stack is full */ if( !inter ){ if( rndr->make.listitem ){ rndr->make.listitem(ob, work, *flags, rndr->make.opaque); } if( work!=&fallback ) release_work_buffer(rndr, work); blob_reset(&fallback); return beg; } /* render of li contents */ if( has_inside_empty ) *flags |= MKD_LI_BLOCK; if( *flags & MKD_LI_BLOCK ){ /* intermediate render of block li */ if( sublist && sublistmake.listitem ){ rndr->make.listitem(ob, inter, *flags, rndr->make.opaque); } release_work_buffer(rndr, inter); if( work!=&fallback ) release_work_buffer(rndr, work); blob_reset(&fallback); return beg; } /* parse_list -- parsing ordered or unordered list block */ static size_t parse_list( struct Blob *ob, struct render *rndr, char *data, size_t size, int flags ){ struct Blob fallback = BLOB_INITIALIZER; struct Blob *work = new_work_buffer(rndr); size_t i = 0, j; if( !work ) work = &fallback; while( imake.list ) rndr->make.list(ob, work, flags, rndr->make.opaque); if( work!=&fallback ) release_work_buffer(rndr, work); blob_reset(&fallback); return i; } /* parse_atxheader -- parsing of atx-style headers */ static size_t parse_atxheader( struct Blob *ob, struct render *rndr, char *data, size_t size ){ int level = 0; size_t i, end, skip, span_beg, span_size; if( !size || data[0]!='#' ) return 0; while( levelmake.header ){ struct Blob fallback = BLOB_INITIALIZER; struct Blob *span = new_work_buffer(rndr); if( span ){ parse_inline(span, rndr, data+span_beg, span_size); }else{ blob_init(&fallback, data+span_beg, span_size); } rndr->make.header(ob, span ? span : &fallback, level, rndr->make.opaque); release_work_buffer(rndr, span); } return skip; } /* htmlblock_end -- checking end of HTML block : [ \t]*\n[ \t*]\n */ /* returns the length on match, 0 otherwise */ static size_t htmlblock_end( const struct html_tag *tag, const char *data, size_t size ){ size_t i, w; /* assuming data[0]=='<' && data[1]=='/' already tested */ /* checking tag is a match */ if( (tag->size+3)>=size || fossil_strnicmp(data+2, tag->text, tag->size) || data[tag->size+2]!='>' ){ return 0; } /* checking white lines */ i = tag->size + 3; w = 0; if( i5 && data[1]=='!' && data[2]=='-' && data[3]=='-' ){ i = 5; while( i') ){ i++; } i++; if( imake.blockhtml ) return work_size; blob_init(&work, data, work_size); rndr->make.blockhtml(ob, &work, rndr->make.opaque); return work_size; } } } /* HR, which is the only self-closing block tag considered */ if( size>4 && (data[1]=='h' || data[1]=='H') && (data[2]=='r' || data[2]=='R') ){ i = 3; while( imake.blockhtml ) return work_size; blob_init(&work, data, work_size); rndr->make.blockhtml(ob, &work, rndr->make.opaque); return work_size; } } } /* no special case recognised */ return 0; } /* looking for an unindented matching closing tag */ /* followed by a blank line */ i = 1; found = 0; #if 0 while( isize)>=size ) break; j = htmlblock_end(curtag, data+i-1, size-i+1); if (j) { i += j-1; found = 1; break; } } #endif /* if not found, trying a second pass looking for indented match */ /* but not if tag is "ins" or "del" (following original Markdown.pl) */ if( !found && curtag!=INS_TAG && curtag!=DEL_TAG ){ i = 1; while( isize)>=size ) break; j = htmlblock_end(curtag, data+i-1, size-i+1); if (j) { i += j-1; found = 1; break; } } } if( !found ) return 0; /* the end of the block has been found */ blob_init(&work, data, i); if( rndr->make.blockhtml ){ rndr->make.blockhtml(ob, &work, rndr->make.opaque); } return i; } /* parse_table_cell -- parse a cell inside a table */ static void parse_table_cell( struct Blob *ob, /* output blob */ struct render *rndr, /* renderer description */ char *data, /* input text */ size_t size, /* input text size */ int flags /* table flags */ ){ struct Blob fallback = BLOB_INITIALIZER; struct Blob *span = new_work_buffer(rndr); if( span ){ parse_inline(span, rndr, data, size); }else{ blob_init(&fallback, data, size); } rndr->make.table_cell(ob, span ? span : &fallback, flags, rndr->make.opaque); release_work_buffer(rndr, span); } /* parse_table_row -- parse an input line into a table row */ static size_t parse_table_row( struct Blob *ob, /* output blob for rendering */ struct render *rndr, /* renderer description */ char *data, /* input text */ size_t size, /* input text size */ int *aligns, /* array of default alignment for columns */ size_t align_size, /* number of columns with default alignment */ int flags /* table flags */ ){ size_t i = 0, col = 0; size_t beg, end, total = 0; struct Blob *cells = new_work_buffer(rndr); int align; /* skip leading blanks and separator */ while( ibeg && data[end-1]==':' ){ align |= MKD_CELL_ALIGN_RIGHT; end--; } /* remove trailing blanks */ while( end>beg && (data[end-1]==' ' || data[end-1]=='\t') ){ end--; } /* skip the last cell if it was only blanks */ /* (because it is only the optional end separator) */ if( total && end<=beg ) continue; /* fallback on default alignment if not explicit */ if( align==0 && aligns && colmake.table_row(ob, cells, flags, rndr->make.opaque); }else{ struct Blob fallback = BLOB_INITIALIZER; blob_init(&fallback, data, total ? total : size); rndr->make.table_row(ob, &fallback, flags, rndr->make.opaque); } release_work_buffer(rndr, cells); return total ? total : size; } /* parse_table -- parsing of a whole table */ static size_t parse_table( struct Blob *ob, struct render *rndr, char *data, size_t size ){ size_t i = 0, head_end, col; size_t align_size = 0; int *aligns = 0; struct Blob fallback = BLOB_INITIALIZER; struct Blob *head = 0; struct Blob *rows = new_work_buffer(rndr); if( !rows ) rows = &fallback; /* skip the first (presumably header) line */ while( i=size ){ parse_table_row(rows, rndr, data, size, 0, 0, 0); rndr->make.table(ob, 0, rows, rndr->make.opaque); if( rows!=&fallback ) release_work_buffer(rndr, rows); return i; } /* attempt to parse a table rule, i.e. blanks, dash, colons and sep */ i++; col = 0; while( imake.table(ob, head, rows, rndr->make.opaque); /* cleanup */ if( head ) release_work_buffer(rndr, head); if( rows!=&fallback ) release_work_buffer(rndr, rows); fossil_free(aligns); return i; } /* parse_block -- parsing of one block, returning next char to parse */ static void parse_block( struct Blob *ob, /* output blob */ struct render *rndr, /* renderer internal state */ char *data, /* input text */ size_t size /* input text size */ ){ size_t beg, end, i; char *txt_data; int has_table = (rndr->make.table && rndr->make.table_row && rndr->make.table_cell); beg = 0; while( begmake.blockhtml && (i = parse_htmlblock(ob, rndr, txt_data, end))!=0 ){ beg += i; }else if( (i=is_empty(txt_data, end))!=0 ){ beg += i; }else if( is_hrule(txt_data, end) ){ if( rndr->make.hrule ) rndr->make.hrule(ob, rndr->make.opaque); while( beg=end ) return 0; if( data[beg]==' ' ){ i = 1; if( data[beg+1]==' ' ){ i = 2; if( data[beg+2]==' ' ){ i = 3; if( data[beg+3]==' ' ) return 0; } } } i += beg; /* id part: anything but a newline between brackets */ if( data[i]!='[' ) return 0; i++; id_offset = i; while( i=end || data[i]!=']' ) return 0; id_end = i; /* spacer: colon (space | tab)* newline? (space | tab)* */ i++; if( i>=end || data[i]!=':' ) return 0; i++; while( i=end ) return 0; /* link: whitespace-free sequence, optionally between angle brackets */ if( data[i]=='<' ) i++; link_offset = i; while( i' ) link_end = i-1; else link_end = i; /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */ while( i=end || data[i]=='\r' || data[i]=='\n' ) line_end = i; if( i+1title_offset && (data[i]==' ' || data[i]=='\t') ){ i--; } if( i>title_offset && (data[i]=='\'' || data[i]=='"' || data[i]==')') ){ line_end = title_end; title_end = i; } } if( !line_end ) return 0; /* garbage after the link */ /* a valid ref has been found, filling-in return structures */ if( last ) *last = line_end; if( !refs ) return 1; if( build_ref_id(&lr.id, data+id_offset, id_end-id_offset)<0 ) return 0; blob_append(&lr.link, data+link_offset, link_end-link_offset); if( title_end>title_offset ){ blob_append(&lr.title, data+title_offset, title_end-title_offset); } blob_append(refs, (char *)&lr, sizeof lr); return 1; } /********************** * EXPORTED FUNCTIONS * **********************/ /* markdown -- parses the input buffer and renders it into the output buffer */ void markdown( struct Blob *ob, /* output blob for rendered text */ struct Blob *ib, /* input blob in markdown */ const struct mkd_renderer *rndrer /* renderer descriptor (callbacks) */ ){ struct link_ref *lr; struct Blob text = BLOB_INITIALIZER; size_t i, beg, end = 0; struct render rndr; char *ib_data; /* filling the render structure */ if( !rndrer ) return; rndr.make = *rndrer; if( rndr.make.max_work_stack<1 ) rndr.make.max_work_stack = 1; rndr.work_active = 0; rndr.work = fossil_malloc(rndr.make.max_work_stack * sizeof *rndr.work); for(i=0; ibeg ) blob_append(&text, ib_data + beg, end - beg); while( end