commit 32c44488c70912a8ea50e091e6e3e2477139cf1b from: Omar Polo date: Wed Jun 05 23:04:38 2024 UTC rework the gemtext parser it was a bit too over-engineered. use external routines only when it makes sense, and implement only the parseline method. commit - 70afc8d42ff0d8e61236c77fd36e33d1df8641cd commit + 32c44488c70912a8ea50e091e6e3e2477139cf1b blob - 715827e7ab3aa47e8a2696784fd235b56eef4c20 blob + 2257c92e64e7cd1afb5e8c82a8a5a27aa7c519f7 --- parser_gemtext.c +++ parser_gemtext.c @@ -32,43 +32,21 @@ #include "parser.h" #include "utf8.h" -static int gemtext_parse(struct parser *, const char *, size_t); -static int gemtext_foreach_line(struct parser *, const char *, size_t); +static int gemtext_parse_line(struct parser *, const char *, size_t); static int gemtext_free(struct parser *); static int gemtext_serialize(struct parser *, FILE *); -static int parse_text(struct parser*, enum line_type, const char*, size_t); -static int parse_link(struct parser*, enum line_type, const char*, size_t); -static int parse_title(struct parser*, enum line_type, const char*, size_t); -static int parse_item(struct parser*, enum line_type, const char*, size_t); -static int parse_quote(struct parser*, enum line_type, const char*, size_t); -static int parse_pre_start(struct parser*, enum line_type, const char*, size_t); -static int parse_pre_cnt(struct parser*, enum line_type, const char*, size_t); -static int parse_pre_end(struct parser*, enum line_type, const char*, size_t); -static void search_title(struct parser*, enum line_type); +static int parse_link(struct parser *, const char*, size_t); +static int parse_title(struct parser *, const char*, size_t); +static void search_title(struct parser *, enum line_type); -typedef int (plinefn)(struct parser*, enum line_type, const char*, size_t); - -static plinefn *parsers[] = { - [LINE_TEXT] = parse_text, - [LINE_LINK] = parse_link, - [LINE_TITLE_1] = parse_title, - [LINE_TITLE_2] = parse_title, - [LINE_TITLE_3] = parse_title, - [LINE_ITEM] = parse_item, - [LINE_QUOTE] = parse_quote, - [LINE_PRE_START] = parse_pre_start, - [LINE_PRE_CONTENT] = parse_pre_cnt, - [LINE_PRE_END] = parse_pre_end, -}; - void gemtext_initparser(struct parser *p) { memset(p, 0, sizeof(*p)); p->name = "text/gemini"; - p->parse = &gemtext_parse; + p->parseline = &gemtext_parse_line; p->free = &gemtext_free; p->serialize = &gemtext_serialize; @@ -118,285 +96,149 @@ emit_line(struct parser *p, enum line_type type, char } static int -parse_text(struct parser *p, enum line_type t, const char *buf, size_t len) +parse_link(struct parser *p, const char *line, size_t len) { - char *l; + char *label, *url; + const char *start; - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_link(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l, *u; - const char *url_start; - if (len <= 2) return emit_line(p, LINE_TEXT, NULL, NULL); - buf += 2; - len -= 2; - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } + line += 2, len -= 2; + while (len > 0 && isspace((unsigned char)line[0])) + line++, len--; if (len == 0) return emit_line(p, LINE_TEXT, NULL, NULL); - url_start = buf; - while (len > 0 && !isspace(buf[0])) { - buf++; - len--; - } + start = line; + while (len > 0 && !isspace((unsigned char)line[0])) + line++, len--; - if ((u = calloc(1, buf - url_start + 1)) == NULL) + if ((url = strndup(start, line - start)) == NULL) return 0; - memcpy(u, url_start, buf - url_start); - if (len == 0) - goto nolabel; + while (len > 0 && isspace(line[0])) + line++, len--; - while (len > 0 && isspace(buf[0])) { - buf++; - len--; + if (len == 0) { + if ((label = strdup(url)) == NULL) + return 0; + } else { + if ((label = strndup(line, len)) == NULL) + return 0; } - if (len == 0) - goto nolabel; - - if ((l = calloc(1, len + 1)) == NULL) - return 0; - - memcpy(l, buf, len); - return emit_line(p, t, l, u); - -nolabel: - if ((l = strdup(u)) == NULL) - return 0; - return emit_line(p, t, l, u); + return emit_line(p, LINE_LINK, label, url); } static int -parse_title(struct parser *p, enum line_type t, const char *buf, size_t len) +parse_title(struct parser *p, const char *line, size_t len) { + enum line_type t = LINE_TITLE_1; char *l; - switch (t) { - case LINE_TITLE_1: - if (len <= 1) - return emit_line(p, t, NULL, NULL); - buf++; - len--; - break; - case LINE_TITLE_2: - if (len <= 2) - return emit_line(p, t, NULL, NULL); - buf += 2; - len -= 2; - break; - case LINE_TITLE_3: - if (len <= 3) - return emit_line(p, t, NULL, NULL); - buf += 3; - len -= 3; - break; - default: - /* unreachable */ - abort(); + while (len > 0 && *line == '#') { + line++, len--; + t++; + if (t == LINE_TITLE_3) + break; } - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } + while (len > 0 && isspace((unsigned char)*line)) + line++, len--; if (len == 0) return emit_line(p, t, NULL, NULL); if (t == LINE_TITLE_1 && *p->title == '\0') - strncpy(p->title, buf, MIN(sizeof(p->title)-1, len)); + strncpy(p->title, line, MIN(sizeof(p->title)-1, len)); - if ((l = calloc(1, len+1)) == NULL) + if ((l = strndup(line, len)) == NULL) return 0; - memcpy(l, buf, len); return emit_line(p, t, l, NULL); } static int -parse_item(struct parser *p, enum line_type t, const char *buf, size_t len) +gemtext_parse_line(struct parser *p, const char *line, size_t len) { char *l; - if (len == 1) - return emit_line(p, t, NULL, NULL); + if (p->flags & PARSER_IN_PRE) { + if (len >= 3 && !strncmp(line, "```", 3)) { + p->flags ^= PARSER_IN_PRE; + return emit_line(p, LINE_PRE_END, NULL, NULL); + } - buf++; - len--; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; + if (len == 0) + return emit_line(p, LINE_PRE_CONTENT, NULL, NULL); + if ((l = strndup(line, len)) == NULL) + return 0; + return emit_line(p, LINE_PRE_CONTENT, l, NULL); } if (len == 0) - return emit_line(p, t, NULL, NULL); + return emit_line(p, LINE_TEXT, NULL, NULL); - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} + switch (*line) { + case '*': + if (len < 1 || line[1] != ' ') + break; -static int -parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; + line += 2, len -= 2; + while (len > 0 && isspace((unsigned char)*line)) + line++, len--; + if (len == 0) + return emit_line(p, LINE_ITEM, NULL, NULL); + if ((l = strndup(line, len)) == NULL) + return 0; + return emit_line(p, LINE_ITEM, l, NULL); - if (len == 1) - return emit_line(p, t, NULL, NULL); + case '>': + line++, len--; + while (len > 0 && isspace((unsigned char)*line)) + line++, len--; + if (len == 0) + return emit_line(p, LINE_QUOTE, NULL, NULL); + if ((l = strndup(line, len)) == NULL) + return 0; + return emit_line(p, LINE_QUOTE, l, NULL); - buf++; - len--; + case '=': + if (len > 1 && line[1] == '>') + return parse_link(p, line, len); + break; - while (len > 0 && isspace(buf[0])) { - buf++; - len--; - } + case '#': + return parse_title(p, line, len); - if (len == 0) - return emit_line(p, t, NULL, NULL); + case '`': + if (len < 3 || strncmp(line, "```", 3) != 0) + break; - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len <= 3) - return emit_line(p, t, NULL, NULL); - - buf += 3; - len -= 3; - - while (len > 0 && isspace(buf[0])) { - buf++; - len--; + p->flags |= PARSER_IN_PRE; + line += 3, len -= 3; + while (len > 0 && isspace((unsigned char)*line)) + line++, len--; + if (len == 0) + return emit_line(p, LINE_PRE_START, + NULL, NULL); + if ((l = strndup(line, len)) == NULL) + return 0; + return emit_line(p, LINE_PRE_START, l, NULL); } - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) + if ((l = strndup(line, len)) == NULL) return 0; - - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); + return emit_line(p, LINE_TEXT, l, NULL); } static int -parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - char *l; - - if (len == 0) - return emit_line(p, t, NULL, NULL); - - if ((l = calloc(1, len+1)) == NULL) - return 0; - memcpy(l, buf, len); - return emit_line(p, t, l, NULL); -} - -static int -parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len) -{ - return emit_line(p, t, NULL, NULL); -} - -static inline enum line_type -detect_line_type(const char *buf, size_t len, int in_pre) -{ - if (in_pre) { - if (len >= 3 && - buf[0] == '`' && buf[1] == '`' && buf[2] == '`') - return LINE_PRE_END; - else - return LINE_PRE_CONTENT; - } - - if (len == 0) - return LINE_TEXT; - - switch (*buf) { - case '*': - if (len > 1 && buf[1] == ' ') - return LINE_ITEM; - break; - case '>': return LINE_QUOTE; - case '=': - if (len >= 1 && buf[1] == '>') - return LINE_LINK; - break; - case '#': - if (len == 1) - return LINE_TEXT; - if (buf[1] != '#') - return LINE_TITLE_1; - if (len == 2) - return LINE_TEXT; - if (buf[2] != '#') - return LINE_TITLE_2; - if (len == 3) - return LINE_TEXT; - return LINE_TITLE_3; - case '`': - if (len < 3) - return LINE_TEXT; - if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`') - return LINE_PRE_START; - break; - } - - return LINE_TEXT; -} - -static int -gemtext_parse(struct parser *p, const char *buf, size_t size) -{ - return parser_foreach_line(p, buf, size, gemtext_foreach_line); -} - -static int -gemtext_foreach_line(struct parser *p, const char *line, size_t linelen) -{ - enum line_type t; - - t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE); - if (t == LINE_PRE_START) - p->flags ^= PARSER_IN_PRE; - if (t == LINE_PRE_END) - p->flags ^= PARSER_IN_PRE; - return parsers[t](p, t, line, linelen); -} - -static int gemtext_free(struct parser *p) { - enum line_type t; - /* flush the buffer */ if (p->len != 0) { - t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE); - if (!parsers[t](p, t, p->buf, p->len)) + if (!gemtext_parse_line(p, p->buf, p->len)) return 0; if ((p->flags & PARSER_IN_PRE) && !emit_line(p, LINE_PRE_END, NULL, NULL))