Commit Diff


commit - 70afc8d42ff0d8e61236c77fd36e33d1df8641cd
commit + 32c44488c70912a8ea50e091e6e3e2477139cf1b
blob - 715827e7ab3aa47e8a2696784fd235b56eef4c20
blob + 2257c92e64e7cd1afb5e8c82a8a5a27aa7c519f7
--- parser_gemtext.c
+++ parser_gemtext.c
@@ -32,43 +32,21 @@
 #include "parser.h"
 #include "utf8.h"
 
-static int	gemtext_parse(struct parser *, const char *, size_t);
-static int	gemtext_foreach_line(struct parser *, const char *, size_t);
+static int	gemtext_parse_line(struct parser *, const char *, size_t);
 static int	gemtext_free(struct parser *);
 static int	gemtext_serialize(struct parser *, FILE *);
 
-static int	parse_text(struct parser*, enum line_type, const char*, size_t);
-static int	parse_link(struct parser*, enum line_type, const char*, size_t);
-static int	parse_title(struct parser*, enum line_type, const char*, size_t);
-static int	parse_item(struct parser*, enum line_type, const char*, size_t);
-static int	parse_quote(struct parser*, enum line_type, const char*, size_t);
-static int	parse_pre_start(struct parser*, enum line_type, const char*, size_t);
-static int	parse_pre_cnt(struct parser*, enum line_type, const char*, size_t);
-static int	parse_pre_end(struct parser*, enum line_type, const char*, size_t);
-static void	search_title(struct parser*, enum line_type);
+static int	parse_link(struct parser *, const char*, size_t);
+static int	parse_title(struct parser *, const char*, size_t);
+static void	search_title(struct parser *, enum line_type);
 
-typedef int (plinefn)(struct parser*, enum line_type, const char*, size_t);
-
-static plinefn *parsers[] = {
-	[LINE_TEXT]		= parse_text,
-	[LINE_LINK]		= parse_link,
-	[LINE_TITLE_1]		= parse_title,
-	[LINE_TITLE_2]		= parse_title,
-	[LINE_TITLE_3]		= parse_title,
-	[LINE_ITEM]		= parse_item,
-	[LINE_QUOTE]		= parse_quote,
-	[LINE_PRE_START]	= parse_pre_start,
-	[LINE_PRE_CONTENT]	= parse_pre_cnt,
-	[LINE_PRE_END]		= parse_pre_end,
-};
-
 void
 gemtext_initparser(struct parser *p)
 {
 	memset(p, 0, sizeof(*p));
 
 	p->name = "text/gemini";
-	p->parse = &gemtext_parse;
+	p->parseline = &gemtext_parse_line;
 	p->free  = &gemtext_free;
 	p->serialize = &gemtext_serialize;
 
@@ -118,285 +96,149 @@ emit_line(struct parser *p, enum line_type type, char 
 }
 
 static int
-parse_text(struct parser *p, enum line_type t, const char *buf, size_t len)
+parse_link(struct parser *p, const char *line, size_t len)
 {
-	char *l;
+	char *label, *url;
+	const char *start;
 
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_link(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l, *u;
-	const char *url_start;
-
 	if (len <= 2)
 		return emit_line(p, LINE_TEXT, NULL, NULL);
-	buf += 2;
-	len -= 2;
 
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
-	}
+	line += 2, len -= 2;
+	while (len > 0 && isspace((unsigned char)line[0]))
+		line++, len--;
 
 	if (len == 0)
 		return emit_line(p, LINE_TEXT, NULL, NULL);
 
-	url_start = buf;
-	while (len > 0 && !isspace(buf[0])) {
-		buf++;
-		len--;
-	}
+	start = line;
+	while (len > 0 && !isspace((unsigned char)line[0]))
+		line++, len--;
 
-	if ((u = calloc(1, buf - url_start + 1)) == NULL)
+	if ((url = strndup(start, line - start)) == NULL)
 		return 0;
-	memcpy(u, url_start, buf - url_start);
 
-	if (len == 0)
-		goto nolabel;
+	while (len > 0 && isspace(line[0]))
+		line++, len--;
 
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
+	if (len == 0) {
+		if ((label = strdup(url)) == NULL)
+			return 0;
+	} else {
+		if ((label = strndup(line, len)) == NULL)
+			return 0;
 	}
 
-	if (len == 0)
-		goto nolabel;
-
-	if ((l = calloc(1, len + 1)) == NULL)
-		return 0;
-
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, u);
-
-nolabel:
-	if ((l = strdup(u)) == NULL)
-		return 0;
-	return emit_line(p, t, l, u);
+	return emit_line(p, LINE_LINK, label, url);
 }
 
 static int
-parse_title(struct parser *p, enum line_type t, const char *buf, size_t len)
+parse_title(struct parser *p, const char *line, size_t len)
 {
+	enum line_type t = LINE_TITLE_1;
 	char *l;
 
-	switch (t) {
-	case LINE_TITLE_1:
-		if (len <= 1)
-			return emit_line(p, t, NULL, NULL);
-		buf++;
-		len--;
-		break;
-	case LINE_TITLE_2:
-		if (len <= 2)
-			return emit_line(p, t, NULL, NULL);
-		buf += 2;
-		len -= 2;
-		break;
-	case LINE_TITLE_3:
-		if (len <= 3)
-			return emit_line(p, t, NULL, NULL);
-		buf += 3;
-		len -= 3;
-		break;
-	default:
-		/* unreachable */
-		abort();
+	while (len > 0 && *line == '#') {
+		line++, len--;
+		t++;
+		if (t == LINE_TITLE_3)
+			break;
 	}
 
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
-	}
+	while (len > 0 && isspace((unsigned char)*line))
+		line++, len--;
 
 	if (len == 0)
 		return emit_line(p, t, NULL, NULL);
 
 	if (t == LINE_TITLE_1 && *p->title == '\0')
-		strncpy(p->title, buf, MIN(sizeof(p->title)-1, len));
+		strncpy(p->title, line, MIN(sizeof(p->title)-1, len));
 
-	if ((l = calloc(1, len+1)) == NULL)
+	if ((l = strndup(line, len)) == NULL)
 		return 0;
-	memcpy(l, buf, len);
 	return emit_line(p, t, l, NULL);
 }
 
 static int
-parse_item(struct parser *p, enum line_type t, const char *buf, size_t len)
+gemtext_parse_line(struct parser *p, const char *line, size_t len)
 {
 	char *l;
 
-	if (len == 1)
-		return emit_line(p, t, NULL, NULL);
+	if (p->flags & PARSER_IN_PRE) {
+		if (len >= 3 && !strncmp(line, "```", 3)) {
+			p->flags ^= PARSER_IN_PRE;
+			return emit_line(p, LINE_PRE_END, NULL, NULL);
+		}
 
-	buf++;
-	len--;
-
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
+		if (len == 0)
+			return emit_line(p, LINE_PRE_CONTENT, NULL, NULL);
+		if ((l = strndup(line, len)) == NULL)
+			return 0;
+		return emit_line(p, LINE_PRE_CONTENT, l, NULL);
 	}
 
 	if (len == 0)
-		return emit_line(p, t, NULL, NULL);
+		return emit_line(p, LINE_TEXT, NULL, NULL);
 
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
+	switch (*line) {
+	case '*':
+		if (len < 1 || line[1] != ' ')
+			break;
 
-static int
-parse_quote(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l;
+		line += 2, len -= 2;
+		while (len > 0 && isspace((unsigned char)*line))
+			line++, len--;
+		if (len == 0)
+			return emit_line(p, LINE_ITEM, NULL, NULL);
+		if ((l = strndup(line, len)) == NULL)
+			return 0;
+		return emit_line(p, LINE_ITEM, l, NULL);
 
-	if (len == 1)
-		return emit_line(p, t, NULL, NULL);
+	case '>':
+		line++, len--;
+		while (len > 0 && isspace((unsigned char)*line))
+			line++, len--;
+		if (len == 0)
+			return emit_line(p, LINE_QUOTE, NULL, NULL);
+		if ((l = strndup(line, len)) == NULL)
+			return 0;
+		return emit_line(p, LINE_QUOTE, l, NULL);
 
-	buf++;
-	len--;
+	case '=':
+		if (len > 1 && line[1] == '>')
+			return parse_link(p, line, len);
+		break;
 
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
-	}
+	case '#':
+		return parse_title(p, line, len);
 
-	if (len == 0)
-		return emit_line(p, t, NULL, NULL);
+	case '`':
+		if (len < 3 || strncmp(line, "```", 3) != 0)
+			break;
 
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_start(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l;
-
-	if (len <= 3)
-		return emit_line(p, t, NULL, NULL);
-
-	buf += 3;
-	len -= 3;
-
-	while (len > 0 && isspace(buf[0])) {
-		buf++;
-		len--;
+		p->flags |= PARSER_IN_PRE;
+		line += 3, len -= 3;
+		while (len > 0 && isspace((unsigned char)*line))
+			line++, len--;
+		if (len == 0)
+			return emit_line(p, LINE_PRE_START,
+			    NULL, NULL);
+		if ((l = strndup(line, len)) == NULL)
+			return 0;
+		return emit_line(p, LINE_PRE_START, l, NULL);
 	}
 
-	if (len == 0)
-		return emit_line(p, t, NULL, NULL);
-
-	if ((l = calloc(1, len+1)) == NULL)
+	if ((l = strndup(line, len)) == NULL)
 		return 0;
-
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
+	return emit_line(p, LINE_TEXT, l, NULL);
 }
 
 static int
-parse_pre_cnt(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	char *l;
-
-	if (len == 0)
-		return emit_line(p, t, NULL, NULL);
-
-	if ((l = calloc(1, len+1)) == NULL)
-		return 0;
-	memcpy(l, buf, len);
-	return emit_line(p, t, l, NULL);
-}
-
-static int
-parse_pre_end(struct parser *p, enum line_type t, const char *buf, size_t len)
-{
-	return emit_line(p, t, NULL, NULL);
-}
-
-static inline enum line_type
-detect_line_type(const char *buf, size_t len, int in_pre)
-{
-	if (in_pre) {
-		if (len >= 3 &&
-		    buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
-			return LINE_PRE_END;
-		else
-			return LINE_PRE_CONTENT;
-	}
-
-	if (len == 0)
-		return LINE_TEXT;
-
-	switch (*buf) {
-	case '*':
-		if (len > 1 && buf[1] == ' ')
-			return LINE_ITEM;
-		break;
-	case '>': return LINE_QUOTE;
-	case '=':
-		if (len >= 1 && buf[1] == '>')
-			return LINE_LINK;
-		break;
-	case '#':
-		if (len == 1)
-			return LINE_TEXT;
-		if (buf[1] != '#')
-			return LINE_TITLE_1;
-		if (len == 2)
-			return LINE_TEXT;
-		if (buf[2] != '#')
-			return LINE_TITLE_2;
-		if (len == 3)
-			return LINE_TEXT;
-		return LINE_TITLE_3;
-	case '`':
-		if (len < 3)
-			return LINE_TEXT;
-		if (buf[0] == '`' && buf[1] == '`' && buf[2] == '`')
-			return LINE_PRE_START;
-		break;
-	}
-
-	return LINE_TEXT;
-}
-
-static int
-gemtext_parse(struct parser *p, const char *buf, size_t size)
-{
-	return parser_foreach_line(p, buf, size, gemtext_foreach_line);
-}
-
-static int
-gemtext_foreach_line(struct parser *p, const char *line, size_t linelen)
-{
-	enum line_type t;
-
-	t = detect_line_type(line, linelen, p->flags & PARSER_IN_PRE);
-	if (t == LINE_PRE_START)
-		p->flags ^= PARSER_IN_PRE;
-	if (t == LINE_PRE_END)
-		p->flags ^= PARSER_IN_PRE;
-	return parsers[t](p, t, line, linelen);
-}
-
-static int
 gemtext_free(struct parser *p)
 {
-	enum line_type	t;
-
 	/* flush the buffer */
 	if (p->len != 0) {
-		t = detect_line_type(p->buf, p->len, p->flags & PARSER_IN_PRE);
-		if (!parsers[t](p, t, p->buf, p->len))
+		if (!gemtext_parse_line(p, p->buf, p->len))
 			return 0;
 		if ((p->flags & PARSER_IN_PRE) &&
 		    !emit_line(p, LINE_PRE_END, NULL, NULL))