Blob


1 /*
2 * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 /*
18 * A streaming gemtext parser.
19 *
20 * TODO:
21 * - handle NULs
22 * - UTF8
23 */
25 #include "compat.h"
27 #include <ctype.h>
28 #include <string.h>
29 #include <stdlib.h>
31 #include "defaults.h"
32 #include "parser.h"
33 #include "telescope.h"
34 #include "utf8.h"
35 #include "xwrapper.h"
37 static int gemtext_parse_line(struct buffer *, const char *, size_t);
38 static int gemtext_free(struct buffer *);
39 static int gemtext_serialize(struct buffer *, FILE *);
41 static int parse_link(struct buffer *, const char*, size_t);
42 static int parse_title(struct buffer *, const char*, size_t);
43 static void search_title(struct buffer *, enum line_type);
45 const struct parser gemtext_parser = {
46 .name = "text/gemini",
47 .parseline = &gemtext_parse_line,
48 .free = &gemtext_free,
49 .serialize = &gemtext_serialize,
50 };
52 static inline int
53 emit_line(struct buffer *b, enum line_type type, char *line, char *alt)
54 {
55 struct line *l;
57 l = xcalloc(1, sizeof(*l));
59 l->type = type;
60 l->line = line;
61 l->alt = alt;
63 switch (l->type) {
64 case LINE_PRE_START:
65 case LINE_PRE_END:
66 if (hide_pre_context)
67 l->flags = L_HIDDEN;
68 if (l->type == LINE_PRE_END &&
69 hide_pre_closing_line)
70 l->flags = L_HIDDEN;
71 break;
72 case LINE_PRE_CONTENT:
73 if (hide_pre_blocks)
74 l->flags = L_HIDDEN;
75 break;
76 case LINE_LINK:
77 if (emojify_link &&
78 !emojied_line(line, (const char **)&l->data))
79 l->data = NULL;
80 break;
81 default:
82 break;
83 }
85 if (dont_apply_styling)
86 l->flags &= ~L_HIDDEN;
88 TAILQ_INSERT_TAIL(&b->head, l, lines);
90 return 1;
91 }
93 static int
94 parse_link(struct buffer *b, const char *line, size_t len)
95 {
96 char *label, *url;
97 const char *start;
99 if (len <= 2)
100 return emit_line(b, LINE_TEXT, NULL, NULL);
102 line += 2, len -= 2;
103 while (len > 0 && isspace((unsigned char)line[0]))
104 line++, len--;
106 if (len == 0)
107 return emit_line(b, LINE_TEXT, NULL, NULL);
109 start = line;
110 while (len > 0 && !isspace((unsigned char)line[0]))
111 line++, len--;
113 url = xstrndup(start, line - start);
115 while (len > 0 && isspace(line[0]))
116 line++, len--;
118 if (len == 0) {
119 label = xstrdup(url);
120 } else {
121 label = xstrndup(line, len);
124 return emit_line(b, LINE_LINK, label, url);
127 static int
128 parse_title(struct buffer *b, const char *line, size_t len)
130 enum line_type t = LINE_TITLE_1;
131 char *l;
133 line++, len--;
134 while (len > 0 && *line == '#') {
135 line++, len--;
136 t++;
137 if (t == LINE_TITLE_3)
138 break;
141 while (len > 0 && isspace((unsigned char)*line))
142 line++, len--;
144 if (len == 0)
145 return emit_line(b, t, NULL, NULL);
147 if (t == LINE_TITLE_1 && *b->title == '\0')
148 strncpy(b->title, line, MIN(sizeof(b->title)-1, len));
150 l = xstrndup(line, len);
151 return emit_line(b, t, l, NULL);
154 static int
155 gemtext_parse_line(struct buffer *b, const char *line, size_t len)
157 char *l;
159 if (b->parser_flags & PARSER_IN_PRE) {
160 if (len >= 3 && !strncmp(line, "```", 3)) {
161 b->parser_flags ^= PARSER_IN_PRE;
162 return emit_line(b, LINE_PRE_END, NULL, NULL);
165 if (len == 0)
166 return emit_line(b, LINE_PRE_CONTENT, NULL, NULL);
167 l = xstrndup(line, len);
168 return emit_line(b, LINE_PRE_CONTENT, l, NULL);
171 if (len == 0)
172 return emit_line(b, LINE_TEXT, NULL, NULL);
174 switch (*line) {
175 case '*':
176 if (len < 1 || line[1] != ' ')
177 break;
179 line += 2, len -= 2;
180 while (len > 0 && isspace((unsigned char)*line))
181 line++, len--;
182 if (len == 0)
183 return emit_line(b, LINE_ITEM, NULL, NULL);
184 l = xstrndup(line, len);
185 return emit_line(b, LINE_ITEM, l, NULL);
187 case '>':
188 line++, len--;
189 while (len > 0 && isspace((unsigned char)*line))
190 line++, len--;
191 if (len == 0)
192 return emit_line(b, LINE_QUOTE, NULL, NULL);
193 l = xstrndup(line, len);
194 return emit_line(b, LINE_QUOTE, l, NULL);
196 case '=':
197 if (len > 1 && line[1] == '>')
198 return parse_link(b, line, len);
199 break;
201 case '#':
202 return parse_title(b, line, len);
204 case '`':
205 if (len < 3 || strncmp(line, "```", 3) != 0)
206 break;
208 b->parser_flags |= PARSER_IN_PRE;
209 line += 3, len -= 3;
210 while (len > 0 && isspace((unsigned char)*line))
211 line++, len--;
212 if (len == 0)
213 return emit_line(b, LINE_PRE_START,
214 NULL, NULL);
215 l = xstrndup(line, len);
216 return emit_line(b, LINE_PRE_START, l, NULL);
219 l = xstrndup(line, len);
220 return emit_line(b, LINE_TEXT, l, NULL);
223 static int
224 gemtext_free(struct buffer *b)
226 /* flush the buffer */
227 if (b->len != 0) {
228 if (!gemtext_parse_line(b, b->buf, b->len))
229 return 0;
230 if ((b->parser_flags & PARSER_IN_PRE) &&
231 !emit_line(b, LINE_PRE_END, NULL, NULL))
232 return 0;
235 /*
236 * use the first level 2 or 3 header as page title if none
237 * found yet.
238 */
239 if (*b->title == '\0')
240 search_title(b, LINE_TITLE_2);
241 if (*b->title == '\0')
242 search_title(b, LINE_TITLE_3);
244 return 1;
247 static void
248 search_title(struct buffer *b, enum line_type level)
250 struct line *l;
252 TAILQ_FOREACH(l, &b->head, lines) {
253 if (l->type == level) {
254 if (l->line == NULL)
255 continue;
256 strlcpy(b->title, l->line, sizeof(b->title));
257 break;
262 static const char *gemtext_prefixes[] = {
263 [LINE_TEXT] = "",
264 [LINE_TITLE_1] = "# ",
265 [LINE_TITLE_2] = "## ",
266 [LINE_TITLE_3] = "### ",
267 [LINE_ITEM] = "* ",
268 [LINE_QUOTE] = "> ",
269 [LINE_PRE_START] = "``` ",
270 [LINE_PRE_CONTENT] = "",
271 [LINE_PRE_END] = "```",
272 };
274 static int
275 gemtext_serialize(struct buffer *b, FILE *fp)
277 struct line *line;
278 const char *text;
279 const char *alt;
280 int r;
282 TAILQ_FOREACH(line, &b->head, lines) {
283 if ((text = line->line) == NULL)
284 text = "";
286 if ((alt = line->alt) == NULL)
287 alt = "";
289 switch (line->type) {
290 case LINE_TEXT:
291 case LINE_TITLE_1:
292 case LINE_TITLE_2:
293 case LINE_TITLE_3:
294 case LINE_ITEM:
295 case LINE_QUOTE:
296 case LINE_PRE_START:
297 case LINE_PRE_CONTENT:
298 case LINE_PRE_END:
299 r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type],
300 text);
301 break;
303 case LINE_LINK:
304 r = fprintf(fp, "=> %s %s\n", alt, text);
305 break;
307 default:
308 /* not reached */
309 abort();
312 if (r == -1)
313 return 0;
316 return 1;