Blame


1 1ac119fb 2024-01-23 op /*
2 1ac119fb 2024-01-23 op * Copyright (c) 2021, 2022 Omar Polo <op@omarpolo.com>
3 1ac119fb 2024-01-23 op *
4 1ac119fb 2024-01-23 op * Permission to use, copy, modify, and distribute this software for any
5 1ac119fb 2024-01-23 op * purpose with or without fee is hereby granted, provided that the above
6 1ac119fb 2024-01-23 op * copyright notice and this permission notice appear in all copies.
7 1ac119fb 2024-01-23 op *
8 1ac119fb 2024-01-23 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 1ac119fb 2024-01-23 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 1ac119fb 2024-01-23 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 1ac119fb 2024-01-23 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 1ac119fb 2024-01-23 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 1ac119fb 2024-01-23 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 1ac119fb 2024-01-23 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 1ac119fb 2024-01-23 op */
16 1ac119fb 2024-01-23 op
17 1ac119fb 2024-01-23 op /*
18 1ac119fb 2024-01-23 op * A streaming gemtext parser.
19 1ac119fb 2024-01-23 op *
20 1ac119fb 2024-01-23 op * TODO:
21 1ac119fb 2024-01-23 op * - handle NULs
22 1ac119fb 2024-01-23 op * - UTF8
23 1ac119fb 2024-01-23 op */
24 1ac119fb 2024-01-23 op
25 1ac119fb 2024-01-23 op #include "compat.h"
26 1ac119fb 2024-01-23 op
27 1ac119fb 2024-01-23 op #include <ctype.h>
28 1ac119fb 2024-01-23 op #include <string.h>
29 1ac119fb 2024-01-23 op #include <stdlib.h>
30 1ac119fb 2024-01-23 op
31 1ac119fb 2024-01-23 op #include "defaults.h"
32 1ac119fb 2024-01-23 op #include "parser.h"
33 c1d27b0e 2024-06-14 op #include "telescope.h"
34 1ac119fb 2024-01-23 op #include "utf8.h"
35 3d89457c 2024-06-18 thomas.ad #include "xwrapper.h"
36 1ac119fb 2024-01-23 op
37 c1d27b0e 2024-06-14 op static int gemtext_parse_line(struct buffer *, const char *, size_t);
38 c1d27b0e 2024-06-14 op static int gemtext_free(struct buffer *);
39 c1d27b0e 2024-06-14 op static int gemtext_serialize(struct buffer *, FILE *);
40 1ac119fb 2024-01-23 op
41 c1d27b0e 2024-06-14 op static int parse_link(struct buffer *, const char*, size_t);
42 c1d27b0e 2024-06-14 op static int parse_title(struct buffer *, const char*, size_t);
43 c1d27b0e 2024-06-14 op static void search_title(struct buffer *, enum line_type);
44 1ac119fb 2024-01-23 op
45 fd1c80ce 2024-06-14 op const struct parser gemtext_parser = {
46 c1d27b0e 2024-06-14 op .name = "text/gemini",
47 c1d27b0e 2024-06-14 op .parseline = &gemtext_parse_line,
48 c1d27b0e 2024-06-14 op .free = &gemtext_free,
49 c1d27b0e 2024-06-14 op .serialize = &gemtext_serialize,
50 c1d27b0e 2024-06-14 op };
51 1ac119fb 2024-01-23 op
52 1ac119fb 2024-01-23 op static inline int
53 c1d27b0e 2024-06-14 op emit_line(struct buffer *b, enum line_type type, char *line, char *alt)
54 1ac119fb 2024-01-23 op {
55 1ac119fb 2024-01-23 op struct line *l;
56 1ac119fb 2024-01-23 op
57 3d89457c 2024-06-18 thomas.ad l = xcalloc(1, sizeof(*l));
58 1ac119fb 2024-01-23 op
59 1ac119fb 2024-01-23 op l->type = type;
60 1ac119fb 2024-01-23 op l->line = line;
61 1ac119fb 2024-01-23 op l->alt = alt;
62 1ac119fb 2024-01-23 op
63 1ac119fb 2024-01-23 op switch (l->type) {
64 1ac119fb 2024-01-23 op case LINE_PRE_START:
65 1ac119fb 2024-01-23 op case LINE_PRE_END:
66 1ac119fb 2024-01-23 op if (hide_pre_context)
67 1ac119fb 2024-01-23 op l->flags = L_HIDDEN;
68 1ac119fb 2024-01-23 op if (l->type == LINE_PRE_END &&
69 1ac119fb 2024-01-23 op hide_pre_closing_line)
70 1ac119fb 2024-01-23 op l->flags = L_HIDDEN;
71 1ac119fb 2024-01-23 op break;
72 1ac119fb 2024-01-23 op case LINE_PRE_CONTENT:
73 1ac119fb 2024-01-23 op if (hide_pre_blocks)
74 1ac119fb 2024-01-23 op l->flags = L_HIDDEN;
75 1ac119fb 2024-01-23 op break;
76 1ac119fb 2024-01-23 op case LINE_LINK:
77 1ac119fb 2024-01-23 op if (emojify_link &&
78 1ac119fb 2024-01-23 op !emojied_line(line, (const char **)&l->data))
79 1ac119fb 2024-01-23 op l->data = NULL;
80 1ac119fb 2024-01-23 op break;
81 1ac119fb 2024-01-23 op default:
82 1ac119fb 2024-01-23 op break;
83 1ac119fb 2024-01-23 op }
84 1ac119fb 2024-01-23 op
85 f350bc73 2024-05-27 thomas.ad if (dont_apply_styling)
86 f350bc73 2024-05-27 thomas.ad l->flags &= ~L_HIDDEN;
87 f350bc73 2024-05-27 thomas.ad
88 c1d27b0e 2024-06-14 op TAILQ_INSERT_TAIL(&b->head, l, lines);
89 1ac119fb 2024-01-23 op
90 1ac119fb 2024-01-23 op return 1;
91 1ac119fb 2024-01-23 op }
92 1ac119fb 2024-01-23 op
93 1ac119fb 2024-01-23 op static int
94 c1d27b0e 2024-06-14 op parse_link(struct buffer *b, const char *line, size_t len)
95 1ac119fb 2024-01-23 op {
96 32c44488 2024-06-05 op char *label, *url;
97 32c44488 2024-06-05 op const char *start;
98 1ac119fb 2024-01-23 op
99 1ac119fb 2024-01-23 op if (len <= 2)
100 c1d27b0e 2024-06-14 op return emit_line(b, LINE_TEXT, NULL, NULL);
101 1ac119fb 2024-01-23 op
102 32c44488 2024-06-05 op line += 2, len -= 2;
103 32c44488 2024-06-05 op while (len > 0 && isspace((unsigned char)line[0]))
104 32c44488 2024-06-05 op line++, len--;
105 1ac119fb 2024-01-23 op
106 1ac119fb 2024-01-23 op if (len == 0)
107 c1d27b0e 2024-06-14 op return emit_line(b, LINE_TEXT, NULL, NULL);
108 1ac119fb 2024-01-23 op
109 32c44488 2024-06-05 op start = line;
110 32c44488 2024-06-05 op while (len > 0 && !isspace((unsigned char)line[0]))
111 32c44488 2024-06-05 op line++, len--;
112 1ac119fb 2024-01-23 op
113 3d89457c 2024-06-18 thomas.ad url = xstrndup(start, line - start);
114 1ac119fb 2024-01-23 op
115 32c44488 2024-06-05 op while (len > 0 && isspace(line[0]))
116 32c44488 2024-06-05 op line++, len--;
117 1ac119fb 2024-01-23 op
118 32c44488 2024-06-05 op if (len == 0) {
119 3d89457c 2024-06-18 thomas.ad label = xstrdup(url);
120 32c44488 2024-06-05 op } else {
121 3d89457c 2024-06-18 thomas.ad label = xstrndup(line, len);
122 1ac119fb 2024-01-23 op }
123 1ac119fb 2024-01-23 op
124 c1d27b0e 2024-06-14 op return emit_line(b, LINE_LINK, label, url);
125 1ac119fb 2024-01-23 op }
126 1ac119fb 2024-01-23 op
127 1ac119fb 2024-01-23 op static int
128 c1d27b0e 2024-06-14 op parse_title(struct buffer *b, const char *line, size_t len)
129 1ac119fb 2024-01-23 op {
130 32c44488 2024-06-05 op enum line_type t = LINE_TITLE_1;
131 1ac119fb 2024-01-23 op char *l;
132 1ac119fb 2024-01-23 op
133 dfc5d927 2024-06-05 op line++, len--;
134 32c44488 2024-06-05 op while (len > 0 && *line == '#') {
135 32c44488 2024-06-05 op line++, len--;
136 32c44488 2024-06-05 op t++;
137 32c44488 2024-06-05 op if (t == LINE_TITLE_3)
138 32c44488 2024-06-05 op break;
139 1ac119fb 2024-01-23 op }
140 1ac119fb 2024-01-23 op
141 32c44488 2024-06-05 op while (len > 0 && isspace((unsigned char)*line))
142 32c44488 2024-06-05 op line++, len--;
143 1ac119fb 2024-01-23 op
144 1ac119fb 2024-01-23 op if (len == 0)
145 c1d27b0e 2024-06-14 op return emit_line(b, t, NULL, NULL);
146 1ac119fb 2024-01-23 op
147 c1d27b0e 2024-06-14 op if (t == LINE_TITLE_1 && *b->title == '\0')
148 c1d27b0e 2024-06-14 op strncpy(b->title, line, MIN(sizeof(b->title)-1, len));
149 1ac119fb 2024-01-23 op
150 3d89457c 2024-06-18 thomas.ad l = xstrndup(line, len);
151 c1d27b0e 2024-06-14 op return emit_line(b, t, l, NULL);
152 1ac119fb 2024-01-23 op }
153 1ac119fb 2024-01-23 op
154 1ac119fb 2024-01-23 op static int
155 c1d27b0e 2024-06-14 op gemtext_parse_line(struct buffer *b, const char *line, size_t len)
156 1ac119fb 2024-01-23 op {
157 1ac119fb 2024-01-23 op char *l;
158 1ac119fb 2024-01-23 op
159 c1d27b0e 2024-06-14 op if (b->parser_flags & PARSER_IN_PRE) {
160 32c44488 2024-06-05 op if (len >= 3 && !strncmp(line, "```", 3)) {
161 c1d27b0e 2024-06-14 op b->parser_flags ^= PARSER_IN_PRE;
162 c1d27b0e 2024-06-14 op return emit_line(b, LINE_PRE_END, NULL, NULL);
163 32c44488 2024-06-05 op }
164 1ac119fb 2024-01-23 op
165 32c44488 2024-06-05 op if (len == 0)
166 c1d27b0e 2024-06-14 op return emit_line(b, LINE_PRE_CONTENT, NULL, NULL);
167 3d89457c 2024-06-18 thomas.ad l = xstrndup(line, len);
168 c1d27b0e 2024-06-14 op return emit_line(b, LINE_PRE_CONTENT, l, NULL);
169 1ac119fb 2024-01-23 op }
170 1ac119fb 2024-01-23 op
171 1ac119fb 2024-01-23 op if (len == 0)
172 c1d27b0e 2024-06-14 op return emit_line(b, LINE_TEXT, NULL, NULL);
173 1ac119fb 2024-01-23 op
174 32c44488 2024-06-05 op switch (*line) {
175 32c44488 2024-06-05 op case '*':
176 32c44488 2024-06-05 op if (len < 1 || line[1] != ' ')
177 32c44488 2024-06-05 op break;
178 1ac119fb 2024-01-23 op
179 32c44488 2024-06-05 op line += 2, len -= 2;
180 32c44488 2024-06-05 op while (len > 0 && isspace((unsigned char)*line))
181 32c44488 2024-06-05 op line++, len--;
182 32c44488 2024-06-05 op if (len == 0)
183 c1d27b0e 2024-06-14 op return emit_line(b, LINE_ITEM, NULL, NULL);
184 3d89457c 2024-06-18 thomas.ad l = xstrndup(line, len);
185 c1d27b0e 2024-06-14 op return emit_line(b, LINE_ITEM, l, NULL);
186 1ac119fb 2024-01-23 op
187 32c44488 2024-06-05 op case '>':
188 32c44488 2024-06-05 op line++, len--;
189 32c44488 2024-06-05 op while (len > 0 && isspace((unsigned char)*line))
190 32c44488 2024-06-05 op line++, len--;
191 32c44488 2024-06-05 op if (len == 0)
192 c1d27b0e 2024-06-14 op return emit_line(b, LINE_QUOTE, NULL, NULL);
193 3d89457c 2024-06-18 thomas.ad l = xstrndup(line, len);
194 c1d27b0e 2024-06-14 op return emit_line(b, LINE_QUOTE, l, NULL);
195 1ac119fb 2024-01-23 op
196 32c44488 2024-06-05 op case '=':
197 32c44488 2024-06-05 op if (len > 1 && line[1] == '>')
198 c1d27b0e 2024-06-14 op return parse_link(b, line, len);
199 32c44488 2024-06-05 op break;
200 1ac119fb 2024-01-23 op
201 32c44488 2024-06-05 op case '#':
202 c1d27b0e 2024-06-14 op return parse_title(b, line, len);
203 1ac119fb 2024-01-23 op
204 32c44488 2024-06-05 op case '`':
205 32c44488 2024-06-05 op if (len < 3 || strncmp(line, "```", 3) != 0)
206 32c44488 2024-06-05 op break;
207 1ac119fb 2024-01-23 op
208 c1d27b0e 2024-06-14 op b->parser_flags |= PARSER_IN_PRE;
209 32c44488 2024-06-05 op line += 3, len -= 3;
210 32c44488 2024-06-05 op while (len > 0 && isspace((unsigned char)*line))
211 32c44488 2024-06-05 op line++, len--;
212 32c44488 2024-06-05 op if (len == 0)
213 c1d27b0e 2024-06-14 op return emit_line(b, LINE_PRE_START,
214 32c44488 2024-06-05 op NULL, NULL);
215 3d89457c 2024-06-18 thomas.ad l = xstrndup(line, len);
216 c1d27b0e 2024-06-14 op return emit_line(b, LINE_PRE_START, l, NULL);
217 1ac119fb 2024-01-23 op }
218 1ac119fb 2024-01-23 op
219 3d89457c 2024-06-18 thomas.ad l = xstrndup(line, len);
220 c1d27b0e 2024-06-14 op return emit_line(b, LINE_TEXT, l, NULL);
221 1ac119fb 2024-01-23 op }
222 1ac119fb 2024-01-23 op
223 1ac119fb 2024-01-23 op static int
224 c1d27b0e 2024-06-14 op gemtext_free(struct buffer *b)
225 1ac119fb 2024-01-23 op {
226 1ac119fb 2024-01-23 op /* flush the buffer */
227 c1d27b0e 2024-06-14 op if (b->len != 0) {
228 c1d27b0e 2024-06-14 op if (!gemtext_parse_line(b, b->buf, b->len))
229 1ac119fb 2024-01-23 op return 0;
230 c1d27b0e 2024-06-14 op if ((b->parser_flags & PARSER_IN_PRE) &&
231 c1d27b0e 2024-06-14 op !emit_line(b, LINE_PRE_END, NULL, NULL))
232 1ac119fb 2024-01-23 op return 0;
233 1ac119fb 2024-01-23 op }
234 1ac119fb 2024-01-23 op
235 1ac119fb 2024-01-23 op /*
236 1ac119fb 2024-01-23 op * use the first level 2 or 3 header as page title if none
237 1ac119fb 2024-01-23 op * found yet.
238 1ac119fb 2024-01-23 op */
239 c1d27b0e 2024-06-14 op if (*b->title == '\0')
240 c1d27b0e 2024-06-14 op search_title(b, LINE_TITLE_2);
241 c1d27b0e 2024-06-14 op if (*b->title == '\0')
242 c1d27b0e 2024-06-14 op search_title(b, LINE_TITLE_3);
243 1ac119fb 2024-01-23 op
244 1ac119fb 2024-01-23 op return 1;
245 1ac119fb 2024-01-23 op }
246 1ac119fb 2024-01-23 op
247 1ac119fb 2024-01-23 op static void
248 c1d27b0e 2024-06-14 op search_title(struct buffer *b, enum line_type level)
249 1ac119fb 2024-01-23 op {
250 1ac119fb 2024-01-23 op struct line *l;
251 1ac119fb 2024-01-23 op
252 c1d27b0e 2024-06-14 op TAILQ_FOREACH(l, &b->head, lines) {
253 1ac119fb 2024-01-23 op if (l->type == level) {
254 1ac119fb 2024-01-23 op if (l->line == NULL)
255 1ac119fb 2024-01-23 op continue;
256 c1d27b0e 2024-06-14 op strlcpy(b->title, l->line, sizeof(b->title));
257 1ac119fb 2024-01-23 op break;
258 1ac119fb 2024-01-23 op }
259 1ac119fb 2024-01-23 op }
260 1ac119fb 2024-01-23 op }
261 1ac119fb 2024-01-23 op
262 1ac119fb 2024-01-23 op static const char *gemtext_prefixes[] = {
263 1ac119fb 2024-01-23 op [LINE_TEXT] = "",
264 1ac119fb 2024-01-23 op [LINE_TITLE_1] = "# ",
265 1ac119fb 2024-01-23 op [LINE_TITLE_2] = "## ",
266 1ac119fb 2024-01-23 op [LINE_TITLE_3] = "### ",
267 1ac119fb 2024-01-23 op [LINE_ITEM] = "* ",
268 1ac119fb 2024-01-23 op [LINE_QUOTE] = "> ",
269 1ac119fb 2024-01-23 op [LINE_PRE_START] = "``` ",
270 1ac119fb 2024-01-23 op [LINE_PRE_CONTENT] = "",
271 1ac119fb 2024-01-23 op [LINE_PRE_END] = "```",
272 1ac119fb 2024-01-23 op };
273 1ac119fb 2024-01-23 op
274 1ac119fb 2024-01-23 op static int
275 c1d27b0e 2024-06-14 op gemtext_serialize(struct buffer *b, FILE *fp)
276 1ac119fb 2024-01-23 op {
277 1ac119fb 2024-01-23 op struct line *line;
278 1ac119fb 2024-01-23 op const char *text;
279 1ac119fb 2024-01-23 op const char *alt;
280 1ac119fb 2024-01-23 op int r;
281 1ac119fb 2024-01-23 op
282 c1d27b0e 2024-06-14 op TAILQ_FOREACH(line, &b->head, lines) {
283 1ac119fb 2024-01-23 op if ((text = line->line) == NULL)
284 1ac119fb 2024-01-23 op text = "";
285 1ac119fb 2024-01-23 op
286 1ac119fb 2024-01-23 op if ((alt = line->alt) == NULL)
287 1ac119fb 2024-01-23 op alt = "";
288 1ac119fb 2024-01-23 op
289 1ac119fb 2024-01-23 op switch (line->type) {
290 1ac119fb 2024-01-23 op case LINE_TEXT:
291 1ac119fb 2024-01-23 op case LINE_TITLE_1:
292 1ac119fb 2024-01-23 op case LINE_TITLE_2:
293 1ac119fb 2024-01-23 op case LINE_TITLE_3:
294 1ac119fb 2024-01-23 op case LINE_ITEM:
295 1ac119fb 2024-01-23 op case LINE_QUOTE:
296 1ac119fb 2024-01-23 op case LINE_PRE_START:
297 1ac119fb 2024-01-23 op case LINE_PRE_CONTENT:
298 1ac119fb 2024-01-23 op case LINE_PRE_END:
299 1ac119fb 2024-01-23 op r = fprintf(fp, "%s%s\n", gemtext_prefixes[line->type],
300 1ac119fb 2024-01-23 op text);
301 1ac119fb 2024-01-23 op break;
302 1ac119fb 2024-01-23 op
303 1ac119fb 2024-01-23 op case LINE_LINK:
304 1ac119fb 2024-01-23 op r = fprintf(fp, "=> %s %s\n", alt, text);
305 1ac119fb 2024-01-23 op break;
306 1ac119fb 2024-01-23 op
307 1ac119fb 2024-01-23 op default:
308 1ac119fb 2024-01-23 op /* not reached */
309 1ac119fb 2024-01-23 op abort();
310 1ac119fb 2024-01-23 op }
311 1ac119fb 2024-01-23 op
312 1ac119fb 2024-01-23 op if (r == -1)
313 1ac119fb 2024-01-23 op return 0;
314 1ac119fb 2024-01-23 op }
315 1ac119fb 2024-01-23 op
316 1ac119fb 2024-01-23 op return 1;
317 1ac119fb 2024-01-23 op }