2 1ac119fb 2024-01-23 op * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
4 1ac119fb 2024-01-23 op * Permission to use, copy, modify, and distribute this software for any
5 1ac119fb 2024-01-23 op * purpose with or without fee is hereby granted, provided that the above
6 1ac119fb 2024-01-23 op * copyright notice and this permission notice appear in all copies.
8 1ac119fb 2024-01-23 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 1ac119fb 2024-01-23 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 1ac119fb 2024-01-23 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 1ac119fb 2024-01-23 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 1ac119fb 2024-01-23 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 1ac119fb 2024-01-23 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 1ac119fb 2024-01-23 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 1ac119fb 2024-01-23 op #include "compat.h"
19 1ac119fb 2024-01-23 op #include <stdlib.h>
20 1ac119fb 2024-01-23 op #include <string.h>
22 1ac119fb 2024-01-23 op #include "hist.h"
23 1ac119fb 2024-01-23 op #include "parser.h"
24 1ac119fb 2024-01-23 op #include "telescope.h"
25 3d89457c 2024-06-18 thomas.ad #include "xwrapper.h"
27 c1d27b0e 2024-06-14 op static int parser_foreach_line(struct buffer *, const char *, size_t);
30 fd1c80ce 2024-06-14 op parser_init(struct buffer *buffer, const struct parser *p)
32 c1d27b0e 2024-06-14 op erase_buffer(buffer);
34 c1d27b0e 2024-06-14 op memset(buffer->title, 0, sizeof(buffer->title));
35 c1d27b0e 2024-06-14 op buffer->parser = p;
36 c1d27b0e 2024-06-14 op buffer->mode = p->name;
37 c1d27b0e 2024-06-14 op buffer->parser_flags = p->initflags;
41 c1d27b0e 2024-06-14 op parser_parse(struct buffer *buffer, const char *chunk, size_t len)
43 fd1c80ce 2024-06-14 op const struct parser *p = buffer->parser;
46 c1d27b0e 2024-06-14 op return p->parse(buffer, chunk, len);
47 c1d27b0e 2024-06-14 op return parser_foreach_line(buffer, chunk, len);
51 c1d27b0e 2024-06-14 op parser_parsef(struct buffer *buffer, const char *fmt, ...)
57 1ac119fb 2024-01-23 op va_start(ap, fmt);
58 1ac119fb 2024-01-23 op r = vasprintf(&s, fmt, ap);
64 c1d27b0e 2024-06-14 op r = parser_parse(buffer, s, strlen(s));
70 1ac119fb 2024-01-23 op parser_free(struct tab *tab)
72 c1d27b0e 2024-06-14 op struct buffer *buffer = &tab->buffer;
73 fd1c80ce 2024-06-14 op const struct parser *p = buffer->parser;
75 c1d27b0e 2024-06-14 op char *tilde, *slash;
77 0fe9ac22 2024-06-03 op if (p->free) {
78 c1d27b0e 2024-06-14 op r = p->free(buffer);
79 c1d27b0e 2024-06-14 op } else if (buffer->len != 0) {
81 c1d27b0e 2024-06-14 op r = p->parse(buffer, buffer->buf, buffer->len);
83 c1d27b0e 2024-06-14 op r = parser_foreach_line(buffer, buffer->buf,
87 c1d27b0e 2024-06-14 op free(buffer->buf);
88 c1d27b0e 2024-06-14 op buffer->buf = NULL;
89 c1d27b0e 2024-06-14 op buffer->len = 0;
91 c1d27b0e 2024-06-14 op if (*buffer->title != '\0')
95 1ac119fb 2024-01-23 op * heuristic: see if there is a "tilde user" and use that as
96 1ac119fb 2024-01-23 op * page title, using the full domain name as fallback.
98 1ac119fb 2024-01-23 op if ((tilde = strstr(hist_cur(tab->hist), "/~")) != NULL) {
99 c1d27b0e 2024-06-14 op strlcpy(buffer->title, tilde+1, sizeof(buffer->title));
101 c1d27b0e 2024-06-14 op if ((slash = strchr(buffer->title, '/')) != NULL)
102 1ac119fb 2024-01-23 op *slash = '\0';
104 c1d27b0e 2024-06-14 op strlcpy(buffer->title, tab->iri.iri_host,
105 c1d27b0e 2024-06-14 op sizeof(buffer->title));
111 c1d27b0e 2024-06-14 op parser_serialize(struct buffer *b, FILE *fp)
113 fd1c80ce 2024-06-14 op const struct parser *p = b->parser;
114 fd1c80ce 2024-06-14 op struct line *line;
115 fd1c80ce 2024-06-14 op const char *text;
118 c1d27b0e 2024-06-14 op if (p->serialize != NULL)
119 c1d27b0e 2024-06-14 op return p->serialize(b, fp);
121 1ac119fb 2024-01-23 op /* a default implementation good enough for plain text */
122 c1d27b0e 2024-06-14 op TAILQ_FOREACH(line, &b->head, lines) {
123 1ac119fb 2024-01-23 op if ((text = line->line) == NULL)
126 1ac119fb 2024-01-23 op r = fprintf(fp, "%s\n", text);
135 c1d27b0e 2024-06-14 op parser_append(struct buffer *b, const char *buf, size_t len)
137 1ac119fb 2024-01-23 op size_t newlen;
140 d92ddc8d 2024-06-28 op if (len == 0)
143 c1d27b0e 2024-06-14 op newlen = len + b->len;
144 3d89457c 2024-06-18 thomas.ad t = xcalloc(1, newlen);
145 c1d27b0e 2024-06-14 op memcpy(t, b->buf, b->len);
146 c1d27b0e 2024-06-14 op memcpy(t + b->len, buf, len);
147 c1d27b0e 2024-06-14 op free(b->buf);
149 c1d27b0e 2024-06-14 op b->len = newlen;
154 c1d27b0e 2024-06-14 op parser_set_buf(struct buffer *b, const char *buf, size_t len)
158 1ac119fb 2024-01-23 op if (len == 0) {
160 c1d27b0e 2024-06-14 op free(b->buf);
161 c1d27b0e 2024-06-14 op b->buf = NULL;
166 1ac119fb 2024-01-23 op * p->buf and buf can (and probably almost always will)
170 3d89457c 2024-06-18 thomas.ad tmp = xcalloc(1, len);
171 1ac119fb 2024-01-23 op memcpy(tmp, buf, len);
172 c1d27b0e 2024-06-14 op free(b->buf);
173 c1d27b0e 2024-06-14 op b->buf = tmp;
174 c1d27b0e 2024-06-14 op b->len = len;
179 c1d27b0e 2024-06-14 op parser_foreach_line(struct buffer *b, const char *buf, size_t size)
181 fd1c80ce 2024-06-14 op const struct parser *p = b->parser;
182 fd1c80ce 2024-06-14 op char *beg, *end;
183 fd1c80ce 2024-06-14 op unsigned int ch;
184 fd1c80ce 2024-06-14 op size_t i, l, len;
186 c1d27b0e 2024-06-14 op if (!parser_append(b, buf, size))
188 c1d27b0e 2024-06-14 op beg = b->buf;
189 c1d27b0e 2024-06-14 op len = b->len;
191 c1d27b0e 2024-06-14 op if (!(b->parser_flags & PARSER_IN_BODY) && len < 3)
194 c1d27b0e 2024-06-14 op if (!(b->parser_flags & PARSER_IN_BODY)) {
195 c1d27b0e 2024-06-14 op b->parser_flags |= PARSER_IN_BODY;
198 1ac119fb 2024-01-23 op * drop the BOM: only UTF-8 is supported, and there
199 1ac119fb 2024-01-23 op * it's useless; some editors may still add one
202 c1d27b0e 2024-06-14 op if (memmem(beg, len, "\xEF\xBB\xBF", 3) == beg) {
208 1ac119fb 2024-01-23 op /* drop every "funny" ASCII character */
209 1ac119fb 2024-01-23 op for (i = 0; i < len; ) {
211 1ac119fb 2024-01-23 op if ((ch >= ' ' || ch == '\n' || ch == '\t')
212 1ac119fb 2024-01-23 op && ch != 127) { /* del */
216 c1d27b0e 2024-06-14 op memmove(&beg[i], &beg[i+1], len - i - 1);
220 1ac119fb 2024-01-23 op while (len > 0) {
221 c1d27b0e 2024-06-14 op if ((end = memmem((char*)beg, len, "\n", 1)) == NULL)
223 c1d27b0e 2024-06-14 op l = end - beg;
225 c1d27b0e 2024-06-14 op if (!p->parseline(b, beg, l))
231 1ac119fb 2024-01-23 op if (len > 0) {
232 1ac119fb 2024-01-23 op /* skip \n */
238 c1d27b0e 2024-06-14 op return parser_set_buf(b, beg, len);