Blame


1 1ac119fb 2024-01-23 op /*
2 1ac119fb 2024-01-23 op * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 1ac119fb 2024-01-23 op *
4 1ac119fb 2024-01-23 op * Permission to use, copy, modify, and distribute this software for any
5 1ac119fb 2024-01-23 op * purpose with or without fee is hereby granted, provided that the above
6 1ac119fb 2024-01-23 op * copyright notice and this permission notice appear in all copies.
7 1ac119fb 2024-01-23 op *
8 1ac119fb 2024-01-23 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 1ac119fb 2024-01-23 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 1ac119fb 2024-01-23 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 1ac119fb 2024-01-23 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 1ac119fb 2024-01-23 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 1ac119fb 2024-01-23 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 1ac119fb 2024-01-23 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 1ac119fb 2024-01-23 op */
16 1ac119fb 2024-01-23 op
17 1ac119fb 2024-01-23 op #include "compat.h"
18 1ac119fb 2024-01-23 op
19 1ac119fb 2024-01-23 op #include <stdlib.h>
20 1ac119fb 2024-01-23 op #include <string.h>
21 1ac119fb 2024-01-23 op
22 1ac119fb 2024-01-23 op #include "hist.h"
23 1ac119fb 2024-01-23 op #include "parser.h"
24 1ac119fb 2024-01-23 op #include "telescope.h"
25 3d89457c 2024-06-18 thomas.ad #include "xwrapper.h"
26 1ac119fb 2024-01-23 op
27 c1d27b0e 2024-06-14 op static int parser_foreach_line(struct buffer *, const char *, size_t);
28 820e3e9a 2024-06-14 op
29 1ac119fb 2024-01-23 op void
30 fd1c80ce 2024-06-14 op parser_init(struct buffer *buffer, const struct parser *p)
31 1ac119fb 2024-01-23 op {
32 c1d27b0e 2024-06-14 op erase_buffer(buffer);
33 c1d27b0e 2024-06-14 op
34 c1d27b0e 2024-06-14 op memset(buffer->title, 0, sizeof(buffer->title));
35 c1d27b0e 2024-06-14 op buffer->parser = p;
36 c1d27b0e 2024-06-14 op buffer->mode = p->name;
37 c1d27b0e 2024-06-14 op buffer->parser_flags = p->initflags;
38 1ac119fb 2024-01-23 op }
39 1ac119fb 2024-01-23 op
40 1ac119fb 2024-01-23 op int
41 c1d27b0e 2024-06-14 op parser_parse(struct buffer *buffer, const char *chunk, size_t len)
42 1ac119fb 2024-01-23 op {
43 fd1c80ce 2024-06-14 op const struct parser *p = buffer->parser;
44 0fe9ac22 2024-06-03 op
45 0fe9ac22 2024-06-03 op if (p->parse)
46 c1d27b0e 2024-06-14 op return p->parse(buffer, chunk, len);
47 c1d27b0e 2024-06-14 op return parser_foreach_line(buffer, chunk, len);
48 1ac119fb 2024-01-23 op }
49 1ac119fb 2024-01-23 op
50 1ac119fb 2024-01-23 op int
51 c1d27b0e 2024-06-14 op parser_parsef(struct buffer *buffer, const char *fmt, ...)
52 1ac119fb 2024-01-23 op {
53 1ac119fb 2024-01-23 op char *s;
54 1ac119fb 2024-01-23 op va_list ap;
55 1ac119fb 2024-01-23 op int r;
56 1ac119fb 2024-01-23 op
57 1ac119fb 2024-01-23 op va_start(ap, fmt);
58 1ac119fb 2024-01-23 op r = vasprintf(&s, fmt, ap);
59 1ac119fb 2024-01-23 op va_end(ap);
60 1ac119fb 2024-01-23 op
61 1ac119fb 2024-01-23 op if (r == -1)
62 1ac119fb 2024-01-23 op return 0;
63 1ac119fb 2024-01-23 op
64 c1d27b0e 2024-06-14 op r = parser_parse(buffer, s, strlen(s));
65 1ac119fb 2024-01-23 op free(s);
66 1ac119fb 2024-01-23 op return r;
67 1ac119fb 2024-01-23 op }
68 1ac119fb 2024-01-23 op
69 1ac119fb 2024-01-23 op int
70 1ac119fb 2024-01-23 op parser_free(struct tab *tab)
71 1ac119fb 2024-01-23 op {
72 c1d27b0e 2024-06-14 op struct buffer *buffer = &tab->buffer;
73 fd1c80ce 2024-06-14 op const struct parser *p = buffer->parser;
74 c1d27b0e 2024-06-14 op int r = 1;
75 c1d27b0e 2024-06-14 op char *tilde, *slash;
76 1ac119fb 2024-01-23 op
77 0fe9ac22 2024-06-03 op if (p->free) {
78 c1d27b0e 2024-06-14 op r = p->free(buffer);
79 c1d27b0e 2024-06-14 op } else if (buffer->len != 0) {
80 0fe9ac22 2024-06-03 op if (p->parse)
81 c1d27b0e 2024-06-14 op r = p->parse(buffer, buffer->buf, buffer->len);
82 0fe9ac22 2024-06-03 op else
83 c1d27b0e 2024-06-14 op r = parser_foreach_line(buffer, buffer->buf,
84 c1d27b0e 2024-06-14 op buffer->len);
85 0fe9ac22 2024-06-03 op }
86 1ac119fb 2024-01-23 op
87 c1d27b0e 2024-06-14 op free(buffer->buf);
88 c1d27b0e 2024-06-14 op buffer->buf = NULL;
89 c1d27b0e 2024-06-14 op buffer->len = 0;
90 70afc8d4 2024-06-05 op
91 c1d27b0e 2024-06-14 op if (*buffer->title != '\0')
92 1ac119fb 2024-01-23 op return r;
93 1ac119fb 2024-01-23 op
94 1ac119fb 2024-01-23 op /*
95 1ac119fb 2024-01-23 op * heuristic: see if there is a "tilde user" and use that as
96 1ac119fb 2024-01-23 op * page title, using the full domain name as fallback.
97 1ac119fb 2024-01-23 op */
98 1ac119fb 2024-01-23 op if ((tilde = strstr(hist_cur(tab->hist), "/~")) != NULL) {
99 c1d27b0e 2024-06-14 op strlcpy(buffer->title, tilde+1, sizeof(buffer->title));
100 1ac119fb 2024-01-23 op
101 c1d27b0e 2024-06-14 op if ((slash = strchr(buffer->title, '/')) != NULL)
102 1ac119fb 2024-01-23 op *slash = '\0';
103 1ac119fb 2024-01-23 op } else
104 c1d27b0e 2024-06-14 op strlcpy(buffer->title, tab->iri.iri_host,
105 c1d27b0e 2024-06-14 op sizeof(buffer->title));
106 1ac119fb 2024-01-23 op
107 1ac119fb 2024-01-23 op return r;
108 1ac119fb 2024-01-23 op }
109 1ac119fb 2024-01-23 op
110 1ac119fb 2024-01-23 op int
111 c1d27b0e 2024-06-14 op parser_serialize(struct buffer *b, FILE *fp)
112 1ac119fb 2024-01-23 op {
113 fd1c80ce 2024-06-14 op const struct parser *p = b->parser;
114 fd1c80ce 2024-06-14 op struct line *line;
115 fd1c80ce 2024-06-14 op const char *text;
116 fd1c80ce 2024-06-14 op int r;
117 1ac119fb 2024-01-23 op
118 c1d27b0e 2024-06-14 op if (p->serialize != NULL)
119 c1d27b0e 2024-06-14 op return p->serialize(b, fp);
120 1ac119fb 2024-01-23 op
121 1ac119fb 2024-01-23 op /* a default implementation good enough for plain text */
122 c1d27b0e 2024-06-14 op TAILQ_FOREACH(line, &b->head, lines) {
123 1ac119fb 2024-01-23 op if ((text = line->line) == NULL)
124 1ac119fb 2024-01-23 op text = "";
125 1ac119fb 2024-01-23 op
126 1ac119fb 2024-01-23 op r = fprintf(fp, "%s\n", text);
127 1ac119fb 2024-01-23 op if (r == -1)
128 1ac119fb 2024-01-23 op return 0;
129 1ac119fb 2024-01-23 op }
130 1ac119fb 2024-01-23 op
131 1ac119fb 2024-01-23 op return 1;
132 1ac119fb 2024-01-23 op }
133 1ac119fb 2024-01-23 op
134 8aa8d060 2024-06-28 op static int
135 c1d27b0e 2024-06-14 op parser_append(struct buffer *b, const char *buf, size_t len)
136 1ac119fb 2024-01-23 op {
137 1ac119fb 2024-01-23 op size_t newlen;
138 1ac119fb 2024-01-23 op char *t;
139 1ac119fb 2024-01-23 op
140 d92ddc8d 2024-06-28 op if (len == 0)
141 d92ddc8d 2024-06-28 op return (1);
142 d92ddc8d 2024-06-28 op
143 c1d27b0e 2024-06-14 op newlen = len + b->len;
144 3d89457c 2024-06-18 thomas.ad t = xcalloc(1, newlen);
145 c1d27b0e 2024-06-14 op memcpy(t, b->buf, b->len);
146 c1d27b0e 2024-06-14 op memcpy(t + b->len, buf, len);
147 c1d27b0e 2024-06-14 op free(b->buf);
148 c1d27b0e 2024-06-14 op b->buf = t;
149 c1d27b0e 2024-06-14 op b->len = newlen;
150 1ac119fb 2024-01-23 op return 1;
151 1ac119fb 2024-01-23 op }
152 1ac119fb 2024-01-23 op
153 a67ab277 2024-06-29 op static int
154 c1d27b0e 2024-06-14 op parser_set_buf(struct buffer *b, const char *buf, size_t len)
155 1ac119fb 2024-01-23 op {
156 1ac119fb 2024-01-23 op char *tmp;
157 1ac119fb 2024-01-23 op
158 1ac119fb 2024-01-23 op if (len == 0) {
159 c1d27b0e 2024-06-14 op b->len = 0;
160 c1d27b0e 2024-06-14 op free(b->buf);
161 c1d27b0e 2024-06-14 op b->buf = NULL;
162 1ac119fb 2024-01-23 op return 1;
163 1ac119fb 2024-01-23 op }
164 1ac119fb 2024-01-23 op
165 1ac119fb 2024-01-23 op /*
166 1ac119fb 2024-01-23 op * p->buf and buf can (and probably almost always will)
167 1ac119fb 2024-01-23 op * overlap!
168 1ac119fb 2024-01-23 op */
169 1ac119fb 2024-01-23 op
170 3d89457c 2024-06-18 thomas.ad tmp = xcalloc(1, len);
171 1ac119fb 2024-01-23 op memcpy(tmp, buf, len);
172 c1d27b0e 2024-06-14 op free(b->buf);
173 c1d27b0e 2024-06-14 op b->buf = tmp;
174 c1d27b0e 2024-06-14 op b->len = len;
175 1ac119fb 2024-01-23 op return 1;
176 1ac119fb 2024-01-23 op }
177 1ac119fb 2024-01-23 op
178 820e3e9a 2024-06-14 op static int
179 c1d27b0e 2024-06-14 op parser_foreach_line(struct buffer *b, const char *buf, size_t size)
180 1ac119fb 2024-01-23 op {
181 fd1c80ce 2024-06-14 op const struct parser *p = b->parser;
182 fd1c80ce 2024-06-14 op char *beg, *end;
183 fd1c80ce 2024-06-14 op unsigned int ch;
184 fd1c80ce 2024-06-14 op size_t i, l, len;
185 1ac119fb 2024-01-23 op
186 c1d27b0e 2024-06-14 op if (!parser_append(b, buf, size))
187 1ac119fb 2024-01-23 op return 0;
188 c1d27b0e 2024-06-14 op beg = b->buf;
189 c1d27b0e 2024-06-14 op len = b->len;
190 1ac119fb 2024-01-23 op
191 c1d27b0e 2024-06-14 op if (!(b->parser_flags & PARSER_IN_BODY) && len < 3)
192 1ac119fb 2024-01-23 op return 1;
193 1ac119fb 2024-01-23 op
194 c1d27b0e 2024-06-14 op if (!(b->parser_flags & PARSER_IN_BODY)) {
195 c1d27b0e 2024-06-14 op b->parser_flags |= PARSER_IN_BODY;
196 1ac119fb 2024-01-23 op
197 1ac119fb 2024-01-23 op /*
198 1ac119fb 2024-01-23 op * drop the BOM: only UTF-8 is supported, and there
199 1ac119fb 2024-01-23 op * it's useless; some editors may still add one
200 1ac119fb 2024-01-23 op * though.
201 1ac119fb 2024-01-23 op */
202 c1d27b0e 2024-06-14 op if (memmem(beg, len, "\xEF\xBB\xBF", 3) == beg) {
203 1ac119fb 2024-01-23 op b += 3;
204 1ac119fb 2024-01-23 op len -= 3;
205 1ac119fb 2024-01-23 op }
206 1ac119fb 2024-01-23 op }
207 1ac119fb 2024-01-23 op
208 1ac119fb 2024-01-23 op /* drop every "funny" ASCII character */
209 1ac119fb 2024-01-23 op for (i = 0; i < len; ) {
210 c1d27b0e 2024-06-14 op ch = beg[i];
211 1ac119fb 2024-01-23 op if ((ch >= ' ' || ch == '\n' || ch == '\t')
212 1ac119fb 2024-01-23 op && ch != 127) { /* del */
213 1ac119fb 2024-01-23 op ++i;
214 1ac119fb 2024-01-23 op continue;
215 1ac119fb 2024-01-23 op }
216 c1d27b0e 2024-06-14 op memmove(&beg[i], &beg[i+1], len - i - 1);
217 1ac119fb 2024-01-23 op len--;
218 1ac119fb 2024-01-23 op }
219 1ac119fb 2024-01-23 op
220 1ac119fb 2024-01-23 op while (len > 0) {
221 c1d27b0e 2024-06-14 op if ((end = memmem((char*)beg, len, "\n", 1)) == NULL)
222 1ac119fb 2024-01-23 op break;
223 c1d27b0e 2024-06-14 op l = end - beg;
224 1ac119fb 2024-01-23 op
225 c1d27b0e 2024-06-14 op if (!p->parseline(b, beg, l))
226 1ac119fb 2024-01-23 op return 0;
227 1ac119fb 2024-01-23 op
228 1ac119fb 2024-01-23 op len -= l;
229 c1d27b0e 2024-06-14 op beg += l;
230 1ac119fb 2024-01-23 op
231 1ac119fb 2024-01-23 op if (len > 0) {
232 1ac119fb 2024-01-23 op /* skip \n */
233 1ac119fb 2024-01-23 op len--;
234 c1d27b0e 2024-06-14 op beg++;
235 1ac119fb 2024-01-23 op }
236 1ac119fb 2024-01-23 op }
237 1ac119fb 2024-01-23 op
238 c1d27b0e 2024-06-14 op return parser_set_buf(b, beg, len);
239 1ac119fb 2024-01-23 op }