Line data Source code
1 : /*
2 : This file is part of TALER
3 : (C) 2025 Taler Systems SA
4 :
5 : TALER is free software; you can redistribute it and/or modify it under the
6 : terms of the GNU Lesser General Public License as published by the Free Software
7 : Foundation; either version 3, or (at your option) any later version.
8 :
9 : TALER is distributed in the hope that it will be useful, but WITHOUT ANY
10 : WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
11 : A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 :
13 : You should have received a copy of the GNU General Public License along with
14 : TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/>
15 : */
16 : /**
17 : * @file validators.c
18 : * @brief Input validators
19 : * @author Christian Grothoff
20 : */
21 : #include "taler/platform.h"
22 : #include <gnunet/gnunet_util_lib.h>
23 : #include <gnunet/gnunet_db_lib.h>
24 : #include <taler/taler_json_lib.h>
25 : #include "taler/taler_merchant_util.h"
26 : #include <regex.h>
27 :
28 : bool
29 0 : TALER_MERCHANT_image_data_url_valid (const char *image_data_url)
30 : {
31 0 : if (0 == strcmp (image_data_url,
32 : ""))
33 0 : return true;
34 0 : if (0 != strncasecmp ("data:image/",
35 : image_data_url,
36 : strlen ("data:image/")))
37 : {
38 0 : GNUNET_break_op (0);
39 0 : return false;
40 : }
41 0 : if (NULL == strstr (image_data_url,
42 : ";base64,"))
43 : {
44 0 : GNUNET_break_op (0);
45 0 : return false;
46 : }
47 0 : if (! TALER_url_valid_charset (image_data_url))
48 : {
49 0 : GNUNET_break_op (0);
50 0 : return false;
51 : }
52 0 : return true;
53 : }
54 :
55 :
56 : bool
57 33 : TALER_MERCHANT_email_valid (const char *email)
58 : {
59 : regex_t regex;
60 : bool is_valid;
61 :
62 : /*
63 : * Email regex pattern supporting:
64 : *
65 : * Local part (before @):
66 : * - Dot-atom: alphanumeric, dots, hyphens, underscores
67 : * (no leading/trailing dots, no consecutive dots)
68 : * - Quoted-string: quoted text with escaped chars inside
69 : *
70 : * Domain part (after @):
71 : * - Domain labels: alphanumeric and hyphens
72 : * (no leading/trailing hyphens per label)
73 : * - IP literals: [IPv4] or [IPv6:...]
74 : *
75 : * Pattern breakdown:
76 : * Local part:
77 : * ([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+
78 : * (\.[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+)*)
79 : * = dot-atom (atext chars, dots allowed between parts)
80 : *
81 : * |"([^"\\]|\\.)*"
82 : * = quoted-string (anything in quotes with escaping)
83 : *
84 : * Domain part:
85 : * ([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
86 : * (\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)
87 : * = domain labels (63 chars max, hyphens in middle)
88 : *
89 : * |\[([0-9]{1,3}\.){3}[0-9]{1,3}\]
90 : * = IPv4 literal
91 : *
92 : * |\[IPv6:[0-9a-fA-F:]+\]
93 : * = IPv6 literal
94 : */
95 33 : const char *pattern =
96 : "^("
97 : /* Local part: dot-atom-text or quoted-string */
98 : "([a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+(\\.)?)*[a-zA-Z0-9!#$%&'*+/=?^_`{|}~-]+"
99 : "|"
100 : "\"([^\"\\\\]|\\\\.)*\""
101 : ")"
102 : "@"
103 : "("
104 : /* Domain: domain labels (with at least one dot) or IP literal */
105 : "([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)"
106 : "|"
107 : "\\[((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}"
108 : "([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\\]"
109 : "|"
110 : "\\[IPv6:[0-9a-fA-F:]*[0-9a-fA-F]\\]"
111 : ")$";
112 :
113 33 : if ('\0' == email[0])
114 1 : return false;
115 :
116 : /* Maximum email length per RFC 5321 */
117 32 : if (strlen (email) > 254)
118 0 : return false;
119 :
120 32 : GNUNET_assert (0 ==
121 : regcomp (®ex,
122 : pattern,
123 : REG_EXTENDED | REG_NOSUB));
124 32 : is_valid = (0 ==
125 32 : regexec (®ex,
126 : email,
127 : 0,
128 : NULL,
129 : 0));
130 32 : regfree (®ex);
131 32 : return is_valid;
132 : }
133 :
134 :
135 : char *
136 42 : TALER_MERCHANT_phone_validate_normalize (const char *phone,
137 : bool allow_letters)
138 : {
139 42 : if ('\0' == phone[0])
140 1 : return NULL;
141 :
142 : /* Maximum phone length (reasonable practical limit) */
143 41 : if (strlen (phone) > 30)
144 0 : return NULL;
145 :
146 : {
147 : regex_t regex;
148 : int ret;
149 :
150 : /*
151 : * Phone number regex pattern with +CC prefix requirement:
152 : *
153 : * Supports:
154 : * - Country codes (1-3 digits after +)
155 : * - Variable length national numbers
156 : * - Spaces, hyphens, and dots as separators
157 : * - Parentheses for area codes
158 : * - Optional extension notation (x, ext, extension)
159 : * - Optional letters representing digits (2-9) if allow_letters is true
160 : *
161 : * Examples:
162 : * +1-202-555-0173
163 : * +33 1 42 68 53 00
164 : * +44.20.7946.0958
165 : * +1 (202) 555-0173
166 : * +886 2 2345 6789
167 : * +1-800-CALL-NOW (if allow_letters is true)
168 : * +49-30-12345678x123
169 : *
170 : * Pattern breakdown:
171 : * ^\+[0-9]{1,3}
172 : * = Plus sign followed by 1-3 digit country code
173 : *
174 : * [-. ]?
175 : * = Optional separator after country code
176 : *
177 : * (\([0-9]{1,4}\)[-. ]?)?
178 : * = Optional parenthesized area code with separator
179 : *
180 : * [0-9A-Z]
181 : * = Start with digit or letter
182 : *
183 : * ([-. ]?[0-9A-Z])*
184 : * = Digit/letter groups with optional separators
185 : *
186 : * ([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?
187 : * = Optional extension
188 : *
189 : * $
190 : * = End of string
191 : */
192 41 : const char *pattern_digits =
193 : "^\\+[0-9]{1,3}" /* Plus and country code (1-3 digits) */
194 : "[-. ]?" /* Optional single separator */
195 : "(" /* Optional area code group */
196 : "\\([0-9]{1,4}\\)" /* Area code in parens */
197 : "[-. ]?" /* Optional separator after parens */
198 : ")?"
199 : "[0-9]" /* Start national number with digit */
200 : "(" /* National number: alternating digits and separators */
201 : "[-. ]?[0-9]" /* Separator optionally followed by digit */
202 : ")*"
203 : "([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?" /* Optional extension */
204 : "$";
205 :
206 41 : const char *pattern_with_letters =
207 : "^\\+[0-9]{1,3}" /* Plus and country code (1-3 digits) */
208 : "[-. ]?" /* Optional single separator */
209 : "(" /* Optional area code group */
210 : "\\([0-9]{1,4}\\)" /* Area code in parens */
211 : "[-. ]?" /* Optional separator after parens */
212 : ")?"
213 : "[0-9A-Z]" /* Start national number with digit or letter */
214 : "(" /* National number: alternating digits/letters and separators */
215 : "[-. ]?[0-9A-Z]" /* Separator optionally followed by digit or letter */
216 : ")*"
217 : "([ ]?(x|ext|extension)[ ]?[0-9]{1,6})?" /* Optional extension */
218 : "$";
219 :
220 41 : const char *pattern = allow_letters
221 : ? pattern_with_letters
222 41 : : pattern_digits;
223 :
224 41 : GNUNET_assert (0 ==
225 : regcomp (®ex,
226 : pattern,
227 : REG_EXTENDED | REG_NOSUB | REG_ICASE));
228 41 : ret = regexec (®ex,
229 : phone, 0,
230 : NULL, 0);
231 41 : regfree (®ex);
232 41 : if (0 != ret)
233 16 : return NULL; /* invalid number */
234 : }
235 :
236 : /* Phone is valid - normalize it */
237 : {
238 : char *normalized;
239 : char *out;
240 :
241 25 : normalized = GNUNET_malloc (strlen (phone) + 1);
242 25 : out = normalized;
243 25 : *out++ = '+'; /* Start with plus sign */
244 :
245 25 : for (const char *in = phone;
246 396 : '\0' != *in;
247 371 : in++)
248 : {
249 371 : if (isdigit ((unsigned char) *in))
250 : {
251 : /* Copy digit as-is */
252 234 : *out++ = *in;
253 : }
254 137 : else if (allow_letters && isalpha ((unsigned char) *in))
255 : {
256 : /* Convert letter to corresponding digit (A-Z maps to 2-9) */
257 44 : char upper = toupper ((unsigned char) *in);
258 : /* T9 keypad mapping:
259 : * 2: ABC
260 : * 3: DEF
261 : * 4: GHI
262 : * 5: JKL
263 : * 6: MNO
264 : * 7: PQRS
265 : * 8: TUV
266 : * 9: WXYZ
267 : */
268 : char digit;
269 :
270 44 : if (upper >= 'A' && upper <= 'C')
271 8 : digit = '2';
272 36 : else if (upper >= 'D' && upper <= 'F')
273 7 : digit = '3';
274 29 : else if (upper >= 'G' && upper <= 'I')
275 4 : digit = '4';
276 25 : else if (upper >= 'J' && upper <= 'L')
277 8 : digit = '5';
278 17 : else if (upper >= 'M' && upper <= 'O')
279 7 : digit = '6';
280 10 : else if (upper >= 'P' && upper <= 'S')
281 4 : digit = '7';
282 6 : else if (upper >= 'T' && upper <= 'V')
283 0 : digit = '8';
284 6 : else if (upper >= 'W' && upper <= 'Z')
285 6 : digit = '9';
286 : else
287 0 : digit = '0'; /* Fallback (shouldn't happen) */
288 44 : *out++ = digit;
289 : }
290 : /* Skip separators, parentheses, and spaces */
291 : /* Skip 'x', 'ext', 'extension' keywords and their extension digits */
292 : }
293 25 : *out = '\0'; /* redundant, but helps analyzers... */
294 25 : return normalized;
295 : }
296 : }
|