]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
read_yes_or_no_if_tty.c: Move interface to own file.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include "h/mh.h"
9 #include "sbr/concat.h"
10 #include "sbr/seq_setprev.h"
11 #include "sbr/seq_setcur.h"
12 #include "sbr/seq_save.h"
13 #include "sbr/smatch.h"
14 #include "sbr/fmt_rfc2047.h"
15 #include "sbr/cpydata.h"
16 #include "sbr/trimcpy.h"
17 #include "sbr/m_convert.h"
18 #include "sbr/m_backup.h"
19 #include "sbr/getfolder.h"
20 #include "sbr/folder_read.h"
21 #include "sbr/context_save.h"
22 #include "sbr/context_replace.h"
23 #include "sbr/context_find.h"
24 #include "sbr/readconfig.h"
25 #include "sbr/ambigsw.h"
26 #include "sbr/path.h"
27 #include "sbr/print_version.h"
28 #include "sbr/print_help.h"
29 #include "sbr/error.h"
30 #include "h/fmt_scan.h"
31 #include "h/mime.h"
32 #include "h/mhparse.h"
33 #include "h/done.h"
34 #include "h/utils.h"
35 #include "h/signals.h"
36 #include "sbr/m_maildir.h"
37 #include "sbr/m_mktemp.h"
38 #include "sbr/mime_type.h"
39 #include "mhmisc.h"
40 #include "mhfree.h"
41 #include "mhoutsbr.h"
42 #include "mhshowsbr.h"
43 #include <fcntl.h>
44
45 #define MHFIXMSG_SWITCHES \
46 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
47 X("nodecodetext", 0, NDECODETEXTSW) \
48 X("decodetypes", 0, DECODETYPESW) \
49 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
50 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
51 X("textcharset", 0, TEXTCHARSETSW) \
52 X("notextcharset", 0, NTEXTCHARSETSW) \
53 X("reformat", 0, REFORMATSW) \
54 X("noreformat", 0, NREFORMATSW) \
55 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
56 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
57 X("fixboundary", 0, FIXBOUNDARYSW) \
58 X("nofixboundary", 0, NFIXBOUNDARYSW) \
59 X("fixcte", 0, FIXCOMPOSITECTESW) \
60 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
61 X("fixtype mimetype", 0, FIXTYPESW) \
62 X("file file", 0, FILESW) \
63 X("outfile file", 0, OUTFILESW) \
64 X("rmmproc program", 0, RPROCSW) \
65 X("normmproc", 0, NRPRCSW) \
66 X("changecur", 0, CHGSW) \
67 X("nochangecur", 0, NCHGSW) \
68 X("verbose", 0, VERBSW) \
69 X("noverbose", 0, NVERBSW) \
70 X("version", 0, VERSIONSW) \
71 X("help", 0, HELPSW) \
72
73 #define X(sw, minchars, id) id,
74 DEFINE_SWITCH_ENUM(MHFIXMSG);
75 #undef X
76
77 #define X(sw, minchars, id) { sw, minchars, id },
78 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
79 #undef X
80
81
82 int verbosw;
83 int debugsw; /* Needed by mhparse.c. */
84
85 #define quitser pipeser
86
87 /*
88 * static prototypes
89 */
90 typedef struct fix_transformations {
91 int fixboundary;
92 int fixcompositecte;
93 svector_t fixtypes;
94 int reformat;
95 int replacetextplain;
96 int decodetext;
97 char *decodetypes;
98 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
99 int lf_line_endings;
100 char *textcharset;
101 } fix_transformations;
102
103 static int mhfixmsgsbr (CT *, char *, const fix_transformations *,
104 FILE **, char *, FILE **);
105 static int fix_boundary (CT *, int *);
106 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
107 static int get_multipart_boundary (CT, char **);
108 static int replace_boundary (CT, char *, char *);
109 static int fix_types (CT, svector_t, int *);
110 static char *replace_substring (char **, const char *, const char *);
111 static char *remove_parameter (char *, const char *);
112 static int fix_composite_cte (CT, int *);
113 static int set_ce (CT, int);
114 static int ensure_text_plain (CT *, CT, int *, int);
115 static int find_textplain_sibling (CT, int, int *);
116 static int insert_new_text_plain_part (CT, int, CT);
117 static CT build_text_plain_part (CT);
118 static int insert_into_new_mp_alt (CT *, int *);
119 static CT divide_part (CT);
120 static void copy_ctinfo (CI, CI);
121 static int decode_part (CT);
122 static int reformat_part (CT, char *, char *, char *, int);
123 static CT build_multipart_alt (CT, CT, int, int);
124 static int boundary_in_content (FILE **, char *, const char *);
125 static void transfer_noncontent_headers (CT, CT);
126 static int set_ct_type (CT, int type, int subtype, int encoding);
127 static int decode_text_parts (CT, int, const char *, int *);
128 static int should_decode(const char *, const char *, const char *);
129 static int content_encoding (CT, const char **);
130 static int strip_crs (CT, int *);
131 static void update_cte (CT);
132 static int least_restrictive_encoding (CT) PURE;
133 static int less_restrictive (int, int);
134 static int convert_charsets (CT, char *, int *);
135 static int fix_always (CT, int *);
136 static int fix_filename_param (char *, char *, PM *, PM *);
137 static int fix_filename_encoding (CT);
138 static int write_content (CT, const char *, char *, FILE *, int, int);
139 static void set_text_ctparams(CT, char *, int);
140 static int remove_file (const char *);
141 static void report (char *, char *, char *, char *, ...)
142 CHECK_PRINTF(4, 5);
143 static void pipeser (int);
144
145
146 int
147 main (int argc, char **argv)
148 {
149 int msgnum;
150 char *cp, *file = NULL, *folder = NULL;
151 char *maildir = NULL, buf[100], *outfile = NULL;
152 char **argp, **arguments;
153 struct msgs_array msgs = { 0, 0, NULL };
154 struct msgs *mp = NULL;
155 CT *ctp;
156 FILE *fp, *infp = NULL, *outfp = NULL;
157 bool using_stdin = false;
158 bool chgflag = true;
159 int status = OK;
160 fix_transformations fx;
161 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
162 fx.fixtypes = NULL;
163 fx.replacetextplain = 0;
164 fx.decodetext = CE_8BIT;
165 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
166 fx.lf_line_endings = 0;
167 fx.textcharset = NULL;
168
169 if (nmh_init(argv[0], true, false)) { return 1; }
170
171 arguments = getarguments (invo_name, argc, argv, 1);
172 argp = arguments;
173
174 /*
175 * Parse arguments
176 */
177 while ((cp = *argp++)) {
178 if (*cp == '-') {
179 switch (smatch (++cp, switches)) {
180 case AMBIGSW:
181 ambigsw (cp, switches);
182 done (1);
183 case UNKWNSW:
184 die("-%s unknown", cp);
185
186 case HELPSW:
187 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
188 invo_name);
189 print_help (buf, switches, 1);
190 done (0);
191 case VERSIONSW:
192 print_version(invo_name);
193 done (0);
194
195 case DECODETEXTSW:
196 if (! (cp = *argp++) || *cp == '-') {
197 die("missing argument to %s", argp[-2]);
198 }
199 if (! strcasecmp (cp, "8bit")) {
200 fx.decodetext = CE_8BIT;
201 } else if (! strcasecmp (cp, "7bit")) {
202 fx.decodetext = CE_7BIT;
203 } else if (! strcasecmp (cp, "binary")) {
204 fx.decodetext = CE_BINARY;
205 } else {
206 die("invalid argument to %s", argp[-2]);
207 }
208 continue;
209 case NDECODETEXTSW:
210 fx.decodetext = 0;
211 continue;
212 case DECODETYPESW:
213 if (! (cp = *argp++) || *cp == '-') {
214 die("missing argument to %s", argp[-2]);
215 }
216 fx.decodetypes = cp;
217 continue;
218 case CRLFLINEBREAKSSW:
219 fx.lf_line_endings = 0;
220 continue;
221 case NCRLFLINEBREAKSSW:
222 fx.lf_line_endings = 1;
223 continue;
224 case TEXTCHARSETSW:
225 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
226 die("missing argument to %s", argp[-2]);
227 }
228 fx.textcharset = cp;
229 continue;
230 case NTEXTCHARSETSW:
231 fx.textcharset = 0;
232 continue;
233 case FIXBOUNDARYSW:
234 fx.fixboundary = 1;
235 continue;
236 case NFIXBOUNDARYSW:
237 fx.fixboundary = 0;
238 continue;
239 case FIXCOMPOSITECTESW:
240 fx.fixcompositecte = 1;
241 continue;
242 case NFIXCOMPOSITECTESW:
243 fx.fixcompositecte = 0;
244 continue;
245 case FIXTYPESW:
246 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
247 die("missing argument to %s", argp[-2]);
248 }
249 if (! strncasecmp (cp, "multipart/", 10) ||
250 ! strncasecmp (cp, "message/", 8))
251 die("-fixtype %s not allowed", cp);
252 if (! strchr (cp, '/'))
253 die("-fixtype requires type/subtype");
254 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
255 svector_push_back (fx.fixtypes, cp);
256 continue;
257 case REFORMATSW:
258 fx.reformat = 1;
259 continue;
260 case NREFORMATSW:
261 fx.reformat = 0;
262 continue;
263 case REPLACETEXTPLAINSW:
264 fx.replacetextplain = 1;
265 continue;
266 case NREPLACETEXTPLAINSW:
267 fx.replacetextplain = 0;
268 continue;
269 case FILESW:
270 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
271 die("missing argument to %s", argp[-2]);
272 }
273 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
274 continue;
275 case OUTFILESW:
276 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
277 die("missing argument to %s", argp[-2]);
278 }
279 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
280 continue;
281 case RPROCSW:
282 if (!(rmmproc = *argp++) || *rmmproc == '-') {
283 die("missing argument to %s", argp[-2]);
284 }
285 continue;
286 case NRPRCSW:
287 rmmproc = NULL;
288 continue;
289 case CHGSW:
290 chgflag = true;
291 continue;
292 case NCHGSW:
293 chgflag = false;
294 continue;
295 case VERBSW:
296 verbosw = 1;
297 continue;
298 case NVERBSW:
299 verbosw = 0;
300 continue;
301 }
302 }
303 if (*cp == '+' || *cp == '@') {
304 if (folder)
305 die("only one folder at a time!");
306 folder = pluspath (cp);
307 } else {
308 if (*cp == '/') {
309 /* Interpret a full path as a filename, not a message. */
310 file = mh_xstrdup (cp);
311 } else {
312 app_msgarg (&msgs, cp);
313 }
314 }
315 }
316
317 SIGNAL (SIGQUIT, quitser);
318 SIGNAL (SIGPIPE, pipeser);
319
320 /*
321 * Read the standard profile setup
322 */
323 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
324 readconfig(NULL, fp, cp, 0);
325 fclose (fp);
326 }
327
328 suppress_bogus_mp_content_warning = skip_mp_cte_check = true;
329 suppress_extraneous_trailing_semicolon_warning = true;
330
331 if (! context_find ("path")) {
332 free (path ("./", TFOLDER));
333 }
334
335 if (file && msgs.size) {
336 die("cannot specify msg and file at same time!");
337 }
338
339 if (outfile) {
340 /* Open the outfile now, so we don't have to risk opening it
341 after running out of fds. */
342 if (strcmp (outfile, "-") == 0) {
343 outfp = stdout;
344 } else if ((outfp = fopen (outfile, "w")) == NULL) {
345 adios (outfile, "unable to open for writing");
346 }
347 }
348
349 /*
350 * check if message is coming from file
351 */
352 if (file) {
353 /* If file is stdin, create a tmp file name before parse_mime()
354 has a chance, because it might put in on a different
355 filesystem than the output file. Instead, put it in the
356 user's preferred tmp directory. */
357 CT ct;
358
359 if (! strcmp ("-", file)) {
360 int fd;
361 char *cp;
362
363 using_stdin = true;
364
365 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
366 die("unable to create temporary file in %s",
367 get_temp_dir());
368 } else {
369 free (file);
370 file = mh_xstrdup (cp);
371 cpydata (STDIN_FILENO, fd, "-", file);
372 }
373
374 if (close (fd)) {
375 (void) m_unlink (file);
376 die("failed to write temporary file");
377 }
378 }
379
380 cts = mh_xcalloc(2, sizeof *cts);
381 ctp = cts;
382
383 if ((ct = parse_mime (file))) {
384 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
385 *ctp++ = ct;
386 } else {
387 inform("unable to parse message from file %s", file);
388 status = NOTOK;
389
390 /* If there's an outfile, pass the input message unchanged, so the
391 message won't get dropped from a pipeline. */
392 if (outfile) {
393 /* Something went wrong. Output might be expected, such as if
394 this were run as a filter. Just copy the input to the
395 output. */
396 if ((infp = fopen (file, "r")) == NULL) {
397 adios (file, "unable to open for reading");
398 }
399
400 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
401 inform("unable to copy message to %s, "
402 "it might be lost\n", outfile);
403 }
404
405 fclose (infp);
406 infp = NULL;
407 }
408 }
409 } else {
410 /*
411 * message(s) are coming from a folder
412 */
413 CT ct;
414
415 if (! msgs.size) {
416 app_msgarg(&msgs, "cur");
417 }
418 if (! folder) {
419 folder = getfolder (1);
420 }
421 maildir = mh_xstrdup(m_maildir (folder));
422
423 /* chdir so that error messages, esp. from MIME parser, just
424 refer to the message and not its path. */
425 if (chdir (maildir) == NOTOK) {
426 adios (maildir, "unable to change directory to");
427 }
428
429 /* read folder and create message structure */
430 if (! (mp = folder_read (folder, 1))) {
431 die("unable to read folder %s", folder);
432 }
433
434 /* check for empty folder */
435 if (mp->nummsg == 0) {
436 die("no messages in %s", folder);
437 }
438
439 /* parse all the message ranges/sequences and set SELECTED */
440 for (msgnum = 0; msgnum < msgs.size; msgnum++)
441 if (! m_convert (mp, msgs.msgs[msgnum])) {
442 done (1);
443 }
444 seq_setprev (mp); /* set the previous-sequence */
445
446 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
447 ctp = cts;
448
449 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
450 if (is_selected(mp, msgnum)) {
451 char *msgnam = m_name (msgnum);
452
453 if ((ct = parse_mime (msgnam))) {
454 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
455 *ctp++ = ct;
456 } else {
457 inform("unable to parse message %s", msgnam);
458 status = NOTOK;
459
460 /* If there's an outfile, pass the input message
461 unchanged, so the message won't get dropped from a
462 pipeline. */
463 if (outfile) {
464 /* Something went wrong. Output might be expected,
465 such as if this were run as a filter. Just copy
466 the input to the output. */
467 /* Can't use path() here because 1) it might have been
468 called before and it caches the pwd, and 2) we call
469 chdir() after that. */
470 char *input_filename =
471 concat (maildir, "/", msgnam, NULL);
472
473 if ((infp = fopen (input_filename, "r")) == NULL) {
474 adios (input_filename,
475 "unable to open for reading");
476 }
477
478 if (copy_input_to_output (input_filename, infp,
479 outfile, outfp) != OK) {
480 inform("unable to copy message to %s, "
481 "it might be lost\n", outfile);
482 }
483
484 fclose (infp);
485 infp = NULL;
486 free (input_filename);
487 }
488 }
489 }
490 }
491
492 if (chgflag) {
493 seq_setcur (mp, mp->hghsel); /* update current message */
494 }
495 seq_save (mp); /* synchronize sequences */
496 context_replace (pfolder, folder);/* update current folder */
497 context_save (); /* save the context file */
498 }
499
500 if (*cts) {
501 for (ctp = cts; *ctp; ++ctp) {
502 status =
503 mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp) == OK
504 ? 0
505 : 1;
506 free_content (*ctp);
507
508 if (using_stdin) {
509 (void) m_unlink (file);
510
511 if (! outfile) {
512 /* Just calling m_backup() unlinks the backup file. */
513 (void) m_backup (file);
514 }
515 }
516 }
517 } else {
518 status = 1;
519 }
520
521 free(maildir);
522 free (cts);
523
524 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
525 if (infp) { fclose (infp); } /* even if stdin */
526 if (outfp) { fclose (outfp); } /* even if stdout */
527 free (outfile);
528 free (file);
529 free (folder);
530 free (arguments);
531
532 done (status == OK ? 0 : 1);
533 return NOTOK;
534 }
535
536
537 /*
538 * Apply transformations to one message.
539 */
540 static int
541 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
542 FILE **infp, char *outfile, FILE **outfp)
543 {
544 /* Store input filename in case one of the transformations, i.e.,
545 fix_boundary(), rewrites to a tmp file. */
546 char *input_filename = maildir
547 ? concat (maildir, "/", (*ctp)->c_file, NULL)
548 : mh_xstrdup ((*ctp)->c_file);
549 bool modify_inplace = false;
550 int message_mods = 0;
551 int status = OK;
552
553 /* Though the input file won't need to be opened if everything goes
554 well, do it here just in case there's a failure, and that failure is
555 running out of file descriptors. */
556 if ((*infp = fopen (input_filename, "r")) == NULL) {
557 adios (input_filename, "unable to open for reading");
558 }
559
560 if (outfile == NULL) {
561 modify_inplace = true;
562
563 if ((*ctp)->c_file) {
564 char *tempfile;
565 /* outfp will be closed by the caller */
566 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
567 NULL) {
568 die("unable to create temporary file in %s",
569 get_temp_dir());
570 }
571 outfile = mh_xstrdup (tempfile);
572 } else {
573 die("missing both input and output filenames\n");
574 }
575 } /* else *outfp was defined by caller */
576
577 reverse_alternative_parts (*ctp);
578 status = fix_always (*ctp, &message_mods);
579 if (status == OK && fx->fixboundary) {
580 status = fix_boundary (ctp, &message_mods);
581 }
582 if (status == OK && fx->fixtypes != NULL) {
583 status = fix_types (*ctp, fx->fixtypes, &message_mods);
584 }
585 if (status == OK && fx->fixcompositecte) {
586 status = fix_composite_cte (*ctp, &message_mods);
587 }
588 if (status == OK && fx->reformat) {
589 status =
590 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
591 }
592 if (status == OK && fx->decodetext) {
593 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
594 &message_mods);
595 update_cte (*ctp);
596 }
597 if (status == OK && fx->textcharset != NULL) {
598 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
599 }
600
601 if (status == OK && ! (*ctp)->c_umask) {
602 /* Set the umask for the contents file. This currently
603 isn't used but just in case it is in the future. */
604 struct stat st;
605
606 if (stat ((*ctp)->c_file, &st) != NOTOK) {
607 (*ctp)->c_umask = ~(st.st_mode & 0777);
608 } else {
609 (*ctp)->c_umask = ~m_gmprot();
610 }
611 }
612
613 /*
614 * Write the content to a file
615 */
616 if (status == OK) {
617 status = write_content (*ctp, input_filename, outfile, *outfp,
618 modify_inplace, message_mods);
619 } else if (! modify_inplace) {
620 /* Something went wrong. Output might be expected, such
621 as if this were run as a filter. Just copy the input
622 to the output. */
623 if (copy_input_to_output (input_filename, *infp, outfile,
624 *outfp) != OK) {
625 inform("unable to copy message to %s, it might be lost\n",
626 outfile);
627 }
628 }
629
630 if (modify_inplace) {
631 if (status != OK) { (void) m_unlink (outfile); }
632 free (outfile);
633 outfile = NULL;
634 }
635
636 fclose (*infp);
637 *infp = NULL;
638 free (input_filename);
639
640 return status;
641 }
642
643
644 /*
645 * Copy input message to output. Assumes not modifying in place, so this
646 * might be running as part of a pipeline.
647 */
648 static int
649 copy_input_to_output (const char *input_filename, FILE *infp,
650 const char *output_filename, FILE *outfp)
651 {
652 int in = fileno (infp);
653 int out = fileno (outfp);
654 int status = OK;
655
656 if (in != -1 && out != -1) {
657 cpydata (in, out, input_filename, output_filename);
658 } else {
659 status = NOTOK;
660 }
661
662 return status;
663 }
664
665
666 /*
667 * Fix mismatched outer level boundary.
668 */
669 static int
670 fix_boundary (CT *ct, int *message_mods)
671 {
672 struct multipart *mp;
673 int status = OK;
674
675 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
676 mp = (struct multipart *) (*ct)->c_ctparams;
677
678 /*
679 * 1) Get boundary at end of part.
680 * 2) Get boundary at beginning of part and compare to the end-of-part
681 * boundary.
682 * 3) Write out contents of ct to tmp file, replacing boundary in
683 * header with boundary from part. Set c_unlink to 1.
684 * 4) Free ct.
685 * 5) Call parse_mime() on the tmp file, replacing ct.
686 */
687
688 if (mp && mp->mp_start) {
689 char *part_boundary;
690
691 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
692 char *fixed;
693
694 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
695 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
696 char *filename = mh_xstrdup ((*ct)->c_file);
697 CT fixed_ct;
698
699 free_content (*ct);
700 if ((fixed_ct = parse_mime (fixed))) {
701 *ct = fixed_ct;
702 (*ct)->c_unlink = 1;
703
704 ++*message_mods;
705 if (verbosw) {
706 report (NULL, NULL, filename,
707 "fix multipart boundary");
708 }
709 } else {
710 *ct = NULL;
711 inform("unable to parse fixed part");
712 status = NOTOK;
713 }
714 free (filename);
715 } else {
716 inform("unable to replace broken boundary");
717 status = NOTOK;
718 }
719 } else {
720 inform("unable to create temporary file in %s",
721 get_temp_dir());
722 status = NOTOK;
723 }
724
725 free (part_boundary);
726 } else {
727 /* Couldn't fix the boundary. Report failure so that mhfixmsg
728 doesn't modify the message. */
729 status = NOTOK;
730 }
731 } else {
732 /* No multipart struct, even though the content type is
733 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
734 the message. */
735 status = NOTOK;
736 }
737 }
738
739 return status;
740 }
741
742
743 /*
744 * Find boundary at end of multipart.
745 */
746 static int
747 get_multipart_boundary (CT ct, char **part_boundary)
748 {
749 char buffer[NMH_BUFSIZ];
750 char *end_boundary = NULL;
751 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
752 ? (off_t) (ct->c_end - sizeof buffer)
753 : (off_t) ct->c_begin;
754 size_t bytes_read;
755 int status = OK;
756
757 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
758 be big enough, even if it's just 1024, to make that unlikely. */
759
760 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
761 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
762 advise (ct->c_file, "unable to open for reading");
763 return NOTOK;
764 }
765
766 /* Get boundary at end of multipart. */
767 while (begin >= (off_t) ct->c_begin) {
768 fseeko (ct->c_fp, begin, SEEK_SET);
769 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
770 char *cp = rfind_str (buffer, bytes_read, "--");
771
772 if (cp) {
773 char *end;
774
775 /* Trim off trailing "--" and anything beyond. */
776 *cp-- = '\0';
777 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
778 if (strlen (end) > 3 && *end++ == '\n' &&
779 *end++ == '-' && *end++ == '-') {
780 end_boundary = mh_xstrdup (end);
781 break;
782 }
783 }
784 }
785 }
786
787 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
788 break;
789 begin -= sizeof buffer;
790 }
791
792 /* Get boundary at beginning of multipart. */
793 if (end_boundary) {
794 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
795 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
796 if (bytes_read >= strlen (end_boundary)) {
797 char *cp = find_str (buffer, bytes_read, end_boundary);
798
799 if (cp && cp - buffer >= 2 && *--cp == '-' &&
800 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
801 status = OK;
802 break;
803 }
804 } else {
805 /* The start and end boundaries didn't match, or the
806 start boundary doesn't begin with "\n--" (or "--"
807 if at the beginning of buffer). Keep trying. */
808 status = NOTOK;
809 }
810 }
811 } else {
812 status = NOTOK;
813 }
814
815 if (ct->c_fp) {
816 fclose (ct->c_fp);
817 ct->c_fp = NULL;
818 }
819
820 if (status == OK) {
821 *part_boundary = end_boundary;
822 } else {
823 *part_boundary = NULL;
824 free (end_boundary);
825 }
826
827 return status;
828 }
829
830
831 /*
832 * Open and copy ct->c_file to file, replacing the multipart boundary.
833 */
834 static int
835 replace_boundary (CT ct, char *file, char *boundary)
836 {
837 FILE *fpin, *fpout;
838 int compnum, state;
839 char buf[NMH_BUFSIZ], name[NAMESZ];
840 char *np, *vp;
841 m_getfld_state_t gstate;
842 int status = OK;
843
844 if (ct->c_file == NULL) {
845 inform("missing input filename");
846 return NOTOK;
847 }
848
849 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
850 advise (ct->c_file, "unable to open for reading");
851 return NOTOK;
852 }
853
854 if ((fpout = fopen (file, "w")) == NULL) {
855 fclose (fpin);
856 advise (file, "unable to open for writing");
857 return NOTOK;
858 }
859
860 gstate = m_getfld_state_init(fpin);
861 for (compnum = 1;;) {
862 int bufsz = (int) sizeof buf;
863
864 switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
865 case FLD:
866 case FLDPLUS:
867 compnum++;
868
869 /* get copies of the buffers */
870 np = mh_xstrdup (name);
871 vp = mh_xstrdup (buf);
872
873 /* if necessary, get rest of field */
874 while (state == FLDPLUS) {
875 bufsz = sizeof buf;
876 state = m_getfld2(&gstate, name, buf, &bufsz);
877 vp = add (buf, vp); /* add to previous value */
878 }
879
880 if (strcasecmp (TYPE_FIELD, np)) {
881 fprintf (fpout, "%s:%s", np, vp);
882 } else {
883 char *new_ctline, *new_params;
884
885 replace_param(&ct->c_ctinfo.ci_first_pm,
886 &ct->c_ctinfo.ci_last_pm, "boundary",
887 boundary, 0);
888
889 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
890 ct->c_ctinfo.ci_subtype, NULL);
891 new_params = output_params(LEN(TYPE_FIELD) +
892 strlen(new_ctline) + 1,
893 ct->c_ctinfo.ci_first_pm, NULL, 0);
894 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
895 FENDNULL(new_params));
896 free(new_ctline);
897 free(new_params);
898 }
899
900 free (vp);
901 free (np);
902
903 continue;
904
905 case BODY:
906 putc('\n', fpout);
907 /* buf will have a terminating NULL, skip it. */
908 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
909 advise (file, "fwrite");
910 }
911 continue;
912
913 case FILEEOF:
914 break;
915
916 case LENERR:
917 case FMTERR:
918 inform("message format error in component #%d", compnum);
919 status = NOTOK;
920 break;
921
922 default:
923 inform("getfld() returned %d", state);
924 status = NOTOK;
925 break;
926 }
927
928 break;
929 }
930
931 m_getfld_state_destroy (&gstate);
932 fclose (fpout);
933 fclose (fpin);
934
935 return status;
936 }
937
938
939 /*
940 * Fix Content-Type header to reflect the content of its part.
941 */
942 static int
943 fix_types (CT ct, svector_t fixtypes, int *message_mods)
944 {
945 int status = OK;
946
947 switch (ct->c_type) {
948 case CT_MULTIPART: {
949 struct multipart *m = (struct multipart *) ct->c_ctparams;
950 struct part *part;
951
952 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
953 status = fix_types (part->mp_part, fixtypes, message_mods);
954 }
955 break;
956 }
957
958 case CT_MESSAGE:
959 if (ct->c_subtype == MESSAGE_EXTERNAL) {
960 struct exbody *e = (struct exbody *) ct->c_ctparams;
961
962 status = fix_types (e->eb_content, fixtypes, message_mods);
963 }
964 break;
965
966 default: {
967 char **typep, *type;
968
969 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
970 for (typep = svector_strs (fixtypes);
971 typep && (type = *typep);
972 ++typep) {
973 char *type_subtype =
974 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
975 NULL);
976
977 if (! strcasecmp (type, type_subtype) &&
978 decode_part (ct) == OK &&
979 ct->c_cefile.ce_file != NULL) {
980 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
981 char *cp;
982
983 if ((cp = strchr (ct_type_subtype, ';'))) {
984 /* Truncate to remove any parameter list from
985 mime_type () result. */
986 *cp = '\0';
987 }
988
989 if (strcasecmp (type, ct_type_subtype)) {
990 char *ct_type, *ct_subtype;
991 HF hf;
992
993 /* The Content-Type header does not match the
994 content, so update these struct Content
995 fields to match:
996 * c_type, c_subtype
997 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
998 * c_ctline
999 */
1000 /* Extract type and subtype from type/subtype. */
1001 ct_type = mh_xstrdup(ct_type_subtype);
1002 if ((cp = strchr (ct_type, '/'))) {
1003 *cp = '\0';
1004 ct_subtype = mh_xstrdup(++cp);
1005 } else {
1006 inform("missing / in MIME type of %s %s",
1007 ct->c_file, ct->c_partno);
1008 free (ct_type);
1009 return NOTOK;
1010 }
1011
1012 ct->c_type = ct_str_type (ct_type);
1013 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
1014
1015 free (ct->c_ctinfo.ci_type);
1016 ct->c_ctinfo.ci_type = ct_type;
1017 free (ct->c_ctinfo.ci_subtype);
1018 ct->c_ctinfo.ci_subtype = ct_subtype;
1019 if (! replace_substring (&ct->c_ctline, type,
1020 ct_type_subtype)) {
1021 inform("did not find %s in %s",
1022 type, ct->c_ctline);
1023 }
1024
1025 /* Update Content-Type header field. */
1026 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1027 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1028 if (replace_substring (&hf->value, type,
1029 ct_type_subtype)) {
1030 ++*message_mods;
1031 if (verbosw) {
1032 report (NULL, ct->c_partno, ct->c_file,
1033 "change Content-Type in header "
1034 "from %s to %s",
1035 type, ct_type_subtype);
1036 }
1037 break;
1038 }
1039 inform("did not find %s in %s", type, hf->value);
1040 }
1041 }
1042 }
1043 free (ct_type_subtype);
1044 }
1045 free (type_subtype);
1046 }
1047 }
1048 }}
1049
1050 return status;
1051 }
1052
1053
1054 /*
1055 * Replace a substring, allocating space to hold the new one.
1056 */
1057 char *
1058 replace_substring (char **str, const char *old, const char *new)
1059 {
1060 char *cp;
1061
1062 if ((cp = strstr (*str, old))) {
1063 char *remainder = cp + strlen (old);
1064 char *prefix, *new_str;
1065
1066 if (cp - *str) {
1067 prefix = mh_xstrdup(*str);
1068 *(prefix + (cp - *str)) = '\0';
1069 new_str = concat (prefix, new, remainder, NULL);
1070 free (prefix);
1071 } else {
1072 new_str = concat (new, remainder, NULL);
1073 }
1074
1075 free (*str);
1076
1077 return *str = new_str;
1078 }
1079
1080 return NULL;
1081 }
1082
1083
1084 /*
1085 * Remove a name=value parameter, given just its name, from a header value.
1086 */
1087 char *
1088 remove_parameter (char *str, const char *name)
1089 {
1090 /* It looks to me, based on the BNF in RFC 2045, than there can't
1091 be whitespace between the parameter name and the "=", or
1092 between the "=" and the parameter value. */
1093 char *param_name = concat (name, "=", NULL);
1094 char *cp;
1095
1096 if ((cp = strstr (str, param_name))) {
1097 char *start, *end;
1098 size_t count = 1;
1099
1100 /* Remove any leading spaces, before the parameter name. */
1101 for (start = cp;
1102 start > str && isspace ((unsigned char) *(start-1));
1103 --start) {
1104 continue;
1105 }
1106 /* Remove a leading semicolon. */
1107 if (start > str && *(start-1) == ';') { --start; }
1108
1109 end = cp + strlen (name) + 1;
1110 if (*end == '"') {
1111 /* Skip past the quoted value, and then the final quote. */
1112 for (++end ; *end && *end != '"'; ++end) { continue; }
1113 ++end;
1114 } else {
1115 /* Skip past the value. */
1116 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1117 }
1118
1119 /* Count how many characters need to be moved. Include
1120 trailing null, which is accounted for by the
1121 initialization of count to 1. */
1122 for (cp = end; *cp; ++cp) { ++count; }
1123 (void) memmove (start, end, count);
1124 }
1125
1126 free (param_name);
1127
1128 return str;
1129 }
1130
1131
1132 /*
1133 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1134 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1135 * 8 bit.
1136 */
1137 static int
1138 fix_composite_cte (CT ct, int *message_mods)
1139 {
1140 int status = OK;
1141
1142 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1143 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1144 ct->c_encoding != CE_BINARY) {
1145 HF hf;
1146
1147 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1148 char *name = hf->name;
1149 for (; isspace((unsigned char)*name); ++name) {
1150 continue;
1151 }
1152
1153 if (! strncasecmp (name, ENCODING_FIELD,
1154 LEN(ENCODING_FIELD))) {
1155 char *prefix = "Nmh-REPLACED-INVALID-";
1156 HF h;
1157
1158 NEW(h);
1159 h->name = mh_xstrdup (hf->name);
1160 h->hf_encoding = hf->hf_encoding;
1161 h->next = hf->next;
1162 hf->next = h;
1163
1164 /* Retain old header but prefix its name. */
1165 free (hf->name);
1166 hf->name = concat (prefix, h->name, NULL);
1167
1168 ++*message_mods;
1169 if (verbosw) {
1170 char *encoding = cpytrim (hf->value);
1171 report (NULL, ct->c_partno, ct->c_file,
1172 "replace Content-Transfer-Encoding of %s "
1173 "with 8 bit", encoding);
1174 free (encoding);
1175 }
1176
1177 h->value = mh_xstrdup (" 8bit\n");
1178
1179 /* Don't need to warn for multiple C-T-E header
1180 fields, parse_mime() already does that. But
1181 if there are any, fix them all as necessary. */
1182 hf = h;
1183 }
1184 }
1185
1186 set_ce (ct, CE_8BIT);
1187 }
1188
1189 if (ct->c_type == CT_MULTIPART) {
1190 struct multipart *m;
1191 struct part *part;
1192
1193 m = (struct multipart *) ct->c_ctparams;
1194 for (part = m->mp_parts; part; part = part->mp_next) {
1195 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1196 status = NOTOK;
1197 break;
1198 }
1199 }
1200 }
1201 }
1202
1203 return status;
1204 }
1205
1206
1207 /*
1208 * Set content encoding.
1209 */
1210 static int
1211 set_ce (CT ct, int encoding)
1212 {
1213 const char *ce = ce_str (encoding);
1214 const struct str2init *ctinit = get_ce_method (ce);
1215
1216 if (ctinit) {
1217 char *cte = concat (" ", ce, "\n", NULL);
1218 bool found_cte = false;
1219 HF hf;
1220 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1221 caller is decode_text_parts (). Save because we'll
1222 overwrite below. */
1223 struct cefile decoded_content_info = ct->c_cefile;
1224
1225 ct->c_encoding = encoding;
1226
1227 ct->c_ctinitfnx = ctinit->si_init;
1228 /* This will assign ct->c_cefile with an all-0 struct, which
1229 is what we want. */
1230 (*ctinit->si_init) (ct);
1231 /* After returning, the caller should set
1232 ct->c_cefile.ce_file to the name of the file containing
1233 the contents. */
1234
1235 if (ct->c_ceclosefnx) {
1236 (*ct->c_ceclosefnx) (ct);
1237 }
1238
1239 /* Restore the cefile. */
1240 ct->c_cefile = decoded_content_info;
1241
1242 /* Update/add Content-Transfer-Encoding header field. */
1243 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1244 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1245 found_cte = true;
1246 free (hf->value);
1247 hf->value = cte;
1248 }
1249 }
1250 if (! found_cte) {
1251 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1252 }
1253
1254 /* Update c_celine. It's used only by mhlist -debug. */
1255 free (ct->c_celine);
1256 ct->c_celine = mh_xstrdup (cte);
1257
1258 return OK;
1259 }
1260
1261 return NOTOK;
1262 }
1263
1264
1265 /*
1266 * Make sure each text part has a corresponding text/plain part.
1267 */
1268 static int
1269 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain)
1270 {
1271 int status = OK;
1272
1273 switch ((*ct)->c_type) {
1274 case CT_TEXT: {
1275 /* Nothing to do for text/plain. */
1276 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1277
1278 if (parent && parent->c_type == CT_MULTIPART &&
1279 parent->c_subtype == MULTI_ALTERNATE) {
1280 int new_subpart_number = 1;
1281 int has_text_plain =
1282 find_textplain_sibling (parent, replacetextplain,
1283 &new_subpart_number);
1284
1285 if (! has_text_plain) {
1286 /* Parent is a multipart/alternative. Insert a new
1287 text/plain subpart. */
1288 const int inserted =
1289 insert_new_text_plain_part (*ct, new_subpart_number,
1290 parent);
1291 if (inserted) {
1292 ++*message_mods;
1293 if (verbosw) {
1294 report (NULL, parent->c_partno, parent->c_file,
1295 "insert text/plain part");
1296 }
1297 } else {
1298 status = NOTOK;
1299 }
1300 }
1301 } else if (parent && parent->c_type == CT_MULTIPART &&
1302 parent->c_subtype == MULTI_RELATED) {
1303 char *type_subtype =
1304 concat ((*ct)->c_ctinfo.ci_type, "/",
1305 (*ct)->c_ctinfo.ci_subtype, NULL);
1306 const char *parent_type =
1307 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1308 int new_subpart_number = 1;
1309 int has_text_plain = 0;
1310
1311 /* Have to do string comparison on the subtype because we
1312 don't enumerate all of them in c_subtype values.
1313 parent_type will be NULL if the multipart/related part
1314 doesn't have a type parameter. The type parameter must
1315 be specified according to RFC 2387 Sec. 3.1 but not all
1316 messages comply. */
1317 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1318 /* The type of this part matches the root type of the
1319 parent multipart/related. Look to see if there's
1320 text/plain sibling. */
1321 has_text_plain =
1322 find_textplain_sibling (parent, replacetextplain,
1323 &new_subpart_number);
1324 }
1325
1326 free (type_subtype);
1327
1328 if (! has_text_plain) {
1329 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1330 struct part *part;
1331 int siblings = 0;
1332
1333 for (part = mp->mp_parts; part; part = part->mp_next) {
1334 if (*ct != part->mp_part) {
1335 ++siblings;
1336 }
1337 }
1338
1339 if (siblings) {
1340 /* Parent is a multipart/related. Insert a new
1341 text/plain subpart in a new multipart/alternative. */
1342 if (insert_into_new_mp_alt (ct, message_mods)) {
1343 /* Not an error if text/plain couldn't be added. */
1344 }
1345 } else {
1346 /* There are no siblings, so insert a new text/plain
1347 subpart, and change the parent type from
1348 multipart/related to multipart/alternative. */
1349 const int inserted =
1350 insert_new_text_plain_part (*ct, new_subpart_number,
1351 parent);
1352
1353 if (inserted) {
1354 HF hf;
1355
1356 parent->c_subtype = MULTI_ALTERNATE;
1357 free (parent->c_ctinfo.ci_subtype);
1358 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1359 if (! replace_substring (&parent->c_ctline, "/related",
1360 "/alternative")) {
1361 inform("did not find multipart/related in %s",
1362 parent->c_ctline);
1363 }
1364
1365 /* Update Content-Type header field. */
1366 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1367 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1368 if (replace_substring (&hf->value, "/related",
1369 "/alternative")) {
1370 ++*message_mods;
1371 if (verbosw) {
1372 report (NULL, parent->c_partno,
1373 parent->c_file,
1374 "insert text/plain part");
1375 }
1376
1377 /* Remove, e.g., type="text/html" from
1378 multipart/alternative. */
1379 remove_parameter (hf->value, "type");
1380 break;
1381 }
1382 inform("did not find multipart/"
1383 "related in header %s", hf->value);
1384 }
1385 }
1386 } else {
1387 /* Not an error if text/plain couldn't be inserted. */
1388 }
1389 }
1390 }
1391 } else {
1392 if (insert_into_new_mp_alt (ct, message_mods)) {
1393 status = NOTOK;
1394 }
1395 }
1396 break;
1397 }
1398
1399 case CT_MULTIPART: {
1400 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1401 struct part *part;
1402
1403 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1404 if ((*ct)->c_type == CT_MULTIPART) {
1405 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1406 replacetextplain);
1407 }
1408 }
1409 break;
1410 }
1411
1412 case CT_MESSAGE:
1413 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1414 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1415
1416 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1417 replacetextplain);
1418 }
1419 break;
1420 }
1421
1422 return status;
1423 }
1424
1425
1426 /*
1427 * See if there is a sibling text/plain, and return its subpart number.
1428 */
1429 static int
1430 find_textplain_sibling (CT parent, int replacetextplain,
1431 int *new_subpart_number)
1432 {
1433 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1434 struct part *part, *prev;
1435 bool has_text_plain = false;
1436
1437 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1438 ++*new_subpart_number;
1439 if (part->mp_part->c_type == CT_TEXT &&
1440 part->mp_part->c_subtype == TEXT_PLAIN) {
1441 if (replacetextplain) {
1442 struct part *old_part;
1443 if (part == mp->mp_parts) {
1444 old_part = mp->mp_parts;
1445 mp->mp_parts = part->mp_next;
1446 } else {
1447 old_part = prev->mp_next;
1448 prev->mp_next = part->mp_next;
1449 }
1450 if (verbosw) {
1451 report (NULL, parent->c_partno, parent->c_file,
1452 "remove text/plain part %s",
1453 old_part->mp_part->c_partno);
1454 }
1455 free_content (old_part->mp_part);
1456 free (old_part);
1457 } else {
1458 has_text_plain = true;
1459 }
1460 break;
1461 }
1462 prev = part;
1463 }
1464
1465 return has_text_plain;
1466 }
1467
1468
1469 /*
1470 * Insert a new text/plain part.
1471 */
1472 static int
1473 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent)
1474 {
1475 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1476 struct part *new_part;
1477
1478 NEW(new_part);
1479 if ((new_part->mp_part = build_text_plain_part (ct))) {
1480 char buffer[16];
1481 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1482
1483 new_part->mp_next = mp->mp_parts;
1484 mp->mp_parts = new_part;
1485 new_part->mp_part->c_partno =
1486 concat (parent->c_partno ? parent->c_partno : "1", ".",
1487 buffer, NULL);
1488
1489 return 1;
1490 }
1491
1492 free_content (new_part->mp_part);
1493 free (new_part);
1494
1495 return 0;
1496 }
1497
1498
1499 /*
1500 * Create a text/plain part to go along with non-plain sibling part.
1501 */
1502 static CT
1503 build_text_plain_part (CT encoded_part)
1504 {
1505 CT tp_part = divide_part (encoded_part);
1506 char *tmp_plain_file = NULL;
1507
1508 if (decode_part (tp_part) == OK) {
1509 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1510 contains the decoded contents. And the decoding function, such
1511 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1512 be unlinked by free_content (). */
1513 char *tempfile;
1514
1515 /* This m_mktemp2() call closes the temp file. */
1516 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1517 inform("unable to create temporary file in %s",
1518 get_temp_dir());
1519 } else {
1520 tmp_plain_file = mh_xstrdup (tempfile);
1521 if (reformat_part (tp_part, tmp_plain_file,
1522 tp_part->c_ctinfo.ci_type,
1523 tp_part->c_ctinfo.ci_subtype,
1524 tp_part->c_type) == OK) {
1525 return tp_part;
1526 }
1527 }
1528 }
1529
1530 free_content (tp_part);
1531 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1532 free (tmp_plain_file);
1533
1534 return NULL;
1535 }
1536
1537
1538 /*
1539 * Slip new text/plain part into a new multipart/alternative.
1540 */
1541 static int
1542 insert_into_new_mp_alt (CT *ct, int *message_mods)
1543 {
1544 CT tp_part = build_text_plain_part (*ct);
1545 int status = OK;
1546
1547 if (tp_part) {
1548 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1549 MULTI_ALTERNATE);
1550 if (mp_alt) {
1551 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1552
1553 if (mp && mp->mp_parts) {
1554 mp->mp_parts->mp_part = tp_part;
1555 /* Make the new multipart/alternative the parent. */
1556 *ct = mp_alt;
1557
1558 ++*message_mods;
1559 if (verbosw) {
1560 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1561 "insert text/plain part");
1562 }
1563 } else {
1564 free_content (tp_part);
1565 free_content (mp_alt);
1566 status = NOTOK;
1567 }
1568 } else {
1569 status = NOTOK;
1570 }
1571 } else {
1572 /* Not an error if text/plain couldn't be built. */
1573 }
1574
1575 return status;
1576 }
1577
1578
1579 /*
1580 * Clone a MIME part.
1581 */
1582 static CT
1583 divide_part (CT ct)
1584 {
1585 CT new_part;
1586
1587 NEW0(new_part);
1588 /* Just copy over what is needed for decoding. c_vrsn and
1589 c_celine aren't necessary. */
1590 new_part->c_file = mh_xstrdup (ct->c_file);
1591 new_part->c_begin = ct->c_begin;
1592 new_part->c_end = ct->c_end;
1593 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1594 new_part->c_type = ct->c_type;
1595 new_part->c_cefile = ct->c_cefile;
1596 new_part->c_encoding = ct->c_encoding;
1597 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1598 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1599 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1600 new_part->c_cesizefnx = ct->c_cesizefnx;
1601
1602 /* c_ctline is used by reformat__part(), so it can preserve
1603 anything after the type/subtype. */
1604 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1605
1606 return new_part;
1607 }
1608
1609
1610 /*
1611 * Copy the content info from one part to another.
1612 */
1613 static void
1614 copy_ctinfo (CI dest, CI src)
1615 {
1616 PM s_pm, d_pm;
1617
1618 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1619 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1620
1621 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1622 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1623 s_pm->pm_value, 0);
1624 if (s_pm->pm_charset) {
1625 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1626 }
1627 if (s_pm->pm_lang) {
1628 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1629 }
1630 }
1631
1632 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1633 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1634 }
1635
1636
1637 /*
1638 * Decode content.
1639 */
1640 static int
1641 decode_part (CT ct)
1642 {
1643 char *tmp_decoded;
1644 int status;
1645 FILE *file;
1646 char *tempfile;
1647
1648 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1649 die("unable to create temporary file in %s", get_temp_dir());
1650 }
1651 tmp_decoded = mh_xstrdup (tempfile);
1652 /* The following call will load ct->c_cefile.ce_file with the tmp
1653 filename of the decoded content. tmp_decoded will contain the
1654 encoded output, get rid of that. */
1655 status = output_message_fp (ct, file, tmp_decoded);
1656 (void) m_unlink (tmp_decoded);
1657 free (tmp_decoded);
1658 if (fclose (file)) {
1659 inform("unable to close temporary file %s, continuing...", tempfile);
1660 }
1661
1662 return status;
1663 }
1664
1665
1666 /*
1667 * Reformat content as plain text.
1668 * Some of the arguments aren't really needed now, but maybe will
1669 * be in the future for other than text types.
1670 */
1671 static int
1672 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type)
1673 {
1674 int output_subtype, output_encoding;
1675 const char *reason = NULL;
1676 char *cp, *cf;
1677 int status;
1678
1679 /* Hacky: this redirects the output from whatever command is used
1680 to show the part to a file. So, the user can't have any output
1681 redirection in that command.
1682 Could show_multi() in mhshowsbr.c avoid this? */
1683
1684 /* Check for invo_name-format-type/subtype. */
1685 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1686 if (verbosw) {
1687 inform("Don't know how to convert %s, there is no "
1688 "%s-format-%s/%s profile entry",
1689 ct->c_file, invo_name, type, subtype);
1690 }
1691 return NOTOK;
1692 }
1693 if (strchr (cf, '>')) {
1694 inform("'>' prohibited in \"%s\",\nplease fix your "
1695 "%s-format-%s/%s profile entry", cf, invo_name, type,
1696 FENDNULL(subtype));
1697
1698 return NOTOK;
1699 }
1700
1701 cp = concat (cf, " >", file, NULL);
1702 status = show_content_aux (ct, 0, cp, NULL, NULL);
1703 free (cp);
1704
1705 /* Unlink decoded content tmp file and free its filename to avoid
1706 leaks. The file stream should already have been closed. */
1707 if (ct->c_cefile.ce_unlink) {
1708 (void) m_unlink (ct->c_cefile.ce_file);
1709 free (ct->c_cefile.ce_file);
1710 ct->c_cefile.ce_file = NULL;
1711 ct->c_cefile.ce_unlink = 0;
1712 }
1713
1714 if (c_type == CT_TEXT) {
1715 output_subtype = TEXT_PLAIN;
1716 } else {
1717 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1718 output_subtype = 0;
1719 }
1720
1721 output_encoding = content_encoding (ct, &reason);
1722 if (status == OK &&
1723 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1724 ct->c_cefile.ce_file = file;
1725 ct->c_cefile.ce_unlink = 1;
1726 } else {
1727 ct->c_cefile.ce_unlink = 0;
1728 status = NOTOK;
1729 }
1730
1731 return status;
1732 }
1733
1734
1735 /*
1736 * Fill in a multipart/alternative part.
1737 */
1738 static CT
1739 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype)
1740 {
1741 char *boundary_prefix = "----=_nmh-multipart";
1742 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1743 char *boundary_indicator = "; boundary=";
1744 char *typename, *subtypename, *name;
1745 CT ct;
1746 struct part *p;
1747 struct multipart *m;
1748 const struct str2init *ctinit;
1749
1750 NEW0(ct);
1751
1752 /* Set up the multipart/alternative part. These fields of *ct were
1753 initialized to 0 by mh_xcalloc():
1754 c_fp, c_unlink, c_begin, c_end,
1755 c_vrsn, c_ctline, c_celine,
1756 c_id, c_descr, c_dispo, c_partno,
1757 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1758 c_cefile, c_encoding,
1759 c_digested, c_digest[16], c_ctexbody,
1760 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1761 c_umask, c_rfc934,
1762 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1763 */
1764
1765 ct->c_file = mh_xstrdup (first_alt->c_file);
1766 ct->c_type = type;
1767 ct->c_subtype = subtype;
1768
1769 ctinit = get_ct_init (ct->c_type);
1770
1771 typename = ct_type_str (type);
1772 subtypename = ct_subtype_str (type, subtype);
1773
1774 {
1775 int serial = 0;
1776 int found_boundary = 1;
1777
1778 while (found_boundary && serial < 1000000) {
1779 found_boundary = 0;
1780
1781 /* Ensure that the boundary doesn't appear in the decoded
1782 content. */
1783 if (new_part->c_cefile.ce_file) {
1784 if ((found_boundary =
1785 boundary_in_content (&new_part->c_cefile.ce_fp,
1786 new_part->c_cefile.ce_file,
1787 boundary)) == NOTOK) {
1788 goto return_null;
1789 }
1790 }
1791
1792 /* Ensure that the boundary doesn't appear in the encoded
1793 content. */
1794 if (! found_boundary && new_part->c_file) {
1795 if ((found_boundary =
1796 boundary_in_content (&new_part->c_fp,
1797 new_part->c_file,
1798 boundary)) == NOTOK) {
1799 goto return_null;
1800 }
1801 }
1802
1803 if (found_boundary) {
1804 /* Try a slightly different boundary. */
1805 char buffer2[16];
1806
1807 free (boundary);
1808 ++serial;
1809 snprintf (buffer2, sizeof buffer2, "%d", serial);
1810 boundary =
1811 concat (boundary_prefix,
1812 FENDNULL(first_alt->c_partno),
1813 "-", buffer2, NULL);
1814 }
1815 }
1816
1817 if (found_boundary) {
1818 inform("giving up trying to find a unique boundary");
1819 goto return_null;
1820 }
1821 }
1822
1823 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1824 boundary, "\"", NULL);
1825
1826 /* Load c_first_hf and c_last_hf. */
1827 transfer_noncontent_headers (first_alt, ct);
1828 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1829 free (name);
1830
1831 /* Load c_partno. */
1832 if (first_alt->c_partno) {
1833 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1834 free (first_alt->c_partno);
1835 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1836 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1837 } else {
1838 first_alt->c_partno = mh_xstrdup ("1");
1839 new_part->c_partno = mh_xstrdup ("2");
1840 }
1841
1842 if (ctinit) {
1843 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1844 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1845 }
1846
1847 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1848 "boundary", boundary, 0);
1849
1850 NEW(p);
1851 NEW(p->mp_next);
1852 p->mp_next->mp_next = NULL;
1853 p->mp_next->mp_part = first_alt;
1854
1855 NEW0(m);
1856 m->mp_start = concat (boundary, "\n", NULL);
1857 m->mp_stop = concat (boundary, "--\n", NULL);
1858 m->mp_parts = p;
1859 ct->c_ctparams = m;
1860
1861 free (boundary);
1862
1863 return ct;
1864
1865 return_null:
1866 free_content(ct);
1867 free(boundary);
1868 return NULL;
1869 }
1870
1871
1872 /*
1873 * Check that the boundary does not appear in the content.
1874 */
1875 static int
1876 boundary_in_content (FILE **fp, char *file, const char *boundary)
1877 {
1878 char buffer[NMH_BUFSIZ];
1879 size_t bytes_read;
1880 bool found_boundary = false;
1881
1882 /* free_content() will close *fp if we fopen it here. */
1883 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1884 advise (file, "unable to open %s for reading", file);
1885 return NOTOK;
1886 }
1887
1888 fseeko (*fp, 0L, SEEK_SET);
1889 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1890 if (find_str (buffer, bytes_read, boundary)) {
1891 found_boundary = true;
1892 break;
1893 }
1894 }
1895
1896 return found_boundary;
1897 }
1898
1899
1900 /*
1901 * Remove all non-Content headers.
1902 */
1903 static void
1904 transfer_noncontent_headers (CT old, CT new)
1905 {
1906 HF hp, hp_prev;
1907
1908 hp_prev = hp = old->c_first_hf;
1909 while (hp) {
1910 HF next = hp->next;
1911
1912 if (strncasecmp (XXX_FIELD_PRF, hp->name, LEN(XXX_FIELD_PRF))) {
1913 if (hp == old->c_last_hf) {
1914 if (hp == old->c_first_hf) {
1915 old->c_last_hf = old->c_first_hf = NULL;
1916 } else {
1917 hp_prev->next = NULL;
1918 old->c_last_hf = hp_prev;
1919 }
1920 } else {
1921 if (hp == old->c_first_hf) {
1922 old->c_first_hf = next;
1923 } else {
1924 hp_prev->next = next;
1925 }
1926 }
1927
1928 /* Put node hp in the new CT. */
1929 if (new->c_first_hf == NULL) {
1930 new->c_first_hf = hp;
1931 } else {
1932 new->c_last_hf->next = hp;
1933 }
1934 new->c_last_hf = hp;
1935 } else {
1936 /* A Content- header, leave in old. */
1937 hp_prev = hp;
1938 }
1939
1940 hp = next;
1941 }
1942 }
1943
1944
1945 /*
1946 * Set content type.
1947 */
1948 static int
1949 set_ct_type (CT ct, int type, int subtype, int encoding)
1950 {
1951 char *typename = ct_type_str (type);
1952 char *subtypename = ct_subtype_str (type, subtype);
1953 /* E.g, " text/plain" */
1954 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1955 /* E.g, " text/plain\n" */
1956 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1957 bool found_content_type = false;
1958 HF hf;
1959 const char *cp = NULL;
1960 char *ctline;
1961 int status;
1962
1963 /* Update/add Content-Type header field. */
1964 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1965 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1966 found_content_type = true;
1967 free (hf->value);
1968 hf->value = (cp = strchr (ct->c_ctline, ';'))
1969 ? concat (type_subtypename, cp, "\n", NULL)
1970 : mh_xstrdup (name_plus_nl);
1971 }
1972 }
1973 if (! found_content_type) {
1974 add_header (ct, mh_xstrdup (TYPE_FIELD),
1975 (cp = strchr (ct->c_ctline, ';'))
1976 ? concat (type_subtypename, cp, "\n", NULL)
1977 : mh_xstrdup (name_plus_nl));
1978 }
1979
1980 /* Some of these might not be used, but set them anyway. */
1981 ctline = cp
1982 ? concat (type_subtypename, cp, NULL)
1983 : concat (type_subtypename, NULL);
1984 free (ct->c_ctline);
1985 ct->c_ctline = ctline;
1986 /* Leave other ctinfo members as they were. */
1987 free (ct->c_ctinfo.ci_type);
1988 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1989 free (ct->c_ctinfo.ci_subtype);
1990 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1991 ct->c_type = type;
1992 ct->c_subtype = subtype;
1993
1994 free (name_plus_nl);
1995 free (type_subtypename);
1996
1997 status = set_ce (ct, encoding);
1998
1999 return status;
2000 }
2001
2002
2003 /*
2004 * It's not necessary to update the charset parameter of a Content-Type
2005 * header for a text part. According to RFC 2045 Sec. 6.4, the body
2006 * (content) was originally in the specified charset, "and will be in
2007 * that character set again after decoding."
2008 */
2009 static int
2010 decode_text_parts (CT ct, int encoding, const char *decodetypes,
2011 int *message_mods)
2012 {
2013 int status = OK;
2014 int lf_line_endings = 0;
2015
2016 switch (ct->c_type) {
2017 case CT_MULTIPART: {
2018 struct multipart *m = (struct multipart *) ct->c_ctparams;
2019 struct part *part;
2020
2021 /* Should check to see if the body for this part is encoded?
2022 For now, it gets passed along as-is by InitMultiPart(). */
2023 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2024 status = decode_text_parts (part->mp_part, encoding, decodetypes,
2025 message_mods);
2026 }
2027 break;
2028 }
2029
2030 case CT_MESSAGE:
2031 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2032 struct exbody *e = (struct exbody *) ct->c_ctparams;
2033
2034 status = decode_text_parts (e->eb_content, encoding, decodetypes,
2035 message_mods);
2036 }
2037 break;
2038
2039 default:
2040 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2041 break;
2042 }
2043
2044 lf_line_endings =
2045 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2046
2047 switch (ct->c_encoding) {
2048 case CE_BASE64:
2049 case CE_QUOTED: {
2050 int ct_encoding;
2051
2052 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2053 const char *reason = NULL;
2054
2055 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2056 && encoding != CE_BINARY) {
2057 /* The decoding isn't acceptable so discard it.
2058 Leave status as OK to allow other transformations. */
2059 if (verbosw) {
2060 report (NULL, ct->c_partno, ct->c_file,
2061 "will not decode%s because it is binary (%s)",
2062 ct->c_partno ? ""
2063 : (FENDNULL(ct->c_ctline)),
2064 reason);
2065 }
2066 (void) m_unlink (ct->c_cefile.ce_file);
2067 free (ct->c_cefile.ce_file);
2068 ct->c_cefile.ce_file = NULL;
2069 } else if (ct->c_encoding == CE_QUOTED &&
2070 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2071 /* The decoding isn't acceptable so discard it.
2072 Leave status as OK to allow other transformations. */
2073 if (verbosw) {
2074 report (NULL, ct->c_partno, ct->c_file,
2075 "will not decode%s because it is 8bit",
2076 ct->c_partno ? ""
2077 : (FENDNULL(ct->c_ctline)));
2078 }
2079 (void) m_unlink (ct->c_cefile.ce_file);
2080 free (ct->c_cefile.ce_file);
2081 ct->c_cefile.ce_file = NULL;
2082 } else {
2083 int enc;
2084
2085 if (ct_encoding == CE_BINARY) {
2086 enc = CE_BINARY;
2087 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2088 enc = CE_QUOTED;
2089 } else {
2090 enc = ct_encoding;
2091 }
2092 if (set_ce (ct, enc) == OK) {
2093 ++*message_mods;
2094 if (verbosw) {
2095 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2096 FENDNULL(ct->c_ctline));
2097 }
2098 if (lf_line_endings) {
2099 strip_crs (ct, message_mods);
2100 }
2101 } else {
2102 status = NOTOK;
2103 }
2104 }
2105 } else {
2106 status = NOTOK;
2107 }
2108 break;
2109 }
2110 case CE_8BIT:
2111 case CE_7BIT:
2112 if (lf_line_endings) {
2113 strip_crs (ct, message_mods);
2114 }
2115 break;
2116 default:
2117 break;
2118 }
2119
2120 break;
2121 }
2122
2123 return status;
2124 }
2125
2126
2127 /*
2128 * Determine if the part with type[/subtype] should be decoded, according to
2129 * decodetypes (which came from the -decodetypes switch).
2130 */
2131 static int
2132 should_decode(const char *decodetypes, const char *type, const char *subtype)
2133 {
2134 /* Quick search for matching type[/subtype] in decodetypes: bracket
2135 decodetypes with commas, then search for ,type, and ,type/subtype, in
2136 it. */
2137
2138 bool found_match = false;
2139 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2140 char *delimited_type = concat(",", type, ",", NULL);
2141
2142 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2143 found_match = true;
2144 } else if (subtype != NULL) {
2145 char *delimited_type_subtype =
2146 concat(",", type, "/", subtype, ",", NULL);
2147
2148 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2149 found_match = true;
2150 }
2151 free(delimited_type_subtype);
2152 }
2153
2154 free(delimited_type);
2155 free(delimited_decodetypes);
2156
2157 return found_match;
2158 }
2159
2160
2161 /*
2162 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2163 * if it has any NUL characters, a CR not followed by a LF, or lines
2164 * greater than 998 characters in length. If binary, reason is set
2165 * to a string explaining why.
2166 */
2167 static int
2168 content_encoding (CT ct, const char **reason)
2169 {
2170 CE ce = &ct->c_cefile;
2171 int encoding = CE_7BIT;
2172
2173 if (ce->ce_file) {
2174 size_t line_len = 0;
2175 char buffer[NMH_BUFSIZ];
2176 size_t inbytes;
2177
2178 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2179 advise (ce->ce_file, "unable to open for reading");
2180 return CE_UNKNOWN;
2181 }
2182
2183 fseeko (ce->ce_fp, 0L, SEEK_SET);
2184 while (encoding != CE_BINARY &&
2185 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2186 char *cp;
2187 size_t i;
2188 int last_char_was_cr = 0;
2189
2190 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2191 if (*cp == '\0' || ++line_len > 998 ||
2192 (*cp != '\n' && last_char_was_cr)) {
2193 encoding = CE_BINARY;
2194 if (*cp == '\0') {
2195 *reason = "null character";
2196 } else if (line_len > 998) {
2197 *reason = "line length > 998";
2198 } else if (*cp != '\n' && last_char_was_cr) {
2199 *reason = "CR not followed by LF";
2200 } else {
2201 /* Should not reach this. */
2202 *reason = "";
2203 }
2204 break;
2205 }
2206 if (*cp == '\n') {
2207 line_len = 0;
2208 } else if (! isascii ((unsigned char) *cp)) {
2209 encoding = CE_8BIT;
2210 }
2211
2212 last_char_was_cr = *cp == '\r';
2213 }
2214 }
2215
2216 fclose (ce->ce_fp);
2217 ce->ce_fp = NULL;
2218 } /* else should never happen */
2219
2220 return encoding;
2221 }
2222
2223
2224 /*
2225 * Strip carriage returns from content.
2226 */
2227 static int
2228 strip_crs (CT ct, int *message_mods)
2229 {
2230 char *charset = content_charset (ct);
2231 int status = OK;
2232
2233 /* Only strip carriage returns if content is ASCII or another
2234 charset that has the same readily recognizable CR followed by a
2235 LF. We can include UTF-8 here because if the high-order bit of
2236 a UTF-8 byte is 0, then it must be a single-byte ASCII
2237 character. */
2238 if (! strcasecmp (charset, "US-ASCII") ||
2239 ! strcasecmp (charset, "UTF-8") ||
2240 ! strncasecmp (charset, "ISO-8859-", 9) ||
2241 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2242 char **file = NULL;
2243 FILE **fp = NULL;
2244 size_t begin;
2245 size_t end;
2246 bool has_crs = false;
2247 bool opened_input_file = false;
2248
2249 if (ct->c_cefile.ce_file) {
2250 file = &ct->c_cefile.ce_file;
2251 fp = &ct->c_cefile.ce_fp;
2252 begin = end = 0;
2253 } else if (ct->c_file) {
2254 file = &ct->c_file;
2255 fp = &ct->c_fp;
2256 begin = (size_t) ct->c_begin;
2257 end = (size_t) ct->c_end;
2258 } /* else don't know where the content is */
2259
2260 if (file && *file && fp) {
2261 if (! *fp) {
2262 if ((*fp = fopen (*file, "r")) == NULL) {
2263 advise (*file, "unable to open for reading");
2264 status = NOTOK;
2265 } else {
2266 opened_input_file = true;
2267 }
2268 }
2269 }
2270
2271 if (fp && *fp) {
2272 char buffer[NMH_BUFSIZ];
2273 size_t bytes_read;
2274 size_t bytes_to_read =
2275 end > 0 && end > begin ? end - begin : sizeof buffer;
2276
2277 fseeko (*fp, begin, SEEK_SET);
2278 while ((bytes_read = fread (buffer, 1,
2279 min (bytes_to_read, sizeof buffer),
2280 *fp)) > 0) {
2281 /* Look for CR followed by a LF. This is supposed to
2282 be text so there should be LF's. If not, don't
2283 modify the content. */
2284 char *cp;
2285 size_t i;
2286 bool last_char_was_cr = false;
2287
2288 if (end > 0) { bytes_to_read -= bytes_read; }
2289
2290 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2291 if (*cp == '\n' && last_char_was_cr) {
2292 has_crs = true;
2293 break;
2294 }
2295
2296 last_char_was_cr = *cp == '\r';
2297 }
2298 }
2299
2300 if (has_crs) {
2301 int fd;
2302 char *stripped_content_file;
2303 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2304
2305 if (tempfile == NULL) {
2306 die("unable to create temporary file in %s",
2307 get_temp_dir());
2308 }
2309 stripped_content_file = mh_xstrdup (tempfile);
2310
2311 /* Strip each CR before a LF from the content. */
2312 fseeko (*fp, begin, SEEK_SET);
2313 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2314 0) {
2315 char *cp;
2316 size_t i;
2317 bool last_char_was_cr = false;
2318
2319 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2320 if (*cp == '\r') {
2321 last_char_was_cr = true;
2322 } else if (last_char_was_cr) {
2323 if (*cp != '\n') {
2324 if (write (fd, "\r", 1) < 0) {
2325 advise (tempfile, "CR write");
2326 }
2327 }
2328 if (write (fd, cp, 1) < 0) {
2329 advise (tempfile, "write");
2330 }
2331 last_char_was_cr = false;
2332 } else {
2333 if (write (fd, cp, 1) < 0) {
2334 advise (tempfile, "write");
2335 }
2336 last_char_was_cr = false;
2337 }
2338 }
2339 }
2340
2341 if (close (fd)) {
2342 inform("unable to write temporary file %s, continuing...",
2343 stripped_content_file);
2344 (void) m_unlink (stripped_content_file);
2345 free(stripped_content_file);
2346 status = NOTOK;
2347 } else {
2348 /* Replace the decoded file with the converted one. */
2349 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2350 (void) m_unlink (ct->c_cefile.ce_file);
2351
2352 free(ct->c_cefile.ce_file);
2353 ct->c_cefile.ce_file = stripped_content_file;
2354 ct->c_cefile.ce_unlink = 1;
2355
2356 ++*message_mods;
2357 if (verbosw) {
2358 report (NULL, ct->c_partno,
2359 begin == 0 && end == 0 ? "" : *file,
2360 "stripped CRs");
2361 }
2362 }
2363 }
2364
2365 if (opened_input_file) {
2366 fclose (*fp);
2367 *fp = NULL;
2368 }
2369 }
2370 }
2371
2372 free (charset);
2373
2374 return status;
2375 }
2376
2377
2378 /*
2379 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2380 * of the part C-T-E's.
2381 */
2382 static void
2383 update_cte (CT ct)
2384 {
2385 const int least_restrictive_enc = least_restrictive_encoding (ct);
2386
2387 if (least_restrictive_enc != CE_UNKNOWN &&
2388 least_restrictive_enc != CE_7BIT) {
2389 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2390 HF hf;
2391 bool found_cte = false;
2392
2393 /* Update/add Content-Transfer-Encoding header field. */
2394 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2395 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2396 found_cte = true;
2397 free (hf->value);
2398 hf->value = cte;
2399 }
2400 }
2401 if (! found_cte) {
2402 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2403 }
2404 }
2405 }
2406
2407
2408 /*
2409 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2410 * within a message.
2411 */
2412 static int
2413 least_restrictive_encoding (CT ct)
2414 {
2415 int encoding = CE_UNKNOWN;
2416
2417 switch (ct->c_type) {
2418 case CT_MULTIPART: {
2419 struct multipart *m = (struct multipart *) ct->c_ctparams;
2420 struct part *part;
2421
2422 for (part = m->mp_parts; part; part = part->mp_next) {
2423 const int part_encoding =
2424 least_restrictive_encoding (part->mp_part);
2425
2426 if (less_restrictive (encoding, part_encoding)) {
2427 encoding = part_encoding;
2428 }
2429 }
2430 break;
2431 }
2432
2433 case CT_MESSAGE:
2434 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2435 struct exbody *e = (struct exbody *) ct->c_ctparams;
2436 const int part_encoding =
2437 least_restrictive_encoding (e->eb_content);
2438
2439 if (less_restrictive (encoding, part_encoding)) {
2440 encoding = part_encoding;
2441 }
2442 }
2443 break;
2444
2445 default: {
2446 if (less_restrictive (encoding, ct->c_encoding)) {
2447 encoding = ct->c_encoding;
2448 }
2449 }}
2450
2451 return encoding;
2452 }
2453
2454
2455 /*
2456 * Return whether the second encoding is less restrictive than the first, where
2457 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2458 * CE_BINARY is less restrictive than CE_8BIT and
2459 * CE_8BIT is less restrictive than CE_7BIT.
2460 */
2461 static int
2462 less_restrictive (int encoding, int second_encoding)
2463 {
2464 switch (second_encoding) {
2465 case CE_BINARY:
2466 return encoding != CE_BINARY;
2467 case CE_8BIT:
2468 return encoding != CE_BINARY && encoding != CE_8BIT;
2469 case CE_7BIT:
2470 return encoding != CE_BINARY && encoding != CE_8BIT &&
2471 encoding != CE_7BIT;
2472 default :
2473 return 0;
2474 }
2475 }
2476
2477
2478 /*
2479 * Convert character set of each part.
2480 */
2481 static int
2482 convert_charsets (CT ct, char *dest_charset, int *message_mods)
2483 {
2484 int status = OK;
2485
2486 switch (ct->c_type) {
2487 case CT_TEXT:
2488 if (ct->c_subtype == TEXT_PLAIN) {
2489 status = convert_charset (ct, dest_charset, message_mods);
2490 if (status == OK) {
2491 if (verbosw) {
2492 char *ct_charset = content_charset (ct);
2493
2494 report (NULL, ct->c_partno, ct->c_file,
2495 "convert %s to %s", ct_charset, dest_charset);
2496 free (ct_charset);
2497 }
2498 } else {
2499 char *ct_charset = content_charset (ct);
2500
2501 report ("iconv", ct->c_partno, ct->c_file,
2502 "failed to convert %s to %s", ct_charset, dest_charset);
2503 free (ct_charset);
2504 }
2505 }
2506 break;
2507
2508 case CT_MULTIPART: {
2509 struct multipart *m = (struct multipart *) ct->c_ctparams;
2510 struct part *part;
2511
2512 /* Should check to see if the body for this part is encoded?
2513 For now, it gets passed along as-is by InitMultiPart(). */
2514 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2515 status =
2516 convert_charsets (part->mp_part, dest_charset, message_mods);
2517 }
2518 break;
2519 }
2520
2521 case CT_MESSAGE:
2522 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2523 struct exbody *e = (struct exbody *) ct->c_ctparams;
2524
2525 status =
2526 convert_charsets (e->eb_content, dest_charset, message_mods);
2527 }
2528 break;
2529
2530 default:
2531 break;
2532 }
2533
2534 return status;
2535 }
2536
2537
2538 /*
2539 * Fix various problems that aren't handled elsewhere. These
2540 * are fixed unconditionally: there are no switches to disable
2541 * them. Currently, "problems" are these:
2542 * 1) remove extraneous semicolon at the end of a header parameter list
2543 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2544 * filename parameters in Content-Type and Content-Disposition
2545 * headers, respectively.
2546 */
2547 static int
2548 fix_always (CT ct, int *message_mods)
2549 {
2550 int status = OK;
2551
2552 switch (ct->c_type) {
2553 case CT_MULTIPART: {
2554 struct multipart *m = (struct multipart *) ct->c_ctparams;
2555 struct part *part;
2556
2557 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2558 status = fix_always (part->mp_part, message_mods);
2559 }
2560 break;
2561 }
2562
2563 case CT_MESSAGE:
2564 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2565 struct exbody *e = (struct exbody *) ct->c_ctparams;
2566
2567 status = fix_always (e->eb_content, message_mods);
2568 }
2569 break;
2570
2571 default: {
2572 HF hf;
2573
2574 if (ct->c_first_hf) {
2575 fix_filename_encoding (ct);
2576 }
2577
2578 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2579 size_t len = strlen (hf->value);
2580
2581 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2582 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2583 /* Only do this for Content-Type and
2584 Content-Disposition fields because those are the
2585 only headers that parse_mime() warns about. */
2586 continue;
2587 }
2588
2589 /* whitespace following a trailing ';' will be nuked as well */
2590 if (hf->value[len - 1] == '\n') {
2591 while (isspace((unsigned char)(hf->value[len - 2]))) {
2592 if (len-- == 0) { break; }
2593 }
2594 }
2595
2596 if (hf->value[len - 2] == ';') {
2597 /* Remove trailing ';' from parameter value. */
2598 hf->value[len - 2] = '\n';
2599 hf->value[len - 1] = '\0';
2600
2601 /* Also, if Content-Type parameter, remove trailing ';'
2602 from ct->c_ctline. This probably isn't necessary
2603 but can't hurt. */
2604 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2605 size_t l = strlen(ct->c_ctline) - 1;
2606 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2607 ct->c_ctline[l] == ';') {
2608 ct->c_ctline[l--] = '\0';
2609 if (l == 0) { break; }
2610 }
2611 }
2612
2613 ++*message_mods;
2614 if (verbosw) {
2615 report (NULL, ct->c_partno, ct->c_file,
2616 "remove trailing ; from %s parameter value",
2617 hf->name);
2618 }
2619 }
2620 }
2621 }}
2622
2623 return status;
2624 }
2625
2626
2627 /*
2628 * Factor out common code for loops in fix_filename_encoding().
2629 */
2630 static int
2631 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm)
2632 {
2633 bool fixed = false;
2634
2635 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2636 /* Looks like an RFC 2047 encoded parameter. */
2637 char decoded[PATH_MAX + 1];
2638
2639 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2640 /* Encode using RFC 2231. */
2641 replace_param (first_pm, last_pm, name, decoded, 0);
2642 fixed = true;
2643 } else {
2644 inform("failed to decode %s parameter %s", name, value);
2645 }
2646 }
2647
2648 return fixed;
2649 }
2650
2651
2652 /*
2653 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2654 * filename parameters in Content-Type and Content-Disposition
2655 * headers, respectively.
2656 */
2657 static int
2658 fix_filename_encoding (CT ct)
2659 {
2660 PM pm;
2661 HF hf;
2662 int fixed = 0;
2663
2664 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2665 if (pm->pm_name && pm->pm_value &&
2666 strcasecmp (pm->pm_name, "name") == 0) {
2667 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2668 &ct->c_ctinfo.ci_first_pm,
2669 &ct->c_ctinfo.ci_last_pm);
2670 }
2671 }
2672
2673 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2674 if (pm->pm_name && pm->pm_value &&
2675 strcasecmp (pm->pm_name, "filename") == 0) {
2676 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2677 &ct->c_dispo_first,
2678 &ct->c_dispo_last);
2679 }
2680 }
2681
2682 /* Fix hf values to correspond. */
2683 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2684 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2685
2686 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2687 field = TYPE_HEADER;
2688 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2689 field = DISPO_HEADER;
2690 }
2691
2692 if (field != OTHER) {
2693 const char *const semicolon_loc = strchr (hf->value, ';');
2694
2695 if (semicolon_loc) {
2696 const size_t len =
2697 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2698 const char *const params =
2699 output_params (len,
2700 field == TYPE_HEADER
2701 ? ct->c_ctinfo.ci_first_pm
2702 : ct->c_dispo_first,
2703 NULL, 0);
2704 const char *const new_params = concat (params, "\n", NULL);
2705
2706 replace_substring (&hf->value, semicolon_loc, new_params);
2707 free((void *)new_params); /* Cast away const. Sigh. */
2708 free((void *)params);
2709 } else {
2710 inform("did not find semicolon in %s:%s\n",
2711 hf->name, hf->value);
2712 }
2713 }
2714 }
2715
2716 return OK;
2717 }
2718
2719
2720 /*
2721 * Output content in input file to output file.
2722 */
2723 static int
2724 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2725 int modify_inplace, int message_mods)
2726 {
2727 int status = OK;
2728
2729 if (modify_inplace) {
2730 if (message_mods > 0) {
2731 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2732 char *infile = input_filename
2733 ? mh_xstrdup (input_filename)
2734 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2735
2736 if (remove_file (infile) == OK) {
2737 if (rename (outfile, infile)) {
2738 /* Rename didn't work, possibly because of an
2739 attempt to rename across filesystems. Try
2740 brute force copy. */
2741 int old = open (outfile, O_RDONLY);
2742 int new =
2743 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2744 int i = -1;
2745
2746 if (old != -1 && new != -1) {
2747 char buffer[NMH_BUFSIZ];
2748
2749 while ((i = read (old, buffer, sizeof buffer)) >
2750 0) {
2751 if (write (new, buffer, i) != i) {
2752 i = -1;
2753 break;
2754 }
2755 }
2756 }
2757 if (new != -1) { close (new); }
2758 if (old != -1) { close (old); }
2759 (void) m_unlink (outfile);
2760
2761 if (i < 0) {
2762 /* The -file argument processing used path() to
2763 expand filename to absolute path. */
2764 int file = ct->c_file && ct->c_file[0] == '/';
2765
2766 inform("unable to rename %s %s to %s, continuing...",
2767 file ? "file" : "message", outfile,
2768 infile);
2769 status = NOTOK;
2770 }
2771 }
2772 } else {
2773 inform("unable to remove input file %s, "
2774 "not modifying it, continuing...", infile);
2775 (void) m_unlink (outfile);
2776 status = NOTOK;
2777 }
2778
2779 free (infile);
2780 } else {
2781 status = NOTOK;
2782 }
2783 } else {
2784 /* No modifications and didn't need the tmp outfile. */
2785 (void) m_unlink (outfile);
2786 }
2787 } else {
2788 /* Output is going to some file. Produce it whether or not
2789 there were modifications. */
2790 status = output_message_fp (ct, outfp, outfile);
2791 }
2792
2793 flush_errors ();
2794 return status;
2795 }
2796
2797
2798 /*
2799 * parse_mime() does not set lf_line_endings in struct text, so use this
2800 * function to do it. It touches the parts the decodetypes identifies.
2801 */
2802 static void
2803 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings)
2804 {
2805 switch (ct->c_type) {
2806 case CT_MULTIPART: {
2807 struct multipart *m = (struct multipart *) ct->c_ctparams;
2808 struct part *part;
2809
2810 for (part = m->mp_parts; part; part = part->mp_next) {
2811 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2812 }
2813 break;
2814 }
2815
2816 case CT_MESSAGE:
2817 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2818 struct exbody *e = (struct exbody *) ct->c_ctparams;
2819
2820 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2821 }
2822 break;
2823
2824 default:
2825 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2826 if (ct->c_ctparams == NULL) {
2827 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2828 }
2829 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2830 }
2831 }
2832 }
2833
2834
2835 /*
2836 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2837 * use the standard MH backup file.
2838 */
2839 static int
2840 remove_file (const char *file)
2841 {
2842 if (rmmproc) {
2843 char *rmm_command = concat (rmmproc, " ", file, NULL);
2844 int status = system (rmm_command);
2845
2846 free (rmm_command);
2847 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2848 }
2849 /* This is OK for a non-message file, it still uses the
2850 BACKUP_PREFIX form. The backup file will be in the same
2851 directory as file. */
2852 return rename (file, m_backup (file));
2853 }
2854
2855
2856 /*
2857 * Output formatted message to user.
2858 */
2859 static void
2860 report (char *what, char *partno, char *filename, char *message, ...)
2861 {
2862 va_list args;
2863 char *fmt;
2864
2865 if (verbosw) {
2866 va_start (args, message);
2867 fmt = concat (filename, partno ? " part " : ", ",
2868 FENDNULL(partno), partno ? ", " : "", message, NULL);
2869
2870 advertise (what, NULL, fmt, args);
2871
2872 free (fmt);
2873 va_end (args);
2874 }
2875 }
2876
2877
2878 static void
2879 pipeser (int i)
2880 {
2881 if (i == SIGQUIT) {
2882 fflush (stdout);
2883 fprintf (stderr, "\n");
2884 fflush (stderr);
2885 }
2886
2887 done (1);
2888 /* NOTREACHED */
2889 }