]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
copyip.c: Move interface to own file.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include "h/mh.h"
9 #include "sbr/cpydata.h"
10 #include "sbr/trimcpy.h"
11 #include "sbr/m_convert.h"
12 #include "sbr/m_backup.h"
13 #include "sbr/getfolder.h"
14 #include "sbr/folder_read.h"
15 #include "sbr/context_save.h"
16 #include "sbr/context_replace.h"
17 #include "sbr/context_find.h"
18 #include "sbr/readconfig.h"
19 #include "sbr/ambigsw.h"
20 #include "sbr/path.h"
21 #include "sbr/print_version.h"
22 #include "sbr/print_help.h"
23 #include "sbr/error.h"
24 #include "h/fmt_scan.h"
25 #include "h/mime.h"
26 #include "h/mhparse.h"
27 #include "h/done.h"
28 #include "h/utils.h"
29 #include "h/signals.h"
30 #include "sbr/m_maildir.h"
31 #include "sbr/m_mktemp.h"
32 #include "sbr/mime_type.h"
33 #include "mhmisc.h"
34 #include "mhfree.h"
35 #include "mhoutsbr.h"
36 #include "mhshowsbr.h"
37 #include <fcntl.h>
38
39 #define MHFIXMSG_SWITCHES \
40 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
41 X("nodecodetext", 0, NDECODETEXTSW) \
42 X("decodetypes", 0, DECODETYPESW) \
43 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
44 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
45 X("textcharset", 0, TEXTCHARSETSW) \
46 X("notextcharset", 0, NTEXTCHARSETSW) \
47 X("reformat", 0, REFORMATSW) \
48 X("noreformat", 0, NREFORMATSW) \
49 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
50 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
51 X("fixboundary", 0, FIXBOUNDARYSW) \
52 X("nofixboundary", 0, NFIXBOUNDARYSW) \
53 X("fixcte", 0, FIXCOMPOSITECTESW) \
54 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
55 X("fixtype mimetype", 0, FIXTYPESW) \
56 X("file file", 0, FILESW) \
57 X("outfile file", 0, OUTFILESW) \
58 X("rmmproc program", 0, RPROCSW) \
59 X("normmproc", 0, NRPRCSW) \
60 X("changecur", 0, CHGSW) \
61 X("nochangecur", 0, NCHGSW) \
62 X("verbose", 0, VERBSW) \
63 X("noverbose", 0, NVERBSW) \
64 X("version", 0, VERSIONSW) \
65 X("help", 0, HELPSW) \
66
67 #define X(sw, minchars, id) id,
68 DEFINE_SWITCH_ENUM(MHFIXMSG);
69 #undef X
70
71 #define X(sw, minchars, id) { sw, minchars, id },
72 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
73 #undef X
74
75
76 int verbosw;
77 int debugsw; /* Needed by mhparse.c. */
78
79 #define quitser pipeser
80
81 /*
82 * static prototypes
83 */
84 typedef struct fix_transformations {
85 int fixboundary;
86 int fixcompositecte;
87 svector_t fixtypes;
88 int reformat;
89 int replacetextplain;
90 int decodetext;
91 char *decodetypes;
92 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
93 int lf_line_endings;
94 char *textcharset;
95 } fix_transformations;
96
97 static int mhfixmsgsbr (CT *, char *, const fix_transformations *,
98 FILE **, char *, FILE **);
99 static int fix_boundary (CT *, int *);
100 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
101 static int get_multipart_boundary (CT, char **);
102 static int replace_boundary (CT, char *, char *);
103 static int fix_types (CT, svector_t, int *);
104 static char *replace_substring (char **, const char *, const char *);
105 static char *remove_parameter (char *, const char *);
106 static int fix_composite_cte (CT, int *);
107 static int set_ce (CT, int);
108 static int ensure_text_plain (CT *, CT, int *, int);
109 static int find_textplain_sibling (CT, int, int *);
110 static int insert_new_text_plain_part (CT, int, CT);
111 static CT build_text_plain_part (CT);
112 static int insert_into_new_mp_alt (CT *, int *);
113 static CT divide_part (CT);
114 static void copy_ctinfo (CI, CI);
115 static int decode_part (CT);
116 static int reformat_part (CT, char *, char *, char *, int);
117 static CT build_multipart_alt (CT, CT, int, int);
118 static int boundary_in_content (FILE **, char *, const char *);
119 static void transfer_noncontent_headers (CT, CT);
120 static int set_ct_type (CT, int type, int subtype, int encoding);
121 static int decode_text_parts (CT, int, const char *, int *);
122 static int should_decode(const char *, const char *, const char *);
123 static int content_encoding (CT, const char **);
124 static int strip_crs (CT, int *);
125 static void update_cte (CT);
126 static int least_restrictive_encoding (CT) PURE;
127 static int less_restrictive (int, int);
128 static int convert_charsets (CT, char *, int *);
129 static int fix_always (CT, int *);
130 static int fix_filename_param (char *, char *, PM *, PM *);
131 static int fix_filename_encoding (CT);
132 static int write_content (CT, const char *, char *, FILE *, int, int);
133 static void set_text_ctparams(CT, char *, int);
134 static int remove_file (const char *);
135 static void report (char *, char *, char *, char *, ...)
136 CHECK_PRINTF(4, 5);
137 static void pipeser (int);
138
139
140 int
141 main (int argc, char **argv)
142 {
143 int msgnum;
144 char *cp, *file = NULL, *folder = NULL;
145 char *maildir = NULL, buf[100], *outfile = NULL;
146 char **argp, **arguments;
147 struct msgs_array msgs = { 0, 0, NULL };
148 struct msgs *mp = NULL;
149 CT *ctp;
150 FILE *fp, *infp = NULL, *outfp = NULL;
151 bool using_stdin = false;
152 bool chgflag = true;
153 int status = OK;
154 fix_transformations fx;
155 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
156 fx.fixtypes = NULL;
157 fx.replacetextplain = 0;
158 fx.decodetext = CE_8BIT;
159 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
160 fx.lf_line_endings = 0;
161 fx.textcharset = NULL;
162
163 if (nmh_init(argv[0], true, false)) { return 1; }
164
165 arguments = getarguments (invo_name, argc, argv, 1);
166 argp = arguments;
167
168 /*
169 * Parse arguments
170 */
171 while ((cp = *argp++)) {
172 if (*cp == '-') {
173 switch (smatch (++cp, switches)) {
174 case AMBIGSW:
175 ambigsw (cp, switches);
176 done (1);
177 case UNKWNSW:
178 die("-%s unknown", cp);
179
180 case HELPSW:
181 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
182 invo_name);
183 print_help (buf, switches, 1);
184 done (0);
185 case VERSIONSW:
186 print_version(invo_name);
187 done (0);
188
189 case DECODETEXTSW:
190 if (! (cp = *argp++) || *cp == '-') {
191 die("missing argument to %s", argp[-2]);
192 }
193 if (! strcasecmp (cp, "8bit")) {
194 fx.decodetext = CE_8BIT;
195 } else if (! strcasecmp (cp, "7bit")) {
196 fx.decodetext = CE_7BIT;
197 } else if (! strcasecmp (cp, "binary")) {
198 fx.decodetext = CE_BINARY;
199 } else {
200 die("invalid argument to %s", argp[-2]);
201 }
202 continue;
203 case NDECODETEXTSW:
204 fx.decodetext = 0;
205 continue;
206 case DECODETYPESW:
207 if (! (cp = *argp++) || *cp == '-') {
208 die("missing argument to %s", argp[-2]);
209 }
210 fx.decodetypes = cp;
211 continue;
212 case CRLFLINEBREAKSSW:
213 fx.lf_line_endings = 0;
214 continue;
215 case NCRLFLINEBREAKSSW:
216 fx.lf_line_endings = 1;
217 continue;
218 case TEXTCHARSETSW:
219 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
220 die("missing argument to %s", argp[-2]);
221 }
222 fx.textcharset = cp;
223 continue;
224 case NTEXTCHARSETSW:
225 fx.textcharset = 0;
226 continue;
227 case FIXBOUNDARYSW:
228 fx.fixboundary = 1;
229 continue;
230 case NFIXBOUNDARYSW:
231 fx.fixboundary = 0;
232 continue;
233 case FIXCOMPOSITECTESW:
234 fx.fixcompositecte = 1;
235 continue;
236 case NFIXCOMPOSITECTESW:
237 fx.fixcompositecte = 0;
238 continue;
239 case FIXTYPESW:
240 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
241 die("missing argument to %s", argp[-2]);
242 }
243 if (! strncasecmp (cp, "multipart/", 10) ||
244 ! strncasecmp (cp, "message/", 8))
245 die("-fixtype %s not allowed", cp);
246 if (! strchr (cp, '/'))
247 die("-fixtype requires type/subtype");
248 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
249 svector_push_back (fx.fixtypes, cp);
250 continue;
251 case REFORMATSW:
252 fx.reformat = 1;
253 continue;
254 case NREFORMATSW:
255 fx.reformat = 0;
256 continue;
257 case REPLACETEXTPLAINSW:
258 fx.replacetextplain = 1;
259 continue;
260 case NREPLACETEXTPLAINSW:
261 fx.replacetextplain = 0;
262 continue;
263 case FILESW:
264 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
265 die("missing argument to %s", argp[-2]);
266 }
267 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
268 continue;
269 case OUTFILESW:
270 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
271 die("missing argument to %s", argp[-2]);
272 }
273 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
274 continue;
275 case RPROCSW:
276 if (!(rmmproc = *argp++) || *rmmproc == '-') {
277 die("missing argument to %s", argp[-2]);
278 }
279 continue;
280 case NRPRCSW:
281 rmmproc = NULL;
282 continue;
283 case CHGSW:
284 chgflag = true;
285 continue;
286 case NCHGSW:
287 chgflag = false;
288 continue;
289 case VERBSW:
290 verbosw = 1;
291 continue;
292 case NVERBSW:
293 verbosw = 0;
294 continue;
295 }
296 }
297 if (*cp == '+' || *cp == '@') {
298 if (folder)
299 die("only one folder at a time!");
300 folder = pluspath (cp);
301 } else {
302 if (*cp == '/') {
303 /* Interpret a full path as a filename, not a message. */
304 file = mh_xstrdup (cp);
305 } else {
306 app_msgarg (&msgs, cp);
307 }
308 }
309 }
310
311 SIGNAL (SIGQUIT, quitser);
312 SIGNAL (SIGPIPE, pipeser);
313
314 /*
315 * Read the standard profile setup
316 */
317 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
318 readconfig(NULL, fp, cp, 0);
319 fclose (fp);
320 }
321
322 suppress_bogus_mp_content_warning = skip_mp_cte_check = true;
323 suppress_extraneous_trailing_semicolon_warning = true;
324
325 if (! context_find ("path")) {
326 free (path ("./", TFOLDER));
327 }
328
329 if (file && msgs.size) {
330 die("cannot specify msg and file at same time!");
331 }
332
333 if (outfile) {
334 /* Open the outfile now, so we don't have to risk opening it
335 after running out of fds. */
336 if (strcmp (outfile, "-") == 0) {
337 outfp = stdout;
338 } else if ((outfp = fopen (outfile, "w")) == NULL) {
339 adios (outfile, "unable to open for writing");
340 }
341 }
342
343 /*
344 * check if message is coming from file
345 */
346 if (file) {
347 /* If file is stdin, create a tmp file name before parse_mime()
348 has a chance, because it might put in on a different
349 filesystem than the output file. Instead, put it in the
350 user's preferred tmp directory. */
351 CT ct;
352
353 if (! strcmp ("-", file)) {
354 int fd;
355 char *cp;
356
357 using_stdin = true;
358
359 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
360 die("unable to create temporary file in %s",
361 get_temp_dir());
362 } else {
363 free (file);
364 file = mh_xstrdup (cp);
365 cpydata (STDIN_FILENO, fd, "-", file);
366 }
367
368 if (close (fd)) {
369 (void) m_unlink (file);
370 die("failed to write temporary file");
371 }
372 }
373
374 cts = mh_xcalloc(2, sizeof *cts);
375 ctp = cts;
376
377 if ((ct = parse_mime (file))) {
378 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
379 *ctp++ = ct;
380 } else {
381 inform("unable to parse message from file %s", file);
382 status = NOTOK;
383
384 /* If there's an outfile, pass the input message unchanged, so the
385 message won't get dropped from a pipeline. */
386 if (outfile) {
387 /* Something went wrong. Output might be expected, such as if
388 this were run as a filter. Just copy the input to the
389 output. */
390 if ((infp = fopen (file, "r")) == NULL) {
391 adios (file, "unable to open for reading");
392 }
393
394 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
395 inform("unable to copy message to %s, "
396 "it might be lost\n", outfile);
397 }
398
399 fclose (infp);
400 infp = NULL;
401 }
402 }
403 } else {
404 /*
405 * message(s) are coming from a folder
406 */
407 CT ct;
408
409 if (! msgs.size) {
410 app_msgarg(&msgs, "cur");
411 }
412 if (! folder) {
413 folder = getfolder (1);
414 }
415 maildir = mh_xstrdup(m_maildir (folder));
416
417 /* chdir so that error messages, esp. from MIME parser, just
418 refer to the message and not its path. */
419 if (chdir (maildir) == NOTOK) {
420 adios (maildir, "unable to change directory to");
421 }
422
423 /* read folder and create message structure */
424 if (! (mp = folder_read (folder, 1))) {
425 die("unable to read folder %s", folder);
426 }
427
428 /* check for empty folder */
429 if (mp->nummsg == 0) {
430 die("no messages in %s", folder);
431 }
432
433 /* parse all the message ranges/sequences and set SELECTED */
434 for (msgnum = 0; msgnum < msgs.size; msgnum++)
435 if (! m_convert (mp, msgs.msgs[msgnum])) {
436 done (1);
437 }
438 seq_setprev (mp); /* set the previous-sequence */
439
440 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
441 ctp = cts;
442
443 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
444 if (is_selected(mp, msgnum)) {
445 char *msgnam = m_name (msgnum);
446
447 if ((ct = parse_mime (msgnam))) {
448 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
449 *ctp++ = ct;
450 } else {
451 inform("unable to parse message %s", msgnam);
452 status = NOTOK;
453
454 /* If there's an outfile, pass the input message
455 unchanged, so the message won't get dropped from a
456 pipeline. */
457 if (outfile) {
458 /* Something went wrong. Output might be expected,
459 such as if this were run as a filter. Just copy
460 the input to the output. */
461 /* Can't use path() here because 1) it might have been
462 called before and it caches the pwd, and 2) we call
463 chdir() after that. */
464 char *input_filename =
465 concat (maildir, "/", msgnam, NULL);
466
467 if ((infp = fopen (input_filename, "r")) == NULL) {
468 adios (input_filename,
469 "unable to open for reading");
470 }
471
472 if (copy_input_to_output (input_filename, infp,
473 outfile, outfp) != OK) {
474 inform("unable to copy message to %s, "
475 "it might be lost\n", outfile);
476 }
477
478 fclose (infp);
479 infp = NULL;
480 free (input_filename);
481 }
482 }
483 }
484 }
485
486 if (chgflag) {
487 seq_setcur (mp, mp->hghsel); /* update current message */
488 }
489 seq_save (mp); /* synchronize sequences */
490 context_replace (pfolder, folder);/* update current folder */
491 context_save (); /* save the context file */
492 }
493
494 if (*cts) {
495 for (ctp = cts; *ctp; ++ctp) {
496 status =
497 mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp) == OK
498 ? 0
499 : 1;
500 free_content (*ctp);
501
502 if (using_stdin) {
503 (void) m_unlink (file);
504
505 if (! outfile) {
506 /* Just calling m_backup() unlinks the backup file. */
507 (void) m_backup (file);
508 }
509 }
510 }
511 } else {
512 status = 1;
513 }
514
515 free(maildir);
516 free (cts);
517
518 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
519 if (infp) { fclose (infp); } /* even if stdin */
520 if (outfp) { fclose (outfp); } /* even if stdout */
521 free (outfile);
522 free (file);
523 free (folder);
524 free (arguments);
525
526 done (status == OK ? 0 : 1);
527 return NOTOK;
528 }
529
530
531 /*
532 * Apply transformations to one message.
533 */
534 static int
535 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
536 FILE **infp, char *outfile, FILE **outfp)
537 {
538 /* Store input filename in case one of the transformations, i.e.,
539 fix_boundary(), rewrites to a tmp file. */
540 char *input_filename = maildir
541 ? concat (maildir, "/", (*ctp)->c_file, NULL)
542 : mh_xstrdup ((*ctp)->c_file);
543 bool modify_inplace = false;
544 int message_mods = 0;
545 int status = OK;
546
547 /* Though the input file won't need to be opened if everything goes
548 well, do it here just in case there's a failure, and that failure is
549 running out of file descriptors. */
550 if ((*infp = fopen (input_filename, "r")) == NULL) {
551 adios (input_filename, "unable to open for reading");
552 }
553
554 if (outfile == NULL) {
555 modify_inplace = true;
556
557 if ((*ctp)->c_file) {
558 char *tempfile;
559 /* outfp will be closed by the caller */
560 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
561 NULL) {
562 die("unable to create temporary file in %s",
563 get_temp_dir());
564 }
565 outfile = mh_xstrdup (tempfile);
566 } else {
567 die("missing both input and output filenames\n");
568 }
569 } /* else *outfp was defined by caller */
570
571 reverse_alternative_parts (*ctp);
572 status = fix_always (*ctp, &message_mods);
573 if (status == OK && fx->fixboundary) {
574 status = fix_boundary (ctp, &message_mods);
575 }
576 if (status == OK && fx->fixtypes != NULL) {
577 status = fix_types (*ctp, fx->fixtypes, &message_mods);
578 }
579 if (status == OK && fx->fixcompositecte) {
580 status = fix_composite_cte (*ctp, &message_mods);
581 }
582 if (status == OK && fx->reformat) {
583 status =
584 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
585 }
586 if (status == OK && fx->decodetext) {
587 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
588 &message_mods);
589 update_cte (*ctp);
590 }
591 if (status == OK && fx->textcharset != NULL) {
592 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
593 }
594
595 if (status == OK && ! (*ctp)->c_umask) {
596 /* Set the umask for the contents file. This currently
597 isn't used but just in case it is in the future. */
598 struct stat st;
599
600 if (stat ((*ctp)->c_file, &st) != NOTOK) {
601 (*ctp)->c_umask = ~(st.st_mode & 0777);
602 } else {
603 (*ctp)->c_umask = ~m_gmprot();
604 }
605 }
606
607 /*
608 * Write the content to a file
609 */
610 if (status == OK) {
611 status = write_content (*ctp, input_filename, outfile, *outfp,
612 modify_inplace, message_mods);
613 } else if (! modify_inplace) {
614 /* Something went wrong. Output might be expected, such
615 as if this were run as a filter. Just copy the input
616 to the output. */
617 if (copy_input_to_output (input_filename, *infp, outfile,
618 *outfp) != OK) {
619 inform("unable to copy message to %s, it might be lost\n",
620 outfile);
621 }
622 }
623
624 if (modify_inplace) {
625 if (status != OK) { (void) m_unlink (outfile); }
626 free (outfile);
627 outfile = NULL;
628 }
629
630 fclose (*infp);
631 *infp = NULL;
632 free (input_filename);
633
634 return status;
635 }
636
637
638 /*
639 * Copy input message to output. Assumes not modifying in place, so this
640 * might be running as part of a pipeline.
641 */
642 static int
643 copy_input_to_output (const char *input_filename, FILE *infp,
644 const char *output_filename, FILE *outfp)
645 {
646 int in = fileno (infp);
647 int out = fileno (outfp);
648 int status = OK;
649
650 if (in != -1 && out != -1) {
651 cpydata (in, out, input_filename, output_filename);
652 } else {
653 status = NOTOK;
654 }
655
656 return status;
657 }
658
659
660 /*
661 * Fix mismatched outer level boundary.
662 */
663 static int
664 fix_boundary (CT *ct, int *message_mods)
665 {
666 struct multipart *mp;
667 int status = OK;
668
669 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
670 mp = (struct multipart *) (*ct)->c_ctparams;
671
672 /*
673 * 1) Get boundary at end of part.
674 * 2) Get boundary at beginning of part and compare to the end-of-part
675 * boundary.
676 * 3) Write out contents of ct to tmp file, replacing boundary in
677 * header with boundary from part. Set c_unlink to 1.
678 * 4) Free ct.
679 * 5) Call parse_mime() on the tmp file, replacing ct.
680 */
681
682 if (mp && mp->mp_start) {
683 char *part_boundary;
684
685 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
686 char *fixed;
687
688 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
689 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
690 char *filename = mh_xstrdup ((*ct)->c_file);
691 CT fixed_ct;
692
693 free_content (*ct);
694 if ((fixed_ct = parse_mime (fixed))) {
695 *ct = fixed_ct;
696 (*ct)->c_unlink = 1;
697
698 ++*message_mods;
699 if (verbosw) {
700 report (NULL, NULL, filename,
701 "fix multipart boundary");
702 }
703 } else {
704 *ct = NULL;
705 inform("unable to parse fixed part");
706 status = NOTOK;
707 }
708 free (filename);
709 } else {
710 inform("unable to replace broken boundary");
711 status = NOTOK;
712 }
713 } else {
714 inform("unable to create temporary file in %s",
715 get_temp_dir());
716 status = NOTOK;
717 }
718
719 free (part_boundary);
720 } else {
721 /* Couldn't fix the boundary. Report failure so that mhfixmsg
722 doesn't modify the message. */
723 status = NOTOK;
724 }
725 } else {
726 /* No multipart struct, even though the content type is
727 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
728 the message. */
729 status = NOTOK;
730 }
731 }
732
733 return status;
734 }
735
736
737 /*
738 * Find boundary at end of multipart.
739 */
740 static int
741 get_multipart_boundary (CT ct, char **part_boundary)
742 {
743 char buffer[NMH_BUFSIZ];
744 char *end_boundary = NULL;
745 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
746 ? (off_t) (ct->c_end - sizeof buffer)
747 : (off_t) ct->c_begin;
748 size_t bytes_read;
749 int status = OK;
750
751 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
752 be big enough, even if it's just 1024, to make that unlikely. */
753
754 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
755 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
756 advise (ct->c_file, "unable to open for reading");
757 return NOTOK;
758 }
759
760 /* Get boundary at end of multipart. */
761 while (begin >= (off_t) ct->c_begin) {
762 fseeko (ct->c_fp, begin, SEEK_SET);
763 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
764 char *cp = rfind_str (buffer, bytes_read, "--");
765
766 if (cp) {
767 char *end;
768
769 /* Trim off trailing "--" and anything beyond. */
770 *cp-- = '\0';
771 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
772 if (strlen (end) > 3 && *end++ == '\n' &&
773 *end++ == '-' && *end++ == '-') {
774 end_boundary = mh_xstrdup (end);
775 break;
776 }
777 }
778 }
779 }
780
781 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
782 break;
783 begin -= sizeof buffer;
784 }
785
786 /* Get boundary at beginning of multipart. */
787 if (end_boundary) {
788 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
789 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
790 if (bytes_read >= strlen (end_boundary)) {
791 char *cp = find_str (buffer, bytes_read, end_boundary);
792
793 if (cp && cp - buffer >= 2 && *--cp == '-' &&
794 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
795 status = OK;
796 break;
797 }
798 } else {
799 /* The start and end boundaries didn't match, or the
800 start boundary doesn't begin with "\n--" (or "--"
801 if at the beginning of buffer). Keep trying. */
802 status = NOTOK;
803 }
804 }
805 } else {
806 status = NOTOK;
807 }
808
809 if (ct->c_fp) {
810 fclose (ct->c_fp);
811 ct->c_fp = NULL;
812 }
813
814 if (status == OK) {
815 *part_boundary = end_boundary;
816 } else {
817 *part_boundary = NULL;
818 free (end_boundary);
819 }
820
821 return status;
822 }
823
824
825 /*
826 * Open and copy ct->c_file to file, replacing the multipart boundary.
827 */
828 static int
829 replace_boundary (CT ct, char *file, char *boundary)
830 {
831 FILE *fpin, *fpout;
832 int compnum, state;
833 char buf[NMH_BUFSIZ], name[NAMESZ];
834 char *np, *vp;
835 m_getfld_state_t gstate;
836 int status = OK;
837
838 if (ct->c_file == NULL) {
839 inform("missing input filename");
840 return NOTOK;
841 }
842
843 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
844 advise (ct->c_file, "unable to open for reading");
845 return NOTOK;
846 }
847
848 if ((fpout = fopen (file, "w")) == NULL) {
849 fclose (fpin);
850 advise (file, "unable to open for writing");
851 return NOTOK;
852 }
853
854 gstate = m_getfld_state_init(fpin);
855 for (compnum = 1;;) {
856 int bufsz = (int) sizeof buf;
857
858 switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
859 case FLD:
860 case FLDPLUS:
861 compnum++;
862
863 /* get copies of the buffers */
864 np = mh_xstrdup (name);
865 vp = mh_xstrdup (buf);
866
867 /* if necessary, get rest of field */
868 while (state == FLDPLUS) {
869 bufsz = sizeof buf;
870 state = m_getfld2(&gstate, name, buf, &bufsz);
871 vp = add (buf, vp); /* add to previous value */
872 }
873
874 if (strcasecmp (TYPE_FIELD, np)) {
875 fprintf (fpout, "%s:%s", np, vp);
876 } else {
877 char *new_ctline, *new_params;
878
879 replace_param(&ct->c_ctinfo.ci_first_pm,
880 &ct->c_ctinfo.ci_last_pm, "boundary",
881 boundary, 0);
882
883 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
884 ct->c_ctinfo.ci_subtype, NULL);
885 new_params = output_params(LEN(TYPE_FIELD) +
886 strlen(new_ctline) + 1,
887 ct->c_ctinfo.ci_first_pm, NULL, 0);
888 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
889 FENDNULL(new_params));
890 free(new_ctline);
891 free(new_params);
892 }
893
894 free (vp);
895 free (np);
896
897 continue;
898
899 case BODY:
900 putc('\n', fpout);
901 /* buf will have a terminating NULL, skip it. */
902 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
903 advise (file, "fwrite");
904 }
905 continue;
906
907 case FILEEOF:
908 break;
909
910 case LENERR:
911 case FMTERR:
912 inform("message format error in component #%d", compnum);
913 status = NOTOK;
914 break;
915
916 default:
917 inform("getfld() returned %d", state);
918 status = NOTOK;
919 break;
920 }
921
922 break;
923 }
924
925 m_getfld_state_destroy (&gstate);
926 fclose (fpout);
927 fclose (fpin);
928
929 return status;
930 }
931
932
933 /*
934 * Fix Content-Type header to reflect the content of its part.
935 */
936 static int
937 fix_types (CT ct, svector_t fixtypes, int *message_mods)
938 {
939 int status = OK;
940
941 switch (ct->c_type) {
942 case CT_MULTIPART: {
943 struct multipart *m = (struct multipart *) ct->c_ctparams;
944 struct part *part;
945
946 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
947 status = fix_types (part->mp_part, fixtypes, message_mods);
948 }
949 break;
950 }
951
952 case CT_MESSAGE:
953 if (ct->c_subtype == MESSAGE_EXTERNAL) {
954 struct exbody *e = (struct exbody *) ct->c_ctparams;
955
956 status = fix_types (e->eb_content, fixtypes, message_mods);
957 }
958 break;
959
960 default: {
961 char **typep, *type;
962
963 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
964 for (typep = svector_strs (fixtypes);
965 typep && (type = *typep);
966 ++typep) {
967 char *type_subtype =
968 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
969 NULL);
970
971 if (! strcasecmp (type, type_subtype) &&
972 decode_part (ct) == OK &&
973 ct->c_cefile.ce_file != NULL) {
974 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
975 char *cp;
976
977 if ((cp = strchr (ct_type_subtype, ';'))) {
978 /* Truncate to remove any parameter list from
979 mime_type () result. */
980 *cp = '\0';
981 }
982
983 if (strcasecmp (type, ct_type_subtype)) {
984 char *ct_type, *ct_subtype;
985 HF hf;
986
987 /* The Content-Type header does not match the
988 content, so update these struct Content
989 fields to match:
990 * c_type, c_subtype
991 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
992 * c_ctline
993 */
994 /* Extract type and subtype from type/subtype. */
995 ct_type = mh_xstrdup(ct_type_subtype);
996 if ((cp = strchr (ct_type, '/'))) {
997 *cp = '\0';
998 ct_subtype = mh_xstrdup(++cp);
999 } else {
1000 inform("missing / in MIME type of %s %s",
1001 ct->c_file, ct->c_partno);
1002 free (ct_type);
1003 return NOTOK;
1004 }
1005
1006 ct->c_type = ct_str_type (ct_type);
1007 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
1008
1009 free (ct->c_ctinfo.ci_type);
1010 ct->c_ctinfo.ci_type = ct_type;
1011 free (ct->c_ctinfo.ci_subtype);
1012 ct->c_ctinfo.ci_subtype = ct_subtype;
1013 if (! replace_substring (&ct->c_ctline, type,
1014 ct_type_subtype)) {
1015 inform("did not find %s in %s",
1016 type, ct->c_ctline);
1017 }
1018
1019 /* Update Content-Type header field. */
1020 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1021 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1022 if (replace_substring (&hf->value, type,
1023 ct_type_subtype)) {
1024 ++*message_mods;
1025 if (verbosw) {
1026 report (NULL, ct->c_partno, ct->c_file,
1027 "change Content-Type in header "
1028 "from %s to %s",
1029 type, ct_type_subtype);
1030 }
1031 break;
1032 }
1033 inform("did not find %s in %s", type, hf->value);
1034 }
1035 }
1036 }
1037 free (ct_type_subtype);
1038 }
1039 free (type_subtype);
1040 }
1041 }
1042 }}
1043
1044 return status;
1045 }
1046
1047
1048 /*
1049 * Replace a substring, allocating space to hold the new one.
1050 */
1051 char *
1052 replace_substring (char **str, const char *old, const char *new)
1053 {
1054 char *cp;
1055
1056 if ((cp = strstr (*str, old))) {
1057 char *remainder = cp + strlen (old);
1058 char *prefix, *new_str;
1059
1060 if (cp - *str) {
1061 prefix = mh_xstrdup(*str);
1062 *(prefix + (cp - *str)) = '\0';
1063 new_str = concat (prefix, new, remainder, NULL);
1064 free (prefix);
1065 } else {
1066 new_str = concat (new, remainder, NULL);
1067 }
1068
1069 free (*str);
1070
1071 return *str = new_str;
1072 }
1073
1074 return NULL;
1075 }
1076
1077
1078 /*
1079 * Remove a name=value parameter, given just its name, from a header value.
1080 */
1081 char *
1082 remove_parameter (char *str, const char *name)
1083 {
1084 /* It looks to me, based on the BNF in RFC 2045, than there can't
1085 be whitespace between the parameter name and the "=", or
1086 between the "=" and the parameter value. */
1087 char *param_name = concat (name, "=", NULL);
1088 char *cp;
1089
1090 if ((cp = strstr (str, param_name))) {
1091 char *start, *end;
1092 size_t count = 1;
1093
1094 /* Remove any leading spaces, before the parameter name. */
1095 for (start = cp;
1096 start > str && isspace ((unsigned char) *(start-1));
1097 --start) {
1098 continue;
1099 }
1100 /* Remove a leading semicolon. */
1101 if (start > str && *(start-1) == ';') { --start; }
1102
1103 end = cp + strlen (name) + 1;
1104 if (*end == '"') {
1105 /* Skip past the quoted value, and then the final quote. */
1106 for (++end ; *end && *end != '"'; ++end) { continue; }
1107 ++end;
1108 } else {
1109 /* Skip past the value. */
1110 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1111 }
1112
1113 /* Count how many characters need to be moved. Include
1114 trailing null, which is accounted for by the
1115 initialization of count to 1. */
1116 for (cp = end; *cp; ++cp) { ++count; }
1117 (void) memmove (start, end, count);
1118 }
1119
1120 free (param_name);
1121
1122 return str;
1123 }
1124
1125
1126 /*
1127 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1128 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1129 * 8 bit.
1130 */
1131 static int
1132 fix_composite_cte (CT ct, int *message_mods)
1133 {
1134 int status = OK;
1135
1136 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1137 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1138 ct->c_encoding != CE_BINARY) {
1139 HF hf;
1140
1141 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1142 char *name = hf->name;
1143 for (; isspace((unsigned char)*name); ++name) {
1144 continue;
1145 }
1146
1147 if (! strncasecmp (name, ENCODING_FIELD,
1148 LEN(ENCODING_FIELD))) {
1149 char *prefix = "Nmh-REPLACED-INVALID-";
1150 HF h;
1151
1152 NEW(h);
1153 h->name = mh_xstrdup (hf->name);
1154 h->hf_encoding = hf->hf_encoding;
1155 h->next = hf->next;
1156 hf->next = h;
1157
1158 /* Retain old header but prefix its name. */
1159 free (hf->name);
1160 hf->name = concat (prefix, h->name, NULL);
1161
1162 ++*message_mods;
1163 if (verbosw) {
1164 char *encoding = cpytrim (hf->value);
1165 report (NULL, ct->c_partno, ct->c_file,
1166 "replace Content-Transfer-Encoding of %s "
1167 "with 8 bit", encoding);
1168 free (encoding);
1169 }
1170
1171 h->value = mh_xstrdup (" 8bit\n");
1172
1173 /* Don't need to warn for multiple C-T-E header
1174 fields, parse_mime() already does that. But
1175 if there are any, fix them all as necessary. */
1176 hf = h;
1177 }
1178 }
1179
1180 set_ce (ct, CE_8BIT);
1181 }
1182
1183 if (ct->c_type == CT_MULTIPART) {
1184 struct multipart *m;
1185 struct part *part;
1186
1187 m = (struct multipart *) ct->c_ctparams;
1188 for (part = m->mp_parts; part; part = part->mp_next) {
1189 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1190 status = NOTOK;
1191 break;
1192 }
1193 }
1194 }
1195 }
1196
1197 return status;
1198 }
1199
1200
1201 /*
1202 * Set content encoding.
1203 */
1204 static int
1205 set_ce (CT ct, int encoding)
1206 {
1207 const char *ce = ce_str (encoding);
1208 const struct str2init *ctinit = get_ce_method (ce);
1209
1210 if (ctinit) {
1211 char *cte = concat (" ", ce, "\n", NULL);
1212 bool found_cte = false;
1213 HF hf;
1214 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1215 caller is decode_text_parts (). Save because we'll
1216 overwrite below. */
1217 struct cefile decoded_content_info = ct->c_cefile;
1218
1219 ct->c_encoding = encoding;
1220
1221 ct->c_ctinitfnx = ctinit->si_init;
1222 /* This will assign ct->c_cefile with an all-0 struct, which
1223 is what we want. */
1224 (*ctinit->si_init) (ct);
1225 /* After returning, the caller should set
1226 ct->c_cefile.ce_file to the name of the file containing
1227 the contents. */
1228
1229 if (ct->c_ceclosefnx) {
1230 (*ct->c_ceclosefnx) (ct);
1231 }
1232
1233 /* Restore the cefile. */
1234 ct->c_cefile = decoded_content_info;
1235
1236 /* Update/add Content-Transfer-Encoding header field. */
1237 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1238 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1239 found_cte = true;
1240 free (hf->value);
1241 hf->value = cte;
1242 }
1243 }
1244 if (! found_cte) {
1245 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1246 }
1247
1248 /* Update c_celine. It's used only by mhlist -debug. */
1249 free (ct->c_celine);
1250 ct->c_celine = mh_xstrdup (cte);
1251
1252 return OK;
1253 }
1254
1255 return NOTOK;
1256 }
1257
1258
1259 /*
1260 * Make sure each text part has a corresponding text/plain part.
1261 */
1262 static int
1263 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain)
1264 {
1265 int status = OK;
1266
1267 switch ((*ct)->c_type) {
1268 case CT_TEXT: {
1269 /* Nothing to do for text/plain. */
1270 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1271
1272 if (parent && parent->c_type == CT_MULTIPART &&
1273 parent->c_subtype == MULTI_ALTERNATE) {
1274 int new_subpart_number = 1;
1275 int has_text_plain =
1276 find_textplain_sibling (parent, replacetextplain,
1277 &new_subpart_number);
1278
1279 if (! has_text_plain) {
1280 /* Parent is a multipart/alternative. Insert a new
1281 text/plain subpart. */
1282 const int inserted =
1283 insert_new_text_plain_part (*ct, new_subpart_number,
1284 parent);
1285 if (inserted) {
1286 ++*message_mods;
1287 if (verbosw) {
1288 report (NULL, parent->c_partno, parent->c_file,
1289 "insert text/plain part");
1290 }
1291 } else {
1292 status = NOTOK;
1293 }
1294 }
1295 } else if (parent && parent->c_type == CT_MULTIPART &&
1296 parent->c_subtype == MULTI_RELATED) {
1297 char *type_subtype =
1298 concat ((*ct)->c_ctinfo.ci_type, "/",
1299 (*ct)->c_ctinfo.ci_subtype, NULL);
1300 const char *parent_type =
1301 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1302 int new_subpart_number = 1;
1303 int has_text_plain = 0;
1304
1305 /* Have to do string comparison on the subtype because we
1306 don't enumerate all of them in c_subtype values.
1307 parent_type will be NULL if the multipart/related part
1308 doesn't have a type parameter. The type parameter must
1309 be specified according to RFC 2387 Sec. 3.1 but not all
1310 messages comply. */
1311 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1312 /* The type of this part matches the root type of the
1313 parent multipart/related. Look to see if there's
1314 text/plain sibling. */
1315 has_text_plain =
1316 find_textplain_sibling (parent, replacetextplain,
1317 &new_subpart_number);
1318 }
1319
1320 free (type_subtype);
1321
1322 if (! has_text_plain) {
1323 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1324 struct part *part;
1325 int siblings = 0;
1326
1327 for (part = mp->mp_parts; part; part = part->mp_next) {
1328 if (*ct != part->mp_part) {
1329 ++siblings;
1330 }
1331 }
1332
1333 if (siblings) {
1334 /* Parent is a multipart/related. Insert a new
1335 text/plain subpart in a new multipart/alternative. */
1336 if (insert_into_new_mp_alt (ct, message_mods)) {
1337 /* Not an error if text/plain couldn't be added. */
1338 }
1339 } else {
1340 /* There are no siblings, so insert a new text/plain
1341 subpart, and change the parent type from
1342 multipart/related to multipart/alternative. */
1343 const int inserted =
1344 insert_new_text_plain_part (*ct, new_subpart_number,
1345 parent);
1346
1347 if (inserted) {
1348 HF hf;
1349
1350 parent->c_subtype = MULTI_ALTERNATE;
1351 free (parent->c_ctinfo.ci_subtype);
1352 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1353 if (! replace_substring (&parent->c_ctline, "/related",
1354 "/alternative")) {
1355 inform("did not find multipart/related in %s",
1356 parent->c_ctline);
1357 }
1358
1359 /* Update Content-Type header field. */
1360 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1361 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1362 if (replace_substring (&hf->value, "/related",
1363 "/alternative")) {
1364 ++*message_mods;
1365 if (verbosw) {
1366 report (NULL, parent->c_partno,
1367 parent->c_file,
1368 "insert text/plain part");
1369 }
1370
1371 /* Remove, e.g., type="text/html" from
1372 multipart/alternative. */
1373 remove_parameter (hf->value, "type");
1374 break;
1375 }
1376 inform("did not find multipart/"
1377 "related in header %s", hf->value);
1378 }
1379 }
1380 } else {
1381 /* Not an error if text/plain couldn't be inserted. */
1382 }
1383 }
1384 }
1385 } else {
1386 if (insert_into_new_mp_alt (ct, message_mods)) {
1387 status = NOTOK;
1388 }
1389 }
1390 break;
1391 }
1392
1393 case CT_MULTIPART: {
1394 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1395 struct part *part;
1396
1397 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1398 if ((*ct)->c_type == CT_MULTIPART) {
1399 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1400 replacetextplain);
1401 }
1402 }
1403 break;
1404 }
1405
1406 case CT_MESSAGE:
1407 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1408 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1409
1410 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1411 replacetextplain);
1412 }
1413 break;
1414 }
1415
1416 return status;
1417 }
1418
1419
1420 /*
1421 * See if there is a sibling text/plain, and return its subpart number.
1422 */
1423 static int
1424 find_textplain_sibling (CT parent, int replacetextplain,
1425 int *new_subpart_number)
1426 {
1427 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1428 struct part *part, *prev;
1429 bool has_text_plain = false;
1430
1431 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1432 ++*new_subpart_number;
1433 if (part->mp_part->c_type == CT_TEXT &&
1434 part->mp_part->c_subtype == TEXT_PLAIN) {
1435 if (replacetextplain) {
1436 struct part *old_part;
1437 if (part == mp->mp_parts) {
1438 old_part = mp->mp_parts;
1439 mp->mp_parts = part->mp_next;
1440 } else {
1441 old_part = prev->mp_next;
1442 prev->mp_next = part->mp_next;
1443 }
1444 if (verbosw) {
1445 report (NULL, parent->c_partno, parent->c_file,
1446 "remove text/plain part %s",
1447 old_part->mp_part->c_partno);
1448 }
1449 free_content (old_part->mp_part);
1450 free (old_part);
1451 } else {
1452 has_text_plain = true;
1453 }
1454 break;
1455 }
1456 prev = part;
1457 }
1458
1459 return has_text_plain;
1460 }
1461
1462
1463 /*
1464 * Insert a new text/plain part.
1465 */
1466 static int
1467 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent)
1468 {
1469 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1470 struct part *new_part;
1471
1472 NEW(new_part);
1473 if ((new_part->mp_part = build_text_plain_part (ct))) {
1474 char buffer[16];
1475 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1476
1477 new_part->mp_next = mp->mp_parts;
1478 mp->mp_parts = new_part;
1479 new_part->mp_part->c_partno =
1480 concat (parent->c_partno ? parent->c_partno : "1", ".",
1481 buffer, NULL);
1482
1483 return 1;
1484 }
1485
1486 free_content (new_part->mp_part);
1487 free (new_part);
1488
1489 return 0;
1490 }
1491
1492
1493 /*
1494 * Create a text/plain part to go along with non-plain sibling part.
1495 */
1496 static CT
1497 build_text_plain_part (CT encoded_part)
1498 {
1499 CT tp_part = divide_part (encoded_part);
1500 char *tmp_plain_file = NULL;
1501
1502 if (decode_part (tp_part) == OK) {
1503 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1504 contains the decoded contents. And the decoding function, such
1505 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1506 be unlinked by free_content (). */
1507 char *tempfile;
1508
1509 /* This m_mktemp2() call closes the temp file. */
1510 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1511 inform("unable to create temporary file in %s",
1512 get_temp_dir());
1513 } else {
1514 tmp_plain_file = mh_xstrdup (tempfile);
1515 if (reformat_part (tp_part, tmp_plain_file,
1516 tp_part->c_ctinfo.ci_type,
1517 tp_part->c_ctinfo.ci_subtype,
1518 tp_part->c_type) == OK) {
1519 return tp_part;
1520 }
1521 }
1522 }
1523
1524 free_content (tp_part);
1525 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1526 free (tmp_plain_file);
1527
1528 return NULL;
1529 }
1530
1531
1532 /*
1533 * Slip new text/plain part into a new multipart/alternative.
1534 */
1535 static int
1536 insert_into_new_mp_alt (CT *ct, int *message_mods)
1537 {
1538 CT tp_part = build_text_plain_part (*ct);
1539 int status = OK;
1540
1541 if (tp_part) {
1542 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1543 MULTI_ALTERNATE);
1544 if (mp_alt) {
1545 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1546
1547 if (mp && mp->mp_parts) {
1548 mp->mp_parts->mp_part = tp_part;
1549 /* Make the new multipart/alternative the parent. */
1550 *ct = mp_alt;
1551
1552 ++*message_mods;
1553 if (verbosw) {
1554 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1555 "insert text/plain part");
1556 }
1557 } else {
1558 free_content (tp_part);
1559 free_content (mp_alt);
1560 status = NOTOK;
1561 }
1562 } else {
1563 status = NOTOK;
1564 }
1565 } else {
1566 /* Not an error if text/plain couldn't be built. */
1567 }
1568
1569 return status;
1570 }
1571
1572
1573 /*
1574 * Clone a MIME part.
1575 */
1576 static CT
1577 divide_part (CT ct)
1578 {
1579 CT new_part;
1580
1581 NEW0(new_part);
1582 /* Just copy over what is needed for decoding. c_vrsn and
1583 c_celine aren't necessary. */
1584 new_part->c_file = mh_xstrdup (ct->c_file);
1585 new_part->c_begin = ct->c_begin;
1586 new_part->c_end = ct->c_end;
1587 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1588 new_part->c_type = ct->c_type;
1589 new_part->c_cefile = ct->c_cefile;
1590 new_part->c_encoding = ct->c_encoding;
1591 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1592 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1593 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1594 new_part->c_cesizefnx = ct->c_cesizefnx;
1595
1596 /* c_ctline is used by reformat__part(), so it can preserve
1597 anything after the type/subtype. */
1598 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1599
1600 return new_part;
1601 }
1602
1603
1604 /*
1605 * Copy the content info from one part to another.
1606 */
1607 static void
1608 copy_ctinfo (CI dest, CI src)
1609 {
1610 PM s_pm, d_pm;
1611
1612 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1613 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1614
1615 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1616 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1617 s_pm->pm_value, 0);
1618 if (s_pm->pm_charset) {
1619 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1620 }
1621 if (s_pm->pm_lang) {
1622 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1623 }
1624 }
1625
1626 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1627 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1628 }
1629
1630
1631 /*
1632 * Decode content.
1633 */
1634 static int
1635 decode_part (CT ct)
1636 {
1637 char *tmp_decoded;
1638 int status;
1639 FILE *file;
1640 char *tempfile;
1641
1642 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1643 die("unable to create temporary file in %s", get_temp_dir());
1644 }
1645 tmp_decoded = mh_xstrdup (tempfile);
1646 /* The following call will load ct->c_cefile.ce_file with the tmp
1647 filename of the decoded content. tmp_decoded will contain the
1648 encoded output, get rid of that. */
1649 status = output_message_fp (ct, file, tmp_decoded);
1650 (void) m_unlink (tmp_decoded);
1651 free (tmp_decoded);
1652 if (fclose (file)) {
1653 inform("unable to close temporary file %s, continuing...", tempfile);
1654 }
1655
1656 return status;
1657 }
1658
1659
1660 /*
1661 * Reformat content as plain text.
1662 * Some of the arguments aren't really needed now, but maybe will
1663 * be in the future for other than text types.
1664 */
1665 static int
1666 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type)
1667 {
1668 int output_subtype, output_encoding;
1669 const char *reason = NULL;
1670 char *cp, *cf;
1671 int status;
1672
1673 /* Hacky: this redirects the output from whatever command is used
1674 to show the part to a file. So, the user can't have any output
1675 redirection in that command.
1676 Could show_multi() in mhshowsbr.c avoid this? */
1677
1678 /* Check for invo_name-format-type/subtype. */
1679 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1680 if (verbosw) {
1681 inform("Don't know how to convert %s, there is no "
1682 "%s-format-%s/%s profile entry",
1683 ct->c_file, invo_name, type, subtype);
1684 }
1685 return NOTOK;
1686 }
1687 if (strchr (cf, '>')) {
1688 inform("'>' prohibited in \"%s\",\nplease fix your "
1689 "%s-format-%s/%s profile entry", cf, invo_name, type,
1690 FENDNULL(subtype));
1691
1692 return NOTOK;
1693 }
1694
1695 cp = concat (cf, " >", file, NULL);
1696 status = show_content_aux (ct, 0, cp, NULL, NULL);
1697 free (cp);
1698
1699 /* Unlink decoded content tmp file and free its filename to avoid
1700 leaks. The file stream should already have been closed. */
1701 if (ct->c_cefile.ce_unlink) {
1702 (void) m_unlink (ct->c_cefile.ce_file);
1703 free (ct->c_cefile.ce_file);
1704 ct->c_cefile.ce_file = NULL;
1705 ct->c_cefile.ce_unlink = 0;
1706 }
1707
1708 if (c_type == CT_TEXT) {
1709 output_subtype = TEXT_PLAIN;
1710 } else {
1711 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1712 output_subtype = 0;
1713 }
1714
1715 output_encoding = content_encoding (ct, &reason);
1716 if (status == OK &&
1717 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1718 ct->c_cefile.ce_file = file;
1719 ct->c_cefile.ce_unlink = 1;
1720 } else {
1721 ct->c_cefile.ce_unlink = 0;
1722 status = NOTOK;
1723 }
1724
1725 return status;
1726 }
1727
1728
1729 /*
1730 * Fill in a multipart/alternative part.
1731 */
1732 static CT
1733 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype)
1734 {
1735 char *boundary_prefix = "----=_nmh-multipart";
1736 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1737 char *boundary_indicator = "; boundary=";
1738 char *typename, *subtypename, *name;
1739 CT ct;
1740 struct part *p;
1741 struct multipart *m;
1742 const struct str2init *ctinit;
1743
1744 NEW0(ct);
1745
1746 /* Set up the multipart/alternative part. These fields of *ct were
1747 initialized to 0 by mh_xcalloc():
1748 c_fp, c_unlink, c_begin, c_end,
1749 c_vrsn, c_ctline, c_celine,
1750 c_id, c_descr, c_dispo, c_partno,
1751 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1752 c_cefile, c_encoding,
1753 c_digested, c_digest[16], c_ctexbody,
1754 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1755 c_umask, c_rfc934,
1756 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1757 */
1758
1759 ct->c_file = mh_xstrdup (first_alt->c_file);
1760 ct->c_type = type;
1761 ct->c_subtype = subtype;
1762
1763 ctinit = get_ct_init (ct->c_type);
1764
1765 typename = ct_type_str (type);
1766 subtypename = ct_subtype_str (type, subtype);
1767
1768 {
1769 int serial = 0;
1770 int found_boundary = 1;
1771
1772 while (found_boundary && serial < 1000000) {
1773 found_boundary = 0;
1774
1775 /* Ensure that the boundary doesn't appear in the decoded
1776 content. */
1777 if (new_part->c_cefile.ce_file) {
1778 if ((found_boundary =
1779 boundary_in_content (&new_part->c_cefile.ce_fp,
1780 new_part->c_cefile.ce_file,
1781 boundary)) == NOTOK) {
1782 goto return_null;
1783 }
1784 }
1785
1786 /* Ensure that the boundary doesn't appear in the encoded
1787 content. */
1788 if (! found_boundary && new_part->c_file) {
1789 if ((found_boundary =
1790 boundary_in_content (&new_part->c_fp,
1791 new_part->c_file,
1792 boundary)) == NOTOK) {
1793 goto return_null;
1794 }
1795 }
1796
1797 if (found_boundary) {
1798 /* Try a slightly different boundary. */
1799 char buffer2[16];
1800
1801 free (boundary);
1802 ++serial;
1803 snprintf (buffer2, sizeof buffer2, "%d", serial);
1804 boundary =
1805 concat (boundary_prefix,
1806 FENDNULL(first_alt->c_partno),
1807 "-", buffer2, NULL);
1808 }
1809 }
1810
1811 if (found_boundary) {
1812 inform("giving up trying to find a unique boundary");
1813 goto return_null;
1814 }
1815 }
1816
1817 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1818 boundary, "\"", NULL);
1819
1820 /* Load c_first_hf and c_last_hf. */
1821 transfer_noncontent_headers (first_alt, ct);
1822 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1823 free (name);
1824
1825 /* Load c_partno. */
1826 if (first_alt->c_partno) {
1827 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1828 free (first_alt->c_partno);
1829 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1830 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1831 } else {
1832 first_alt->c_partno = mh_xstrdup ("1");
1833 new_part->c_partno = mh_xstrdup ("2");
1834 }
1835
1836 if (ctinit) {
1837 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1838 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1839 }
1840
1841 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1842 "boundary", boundary, 0);
1843
1844 NEW(p);
1845 NEW(p->mp_next);
1846 p->mp_next->mp_next = NULL;
1847 p->mp_next->mp_part = first_alt;
1848
1849 NEW0(m);
1850 m->mp_start = concat (boundary, "\n", NULL);
1851 m->mp_stop = concat (boundary, "--\n", NULL);
1852 m->mp_parts = p;
1853 ct->c_ctparams = m;
1854
1855 free (boundary);
1856
1857 return ct;
1858
1859 return_null:
1860 free_content(ct);
1861 free(boundary);
1862 return NULL;
1863 }
1864
1865
1866 /*
1867 * Check that the boundary does not appear in the content.
1868 */
1869 static int
1870 boundary_in_content (FILE **fp, char *file, const char *boundary)
1871 {
1872 char buffer[NMH_BUFSIZ];
1873 size_t bytes_read;
1874 bool found_boundary = false;
1875
1876 /* free_content() will close *fp if we fopen it here. */
1877 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1878 advise (file, "unable to open %s for reading", file);
1879 return NOTOK;
1880 }
1881
1882 fseeko (*fp, 0L, SEEK_SET);
1883 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1884 if (find_str (buffer, bytes_read, boundary)) {
1885 found_boundary = true;
1886 break;
1887 }
1888 }
1889
1890 return found_boundary;
1891 }
1892
1893
1894 /*
1895 * Remove all non-Content headers.
1896 */
1897 static void
1898 transfer_noncontent_headers (CT old, CT new)
1899 {
1900 HF hp, hp_prev;
1901
1902 hp_prev = hp = old->c_first_hf;
1903 while (hp) {
1904 HF next = hp->next;
1905
1906 if (strncasecmp (XXX_FIELD_PRF, hp->name, LEN(XXX_FIELD_PRF))) {
1907 if (hp == old->c_last_hf) {
1908 if (hp == old->c_first_hf) {
1909 old->c_last_hf = old->c_first_hf = NULL;
1910 } else {
1911 hp_prev->next = NULL;
1912 old->c_last_hf = hp_prev;
1913 }
1914 } else {
1915 if (hp == old->c_first_hf) {
1916 old->c_first_hf = next;
1917 } else {
1918 hp_prev->next = next;
1919 }
1920 }
1921
1922 /* Put node hp in the new CT. */
1923 if (new->c_first_hf == NULL) {
1924 new->c_first_hf = hp;
1925 } else {
1926 new->c_last_hf->next = hp;
1927 }
1928 new->c_last_hf = hp;
1929 } else {
1930 /* A Content- header, leave in old. */
1931 hp_prev = hp;
1932 }
1933
1934 hp = next;
1935 }
1936 }
1937
1938
1939 /*
1940 * Set content type.
1941 */
1942 static int
1943 set_ct_type (CT ct, int type, int subtype, int encoding)
1944 {
1945 char *typename = ct_type_str (type);
1946 char *subtypename = ct_subtype_str (type, subtype);
1947 /* E.g, " text/plain" */
1948 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1949 /* E.g, " text/plain\n" */
1950 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1951 bool found_content_type = false;
1952 HF hf;
1953 const char *cp = NULL;
1954 char *ctline;
1955 int status;
1956
1957 /* Update/add Content-Type header field. */
1958 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1959 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1960 found_content_type = true;
1961 free (hf->value);
1962 hf->value = (cp = strchr (ct->c_ctline, ';'))
1963 ? concat (type_subtypename, cp, "\n", NULL)
1964 : mh_xstrdup (name_plus_nl);
1965 }
1966 }
1967 if (! found_content_type) {
1968 add_header (ct, mh_xstrdup (TYPE_FIELD),
1969 (cp = strchr (ct->c_ctline, ';'))
1970 ? concat (type_subtypename, cp, "\n", NULL)
1971 : mh_xstrdup (name_plus_nl));
1972 }
1973
1974 /* Some of these might not be used, but set them anyway. */
1975 ctline = cp
1976 ? concat (type_subtypename, cp, NULL)
1977 : concat (type_subtypename, NULL);
1978 free (ct->c_ctline);
1979 ct->c_ctline = ctline;
1980 /* Leave other ctinfo members as they were. */
1981 free (ct->c_ctinfo.ci_type);
1982 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1983 free (ct->c_ctinfo.ci_subtype);
1984 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1985 ct->c_type = type;
1986 ct->c_subtype = subtype;
1987
1988 free (name_plus_nl);
1989 free (type_subtypename);
1990
1991 status = set_ce (ct, encoding);
1992
1993 return status;
1994 }
1995
1996
1997 /*
1998 * It's not necessary to update the charset parameter of a Content-Type
1999 * header for a text part. According to RFC 2045 Sec. 6.4, the body
2000 * (content) was originally in the specified charset, "and will be in
2001 * that character set again after decoding."
2002 */
2003 static int
2004 decode_text_parts (CT ct, int encoding, const char *decodetypes,
2005 int *message_mods)
2006 {
2007 int status = OK;
2008 int lf_line_endings = 0;
2009
2010 switch (ct->c_type) {
2011 case CT_MULTIPART: {
2012 struct multipart *m = (struct multipart *) ct->c_ctparams;
2013 struct part *part;
2014
2015 /* Should check to see if the body for this part is encoded?
2016 For now, it gets passed along as-is by InitMultiPart(). */
2017 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2018 status = decode_text_parts (part->mp_part, encoding, decodetypes,
2019 message_mods);
2020 }
2021 break;
2022 }
2023
2024 case CT_MESSAGE:
2025 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2026 struct exbody *e = (struct exbody *) ct->c_ctparams;
2027
2028 status = decode_text_parts (e->eb_content, encoding, decodetypes,
2029 message_mods);
2030 }
2031 break;
2032
2033 default:
2034 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2035 break;
2036 }
2037
2038 lf_line_endings =
2039 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2040
2041 switch (ct->c_encoding) {
2042 case CE_BASE64:
2043 case CE_QUOTED: {
2044 int ct_encoding;
2045
2046 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2047 const char *reason = NULL;
2048
2049 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2050 && encoding != CE_BINARY) {
2051 /* The decoding isn't acceptable so discard it.
2052 Leave status as OK to allow other transformations. */
2053 if (verbosw) {
2054 report (NULL, ct->c_partno, ct->c_file,
2055 "will not decode%s because it is binary (%s)",
2056 ct->c_partno ? ""
2057 : (FENDNULL(ct->c_ctline)),
2058 reason);
2059 }
2060 (void) m_unlink (ct->c_cefile.ce_file);
2061 free (ct->c_cefile.ce_file);
2062 ct->c_cefile.ce_file = NULL;
2063 } else if (ct->c_encoding == CE_QUOTED &&
2064 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2065 /* The decoding isn't acceptable so discard it.
2066 Leave status as OK to allow other transformations. */
2067 if (verbosw) {
2068 report (NULL, ct->c_partno, ct->c_file,
2069 "will not decode%s because it is 8bit",
2070 ct->c_partno ? ""
2071 : (FENDNULL(ct->c_ctline)));
2072 }
2073 (void) m_unlink (ct->c_cefile.ce_file);
2074 free (ct->c_cefile.ce_file);
2075 ct->c_cefile.ce_file = NULL;
2076 } else {
2077 int enc;
2078
2079 if (ct_encoding == CE_BINARY) {
2080 enc = CE_BINARY;
2081 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2082 enc = CE_QUOTED;
2083 } else {
2084 enc = ct_encoding;
2085 }
2086 if (set_ce (ct, enc) == OK) {
2087 ++*message_mods;
2088 if (verbosw) {
2089 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2090 FENDNULL(ct->c_ctline));
2091 }
2092 if (lf_line_endings) {
2093 strip_crs (ct, message_mods);
2094 }
2095 } else {
2096 status = NOTOK;
2097 }
2098 }
2099 } else {
2100 status = NOTOK;
2101 }
2102 break;
2103 }
2104 case CE_8BIT:
2105 case CE_7BIT:
2106 if (lf_line_endings) {
2107 strip_crs (ct, message_mods);
2108 }
2109 break;
2110 default:
2111 break;
2112 }
2113
2114 break;
2115 }
2116
2117 return status;
2118 }
2119
2120
2121 /*
2122 * Determine if the part with type[/subtype] should be decoded, according to
2123 * decodetypes (which came from the -decodetypes switch).
2124 */
2125 static int
2126 should_decode(const char *decodetypes, const char *type, const char *subtype)
2127 {
2128 /* Quick search for matching type[/subtype] in decodetypes: bracket
2129 decodetypes with commas, then search for ,type, and ,type/subtype, in
2130 it. */
2131
2132 bool found_match = false;
2133 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2134 char *delimited_type = concat(",", type, ",", NULL);
2135
2136 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2137 found_match = true;
2138 } else if (subtype != NULL) {
2139 char *delimited_type_subtype =
2140 concat(",", type, "/", subtype, ",", NULL);
2141
2142 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2143 found_match = true;
2144 }
2145 free(delimited_type_subtype);
2146 }
2147
2148 free(delimited_type);
2149 free(delimited_decodetypes);
2150
2151 return found_match;
2152 }
2153
2154
2155 /*
2156 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2157 * if it has any NUL characters, a CR not followed by a LF, or lines
2158 * greater than 998 characters in length. If binary, reason is set
2159 * to a string explaining why.
2160 */
2161 static int
2162 content_encoding (CT ct, const char **reason)
2163 {
2164 CE ce = &ct->c_cefile;
2165 int encoding = CE_7BIT;
2166
2167 if (ce->ce_file) {
2168 size_t line_len = 0;
2169 char buffer[NMH_BUFSIZ];
2170 size_t inbytes;
2171
2172 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2173 advise (ce->ce_file, "unable to open for reading");
2174 return CE_UNKNOWN;
2175 }
2176
2177 fseeko (ce->ce_fp, 0L, SEEK_SET);
2178 while (encoding != CE_BINARY &&
2179 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2180 char *cp;
2181 size_t i;
2182 int last_char_was_cr = 0;
2183
2184 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2185 if (*cp == '\0' || ++line_len > 998 ||
2186 (*cp != '\n' && last_char_was_cr)) {
2187 encoding = CE_BINARY;
2188 if (*cp == '\0') {
2189 *reason = "null character";
2190 } else if (line_len > 998) {
2191 *reason = "line length > 998";
2192 } else if (*cp != '\n' && last_char_was_cr) {
2193 *reason = "CR not followed by LF";
2194 } else {
2195 /* Should not reach this. */
2196 *reason = "";
2197 }
2198 break;
2199 }
2200 if (*cp == '\n') {
2201 line_len = 0;
2202 } else if (! isascii ((unsigned char) *cp)) {
2203 encoding = CE_8BIT;
2204 }
2205
2206 last_char_was_cr = *cp == '\r';
2207 }
2208 }
2209
2210 fclose (ce->ce_fp);
2211 ce->ce_fp = NULL;
2212 } /* else should never happen */
2213
2214 return encoding;
2215 }
2216
2217
2218 /*
2219 * Strip carriage returns from content.
2220 */
2221 static int
2222 strip_crs (CT ct, int *message_mods)
2223 {
2224 char *charset = content_charset (ct);
2225 int status = OK;
2226
2227 /* Only strip carriage returns if content is ASCII or another
2228 charset that has the same readily recognizable CR followed by a
2229 LF. We can include UTF-8 here because if the high-order bit of
2230 a UTF-8 byte is 0, then it must be a single-byte ASCII
2231 character. */
2232 if (! strcasecmp (charset, "US-ASCII") ||
2233 ! strcasecmp (charset, "UTF-8") ||
2234 ! strncasecmp (charset, "ISO-8859-", 9) ||
2235 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2236 char **file = NULL;
2237 FILE **fp = NULL;
2238 size_t begin;
2239 size_t end;
2240 bool has_crs = false;
2241 bool opened_input_file = false;
2242
2243 if (ct->c_cefile.ce_file) {
2244 file = &ct->c_cefile.ce_file;
2245 fp = &ct->c_cefile.ce_fp;
2246 begin = end = 0;
2247 } else if (ct->c_file) {
2248 file = &ct->c_file;
2249 fp = &ct->c_fp;
2250 begin = (size_t) ct->c_begin;
2251 end = (size_t) ct->c_end;
2252 } /* else don't know where the content is */
2253
2254 if (file && *file && fp) {
2255 if (! *fp) {
2256 if ((*fp = fopen (*file, "r")) == NULL) {
2257 advise (*file, "unable to open for reading");
2258 status = NOTOK;
2259 } else {
2260 opened_input_file = true;
2261 }
2262 }
2263 }
2264
2265 if (fp && *fp) {
2266 char buffer[NMH_BUFSIZ];
2267 size_t bytes_read;
2268 size_t bytes_to_read =
2269 end > 0 && end > begin ? end - begin : sizeof buffer;
2270
2271 fseeko (*fp, begin, SEEK_SET);
2272 while ((bytes_read = fread (buffer, 1,
2273 min (bytes_to_read, sizeof buffer),
2274 *fp)) > 0) {
2275 /* Look for CR followed by a LF. This is supposed to
2276 be text so there should be LF's. If not, don't
2277 modify the content. */
2278 char *cp;
2279 size_t i;
2280 bool last_char_was_cr = false;
2281
2282 if (end > 0) { bytes_to_read -= bytes_read; }
2283
2284 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2285 if (*cp == '\n' && last_char_was_cr) {
2286 has_crs = true;
2287 break;
2288 }
2289
2290 last_char_was_cr = *cp == '\r';
2291 }
2292 }
2293
2294 if (has_crs) {
2295 int fd;
2296 char *stripped_content_file;
2297 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2298
2299 if (tempfile == NULL) {
2300 die("unable to create temporary file in %s",
2301 get_temp_dir());
2302 }
2303 stripped_content_file = mh_xstrdup (tempfile);
2304
2305 /* Strip each CR before a LF from the content. */
2306 fseeko (*fp, begin, SEEK_SET);
2307 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2308 0) {
2309 char *cp;
2310 size_t i;
2311 bool last_char_was_cr = false;
2312
2313 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2314 if (*cp == '\r') {
2315 last_char_was_cr = true;
2316 } else if (last_char_was_cr) {
2317 if (*cp != '\n') {
2318 if (write (fd, "\r", 1) < 0) {
2319 advise (tempfile, "CR write");
2320 }
2321 }
2322 if (write (fd, cp, 1) < 0) {
2323 advise (tempfile, "write");
2324 }
2325 last_char_was_cr = false;
2326 } else {
2327 if (write (fd, cp, 1) < 0) {
2328 advise (tempfile, "write");
2329 }
2330 last_char_was_cr = false;
2331 }
2332 }
2333 }
2334
2335 if (close (fd)) {
2336 inform("unable to write temporary file %s, continuing...",
2337 stripped_content_file);
2338 (void) m_unlink (stripped_content_file);
2339 free(stripped_content_file);
2340 status = NOTOK;
2341 } else {
2342 /* Replace the decoded file with the converted one. */
2343 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2344 (void) m_unlink (ct->c_cefile.ce_file);
2345
2346 free(ct->c_cefile.ce_file);
2347 ct->c_cefile.ce_file = stripped_content_file;
2348 ct->c_cefile.ce_unlink = 1;
2349
2350 ++*message_mods;
2351 if (verbosw) {
2352 report (NULL, ct->c_partno,
2353 begin == 0 && end == 0 ? "" : *file,
2354 "stripped CRs");
2355 }
2356 }
2357 }
2358
2359 if (opened_input_file) {
2360 fclose (*fp);
2361 *fp = NULL;
2362 }
2363 }
2364 }
2365
2366 free (charset);
2367
2368 return status;
2369 }
2370
2371
2372 /*
2373 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2374 * of the part C-T-E's.
2375 */
2376 static void
2377 update_cte (CT ct)
2378 {
2379 const int least_restrictive_enc = least_restrictive_encoding (ct);
2380
2381 if (least_restrictive_enc != CE_UNKNOWN &&
2382 least_restrictive_enc != CE_7BIT) {
2383 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2384 HF hf;
2385 bool found_cte = false;
2386
2387 /* Update/add Content-Transfer-Encoding header field. */
2388 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2389 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2390 found_cte = true;
2391 free (hf->value);
2392 hf->value = cte;
2393 }
2394 }
2395 if (! found_cte) {
2396 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2397 }
2398 }
2399 }
2400
2401
2402 /*
2403 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2404 * within a message.
2405 */
2406 static int
2407 least_restrictive_encoding (CT ct)
2408 {
2409 int encoding = CE_UNKNOWN;
2410
2411 switch (ct->c_type) {
2412 case CT_MULTIPART: {
2413 struct multipart *m = (struct multipart *) ct->c_ctparams;
2414 struct part *part;
2415
2416 for (part = m->mp_parts; part; part = part->mp_next) {
2417 const int part_encoding =
2418 least_restrictive_encoding (part->mp_part);
2419
2420 if (less_restrictive (encoding, part_encoding)) {
2421 encoding = part_encoding;
2422 }
2423 }
2424 break;
2425 }
2426
2427 case CT_MESSAGE:
2428 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2429 struct exbody *e = (struct exbody *) ct->c_ctparams;
2430 const int part_encoding =
2431 least_restrictive_encoding (e->eb_content);
2432
2433 if (less_restrictive (encoding, part_encoding)) {
2434 encoding = part_encoding;
2435 }
2436 }
2437 break;
2438
2439 default: {
2440 if (less_restrictive (encoding, ct->c_encoding)) {
2441 encoding = ct->c_encoding;
2442 }
2443 }}
2444
2445 return encoding;
2446 }
2447
2448
2449 /*
2450 * Return whether the second encoding is less restrictive than the first, where
2451 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2452 * CE_BINARY is less restrictive than CE_8BIT and
2453 * CE_8BIT is less restrictive than CE_7BIT.
2454 */
2455 static int
2456 less_restrictive (int encoding, int second_encoding)
2457 {
2458 switch (second_encoding) {
2459 case CE_BINARY:
2460 return encoding != CE_BINARY;
2461 case CE_8BIT:
2462 return encoding != CE_BINARY && encoding != CE_8BIT;
2463 case CE_7BIT:
2464 return encoding != CE_BINARY && encoding != CE_8BIT &&
2465 encoding != CE_7BIT;
2466 default :
2467 return 0;
2468 }
2469 }
2470
2471
2472 /*
2473 * Convert character set of each part.
2474 */
2475 static int
2476 convert_charsets (CT ct, char *dest_charset, int *message_mods)
2477 {
2478 int status = OK;
2479
2480 switch (ct->c_type) {
2481 case CT_TEXT:
2482 if (ct->c_subtype == TEXT_PLAIN) {
2483 status = convert_charset (ct, dest_charset, message_mods);
2484 if (status == OK) {
2485 if (verbosw) {
2486 char *ct_charset = content_charset (ct);
2487
2488 report (NULL, ct->c_partno, ct->c_file,
2489 "convert %s to %s", ct_charset, dest_charset);
2490 free (ct_charset);
2491 }
2492 } else {
2493 char *ct_charset = content_charset (ct);
2494
2495 report ("iconv", ct->c_partno, ct->c_file,
2496 "failed to convert %s to %s", ct_charset, dest_charset);
2497 free (ct_charset);
2498 }
2499 }
2500 break;
2501
2502 case CT_MULTIPART: {
2503 struct multipart *m = (struct multipart *) ct->c_ctparams;
2504 struct part *part;
2505
2506 /* Should check to see if the body for this part is encoded?
2507 For now, it gets passed along as-is by InitMultiPart(). */
2508 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2509 status =
2510 convert_charsets (part->mp_part, dest_charset, message_mods);
2511 }
2512 break;
2513 }
2514
2515 case CT_MESSAGE:
2516 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2517 struct exbody *e = (struct exbody *) ct->c_ctparams;
2518
2519 status =
2520 convert_charsets (e->eb_content, dest_charset, message_mods);
2521 }
2522 break;
2523
2524 default:
2525 break;
2526 }
2527
2528 return status;
2529 }
2530
2531
2532 /*
2533 * Fix various problems that aren't handled elsewhere. These
2534 * are fixed unconditionally: there are no switches to disable
2535 * them. Currently, "problems" are these:
2536 * 1) remove extraneous semicolon at the end of a header parameter list
2537 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2538 * filename parameters in Content-Type and Content-Disposition
2539 * headers, respectively.
2540 */
2541 static int
2542 fix_always (CT ct, int *message_mods)
2543 {
2544 int status = OK;
2545
2546 switch (ct->c_type) {
2547 case CT_MULTIPART: {
2548 struct multipart *m = (struct multipart *) ct->c_ctparams;
2549 struct part *part;
2550
2551 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2552 status = fix_always (part->mp_part, message_mods);
2553 }
2554 break;
2555 }
2556
2557 case CT_MESSAGE:
2558 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2559 struct exbody *e = (struct exbody *) ct->c_ctparams;
2560
2561 status = fix_always (e->eb_content, message_mods);
2562 }
2563 break;
2564
2565 default: {
2566 HF hf;
2567
2568 if (ct->c_first_hf) {
2569 fix_filename_encoding (ct);
2570 }
2571
2572 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2573 size_t len = strlen (hf->value);
2574
2575 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2576 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2577 /* Only do this for Content-Type and
2578 Content-Disposition fields because those are the
2579 only headers that parse_mime() warns about. */
2580 continue;
2581 }
2582
2583 /* whitespace following a trailing ';' will be nuked as well */
2584 if (hf->value[len - 1] == '\n') {
2585 while (isspace((unsigned char)(hf->value[len - 2]))) {
2586 if (len-- == 0) { break; }
2587 }
2588 }
2589
2590 if (hf->value[len - 2] == ';') {
2591 /* Remove trailing ';' from parameter value. */
2592 hf->value[len - 2] = '\n';
2593 hf->value[len - 1] = '\0';
2594
2595 /* Also, if Content-Type parameter, remove trailing ';'
2596 from ct->c_ctline. This probably isn't necessary
2597 but can't hurt. */
2598 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2599 size_t l = strlen(ct->c_ctline) - 1;
2600 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2601 ct->c_ctline[l] == ';') {
2602 ct->c_ctline[l--] = '\0';
2603 if (l == 0) { break; }
2604 }
2605 }
2606
2607 ++*message_mods;
2608 if (verbosw) {
2609 report (NULL, ct->c_partno, ct->c_file,
2610 "remove trailing ; from %s parameter value",
2611 hf->name);
2612 }
2613 }
2614 }
2615 }}
2616
2617 return status;
2618 }
2619
2620
2621 /*
2622 * Factor out common code for loops in fix_filename_encoding().
2623 */
2624 static int
2625 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm)
2626 {
2627 bool fixed = false;
2628
2629 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2630 /* Looks like an RFC 2047 encoded parameter. */
2631 char decoded[PATH_MAX + 1];
2632
2633 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2634 /* Encode using RFC 2231. */
2635 replace_param (first_pm, last_pm, name, decoded, 0);
2636 fixed = true;
2637 } else {
2638 inform("failed to decode %s parameter %s", name, value);
2639 }
2640 }
2641
2642 return fixed;
2643 }
2644
2645
2646 /*
2647 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2648 * filename parameters in Content-Type and Content-Disposition
2649 * headers, respectively.
2650 */
2651 static int
2652 fix_filename_encoding (CT ct)
2653 {
2654 PM pm;
2655 HF hf;
2656 int fixed = 0;
2657
2658 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2659 if (pm->pm_name && pm->pm_value &&
2660 strcasecmp (pm->pm_name, "name") == 0) {
2661 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2662 &ct->c_ctinfo.ci_first_pm,
2663 &ct->c_ctinfo.ci_last_pm);
2664 }
2665 }
2666
2667 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2668 if (pm->pm_name && pm->pm_value &&
2669 strcasecmp (pm->pm_name, "filename") == 0) {
2670 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2671 &ct->c_dispo_first,
2672 &ct->c_dispo_last);
2673 }
2674 }
2675
2676 /* Fix hf values to correspond. */
2677 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2678 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2679
2680 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2681 field = TYPE_HEADER;
2682 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2683 field = DISPO_HEADER;
2684 }
2685
2686 if (field != OTHER) {
2687 const char *const semicolon_loc = strchr (hf->value, ';');
2688
2689 if (semicolon_loc) {
2690 const size_t len =
2691 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2692 const char *const params =
2693 output_params (len,
2694 field == TYPE_HEADER
2695 ? ct->c_ctinfo.ci_first_pm
2696 : ct->c_dispo_first,
2697 NULL, 0);
2698 const char *const new_params = concat (params, "\n", NULL);
2699
2700 replace_substring (&hf->value, semicolon_loc, new_params);
2701 free((void *)new_params); /* Cast away const. Sigh. */
2702 free((void *)params);
2703 } else {
2704 inform("did not find semicolon in %s:%s\n",
2705 hf->name, hf->value);
2706 }
2707 }
2708 }
2709
2710 return OK;
2711 }
2712
2713
2714 /*
2715 * Output content in input file to output file.
2716 */
2717 static int
2718 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2719 int modify_inplace, int message_mods)
2720 {
2721 int status = OK;
2722
2723 if (modify_inplace) {
2724 if (message_mods > 0) {
2725 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2726 char *infile = input_filename
2727 ? mh_xstrdup (input_filename)
2728 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2729
2730 if (remove_file (infile) == OK) {
2731 if (rename (outfile, infile)) {
2732 /* Rename didn't work, possibly because of an
2733 attempt to rename across filesystems. Try
2734 brute force copy. */
2735 int old = open (outfile, O_RDONLY);
2736 int new =
2737 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2738 int i = -1;
2739
2740 if (old != -1 && new != -1) {
2741 char buffer[NMH_BUFSIZ];
2742
2743 while ((i = read (old, buffer, sizeof buffer)) >
2744 0) {
2745 if (write (new, buffer, i) != i) {
2746 i = -1;
2747 break;
2748 }
2749 }
2750 }
2751 if (new != -1) { close (new); }
2752 if (old != -1) { close (old); }
2753 (void) m_unlink (outfile);
2754
2755 if (i < 0) {
2756 /* The -file argument processing used path() to
2757 expand filename to absolute path. */
2758 int file = ct->c_file && ct->c_file[0] == '/';
2759
2760 inform("unable to rename %s %s to %s, continuing...",
2761 file ? "file" : "message", outfile,
2762 infile);
2763 status = NOTOK;
2764 }
2765 }
2766 } else {
2767 inform("unable to remove input file %s, "
2768 "not modifying it, continuing...", infile);
2769 (void) m_unlink (outfile);
2770 status = NOTOK;
2771 }
2772
2773 free (infile);
2774 } else {
2775 status = NOTOK;
2776 }
2777 } else {
2778 /* No modifications and didn't need the tmp outfile. */
2779 (void) m_unlink (outfile);
2780 }
2781 } else {
2782 /* Output is going to some file. Produce it whether or not
2783 there were modifications. */
2784 status = output_message_fp (ct, outfp, outfile);
2785 }
2786
2787 flush_errors ();
2788 return status;
2789 }
2790
2791
2792 /*
2793 * parse_mime() does not set lf_line_endings in struct text, so use this
2794 * function to do it. It touches the parts the decodetypes identifies.
2795 */
2796 static void
2797 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings)
2798 {
2799 switch (ct->c_type) {
2800 case CT_MULTIPART: {
2801 struct multipart *m = (struct multipart *) ct->c_ctparams;
2802 struct part *part;
2803
2804 for (part = m->mp_parts; part; part = part->mp_next) {
2805 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2806 }
2807 break;
2808 }
2809
2810 case CT_MESSAGE:
2811 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2812 struct exbody *e = (struct exbody *) ct->c_ctparams;
2813
2814 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2815 }
2816 break;
2817
2818 default:
2819 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2820 if (ct->c_ctparams == NULL) {
2821 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2822 }
2823 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2824 }
2825 }
2826 }
2827
2828
2829 /*
2830 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2831 * use the standard MH backup file.
2832 */
2833 static int
2834 remove_file (const char *file)
2835 {
2836 if (rmmproc) {
2837 char *rmm_command = concat (rmmproc, " ", file, NULL);
2838 int status = system (rmm_command);
2839
2840 free (rmm_command);
2841 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2842 }
2843 /* This is OK for a non-message file, it still uses the
2844 BACKUP_PREFIX form. The backup file will be in the same
2845 directory as file. */
2846 return rename (file, m_backup (file));
2847 }
2848
2849
2850 /*
2851 * Output formatted message to user.
2852 */
2853 static void
2854 report (char *what, char *partno, char *filename, char *message, ...)
2855 {
2856 va_list args;
2857 char *fmt;
2858
2859 if (verbosw) {
2860 va_start (args, message);
2861 fmt = concat (filename, partno ? " part " : ", ",
2862 FENDNULL(partno), partno ? ", " : "", message, NULL);
2863
2864 advertise (what, NULL, fmt, args);
2865
2866 free (fmt);
2867 va_end (args);
2868 }
2869 }
2870
2871
2872 static void
2873 pipeser (int i)
2874 {
2875 if (i == SIGQUIT) {
2876 fflush (stdout);
2877 fprintf (stderr, "\n");
2878 fflush (stderr);
2879 }
2880
2881 done (1);
2882 /* NOTREACHED */
2883 }