]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
sendsbr.c: Move interface to own file.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include "h/mh.h"
9 #include "sbr/m_name.h"
10 #include "sbr/m_gmprot.h"
11 #include "sbr/m_getfld.h"
12 #include "sbr/getarguments.h"
13 #include "sbr/concat.h"
14 #include "sbr/seq_setprev.h"
15 #include "sbr/seq_setcur.h"
16 #include "sbr/seq_save.h"
17 #include "sbr/smatch.h"
18 #include "sbr/fmt_rfc2047.h"
19 #include "sbr/cpydata.h"
20 #include "sbr/trimcpy.h"
21 #include "sbr/m_convert.h"
22 #include "sbr/m_backup.h"
23 #include "sbr/getfolder.h"
24 #include "sbr/folder_read.h"
25 #include "sbr/context_save.h"
26 #include "sbr/context_replace.h"
27 #include "sbr/context_find.h"
28 #include "sbr/readconfig.h"
29 #include "sbr/ambigsw.h"
30 #include "sbr/path.h"
31 #include "sbr/print_version.h"
32 #include "sbr/print_help.h"
33 #include "sbr/error.h"
34 #include "h/fmt_scan.h"
35 #include "h/mime.h"
36 #include "h/mhparse.h"
37 #include "h/done.h"
38 #include "h/utils.h"
39 #include "h/signals.h"
40 #include "sbr/m_maildir.h"
41 #include "sbr/m_mktemp.h"
42 #include "sbr/mime_type.h"
43 #include "mhmisc.h"
44 #include "mhfree.h"
45 #include "mhoutsbr.h"
46 #include "mhshowsbr.h"
47 #include <fcntl.h>
48
49 #define MHFIXMSG_SWITCHES \
50 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
51 X("nodecodetext", 0, NDECODETEXTSW) \
52 X("decodetypes", 0, DECODETYPESW) \
53 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
54 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
55 X("textcharset", 0, TEXTCHARSETSW) \
56 X("notextcharset", 0, NTEXTCHARSETSW) \
57 X("reformat", 0, REFORMATSW) \
58 X("noreformat", 0, NREFORMATSW) \
59 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
60 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
61 X("fixboundary", 0, FIXBOUNDARYSW) \
62 X("nofixboundary", 0, NFIXBOUNDARYSW) \
63 X("fixcte", 0, FIXCOMPOSITECTESW) \
64 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
65 X("fixtype mimetype", 0, FIXTYPESW) \
66 X("file file", 0, FILESW) \
67 X("outfile file", 0, OUTFILESW) \
68 X("rmmproc program", 0, RPROCSW) \
69 X("normmproc", 0, NRPRCSW) \
70 X("changecur", 0, CHGSW) \
71 X("nochangecur", 0, NCHGSW) \
72 X("verbose", 0, VERBSW) \
73 X("noverbose", 0, NVERBSW) \
74 X("version", 0, VERSIONSW) \
75 X("help", 0, HELPSW) \
76
77 #define X(sw, minchars, id) id,
78 DEFINE_SWITCH_ENUM(MHFIXMSG);
79 #undef X
80
81 #define X(sw, minchars, id) { sw, minchars, id },
82 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
83 #undef X
84
85
86 int verbosw;
87 int debugsw; /* Needed by mhparse.c. */
88
89 #define quitser pipeser
90
91 /*
92 * static prototypes
93 */
94 typedef struct fix_transformations {
95 int fixboundary;
96 int fixcompositecte;
97 svector_t fixtypes;
98 int reformat;
99 int replacetextplain;
100 int decodetext;
101 char *decodetypes;
102 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
103 int lf_line_endings;
104 char *textcharset;
105 } fix_transformations;
106
107 static int mhfixmsgsbr (CT *, char *, const fix_transformations *,
108 FILE **, char *, FILE **);
109 static int fix_boundary (CT *, int *);
110 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
111 static int get_multipart_boundary (CT, char **);
112 static int replace_boundary (CT, char *, char *);
113 static int fix_types (CT, svector_t, int *);
114 static char *replace_substring (char **, const char *, const char *);
115 static char *remove_parameter (char *, const char *);
116 static int fix_composite_cte (CT, int *);
117 static int set_ce (CT, int);
118 static int ensure_text_plain (CT *, CT, int *, int);
119 static int find_textplain_sibling (CT, int, int *);
120 static int insert_new_text_plain_part (CT, int, CT);
121 static CT build_text_plain_part (CT);
122 static int insert_into_new_mp_alt (CT *, int *);
123 static CT divide_part (CT);
124 static void copy_ctinfo (CI, CI);
125 static int decode_part (CT);
126 static int reformat_part (CT, char *, char *, char *, int);
127 static CT build_multipart_alt (CT, CT, int, int);
128 static int boundary_in_content (FILE **, char *, const char *);
129 static void transfer_noncontent_headers (CT, CT);
130 static int set_ct_type (CT, int type, int subtype, int encoding);
131 static int decode_text_parts (CT, int, const char *, int *);
132 static int should_decode(const char *, const char *, const char *);
133 static int content_encoding (CT, const char **);
134 static int strip_crs (CT, int *);
135 static void update_cte (CT);
136 static int least_restrictive_encoding (CT) PURE;
137 static int less_restrictive (int, int);
138 static int convert_charsets (CT, char *, int *);
139 static int fix_always (CT, int *);
140 static int fix_filename_param (char *, char *, PM *, PM *);
141 static int fix_filename_encoding (CT);
142 static int write_content (CT, const char *, char *, FILE *, int, int);
143 static void set_text_ctparams(CT, char *, int);
144 static int remove_file (const char *);
145 static void report (char *, char *, char *, char *, ...)
146 CHECK_PRINTF(4, 5);
147 static void pipeser (int);
148
149
150 int
151 main (int argc, char **argv)
152 {
153 int msgnum;
154 char *cp, *file = NULL, *folder = NULL;
155 char *maildir = NULL, buf[100], *outfile = NULL;
156 char **argp, **arguments;
157 struct msgs_array msgs = { 0, 0, NULL };
158 struct msgs *mp = NULL;
159 CT *ctp;
160 FILE *fp, *infp = NULL, *outfp = NULL;
161 bool using_stdin = false;
162 bool chgflag = true;
163 int status = OK;
164 fix_transformations fx;
165 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
166 fx.fixtypes = NULL;
167 fx.replacetextplain = 0;
168 fx.decodetext = CE_8BIT;
169 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
170 fx.lf_line_endings = 0;
171 fx.textcharset = NULL;
172
173 if (nmh_init(argv[0], true, false)) { return 1; }
174
175 arguments = getarguments (invo_name, argc, argv, 1);
176 argp = arguments;
177
178 /*
179 * Parse arguments
180 */
181 while ((cp = *argp++)) {
182 if (*cp == '-') {
183 switch (smatch (++cp, switches)) {
184 case AMBIGSW:
185 ambigsw (cp, switches);
186 done (1);
187 case UNKWNSW:
188 die("-%s unknown", cp);
189
190 case HELPSW:
191 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
192 invo_name);
193 print_help (buf, switches, 1);
194 done (0);
195 case VERSIONSW:
196 print_version(invo_name);
197 done (0);
198
199 case DECODETEXTSW:
200 if (! (cp = *argp++) || *cp == '-') {
201 die("missing argument to %s", argp[-2]);
202 }
203 if (! strcasecmp (cp, "8bit")) {
204 fx.decodetext = CE_8BIT;
205 } else if (! strcasecmp (cp, "7bit")) {
206 fx.decodetext = CE_7BIT;
207 } else if (! strcasecmp (cp, "binary")) {
208 fx.decodetext = CE_BINARY;
209 } else {
210 die("invalid argument to %s", argp[-2]);
211 }
212 continue;
213 case NDECODETEXTSW:
214 fx.decodetext = 0;
215 continue;
216 case DECODETYPESW:
217 if (! (cp = *argp++) || *cp == '-') {
218 die("missing argument to %s", argp[-2]);
219 }
220 fx.decodetypes = cp;
221 continue;
222 case CRLFLINEBREAKSSW:
223 fx.lf_line_endings = 0;
224 continue;
225 case NCRLFLINEBREAKSSW:
226 fx.lf_line_endings = 1;
227 continue;
228 case TEXTCHARSETSW:
229 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
230 die("missing argument to %s", argp[-2]);
231 }
232 fx.textcharset = cp;
233 continue;
234 case NTEXTCHARSETSW:
235 fx.textcharset = 0;
236 continue;
237 case FIXBOUNDARYSW:
238 fx.fixboundary = 1;
239 continue;
240 case NFIXBOUNDARYSW:
241 fx.fixboundary = 0;
242 continue;
243 case FIXCOMPOSITECTESW:
244 fx.fixcompositecte = 1;
245 continue;
246 case NFIXCOMPOSITECTESW:
247 fx.fixcompositecte = 0;
248 continue;
249 case FIXTYPESW:
250 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
251 die("missing argument to %s", argp[-2]);
252 }
253 if (! strncasecmp (cp, "multipart/", 10) ||
254 ! strncasecmp (cp, "message/", 8))
255 die("-fixtype %s not allowed", cp);
256 if (! strchr (cp, '/'))
257 die("-fixtype requires type/subtype");
258 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
259 svector_push_back (fx.fixtypes, cp);
260 continue;
261 case REFORMATSW:
262 fx.reformat = 1;
263 continue;
264 case NREFORMATSW:
265 fx.reformat = 0;
266 continue;
267 case REPLACETEXTPLAINSW:
268 fx.replacetextplain = 1;
269 continue;
270 case NREPLACETEXTPLAINSW:
271 fx.replacetextplain = 0;
272 continue;
273 case FILESW:
274 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
275 die("missing argument to %s", argp[-2]);
276 }
277 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
278 continue;
279 case OUTFILESW:
280 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
281 die("missing argument to %s", argp[-2]);
282 }
283 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
284 continue;
285 case RPROCSW:
286 if (!(rmmproc = *argp++) || *rmmproc == '-') {
287 die("missing argument to %s", argp[-2]);
288 }
289 continue;
290 case NRPRCSW:
291 rmmproc = NULL;
292 continue;
293 case CHGSW:
294 chgflag = true;
295 continue;
296 case NCHGSW:
297 chgflag = false;
298 continue;
299 case VERBSW:
300 verbosw = 1;
301 continue;
302 case NVERBSW:
303 verbosw = 0;
304 continue;
305 }
306 }
307 if (*cp == '+' || *cp == '@') {
308 if (folder)
309 die("only one folder at a time!");
310 folder = pluspath (cp);
311 } else {
312 if (*cp == '/') {
313 /* Interpret a full path as a filename, not a message. */
314 file = mh_xstrdup (cp);
315 } else {
316 app_msgarg (&msgs, cp);
317 }
318 }
319 }
320
321 SIGNAL (SIGQUIT, quitser);
322 SIGNAL (SIGPIPE, pipeser);
323
324 /*
325 * Read the standard profile setup
326 */
327 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
328 readconfig(NULL, fp, cp, 0);
329 fclose (fp);
330 }
331
332 suppress_bogus_mp_content_warning = skip_mp_cte_check = true;
333 suppress_extraneous_trailing_semicolon_warning = true;
334
335 if (! context_find ("path")) {
336 free (path ("./", TFOLDER));
337 }
338
339 if (file && msgs.size) {
340 die("cannot specify msg and file at same time!");
341 }
342
343 if (outfile) {
344 /* Open the outfile now, so we don't have to risk opening it
345 after running out of fds. */
346 if (strcmp (outfile, "-") == 0) {
347 outfp = stdout;
348 } else if ((outfp = fopen (outfile, "w")) == NULL) {
349 adios (outfile, "unable to open for writing");
350 }
351 }
352
353 /*
354 * check if message is coming from file
355 */
356 if (file) {
357 /* If file is stdin, create a tmp file name before parse_mime()
358 has a chance, because it might put in on a different
359 filesystem than the output file. Instead, put it in the
360 user's preferred tmp directory. */
361 CT ct;
362
363 if (! strcmp ("-", file)) {
364 int fd;
365 char *cp;
366
367 using_stdin = true;
368
369 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
370 die("unable to create temporary file in %s",
371 get_temp_dir());
372 } else {
373 free (file);
374 file = mh_xstrdup (cp);
375 cpydata (STDIN_FILENO, fd, "-", file);
376 }
377
378 if (close (fd)) {
379 (void) m_unlink (file);
380 die("failed to write temporary file");
381 }
382 }
383
384 cts = mh_xcalloc(2, sizeof *cts);
385 ctp = cts;
386
387 if ((ct = parse_mime (file))) {
388 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
389 *ctp++ = ct;
390 } else {
391 inform("unable to parse message from file %s", file);
392 status = NOTOK;
393
394 /* If there's an outfile, pass the input message unchanged, so the
395 message won't get dropped from a pipeline. */
396 if (outfile) {
397 /* Something went wrong. Output might be expected, such as if
398 this were run as a filter. Just copy the input to the
399 output. */
400 if ((infp = fopen (file, "r")) == NULL) {
401 adios (file, "unable to open for reading");
402 }
403
404 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
405 inform("unable to copy message to %s, "
406 "it might be lost\n", outfile);
407 }
408
409 fclose (infp);
410 infp = NULL;
411 }
412 }
413 } else {
414 /*
415 * message(s) are coming from a folder
416 */
417 CT ct;
418
419 if (! msgs.size) {
420 app_msgarg(&msgs, "cur");
421 }
422 if (! folder) {
423 folder = getfolder (1);
424 }
425 maildir = mh_xstrdup(m_maildir (folder));
426
427 /* chdir so that error messages, esp. from MIME parser, just
428 refer to the message and not its path. */
429 if (chdir (maildir) == NOTOK) {
430 adios (maildir, "unable to change directory to");
431 }
432
433 /* read folder and create message structure */
434 if (! (mp = folder_read (folder, 1))) {
435 die("unable to read folder %s", folder);
436 }
437
438 /* check for empty folder */
439 if (mp->nummsg == 0) {
440 die("no messages in %s", folder);
441 }
442
443 /* parse all the message ranges/sequences and set SELECTED */
444 for (msgnum = 0; msgnum < msgs.size; msgnum++)
445 if (! m_convert (mp, msgs.msgs[msgnum])) {
446 done (1);
447 }
448 seq_setprev (mp); /* set the previous-sequence */
449
450 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
451 ctp = cts;
452
453 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
454 if (is_selected(mp, msgnum)) {
455 char *msgnam = m_name (msgnum);
456
457 if ((ct = parse_mime (msgnam))) {
458 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
459 *ctp++ = ct;
460 } else {
461 inform("unable to parse message %s", msgnam);
462 status = NOTOK;
463
464 /* If there's an outfile, pass the input message
465 unchanged, so the message won't get dropped from a
466 pipeline. */
467 if (outfile) {
468 /* Something went wrong. Output might be expected,
469 such as if this were run as a filter. Just copy
470 the input to the output. */
471 /* Can't use path() here because 1) it might have been
472 called before and it caches the pwd, and 2) we call
473 chdir() after that. */
474 char *input_filename =
475 concat (maildir, "/", msgnam, NULL);
476
477 if ((infp = fopen (input_filename, "r")) == NULL) {
478 adios (input_filename,
479 "unable to open for reading");
480 }
481
482 if (copy_input_to_output (input_filename, infp,
483 outfile, outfp) != OK) {
484 inform("unable to copy message to %s, "
485 "it might be lost\n", outfile);
486 }
487
488 fclose (infp);
489 infp = NULL;
490 free (input_filename);
491 }
492 }
493 }
494 }
495
496 if (chgflag) {
497 seq_setcur (mp, mp->hghsel); /* update current message */
498 }
499 seq_save (mp); /* synchronize sequences */
500 context_replace (pfolder, folder);/* update current folder */
501 context_save (); /* save the context file */
502 }
503
504 if (*cts) {
505 for (ctp = cts; *ctp; ++ctp) {
506 status =
507 mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp) == OK
508 ? 0
509 : 1;
510 free_content (*ctp);
511
512 if (using_stdin) {
513 (void) m_unlink (file);
514
515 if (! outfile) {
516 /* Just calling m_backup() unlinks the backup file. */
517 (void) m_backup (file);
518 }
519 }
520 }
521 } else {
522 status = 1;
523 }
524
525 free(maildir);
526 free (cts);
527
528 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
529 if (infp) { fclose (infp); } /* even if stdin */
530 if (outfp) { fclose (outfp); } /* even if stdout */
531 free (outfile);
532 free (file);
533 free (folder);
534 free (arguments);
535
536 done (status == OK ? 0 : 1);
537 return NOTOK;
538 }
539
540
541 /*
542 * Apply transformations to one message.
543 */
544 static int
545 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
546 FILE **infp, char *outfile, FILE **outfp)
547 {
548 /* Store input filename in case one of the transformations, i.e.,
549 fix_boundary(), rewrites to a tmp file. */
550 char *input_filename = maildir
551 ? concat (maildir, "/", (*ctp)->c_file, NULL)
552 : mh_xstrdup ((*ctp)->c_file);
553 bool modify_inplace = false;
554 int message_mods = 0;
555 int status = OK;
556
557 /* Though the input file won't need to be opened if everything goes
558 well, do it here just in case there's a failure, and that failure is
559 running out of file descriptors. */
560 if ((*infp = fopen (input_filename, "r")) == NULL) {
561 adios (input_filename, "unable to open for reading");
562 }
563
564 if (outfile == NULL) {
565 modify_inplace = true;
566
567 if ((*ctp)->c_file) {
568 char *tempfile;
569 /* outfp will be closed by the caller */
570 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
571 NULL) {
572 die("unable to create temporary file in %s",
573 get_temp_dir());
574 }
575 outfile = mh_xstrdup (tempfile);
576 } else {
577 die("missing both input and output filenames\n");
578 }
579 } /* else *outfp was defined by caller */
580
581 reverse_alternative_parts (*ctp);
582 status = fix_always (*ctp, &message_mods);
583 if (status == OK && fx->fixboundary) {
584 status = fix_boundary (ctp, &message_mods);
585 }
586 if (status == OK && fx->fixtypes != NULL) {
587 status = fix_types (*ctp, fx->fixtypes, &message_mods);
588 }
589 if (status == OK && fx->fixcompositecte) {
590 status = fix_composite_cte (*ctp, &message_mods);
591 }
592 if (status == OK && fx->reformat) {
593 status =
594 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
595 }
596 if (status == OK && fx->decodetext) {
597 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
598 &message_mods);
599 update_cte (*ctp);
600 }
601 if (status == OK && fx->textcharset != NULL) {
602 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
603 }
604
605 if (status == OK && ! (*ctp)->c_umask) {
606 /* Set the umask for the contents file. This currently
607 isn't used but just in case it is in the future. */
608 struct stat st;
609
610 if (stat ((*ctp)->c_file, &st) != NOTOK) {
611 (*ctp)->c_umask = ~(st.st_mode & 0777);
612 } else {
613 (*ctp)->c_umask = ~m_gmprot();
614 }
615 }
616
617 /*
618 * Write the content to a file
619 */
620 if (status == OK) {
621 status = write_content (*ctp, input_filename, outfile, *outfp,
622 modify_inplace, message_mods);
623 } else if (! modify_inplace) {
624 /* Something went wrong. Output might be expected, such
625 as if this were run as a filter. Just copy the input
626 to the output. */
627 if (copy_input_to_output (input_filename, *infp, outfile,
628 *outfp) != OK) {
629 inform("unable to copy message to %s, it might be lost\n",
630 outfile);
631 }
632 }
633
634 if (modify_inplace) {
635 if (status != OK) { (void) m_unlink (outfile); }
636 free (outfile);
637 outfile = NULL;
638 }
639
640 fclose (*infp);
641 *infp = NULL;
642 free (input_filename);
643
644 return status;
645 }
646
647
648 /*
649 * Copy input message to output. Assumes not modifying in place, so this
650 * might be running as part of a pipeline.
651 */
652 static int
653 copy_input_to_output (const char *input_filename, FILE *infp,
654 const char *output_filename, FILE *outfp)
655 {
656 int in = fileno (infp);
657 int out = fileno (outfp);
658 int status = OK;
659
660 if (in != -1 && out != -1) {
661 cpydata (in, out, input_filename, output_filename);
662 } else {
663 status = NOTOK;
664 }
665
666 return status;
667 }
668
669
670 /*
671 * Fix mismatched outer level boundary.
672 */
673 static int
674 fix_boundary (CT *ct, int *message_mods)
675 {
676 struct multipart *mp;
677 int status = OK;
678
679 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
680 mp = (struct multipart *) (*ct)->c_ctparams;
681
682 /*
683 * 1) Get boundary at end of part.
684 * 2) Get boundary at beginning of part and compare to the end-of-part
685 * boundary.
686 * 3) Write out contents of ct to tmp file, replacing boundary in
687 * header with boundary from part. Set c_unlink to 1.
688 * 4) Free ct.
689 * 5) Call parse_mime() on the tmp file, replacing ct.
690 */
691
692 if (mp && mp->mp_start) {
693 char *part_boundary;
694
695 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
696 char *fixed;
697
698 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
699 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
700 char *filename = mh_xstrdup ((*ct)->c_file);
701 CT fixed_ct;
702
703 free_content (*ct);
704 if ((fixed_ct = parse_mime (fixed))) {
705 *ct = fixed_ct;
706 (*ct)->c_unlink = 1;
707
708 ++*message_mods;
709 if (verbosw) {
710 report (NULL, NULL, filename,
711 "fix multipart boundary");
712 }
713 } else {
714 *ct = NULL;
715 inform("unable to parse fixed part");
716 status = NOTOK;
717 }
718 free (filename);
719 } else {
720 inform("unable to replace broken boundary");
721 status = NOTOK;
722 }
723 } else {
724 inform("unable to create temporary file in %s",
725 get_temp_dir());
726 status = NOTOK;
727 }
728
729 free (part_boundary);
730 } else {
731 /* Couldn't fix the boundary. Report failure so that mhfixmsg
732 doesn't modify the message. */
733 status = NOTOK;
734 }
735 } else {
736 /* No multipart struct, even though the content type is
737 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
738 the message. */
739 status = NOTOK;
740 }
741 }
742
743 return status;
744 }
745
746
747 /*
748 * Find boundary at end of multipart.
749 */
750 static int
751 get_multipart_boundary (CT ct, char **part_boundary)
752 {
753 char buffer[NMH_BUFSIZ];
754 char *end_boundary = NULL;
755 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
756 ? (off_t) (ct->c_end - sizeof buffer)
757 : (off_t) ct->c_begin;
758 size_t bytes_read;
759 int status = OK;
760
761 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
762 be big enough, even if it's just 1024, to make that unlikely. */
763
764 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
765 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
766 advise (ct->c_file, "unable to open for reading");
767 return NOTOK;
768 }
769
770 /* Get boundary at end of multipart. */
771 while (begin >= (off_t) ct->c_begin) {
772 fseeko (ct->c_fp, begin, SEEK_SET);
773 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
774 char *cp = rfind_str (buffer, bytes_read, "--");
775
776 if (cp) {
777 char *end;
778
779 /* Trim off trailing "--" and anything beyond. */
780 *cp-- = '\0';
781 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
782 if (strlen (end) > 3 && *end++ == '\n' &&
783 *end++ == '-' && *end++ == '-') {
784 end_boundary = mh_xstrdup (end);
785 break;
786 }
787 }
788 }
789 }
790
791 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
792 break;
793 begin -= sizeof buffer;
794 }
795
796 /* Get boundary at beginning of multipart. */
797 if (end_boundary) {
798 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
799 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
800 if (bytes_read >= strlen (end_boundary)) {
801 char *cp = find_str (buffer, bytes_read, end_boundary);
802
803 if (cp && cp - buffer >= 2 && *--cp == '-' &&
804 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
805 status = OK;
806 break;
807 }
808 } else {
809 /* The start and end boundaries didn't match, or the
810 start boundary doesn't begin with "\n--" (or "--"
811 if at the beginning of buffer). Keep trying. */
812 status = NOTOK;
813 }
814 }
815 } else {
816 status = NOTOK;
817 }
818
819 if (ct->c_fp) {
820 fclose (ct->c_fp);
821 ct->c_fp = NULL;
822 }
823
824 if (status == OK) {
825 *part_boundary = end_boundary;
826 } else {
827 *part_boundary = NULL;
828 free (end_boundary);
829 }
830
831 return status;
832 }
833
834
835 /*
836 * Open and copy ct->c_file to file, replacing the multipart boundary.
837 */
838 static int
839 replace_boundary (CT ct, char *file, char *boundary)
840 {
841 FILE *fpin, *fpout;
842 int compnum, state;
843 char buf[NMH_BUFSIZ], name[NAMESZ];
844 char *np, *vp;
845 m_getfld_state_t gstate;
846 int status = OK;
847
848 if (ct->c_file == NULL) {
849 inform("missing input filename");
850 return NOTOK;
851 }
852
853 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
854 advise (ct->c_file, "unable to open for reading");
855 return NOTOK;
856 }
857
858 if ((fpout = fopen (file, "w")) == NULL) {
859 fclose (fpin);
860 advise (file, "unable to open for writing");
861 return NOTOK;
862 }
863
864 gstate = m_getfld_state_init(fpin);
865 for (compnum = 1;;) {
866 int bufsz = (int) sizeof buf;
867
868 switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
869 case FLD:
870 case FLDPLUS:
871 compnum++;
872
873 /* get copies of the buffers */
874 np = mh_xstrdup (name);
875 vp = mh_xstrdup (buf);
876
877 /* if necessary, get rest of field */
878 while (state == FLDPLUS) {
879 bufsz = sizeof buf;
880 state = m_getfld2(&gstate, name, buf, &bufsz);
881 vp = add (buf, vp); /* add to previous value */
882 }
883
884 if (strcasecmp (TYPE_FIELD, np)) {
885 fprintf (fpout, "%s:%s", np, vp);
886 } else {
887 char *new_ctline, *new_params;
888
889 replace_param(&ct->c_ctinfo.ci_first_pm,
890 &ct->c_ctinfo.ci_last_pm, "boundary",
891 boundary, 0);
892
893 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
894 ct->c_ctinfo.ci_subtype, NULL);
895 new_params = output_params(LEN(TYPE_FIELD) +
896 strlen(new_ctline) + 1,
897 ct->c_ctinfo.ci_first_pm, NULL, 0);
898 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
899 FENDNULL(new_params));
900 free(new_ctline);
901 free(new_params);
902 }
903
904 free (vp);
905 free (np);
906
907 continue;
908
909 case BODY:
910 putc('\n', fpout);
911 /* buf will have a terminating NULL, skip it. */
912 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
913 advise (file, "fwrite");
914 }
915 continue;
916
917 case FILEEOF:
918 break;
919
920 case LENERR:
921 case FMTERR:
922 inform("message format error in component #%d", compnum);
923 status = NOTOK;
924 break;
925
926 default:
927 inform("getfld() returned %d", state);
928 status = NOTOK;
929 break;
930 }
931
932 break;
933 }
934
935 m_getfld_state_destroy (&gstate);
936 fclose (fpout);
937 fclose (fpin);
938
939 return status;
940 }
941
942
943 /*
944 * Fix Content-Type header to reflect the content of its part.
945 */
946 static int
947 fix_types (CT ct, svector_t fixtypes, int *message_mods)
948 {
949 int status = OK;
950
951 switch (ct->c_type) {
952 case CT_MULTIPART: {
953 struct multipart *m = (struct multipart *) ct->c_ctparams;
954 struct part *part;
955
956 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
957 status = fix_types (part->mp_part, fixtypes, message_mods);
958 }
959 break;
960 }
961
962 case CT_MESSAGE:
963 if (ct->c_subtype == MESSAGE_EXTERNAL) {
964 struct exbody *e = (struct exbody *) ct->c_ctparams;
965
966 status = fix_types (e->eb_content, fixtypes, message_mods);
967 }
968 break;
969
970 default: {
971 char **typep, *type;
972
973 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
974 for (typep = svector_strs (fixtypes);
975 typep && (type = *typep);
976 ++typep) {
977 char *type_subtype =
978 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
979 NULL);
980
981 if (! strcasecmp (type, type_subtype) &&
982 decode_part (ct) == OK &&
983 ct->c_cefile.ce_file != NULL) {
984 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
985 char *cp;
986
987 if ((cp = strchr (ct_type_subtype, ';'))) {
988 /* Truncate to remove any parameter list from
989 mime_type () result. */
990 *cp = '\0';
991 }
992
993 if (strcasecmp (type, ct_type_subtype)) {
994 char *ct_type, *ct_subtype;
995 HF hf;
996
997 /* The Content-Type header does not match the
998 content, so update these struct Content
999 fields to match:
1000 * c_type, c_subtype
1001 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
1002 * c_ctline
1003 */
1004 /* Extract type and subtype from type/subtype. */
1005 ct_type = mh_xstrdup(ct_type_subtype);
1006 if ((cp = strchr (ct_type, '/'))) {
1007 *cp = '\0';
1008 ct_subtype = mh_xstrdup(++cp);
1009 } else {
1010 inform("missing / in MIME type of %s %s",
1011 ct->c_file, ct->c_partno);
1012 free (ct_type);
1013 return NOTOK;
1014 }
1015
1016 ct->c_type = ct_str_type (ct_type);
1017 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
1018
1019 free (ct->c_ctinfo.ci_type);
1020 ct->c_ctinfo.ci_type = ct_type;
1021 free (ct->c_ctinfo.ci_subtype);
1022 ct->c_ctinfo.ci_subtype = ct_subtype;
1023 if (! replace_substring (&ct->c_ctline, type,
1024 ct_type_subtype)) {
1025 inform("did not find %s in %s",
1026 type, ct->c_ctline);
1027 }
1028
1029 /* Update Content-Type header field. */
1030 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1031 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1032 if (replace_substring (&hf->value, type,
1033 ct_type_subtype)) {
1034 ++*message_mods;
1035 if (verbosw) {
1036 report (NULL, ct->c_partno, ct->c_file,
1037 "change Content-Type in header "
1038 "from %s to %s",
1039 type, ct_type_subtype);
1040 }
1041 break;
1042 }
1043 inform("did not find %s in %s", type, hf->value);
1044 }
1045 }
1046 }
1047 free (ct_type_subtype);
1048 }
1049 free (type_subtype);
1050 }
1051 }
1052 }}
1053
1054 return status;
1055 }
1056
1057
1058 /*
1059 * Replace a substring, allocating space to hold the new one.
1060 */
1061 char *
1062 replace_substring (char **str, const char *old, const char *new)
1063 {
1064 char *cp;
1065
1066 if ((cp = strstr (*str, old))) {
1067 char *remainder = cp + strlen (old);
1068 char *prefix, *new_str;
1069
1070 if (cp - *str) {
1071 prefix = mh_xstrdup(*str);
1072 *(prefix + (cp - *str)) = '\0';
1073 new_str = concat (prefix, new, remainder, NULL);
1074 free (prefix);
1075 } else {
1076 new_str = concat (new, remainder, NULL);
1077 }
1078
1079 free (*str);
1080
1081 return *str = new_str;
1082 }
1083
1084 return NULL;
1085 }
1086
1087
1088 /*
1089 * Remove a name=value parameter, given just its name, from a header value.
1090 */
1091 char *
1092 remove_parameter (char *str, const char *name)
1093 {
1094 /* It looks to me, based on the BNF in RFC 2045, than there can't
1095 be whitespace between the parameter name and the "=", or
1096 between the "=" and the parameter value. */
1097 char *param_name = concat (name, "=", NULL);
1098 char *cp;
1099
1100 if ((cp = strstr (str, param_name))) {
1101 char *start, *end;
1102 size_t count = 1;
1103
1104 /* Remove any leading spaces, before the parameter name. */
1105 for (start = cp;
1106 start > str && isspace ((unsigned char) *(start-1));
1107 --start) {
1108 continue;
1109 }
1110 /* Remove a leading semicolon. */
1111 if (start > str && *(start-1) == ';') { --start; }
1112
1113 end = cp + strlen (name) + 1;
1114 if (*end == '"') {
1115 /* Skip past the quoted value, and then the final quote. */
1116 for (++end ; *end && *end != '"'; ++end) { continue; }
1117 ++end;
1118 } else {
1119 /* Skip past the value. */
1120 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1121 }
1122
1123 /* Count how many characters need to be moved. Include
1124 trailing null, which is accounted for by the
1125 initialization of count to 1. */
1126 for (cp = end; *cp; ++cp) { ++count; }
1127 (void) memmove (start, end, count);
1128 }
1129
1130 free (param_name);
1131
1132 return str;
1133 }
1134
1135
1136 /*
1137 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1138 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1139 * 8 bit.
1140 */
1141 static int
1142 fix_composite_cte (CT ct, int *message_mods)
1143 {
1144 int status = OK;
1145
1146 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1147 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1148 ct->c_encoding != CE_BINARY) {
1149 HF hf;
1150
1151 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1152 char *name = hf->name;
1153 for (; isspace((unsigned char)*name); ++name) {
1154 continue;
1155 }
1156
1157 if (! strncasecmp (name, ENCODING_FIELD,
1158 LEN(ENCODING_FIELD))) {
1159 char *prefix = "Nmh-REPLACED-INVALID-";
1160 HF h;
1161
1162 NEW(h);
1163 h->name = mh_xstrdup (hf->name);
1164 h->hf_encoding = hf->hf_encoding;
1165 h->next = hf->next;
1166 hf->next = h;
1167
1168 /* Retain old header but prefix its name. */
1169 free (hf->name);
1170 hf->name = concat (prefix, h->name, NULL);
1171
1172 ++*message_mods;
1173 if (verbosw) {
1174 char *encoding = cpytrim (hf->value);
1175 report (NULL, ct->c_partno, ct->c_file,
1176 "replace Content-Transfer-Encoding of %s "
1177 "with 8 bit", encoding);
1178 free (encoding);
1179 }
1180
1181 h->value = mh_xstrdup (" 8bit\n");
1182
1183 /* Don't need to warn for multiple C-T-E header
1184 fields, parse_mime() already does that. But
1185 if there are any, fix them all as necessary. */
1186 hf = h;
1187 }
1188 }
1189
1190 set_ce (ct, CE_8BIT);
1191 }
1192
1193 if (ct->c_type == CT_MULTIPART) {
1194 struct multipart *m;
1195 struct part *part;
1196
1197 m = (struct multipart *) ct->c_ctparams;
1198 for (part = m->mp_parts; part; part = part->mp_next) {
1199 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1200 status = NOTOK;
1201 break;
1202 }
1203 }
1204 }
1205 }
1206
1207 return status;
1208 }
1209
1210
1211 /*
1212 * Set content encoding.
1213 */
1214 static int
1215 set_ce (CT ct, int encoding)
1216 {
1217 const char *ce = ce_str (encoding);
1218 const struct str2init *ctinit = get_ce_method (ce);
1219
1220 if (ctinit) {
1221 char *cte = concat (" ", ce, "\n", NULL);
1222 bool found_cte = false;
1223 HF hf;
1224 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1225 caller is decode_text_parts (). Save because we'll
1226 overwrite below. */
1227 struct cefile decoded_content_info = ct->c_cefile;
1228
1229 ct->c_encoding = encoding;
1230
1231 ct->c_ctinitfnx = ctinit->si_init;
1232 /* This will assign ct->c_cefile with an all-0 struct, which
1233 is what we want. */
1234 (*ctinit->si_init) (ct);
1235 /* After returning, the caller should set
1236 ct->c_cefile.ce_file to the name of the file containing
1237 the contents. */
1238
1239 if (ct->c_ceclosefnx) {
1240 (*ct->c_ceclosefnx) (ct);
1241 }
1242
1243 /* Restore the cefile. */
1244 ct->c_cefile = decoded_content_info;
1245
1246 /* Update/add Content-Transfer-Encoding header field. */
1247 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1248 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1249 found_cte = true;
1250 free (hf->value);
1251 hf->value = cte;
1252 }
1253 }
1254 if (! found_cte) {
1255 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1256 }
1257
1258 /* Update c_celine. It's used only by mhlist -debug. */
1259 free (ct->c_celine);
1260 ct->c_celine = mh_xstrdup (cte);
1261
1262 return OK;
1263 }
1264
1265 return NOTOK;
1266 }
1267
1268
1269 /*
1270 * Make sure each text part has a corresponding text/plain part.
1271 */
1272 static int
1273 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain)
1274 {
1275 int status = OK;
1276
1277 switch ((*ct)->c_type) {
1278 case CT_TEXT: {
1279 /* Nothing to do for text/plain. */
1280 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1281
1282 if (parent && parent->c_type == CT_MULTIPART &&
1283 parent->c_subtype == MULTI_ALTERNATE) {
1284 int new_subpart_number = 1;
1285 int has_text_plain =
1286 find_textplain_sibling (parent, replacetextplain,
1287 &new_subpart_number);
1288
1289 if (! has_text_plain) {
1290 /* Parent is a multipart/alternative. Insert a new
1291 text/plain subpart. */
1292 const int inserted =
1293 insert_new_text_plain_part (*ct, new_subpart_number,
1294 parent);
1295 if (inserted) {
1296 ++*message_mods;
1297 if (verbosw) {
1298 report (NULL, parent->c_partno, parent->c_file,
1299 "insert text/plain part");
1300 }
1301 } else {
1302 status = NOTOK;
1303 }
1304 }
1305 } else if (parent && parent->c_type == CT_MULTIPART &&
1306 parent->c_subtype == MULTI_RELATED) {
1307 char *type_subtype =
1308 concat ((*ct)->c_ctinfo.ci_type, "/",
1309 (*ct)->c_ctinfo.ci_subtype, NULL);
1310 const char *parent_type =
1311 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1312 int new_subpart_number = 1;
1313 int has_text_plain = 0;
1314
1315 /* Have to do string comparison on the subtype because we
1316 don't enumerate all of them in c_subtype values.
1317 parent_type will be NULL if the multipart/related part
1318 doesn't have a type parameter. The type parameter must
1319 be specified according to RFC 2387 Sec. 3.1 but not all
1320 messages comply. */
1321 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1322 /* The type of this part matches the root type of the
1323 parent multipart/related. Look to see if there's
1324 text/plain sibling. */
1325 has_text_plain =
1326 find_textplain_sibling (parent, replacetextplain,
1327 &new_subpart_number);
1328 }
1329
1330 free (type_subtype);
1331
1332 if (! has_text_plain) {
1333 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1334 struct part *part;
1335 int siblings = 0;
1336
1337 for (part = mp->mp_parts; part; part = part->mp_next) {
1338 if (*ct != part->mp_part) {
1339 ++siblings;
1340 }
1341 }
1342
1343 if (siblings) {
1344 /* Parent is a multipart/related. Insert a new
1345 text/plain subpart in a new multipart/alternative. */
1346 if (insert_into_new_mp_alt (ct, message_mods)) {
1347 /* Not an error if text/plain couldn't be added. */
1348 }
1349 } else {
1350 /* There are no siblings, so insert a new text/plain
1351 subpart, and change the parent type from
1352 multipart/related to multipart/alternative. */
1353 const int inserted =
1354 insert_new_text_plain_part (*ct, new_subpart_number,
1355 parent);
1356
1357 if (inserted) {
1358 HF hf;
1359
1360 parent->c_subtype = MULTI_ALTERNATE;
1361 free (parent->c_ctinfo.ci_subtype);
1362 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1363 if (! replace_substring (&parent->c_ctline, "/related",
1364 "/alternative")) {
1365 inform("did not find multipart/related in %s",
1366 parent->c_ctline);
1367 }
1368
1369 /* Update Content-Type header field. */
1370 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1371 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1372 if (replace_substring (&hf->value, "/related",
1373 "/alternative")) {
1374 ++*message_mods;
1375 if (verbosw) {
1376 report (NULL, parent->c_partno,
1377 parent->c_file,
1378 "insert text/plain part");
1379 }
1380
1381 /* Remove, e.g., type="text/html" from
1382 multipart/alternative. */
1383 remove_parameter (hf->value, "type");
1384 break;
1385 }
1386 inform("did not find multipart/"
1387 "related in header %s", hf->value);
1388 }
1389 }
1390 } else {
1391 /* Not an error if text/plain couldn't be inserted. */
1392 }
1393 }
1394 }
1395 } else {
1396 if (insert_into_new_mp_alt (ct, message_mods)) {
1397 status = NOTOK;
1398 }
1399 }
1400 break;
1401 }
1402
1403 case CT_MULTIPART: {
1404 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1405 struct part *part;
1406
1407 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1408 if ((*ct)->c_type == CT_MULTIPART) {
1409 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1410 replacetextplain);
1411 }
1412 }
1413 break;
1414 }
1415
1416 case CT_MESSAGE:
1417 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1418 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1419
1420 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1421 replacetextplain);
1422 }
1423 break;
1424 }
1425
1426 return status;
1427 }
1428
1429
1430 /*
1431 * See if there is a sibling text/plain, and return its subpart number.
1432 */
1433 static int
1434 find_textplain_sibling (CT parent, int replacetextplain,
1435 int *new_subpart_number)
1436 {
1437 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1438 struct part *part, *prev;
1439 bool has_text_plain = false;
1440
1441 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1442 ++*new_subpart_number;
1443 if (part->mp_part->c_type == CT_TEXT &&
1444 part->mp_part->c_subtype == TEXT_PLAIN) {
1445 if (replacetextplain) {
1446 struct part *old_part;
1447 if (part == mp->mp_parts) {
1448 old_part = mp->mp_parts;
1449 mp->mp_parts = part->mp_next;
1450 } else {
1451 old_part = prev->mp_next;
1452 prev->mp_next = part->mp_next;
1453 }
1454 if (verbosw) {
1455 report (NULL, parent->c_partno, parent->c_file,
1456 "remove text/plain part %s",
1457 old_part->mp_part->c_partno);
1458 }
1459 free_content (old_part->mp_part);
1460 free (old_part);
1461 } else {
1462 has_text_plain = true;
1463 }
1464 break;
1465 }
1466 prev = part;
1467 }
1468
1469 return has_text_plain;
1470 }
1471
1472
1473 /*
1474 * Insert a new text/plain part.
1475 */
1476 static int
1477 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent)
1478 {
1479 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1480 struct part *new_part;
1481
1482 NEW(new_part);
1483 if ((new_part->mp_part = build_text_plain_part (ct))) {
1484 char buffer[16];
1485 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1486
1487 new_part->mp_next = mp->mp_parts;
1488 mp->mp_parts = new_part;
1489 new_part->mp_part->c_partno =
1490 concat (parent->c_partno ? parent->c_partno : "1", ".",
1491 buffer, NULL);
1492
1493 return 1;
1494 }
1495
1496 free_content (new_part->mp_part);
1497 free (new_part);
1498
1499 return 0;
1500 }
1501
1502
1503 /*
1504 * Create a text/plain part to go along with non-plain sibling part.
1505 */
1506 static CT
1507 build_text_plain_part (CT encoded_part)
1508 {
1509 CT tp_part = divide_part (encoded_part);
1510 char *tmp_plain_file = NULL;
1511
1512 if (decode_part (tp_part) == OK) {
1513 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1514 contains the decoded contents. And the decoding function, such
1515 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1516 be unlinked by free_content (). */
1517 char *tempfile;
1518
1519 /* This m_mktemp2() call closes the temp file. */
1520 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1521 inform("unable to create temporary file in %s",
1522 get_temp_dir());
1523 } else {
1524 tmp_plain_file = mh_xstrdup (tempfile);
1525 if (reformat_part (tp_part, tmp_plain_file,
1526 tp_part->c_ctinfo.ci_type,
1527 tp_part->c_ctinfo.ci_subtype,
1528 tp_part->c_type) == OK) {
1529 return tp_part;
1530 }
1531 }
1532 }
1533
1534 free_content (tp_part);
1535 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1536 free (tmp_plain_file);
1537
1538 return NULL;
1539 }
1540
1541
1542 /*
1543 * Slip new text/plain part into a new multipart/alternative.
1544 */
1545 static int
1546 insert_into_new_mp_alt (CT *ct, int *message_mods)
1547 {
1548 CT tp_part = build_text_plain_part (*ct);
1549 int status = OK;
1550
1551 if (tp_part) {
1552 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1553 MULTI_ALTERNATE);
1554 if (mp_alt) {
1555 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1556
1557 if (mp && mp->mp_parts) {
1558 mp->mp_parts->mp_part = tp_part;
1559 /* Make the new multipart/alternative the parent. */
1560 *ct = mp_alt;
1561
1562 ++*message_mods;
1563 if (verbosw) {
1564 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1565 "insert text/plain part");
1566 }
1567 } else {
1568 free_content (tp_part);
1569 free_content (mp_alt);
1570 status = NOTOK;
1571 }
1572 } else {
1573 status = NOTOK;
1574 }
1575 } else {
1576 /* Not an error if text/plain couldn't be built. */
1577 }
1578
1579 return status;
1580 }
1581
1582
1583 /*
1584 * Clone a MIME part.
1585 */
1586 static CT
1587 divide_part (CT ct)
1588 {
1589 CT new_part;
1590
1591 NEW0(new_part);
1592 /* Just copy over what is needed for decoding. c_vrsn and
1593 c_celine aren't necessary. */
1594 new_part->c_file = mh_xstrdup (ct->c_file);
1595 new_part->c_begin = ct->c_begin;
1596 new_part->c_end = ct->c_end;
1597 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1598 new_part->c_type = ct->c_type;
1599 new_part->c_cefile = ct->c_cefile;
1600 new_part->c_encoding = ct->c_encoding;
1601 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1602 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1603 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1604 new_part->c_cesizefnx = ct->c_cesizefnx;
1605
1606 /* c_ctline is used by reformat__part(), so it can preserve
1607 anything after the type/subtype. */
1608 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1609
1610 return new_part;
1611 }
1612
1613
1614 /*
1615 * Copy the content info from one part to another.
1616 */
1617 static void
1618 copy_ctinfo (CI dest, CI src)
1619 {
1620 PM s_pm, d_pm;
1621
1622 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1623 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1624
1625 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1626 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1627 s_pm->pm_value, 0);
1628 if (s_pm->pm_charset) {
1629 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1630 }
1631 if (s_pm->pm_lang) {
1632 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1633 }
1634 }
1635
1636 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1637 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1638 }
1639
1640
1641 /*
1642 * Decode content.
1643 */
1644 static int
1645 decode_part (CT ct)
1646 {
1647 char *tmp_decoded;
1648 int status;
1649 FILE *file;
1650 char *tempfile;
1651
1652 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1653 die("unable to create temporary file in %s", get_temp_dir());
1654 }
1655 tmp_decoded = mh_xstrdup (tempfile);
1656 /* The following call will load ct->c_cefile.ce_file with the tmp
1657 filename of the decoded content. tmp_decoded will contain the
1658 encoded output, get rid of that. */
1659 status = output_message_fp (ct, file, tmp_decoded);
1660 (void) m_unlink (tmp_decoded);
1661 free (tmp_decoded);
1662 if (fclose (file)) {
1663 inform("unable to close temporary file %s, continuing...", tempfile);
1664 }
1665
1666 return status;
1667 }
1668
1669
1670 /*
1671 * Reformat content as plain text.
1672 * Some of the arguments aren't really needed now, but maybe will
1673 * be in the future for other than text types.
1674 */
1675 static int
1676 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type)
1677 {
1678 int output_subtype, output_encoding;
1679 const char *reason = NULL;
1680 char *cp, *cf;
1681 int status;
1682
1683 /* Hacky: this redirects the output from whatever command is used
1684 to show the part to a file. So, the user can't have any output
1685 redirection in that command.
1686 Could show_multi() in mhshowsbr.c avoid this? */
1687
1688 /* Check for invo_name-format-type/subtype. */
1689 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1690 if (verbosw) {
1691 inform("Don't know how to convert %s, there is no "
1692 "%s-format-%s/%s profile entry",
1693 ct->c_file, invo_name, type, subtype);
1694 }
1695 return NOTOK;
1696 }
1697 if (strchr (cf, '>')) {
1698 inform("'>' prohibited in \"%s\",\nplease fix your "
1699 "%s-format-%s/%s profile entry", cf, invo_name, type,
1700 FENDNULL(subtype));
1701
1702 return NOTOK;
1703 }
1704
1705 cp = concat (cf, " >", file, NULL);
1706 status = show_content_aux (ct, 0, cp, NULL, NULL);
1707 free (cp);
1708
1709 /* Unlink decoded content tmp file and free its filename to avoid
1710 leaks. The file stream should already have been closed. */
1711 if (ct->c_cefile.ce_unlink) {
1712 (void) m_unlink (ct->c_cefile.ce_file);
1713 free (ct->c_cefile.ce_file);
1714 ct->c_cefile.ce_file = NULL;
1715 ct->c_cefile.ce_unlink = 0;
1716 }
1717
1718 if (c_type == CT_TEXT) {
1719 output_subtype = TEXT_PLAIN;
1720 } else {
1721 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1722 output_subtype = 0;
1723 }
1724
1725 output_encoding = content_encoding (ct, &reason);
1726 if (status == OK &&
1727 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1728 ct->c_cefile.ce_file = file;
1729 ct->c_cefile.ce_unlink = 1;
1730 } else {
1731 ct->c_cefile.ce_unlink = 0;
1732 status = NOTOK;
1733 }
1734
1735 return status;
1736 }
1737
1738
1739 /*
1740 * Fill in a multipart/alternative part.
1741 */
1742 static CT
1743 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype)
1744 {
1745 char *boundary_prefix = "----=_nmh-multipart";
1746 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1747 char *boundary_indicator = "; boundary=";
1748 char *typename, *subtypename, *name;
1749 CT ct;
1750 struct part *p;
1751 struct multipart *m;
1752 const struct str2init *ctinit;
1753
1754 NEW0(ct);
1755
1756 /* Set up the multipart/alternative part. These fields of *ct were
1757 initialized to 0 by mh_xcalloc():
1758 c_fp, c_unlink, c_begin, c_end,
1759 c_vrsn, c_ctline, c_celine,
1760 c_id, c_descr, c_dispo, c_partno,
1761 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1762 c_cefile, c_encoding,
1763 c_digested, c_digest[16], c_ctexbody,
1764 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1765 c_umask, c_rfc934,
1766 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1767 */
1768
1769 ct->c_file = mh_xstrdup (first_alt->c_file);
1770 ct->c_type = type;
1771 ct->c_subtype = subtype;
1772
1773 ctinit = get_ct_init (ct->c_type);
1774
1775 typename = ct_type_str (type);
1776 subtypename = ct_subtype_str (type, subtype);
1777
1778 {
1779 int serial = 0;
1780 int found_boundary = 1;
1781
1782 while (found_boundary && serial < 1000000) {
1783 found_boundary = 0;
1784
1785 /* Ensure that the boundary doesn't appear in the decoded
1786 content. */
1787 if (new_part->c_cefile.ce_file) {
1788 if ((found_boundary =
1789 boundary_in_content (&new_part->c_cefile.ce_fp,
1790 new_part->c_cefile.ce_file,
1791 boundary)) == NOTOK) {
1792 goto return_null;
1793 }
1794 }
1795
1796 /* Ensure that the boundary doesn't appear in the encoded
1797 content. */
1798 if (! found_boundary && new_part->c_file) {
1799 if ((found_boundary =
1800 boundary_in_content (&new_part->c_fp,
1801 new_part->c_file,
1802 boundary)) == NOTOK) {
1803 goto return_null;
1804 }
1805 }
1806
1807 if (found_boundary) {
1808 /* Try a slightly different boundary. */
1809 char buffer2[16];
1810
1811 free (boundary);
1812 ++serial;
1813 snprintf (buffer2, sizeof buffer2, "%d", serial);
1814 boundary =
1815 concat (boundary_prefix,
1816 FENDNULL(first_alt->c_partno),
1817 "-", buffer2, NULL);
1818 }
1819 }
1820
1821 if (found_boundary) {
1822 inform("giving up trying to find a unique boundary");
1823 goto return_null;
1824 }
1825 }
1826
1827 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1828 boundary, "\"", NULL);
1829
1830 /* Load c_first_hf and c_last_hf. */
1831 transfer_noncontent_headers (first_alt, ct);
1832 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1833 free (name);
1834
1835 /* Load c_partno. */
1836 if (first_alt->c_partno) {
1837 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1838 free (first_alt->c_partno);
1839 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1840 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1841 } else {
1842 first_alt->c_partno = mh_xstrdup ("1");
1843 new_part->c_partno = mh_xstrdup ("2");
1844 }
1845
1846 if (ctinit) {
1847 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1848 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1849 }
1850
1851 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1852 "boundary", boundary, 0);
1853
1854 NEW(p);
1855 NEW(p->mp_next);
1856 p->mp_next->mp_next = NULL;
1857 p->mp_next->mp_part = first_alt;
1858
1859 NEW0(m);
1860 m->mp_start = concat (boundary, "\n", NULL);
1861 m->mp_stop = concat (boundary, "--\n", NULL);
1862 m->mp_parts = p;
1863 ct->c_ctparams = m;
1864
1865 free (boundary);
1866
1867 return ct;
1868
1869 return_null:
1870 free_content(ct);
1871 free(boundary);
1872 return NULL;
1873 }
1874
1875
1876 /*
1877 * Check that the boundary does not appear in the content.
1878 */
1879 static int
1880 boundary_in_content (FILE **fp, char *file, const char *boundary)
1881 {
1882 char buffer[NMH_BUFSIZ];
1883 size_t bytes_read;
1884 bool found_boundary = false;
1885
1886 /* free_content() will close *fp if we fopen it here. */
1887 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1888 advise (file, "unable to open %s for reading", file);
1889 return NOTOK;
1890 }
1891
1892 fseeko (*fp, 0L, SEEK_SET);
1893 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1894 if (find_str (buffer, bytes_read, boundary)) {
1895 found_boundary = true;
1896 break;
1897 }
1898 }
1899
1900 return found_boundary;
1901 }
1902
1903
1904 /*
1905 * Remove all non-Content headers.
1906 */
1907 static void
1908 transfer_noncontent_headers (CT old, CT new)
1909 {
1910 HF hp, hp_prev;
1911
1912 hp_prev = hp = old->c_first_hf;
1913 while (hp) {
1914 HF next = hp->next;
1915
1916 if (strncasecmp (XXX_FIELD_PRF, hp->name, LEN(XXX_FIELD_PRF))) {
1917 if (hp == old->c_last_hf) {
1918 if (hp == old->c_first_hf) {
1919 old->c_last_hf = old->c_first_hf = NULL;
1920 } else {
1921 hp_prev->next = NULL;
1922 old->c_last_hf = hp_prev;
1923 }
1924 } else {
1925 if (hp == old->c_first_hf) {
1926 old->c_first_hf = next;
1927 } else {
1928 hp_prev->next = next;
1929 }
1930 }
1931
1932 /* Put node hp in the new CT. */
1933 if (new->c_first_hf == NULL) {
1934 new->c_first_hf = hp;
1935 } else {
1936 new->c_last_hf->next = hp;
1937 }
1938 new->c_last_hf = hp;
1939 } else {
1940 /* A Content- header, leave in old. */
1941 hp_prev = hp;
1942 }
1943
1944 hp = next;
1945 }
1946 }
1947
1948
1949 /*
1950 * Set content type.
1951 */
1952 static int
1953 set_ct_type (CT ct, int type, int subtype, int encoding)
1954 {
1955 char *typename = ct_type_str (type);
1956 char *subtypename = ct_subtype_str (type, subtype);
1957 /* E.g, " text/plain" */
1958 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1959 /* E.g, " text/plain\n" */
1960 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1961 bool found_content_type = false;
1962 HF hf;
1963 const char *cp = NULL;
1964 char *ctline;
1965 int status;
1966
1967 /* Update/add Content-Type header field. */
1968 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1969 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1970 found_content_type = true;
1971 free (hf->value);
1972 hf->value = (cp = strchr (ct->c_ctline, ';'))
1973 ? concat (type_subtypename, cp, "\n", NULL)
1974 : mh_xstrdup (name_plus_nl);
1975 }
1976 }
1977 if (! found_content_type) {
1978 add_header (ct, mh_xstrdup (TYPE_FIELD),
1979 (cp = strchr (ct->c_ctline, ';'))
1980 ? concat (type_subtypename, cp, "\n", NULL)
1981 : mh_xstrdup (name_plus_nl));
1982 }
1983
1984 /* Some of these might not be used, but set them anyway. */
1985 ctline = cp
1986 ? concat (type_subtypename, cp, NULL)
1987 : concat (type_subtypename, NULL);
1988 free (ct->c_ctline);
1989 ct->c_ctline = ctline;
1990 /* Leave other ctinfo members as they were. */
1991 free (ct->c_ctinfo.ci_type);
1992 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1993 free (ct->c_ctinfo.ci_subtype);
1994 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1995 ct->c_type = type;
1996 ct->c_subtype = subtype;
1997
1998 free (name_plus_nl);
1999 free (type_subtypename);
2000
2001 status = set_ce (ct, encoding);
2002
2003 return status;
2004 }
2005
2006
2007 /*
2008 * It's not necessary to update the charset parameter of a Content-Type
2009 * header for a text part. According to RFC 2045 Sec. 6.4, the body
2010 * (content) was originally in the specified charset, "and will be in
2011 * that character set again after decoding."
2012 */
2013 static int
2014 decode_text_parts (CT ct, int encoding, const char *decodetypes,
2015 int *message_mods)
2016 {
2017 int status = OK;
2018 int lf_line_endings = 0;
2019
2020 switch (ct->c_type) {
2021 case CT_MULTIPART: {
2022 struct multipart *m = (struct multipart *) ct->c_ctparams;
2023 struct part *part;
2024
2025 /* Should check to see if the body for this part is encoded?
2026 For now, it gets passed along as-is by InitMultiPart(). */
2027 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2028 status = decode_text_parts (part->mp_part, encoding, decodetypes,
2029 message_mods);
2030 }
2031 break;
2032 }
2033
2034 case CT_MESSAGE:
2035 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2036 struct exbody *e = (struct exbody *) ct->c_ctparams;
2037
2038 status = decode_text_parts (e->eb_content, encoding, decodetypes,
2039 message_mods);
2040 }
2041 break;
2042
2043 default:
2044 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2045 break;
2046 }
2047
2048 lf_line_endings =
2049 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2050
2051 switch (ct->c_encoding) {
2052 case CE_BASE64:
2053 case CE_QUOTED: {
2054 int ct_encoding;
2055
2056 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2057 const char *reason = NULL;
2058
2059 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2060 && encoding != CE_BINARY) {
2061 /* The decoding isn't acceptable so discard it.
2062 Leave status as OK to allow other transformations. */
2063 if (verbosw) {
2064 report (NULL, ct->c_partno, ct->c_file,
2065 "will not decode%s because it is binary (%s)",
2066 ct->c_partno ? ""
2067 : (FENDNULL(ct->c_ctline)),
2068 reason);
2069 }
2070 (void) m_unlink (ct->c_cefile.ce_file);
2071 free (ct->c_cefile.ce_file);
2072 ct->c_cefile.ce_file = NULL;
2073 } else if (ct->c_encoding == CE_QUOTED &&
2074 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2075 /* The decoding isn't acceptable so discard it.
2076 Leave status as OK to allow other transformations. */
2077 if (verbosw) {
2078 report (NULL, ct->c_partno, ct->c_file,
2079 "will not decode%s because it is 8bit",
2080 ct->c_partno ? ""
2081 : (FENDNULL(ct->c_ctline)));
2082 }
2083 (void) m_unlink (ct->c_cefile.ce_file);
2084 free (ct->c_cefile.ce_file);
2085 ct->c_cefile.ce_file = NULL;
2086 } else {
2087 int enc;
2088
2089 if (ct_encoding == CE_BINARY) {
2090 enc = CE_BINARY;
2091 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2092 enc = CE_QUOTED;
2093 } else {
2094 enc = ct_encoding;
2095 }
2096 if (set_ce (ct, enc) == OK) {
2097 ++*message_mods;
2098 if (verbosw) {
2099 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2100 FENDNULL(ct->c_ctline));
2101 }
2102 if (lf_line_endings) {
2103 strip_crs (ct, message_mods);
2104 }
2105 } else {
2106 status = NOTOK;
2107 }
2108 }
2109 } else {
2110 status = NOTOK;
2111 }
2112 break;
2113 }
2114 case CE_8BIT:
2115 case CE_7BIT:
2116 if (lf_line_endings) {
2117 strip_crs (ct, message_mods);
2118 }
2119 break;
2120 default:
2121 break;
2122 }
2123
2124 break;
2125 }
2126
2127 return status;
2128 }
2129
2130
2131 /*
2132 * Determine if the part with type[/subtype] should be decoded, according to
2133 * decodetypes (which came from the -decodetypes switch).
2134 */
2135 static int
2136 should_decode(const char *decodetypes, const char *type, const char *subtype)
2137 {
2138 /* Quick search for matching type[/subtype] in decodetypes: bracket
2139 decodetypes with commas, then search for ,type, and ,type/subtype, in
2140 it. */
2141
2142 bool found_match = false;
2143 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2144 char *delimited_type = concat(",", type, ",", NULL);
2145
2146 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2147 found_match = true;
2148 } else if (subtype != NULL) {
2149 char *delimited_type_subtype =
2150 concat(",", type, "/", subtype, ",", NULL);
2151
2152 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2153 found_match = true;
2154 }
2155 free(delimited_type_subtype);
2156 }
2157
2158 free(delimited_type);
2159 free(delimited_decodetypes);
2160
2161 return found_match;
2162 }
2163
2164
2165 /*
2166 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2167 * if it has any NUL characters, a CR not followed by a LF, or lines
2168 * greater than 998 characters in length. If binary, reason is set
2169 * to a string explaining why.
2170 */
2171 static int
2172 content_encoding (CT ct, const char **reason)
2173 {
2174 CE ce = &ct->c_cefile;
2175 int encoding = CE_7BIT;
2176
2177 if (ce->ce_file) {
2178 size_t line_len = 0;
2179 char buffer[NMH_BUFSIZ];
2180 size_t inbytes;
2181
2182 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2183 advise (ce->ce_file, "unable to open for reading");
2184 return CE_UNKNOWN;
2185 }
2186
2187 fseeko (ce->ce_fp, 0L, SEEK_SET);
2188 while (encoding != CE_BINARY &&
2189 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2190 char *cp;
2191 size_t i;
2192 int last_char_was_cr = 0;
2193
2194 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2195 if (*cp == '\0' || ++line_len > 998 ||
2196 (*cp != '\n' && last_char_was_cr)) {
2197 encoding = CE_BINARY;
2198 if (*cp == '\0') {
2199 *reason = "null character";
2200 } else if (line_len > 998) {
2201 *reason = "line length > 998";
2202 } else if (*cp != '\n' && last_char_was_cr) {
2203 *reason = "CR not followed by LF";
2204 } else {
2205 /* Should not reach this. */
2206 *reason = "";
2207 }
2208 break;
2209 }
2210 if (*cp == '\n') {
2211 line_len = 0;
2212 } else if (! isascii ((unsigned char) *cp)) {
2213 encoding = CE_8BIT;
2214 }
2215
2216 last_char_was_cr = *cp == '\r';
2217 }
2218 }
2219
2220 fclose (ce->ce_fp);
2221 ce->ce_fp = NULL;
2222 } /* else should never happen */
2223
2224 return encoding;
2225 }
2226
2227
2228 /*
2229 * Strip carriage returns from content.
2230 */
2231 static int
2232 strip_crs (CT ct, int *message_mods)
2233 {
2234 char *charset = content_charset (ct);
2235 int status = OK;
2236
2237 /* Only strip carriage returns if content is ASCII or another
2238 charset that has the same readily recognizable CR followed by a
2239 LF. We can include UTF-8 here because if the high-order bit of
2240 a UTF-8 byte is 0, then it must be a single-byte ASCII
2241 character. */
2242 if (! strcasecmp (charset, "US-ASCII") ||
2243 ! strcasecmp (charset, "UTF-8") ||
2244 ! strncasecmp (charset, "ISO-8859-", 9) ||
2245 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2246 char **file = NULL;
2247 FILE **fp = NULL;
2248 size_t begin;
2249 size_t end;
2250 bool has_crs = false;
2251 bool opened_input_file = false;
2252
2253 if (ct->c_cefile.ce_file) {
2254 file = &ct->c_cefile.ce_file;
2255 fp = &ct->c_cefile.ce_fp;
2256 begin = end = 0;
2257 } else if (ct->c_file) {
2258 file = &ct->c_file;
2259 fp = &ct->c_fp;
2260 begin = (size_t) ct->c_begin;
2261 end = (size_t) ct->c_end;
2262 } /* else don't know where the content is */
2263
2264 if (file && *file && fp) {
2265 if (! *fp) {
2266 if ((*fp = fopen (*file, "r")) == NULL) {
2267 advise (*file, "unable to open for reading");
2268 status = NOTOK;
2269 } else {
2270 opened_input_file = true;
2271 }
2272 }
2273 }
2274
2275 if (fp && *fp) {
2276 char buffer[NMH_BUFSIZ];
2277 size_t bytes_read;
2278 size_t bytes_to_read =
2279 end > 0 && end > begin ? end - begin : sizeof buffer;
2280
2281 fseeko (*fp, begin, SEEK_SET);
2282 while ((bytes_read = fread (buffer, 1,
2283 min (bytes_to_read, sizeof buffer),
2284 *fp)) > 0) {
2285 /* Look for CR followed by a LF. This is supposed to
2286 be text so there should be LF's. If not, don't
2287 modify the content. */
2288 char *cp;
2289 size_t i;
2290 bool last_char_was_cr = false;
2291
2292 if (end > 0) { bytes_to_read -= bytes_read; }
2293
2294 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2295 if (*cp == '\n' && last_char_was_cr) {
2296 has_crs = true;
2297 break;
2298 }
2299
2300 last_char_was_cr = *cp == '\r';
2301 }
2302 }
2303
2304 if (has_crs) {
2305 int fd;
2306 char *stripped_content_file;
2307 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2308
2309 if (tempfile == NULL) {
2310 die("unable to create temporary file in %s",
2311 get_temp_dir());
2312 }
2313 stripped_content_file = mh_xstrdup (tempfile);
2314
2315 /* Strip each CR before a LF from the content. */
2316 fseeko (*fp, begin, SEEK_SET);
2317 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2318 0) {
2319 char *cp;
2320 size_t i;
2321 bool last_char_was_cr = false;
2322
2323 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2324 if (*cp == '\r') {
2325 last_char_was_cr = true;
2326 } else if (last_char_was_cr) {
2327 if (*cp != '\n') {
2328 if (write (fd, "\r", 1) < 0) {
2329 advise (tempfile, "CR write");
2330 }
2331 }
2332 if (write (fd, cp, 1) < 0) {
2333 advise (tempfile, "write");
2334 }
2335 last_char_was_cr = false;
2336 } else {
2337 if (write (fd, cp, 1) < 0) {
2338 advise (tempfile, "write");
2339 }
2340 last_char_was_cr = false;
2341 }
2342 }
2343 }
2344
2345 if (close (fd)) {
2346 inform("unable to write temporary file %s, continuing...",
2347 stripped_content_file);
2348 (void) m_unlink (stripped_content_file);
2349 free(stripped_content_file);
2350 status = NOTOK;
2351 } else {
2352 /* Replace the decoded file with the converted one. */
2353 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2354 (void) m_unlink (ct->c_cefile.ce_file);
2355
2356 free(ct->c_cefile.ce_file);
2357 ct->c_cefile.ce_file = stripped_content_file;
2358 ct->c_cefile.ce_unlink = 1;
2359
2360 ++*message_mods;
2361 if (verbosw) {
2362 report (NULL, ct->c_partno,
2363 begin == 0 && end == 0 ? "" : *file,
2364 "stripped CRs");
2365 }
2366 }
2367 }
2368
2369 if (opened_input_file) {
2370 fclose (*fp);
2371 *fp = NULL;
2372 }
2373 }
2374 }
2375
2376 free (charset);
2377
2378 return status;
2379 }
2380
2381
2382 /*
2383 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2384 * of the part C-T-E's.
2385 */
2386 static void
2387 update_cte (CT ct)
2388 {
2389 const int least_restrictive_enc = least_restrictive_encoding (ct);
2390
2391 if (least_restrictive_enc != CE_UNKNOWN &&
2392 least_restrictive_enc != CE_7BIT) {
2393 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2394 HF hf;
2395 bool found_cte = false;
2396
2397 /* Update/add Content-Transfer-Encoding header field. */
2398 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2399 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2400 found_cte = true;
2401 free (hf->value);
2402 hf->value = cte;
2403 }
2404 }
2405 if (! found_cte) {
2406 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2407 }
2408 }
2409 }
2410
2411
2412 /*
2413 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2414 * within a message.
2415 */
2416 static int
2417 least_restrictive_encoding (CT ct)
2418 {
2419 int encoding = CE_UNKNOWN;
2420
2421 switch (ct->c_type) {
2422 case CT_MULTIPART: {
2423 struct multipart *m = (struct multipart *) ct->c_ctparams;
2424 struct part *part;
2425
2426 for (part = m->mp_parts; part; part = part->mp_next) {
2427 const int part_encoding =
2428 least_restrictive_encoding (part->mp_part);
2429
2430 if (less_restrictive (encoding, part_encoding)) {
2431 encoding = part_encoding;
2432 }
2433 }
2434 break;
2435 }
2436
2437 case CT_MESSAGE:
2438 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2439 struct exbody *e = (struct exbody *) ct->c_ctparams;
2440 const int part_encoding =
2441 least_restrictive_encoding (e->eb_content);
2442
2443 if (less_restrictive (encoding, part_encoding)) {
2444 encoding = part_encoding;
2445 }
2446 }
2447 break;
2448
2449 default: {
2450 if (less_restrictive (encoding, ct->c_encoding)) {
2451 encoding = ct->c_encoding;
2452 }
2453 }}
2454
2455 return encoding;
2456 }
2457
2458
2459 /*
2460 * Return whether the second encoding is less restrictive than the first, where
2461 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2462 * CE_BINARY is less restrictive than CE_8BIT and
2463 * CE_8BIT is less restrictive than CE_7BIT.
2464 */
2465 static int
2466 less_restrictive (int encoding, int second_encoding)
2467 {
2468 switch (second_encoding) {
2469 case CE_BINARY:
2470 return encoding != CE_BINARY;
2471 case CE_8BIT:
2472 return encoding != CE_BINARY && encoding != CE_8BIT;
2473 case CE_7BIT:
2474 return encoding != CE_BINARY && encoding != CE_8BIT &&
2475 encoding != CE_7BIT;
2476 default :
2477 return 0;
2478 }
2479 }
2480
2481
2482 /*
2483 * Convert character set of each part.
2484 */
2485 static int
2486 convert_charsets (CT ct, char *dest_charset, int *message_mods)
2487 {
2488 int status = OK;
2489
2490 switch (ct->c_type) {
2491 case CT_TEXT:
2492 if (ct->c_subtype == TEXT_PLAIN) {
2493 status = convert_charset (ct, dest_charset, message_mods);
2494 if (status == OK) {
2495 if (verbosw) {
2496 char *ct_charset = content_charset (ct);
2497
2498 report (NULL, ct->c_partno, ct->c_file,
2499 "convert %s to %s", ct_charset, dest_charset);
2500 free (ct_charset);
2501 }
2502 } else {
2503 char *ct_charset = content_charset (ct);
2504
2505 report ("iconv", ct->c_partno, ct->c_file,
2506 "failed to convert %s to %s", ct_charset, dest_charset);
2507 free (ct_charset);
2508 }
2509 }
2510 break;
2511
2512 case CT_MULTIPART: {
2513 struct multipart *m = (struct multipart *) ct->c_ctparams;
2514 struct part *part;
2515
2516 /* Should check to see if the body for this part is encoded?
2517 For now, it gets passed along as-is by InitMultiPart(). */
2518 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2519 status =
2520 convert_charsets (part->mp_part, dest_charset, message_mods);
2521 }
2522 break;
2523 }
2524
2525 case CT_MESSAGE:
2526 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2527 struct exbody *e = (struct exbody *) ct->c_ctparams;
2528
2529 status =
2530 convert_charsets (e->eb_content, dest_charset, message_mods);
2531 }
2532 break;
2533
2534 default:
2535 break;
2536 }
2537
2538 return status;
2539 }
2540
2541
2542 /*
2543 * Fix various problems that aren't handled elsewhere. These
2544 * are fixed unconditionally: there are no switches to disable
2545 * them. Currently, "problems" are these:
2546 * 1) remove extraneous semicolon at the end of a header parameter list
2547 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2548 * filename parameters in Content-Type and Content-Disposition
2549 * headers, respectively.
2550 */
2551 static int
2552 fix_always (CT ct, int *message_mods)
2553 {
2554 int status = OK;
2555
2556 switch (ct->c_type) {
2557 case CT_MULTIPART: {
2558 struct multipart *m = (struct multipart *) ct->c_ctparams;
2559 struct part *part;
2560
2561 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2562 status = fix_always (part->mp_part, message_mods);
2563 }
2564 break;
2565 }
2566
2567 case CT_MESSAGE:
2568 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2569 struct exbody *e = (struct exbody *) ct->c_ctparams;
2570
2571 status = fix_always (e->eb_content, message_mods);
2572 }
2573 break;
2574
2575 default: {
2576 HF hf;
2577
2578 if (ct->c_first_hf) {
2579 fix_filename_encoding (ct);
2580 }
2581
2582 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2583 size_t len = strlen (hf->value);
2584
2585 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2586 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2587 /* Only do this for Content-Type and
2588 Content-Disposition fields because those are the
2589 only headers that parse_mime() warns about. */
2590 continue;
2591 }
2592
2593 /* whitespace following a trailing ';' will be nuked as well */
2594 if (hf->value[len - 1] == '\n') {
2595 while (isspace((unsigned char)(hf->value[len - 2]))) {
2596 if (len-- == 0) { break; }
2597 }
2598 }
2599
2600 if (hf->value[len - 2] == ';') {
2601 /* Remove trailing ';' from parameter value. */
2602 hf->value[len - 2] = '\n';
2603 hf->value[len - 1] = '\0';
2604
2605 /* Also, if Content-Type parameter, remove trailing ';'
2606 from ct->c_ctline. This probably isn't necessary
2607 but can't hurt. */
2608 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2609 size_t l = strlen(ct->c_ctline) - 1;
2610 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2611 ct->c_ctline[l] == ';') {
2612 ct->c_ctline[l--] = '\0';
2613 if (l == 0) { break; }
2614 }
2615 }
2616
2617 ++*message_mods;
2618 if (verbosw) {
2619 report (NULL, ct->c_partno, ct->c_file,
2620 "remove trailing ; from %s parameter value",
2621 hf->name);
2622 }
2623 }
2624 }
2625 }}
2626
2627 return status;
2628 }
2629
2630
2631 /*
2632 * Factor out common code for loops in fix_filename_encoding().
2633 */
2634 static int
2635 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm)
2636 {
2637 bool fixed = false;
2638
2639 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2640 /* Looks like an RFC 2047 encoded parameter. */
2641 char decoded[PATH_MAX + 1];
2642
2643 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2644 /* Encode using RFC 2231. */
2645 replace_param (first_pm, last_pm, name, decoded, 0);
2646 fixed = true;
2647 } else {
2648 inform("failed to decode %s parameter %s", name, value);
2649 }
2650 }
2651
2652 return fixed;
2653 }
2654
2655
2656 /*
2657 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2658 * filename parameters in Content-Type and Content-Disposition
2659 * headers, respectively.
2660 */
2661 static int
2662 fix_filename_encoding (CT ct)
2663 {
2664 PM pm;
2665 HF hf;
2666 int fixed = 0;
2667
2668 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2669 if (pm->pm_name && pm->pm_value &&
2670 strcasecmp (pm->pm_name, "name") == 0) {
2671 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2672 &ct->c_ctinfo.ci_first_pm,
2673 &ct->c_ctinfo.ci_last_pm);
2674 }
2675 }
2676
2677 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2678 if (pm->pm_name && pm->pm_value &&
2679 strcasecmp (pm->pm_name, "filename") == 0) {
2680 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2681 &ct->c_dispo_first,
2682 &ct->c_dispo_last);
2683 }
2684 }
2685
2686 /* Fix hf values to correspond. */
2687 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2688 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2689
2690 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2691 field = TYPE_HEADER;
2692 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2693 field = DISPO_HEADER;
2694 }
2695
2696 if (field != OTHER) {
2697 const char *const semicolon_loc = strchr (hf->value, ';');
2698
2699 if (semicolon_loc) {
2700 const size_t len =
2701 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2702 const char *const params =
2703 output_params (len,
2704 field == TYPE_HEADER
2705 ? ct->c_ctinfo.ci_first_pm
2706 : ct->c_dispo_first,
2707 NULL, 0);
2708 const char *const new_params = concat (params, "\n", NULL);
2709
2710 replace_substring (&hf->value, semicolon_loc, new_params);
2711 free((void *)new_params); /* Cast away const. Sigh. */
2712 free((void *)params);
2713 } else {
2714 inform("did not find semicolon in %s:%s\n",
2715 hf->name, hf->value);
2716 }
2717 }
2718 }
2719
2720 return OK;
2721 }
2722
2723
2724 /*
2725 * Output content in input file to output file.
2726 */
2727 static int
2728 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2729 int modify_inplace, int message_mods)
2730 {
2731 int status = OK;
2732
2733 if (modify_inplace) {
2734 if (message_mods > 0) {
2735 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2736 char *infile = input_filename
2737 ? mh_xstrdup (input_filename)
2738 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2739
2740 if (remove_file (infile) == OK) {
2741 if (rename (outfile, infile)) {
2742 /* Rename didn't work, possibly because of an
2743 attempt to rename across filesystems. Try
2744 brute force copy. */
2745 int old = open (outfile, O_RDONLY);
2746 int new =
2747 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2748 int i = -1;
2749
2750 if (old != -1 && new != -1) {
2751 char buffer[NMH_BUFSIZ];
2752
2753 while ((i = read (old, buffer, sizeof buffer)) >
2754 0) {
2755 if (write (new, buffer, i) != i) {
2756 i = -1;
2757 break;
2758 }
2759 }
2760 }
2761 if (new != -1) { close (new); }
2762 if (old != -1) { close (old); }
2763 (void) m_unlink (outfile);
2764
2765 if (i < 0) {
2766 /* The -file argument processing used path() to
2767 expand filename to absolute path. */
2768 int file = ct->c_file && ct->c_file[0] == '/';
2769
2770 inform("unable to rename %s %s to %s, continuing...",
2771 file ? "file" : "message", outfile,
2772 infile);
2773 status = NOTOK;
2774 }
2775 }
2776 } else {
2777 inform("unable to remove input file %s, "
2778 "not modifying it, continuing...", infile);
2779 (void) m_unlink (outfile);
2780 status = NOTOK;
2781 }
2782
2783 free (infile);
2784 } else {
2785 status = NOTOK;
2786 }
2787 } else {
2788 /* No modifications and didn't need the tmp outfile. */
2789 (void) m_unlink (outfile);
2790 }
2791 } else {
2792 /* Output is going to some file. Produce it whether or not
2793 there were modifications. */
2794 status = output_message_fp (ct, outfp, outfile);
2795 }
2796
2797 flush_errors ();
2798 return status;
2799 }
2800
2801
2802 /*
2803 * parse_mime() does not set lf_line_endings in struct text, so use this
2804 * function to do it. It touches the parts the decodetypes identifies.
2805 */
2806 static void
2807 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings)
2808 {
2809 switch (ct->c_type) {
2810 case CT_MULTIPART: {
2811 struct multipart *m = (struct multipart *) ct->c_ctparams;
2812 struct part *part;
2813
2814 for (part = m->mp_parts; part; part = part->mp_next) {
2815 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2816 }
2817 break;
2818 }
2819
2820 case CT_MESSAGE:
2821 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2822 struct exbody *e = (struct exbody *) ct->c_ctparams;
2823
2824 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2825 }
2826 break;
2827
2828 default:
2829 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2830 if (ct->c_ctparams == NULL) {
2831 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2832 }
2833 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2834 }
2835 }
2836 }
2837
2838
2839 /*
2840 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2841 * use the standard MH backup file.
2842 */
2843 static int
2844 remove_file (const char *file)
2845 {
2846 if (rmmproc) {
2847 char *rmm_command = concat (rmmproc, " ", file, NULL);
2848 int status = system (rmm_command);
2849
2850 free (rmm_command);
2851 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2852 }
2853 /* This is OK for a non-message file, it still uses the
2854 BACKUP_PREFIX form. The backup file will be in the same
2855 directory as file. */
2856 return rename (file, m_backup (file));
2857 }
2858
2859
2860 /*
2861 * Output formatted message to user.
2862 */
2863 static void
2864 report (char *what, char *partno, char *filename, char *message, ...)
2865 {
2866 va_list args;
2867 char *fmt;
2868
2869 if (verbosw) {
2870 va_start (args, message);
2871 fmt = concat (filename, partno ? " part " : ", ",
2872 FENDNULL(partno), partno ? ", " : "", message, NULL);
2873
2874 advertise (what, NULL, fmt, args);
2875
2876 free (fmt);
2877 va_end (args);
2878 }
2879 }
2880
2881
2882 static void
2883 pipeser (int i)
2884 {
2885 if (i == SIGQUIT) {
2886 fflush (stdout);
2887 fprintf (stderr, "\n");
2888 fflush (stderr);
2889 }
2890
2891 done (1);
2892 /* NOTREACHED */
2893 }