]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Use va_copy() to get a copy of va_list, instead of using original.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include <h/mh.h>
9 #include <h/fmt_scan.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include "h/done.h"
13 #include <h/utils.h>
14 #include <h/signals.h>
15 #include "sbr/m_maildir.h"
16 #include "sbr/m_mktemp.h"
17 #include "sbr/mime_type.h"
18 #include "mhmisc.h"
19 #include "mhfree.h"
20 #include "mhoutsbr.h"
21 #include "mhshowsbr.h"
22 #include <fcntl.h>
23
24 #define MHFIXMSG_SWITCHES \
25 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
26 X("nodecodetext", 0, NDECODETEXTSW) \
27 X("decodetypes", 0, DECODETYPESW) \
28 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
29 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
30 X("textcharset", 0, TEXTCHARSETSW) \
31 X("notextcharset", 0, NTEXTCHARSETSW) \
32 X("reformat", 0, REFORMATSW) \
33 X("noreformat", 0, NREFORMATSW) \
34 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
35 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
36 X("fixboundary", 0, FIXBOUNDARYSW) \
37 X("nofixboundary", 0, NFIXBOUNDARYSW) \
38 X("fixcte", 0, FIXCOMPOSITECTESW) \
39 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
40 X("fixtype mimetype", 0, FIXTYPESW) \
41 X("file file", 0, FILESW) \
42 X("outfile file", 0, OUTFILESW) \
43 X("rmmproc program", 0, RPROCSW) \
44 X("normmproc", 0, NRPRCSW) \
45 X("changecur", 0, CHGSW) \
46 X("nochangecur", 0, NCHGSW) \
47 X("verbose", 0, VERBSW) \
48 X("noverbose", 0, NVERBSW) \
49 X("version", 0, VERSIONSW) \
50 X("help", 0, HELPSW) \
51
52 #define X(sw, minchars, id) id,
53 DEFINE_SWITCH_ENUM(MHFIXMSG);
54 #undef X
55
56 #define X(sw, minchars, id) { sw, minchars, id },
57 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
58 #undef X
59
60
61 int verbosw;
62 int debugsw; /* Needed by mhparse.c. */
63
64 #define quitser pipeser
65
66 /*
67 * static prototypes
68 */
69 typedef struct fix_transformations {
70 int fixboundary;
71 int fixcompositecte;
72 svector_t fixtypes;
73 int reformat;
74 int replacetextplain;
75 int decodetext;
76 char *decodetypes;
77 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
78 int lf_line_endings;
79 char *textcharset;
80 } fix_transformations;
81
82 static int mhfixmsgsbr (CT *, char *, const fix_transformations *,
83 FILE **, char *, FILE **);
84 static int fix_boundary (CT *, int *);
85 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
86 static int get_multipart_boundary (CT, char **);
87 static int replace_boundary (CT, char *, char *);
88 static int fix_types (CT, svector_t, int *);
89 static char *replace_substring (char **, const char *, const char *);
90 static char *remove_parameter (char *, const char *);
91 static int fix_composite_cte (CT, int *);
92 static int set_ce (CT, int);
93 static int ensure_text_plain (CT *, CT, int *, int);
94 static int find_textplain_sibling (CT, int, int *);
95 static int insert_new_text_plain_part (CT, int, CT);
96 static CT build_text_plain_part (CT);
97 static int insert_into_new_mp_alt (CT *, int *);
98 static CT divide_part (CT);
99 static void copy_ctinfo (CI, CI);
100 static int decode_part (CT);
101 static int reformat_part (CT, char *, char *, char *, int);
102 static CT build_multipart_alt (CT, CT, int, int);
103 static int boundary_in_content (FILE **, char *, const char *);
104 static void transfer_noncontent_headers (CT, CT);
105 static int set_ct_type (CT, int type, int subtype, int encoding);
106 static int decode_text_parts (CT, int, const char *, int *);
107 static int should_decode(const char *, const char *, const char *);
108 static int content_encoding (CT, const char **);
109 static int strip_crs (CT, int *);
110 static void update_cte (CT);
111 static int least_restrictive_encoding (CT) PURE;
112 static int less_restrictive (int, int);
113 static int convert_charsets (CT, char *, int *);
114 static int fix_always (CT, int *);
115 static int fix_filename_param (char *, char *, PM *, PM *);
116 static int fix_filename_encoding (CT);
117 static int write_content (CT, const char *, char *, FILE *, int, int);
118 static void set_text_ctparams(CT, char *, int);
119 static int remove_file (const char *);
120 static void report (char *, char *, char *, char *, ...)
121 CHECK_PRINTF(4, 5);
122 static void pipeser (int);
123
124
125 int
126 main (int argc, char **argv)
127 {
128 int msgnum;
129 char *cp, *file = NULL, *folder = NULL;
130 char *maildir = NULL, buf[100], *outfile = NULL;
131 char **argp, **arguments;
132 struct msgs_array msgs = { 0, 0, NULL };
133 struct msgs *mp = NULL;
134 CT *ctp;
135 FILE *fp, *infp = NULL, *outfp = NULL;
136 bool using_stdin = false;
137 bool chgflag = true;
138 int status = OK;
139 fix_transformations fx;
140 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
141 fx.fixtypes = NULL;
142 fx.replacetextplain = 0;
143 fx.decodetext = CE_8BIT;
144 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
145 fx.lf_line_endings = 0;
146 fx.textcharset = NULL;
147
148 if (nmh_init(argv[0], true, false)) { return 1; }
149
150 arguments = getarguments (invo_name, argc, argv, 1);
151 argp = arguments;
152
153 /*
154 * Parse arguments
155 */
156 while ((cp = *argp++)) {
157 if (*cp == '-') {
158 switch (smatch (++cp, switches)) {
159 case AMBIGSW:
160 ambigsw (cp, switches);
161 done (1);
162 case UNKWNSW:
163 die("-%s unknown", cp);
164
165 case HELPSW:
166 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
167 invo_name);
168 print_help (buf, switches, 1);
169 done (0);
170 case VERSIONSW:
171 print_version(invo_name);
172 done (0);
173
174 case DECODETEXTSW:
175 if (! (cp = *argp++) || *cp == '-') {
176 die("missing argument to %s", argp[-2]);
177 }
178 if (! strcasecmp (cp, "8bit")) {
179 fx.decodetext = CE_8BIT;
180 } else if (! strcasecmp (cp, "7bit")) {
181 fx.decodetext = CE_7BIT;
182 } else if (! strcasecmp (cp, "binary")) {
183 fx.decodetext = CE_BINARY;
184 } else {
185 die("invalid argument to %s", argp[-2]);
186 }
187 continue;
188 case NDECODETEXTSW:
189 fx.decodetext = 0;
190 continue;
191 case DECODETYPESW:
192 if (! (cp = *argp++) || *cp == '-') {
193 die("missing argument to %s", argp[-2]);
194 }
195 fx.decodetypes = cp;
196 continue;
197 case CRLFLINEBREAKSSW:
198 fx.lf_line_endings = 0;
199 continue;
200 case NCRLFLINEBREAKSSW:
201 fx.lf_line_endings = 1;
202 continue;
203 case TEXTCHARSETSW:
204 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
205 die("missing argument to %s", argp[-2]);
206 }
207 fx.textcharset = cp;
208 continue;
209 case NTEXTCHARSETSW:
210 fx.textcharset = 0;
211 continue;
212 case FIXBOUNDARYSW:
213 fx.fixboundary = 1;
214 continue;
215 case NFIXBOUNDARYSW:
216 fx.fixboundary = 0;
217 continue;
218 case FIXCOMPOSITECTESW:
219 fx.fixcompositecte = 1;
220 continue;
221 case NFIXCOMPOSITECTESW:
222 fx.fixcompositecte = 0;
223 continue;
224 case FIXTYPESW:
225 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
226 die("missing argument to %s", argp[-2]);
227 }
228 if (! strncasecmp (cp, "multipart/", 10) ||
229 ! strncasecmp (cp, "message/", 8))
230 die("-fixtype %s not allowed", cp);
231 if (! strchr (cp, '/'))
232 die("-fixtype requires type/subtype");
233 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
234 svector_push_back (fx.fixtypes, cp);
235 continue;
236 case REFORMATSW:
237 fx.reformat = 1;
238 continue;
239 case NREFORMATSW:
240 fx.reformat = 0;
241 continue;
242 case REPLACETEXTPLAINSW:
243 fx.replacetextplain = 1;
244 continue;
245 case NREPLACETEXTPLAINSW:
246 fx.replacetextplain = 0;
247 continue;
248 case FILESW:
249 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
250 die("missing argument to %s", argp[-2]);
251 }
252 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
253 continue;
254 case OUTFILESW:
255 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
256 die("missing argument to %s", argp[-2]);
257 }
258 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
259 continue;
260 case RPROCSW:
261 if (!(rmmproc = *argp++) || *rmmproc == '-') {
262 die("missing argument to %s", argp[-2]);
263 }
264 continue;
265 case NRPRCSW:
266 rmmproc = NULL;
267 continue;
268 case CHGSW:
269 chgflag = true;
270 continue;
271 case NCHGSW:
272 chgflag = false;
273 continue;
274 case VERBSW:
275 verbosw = 1;
276 continue;
277 case NVERBSW:
278 verbosw = 0;
279 continue;
280 }
281 }
282 if (*cp == '+' || *cp == '@') {
283 if (folder)
284 die("only one folder at a time!");
285 folder = pluspath (cp);
286 } else {
287 if (*cp == '/') {
288 /* Interpret a full path as a filename, not a message. */
289 file = mh_xstrdup (cp);
290 } else {
291 app_msgarg (&msgs, cp);
292 }
293 }
294 }
295
296 SIGNAL (SIGQUIT, quitser);
297 SIGNAL (SIGPIPE, pipeser);
298
299 /*
300 * Read the standard profile setup
301 */
302 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
303 readconfig(NULL, fp, cp, 0);
304 fclose (fp);
305 }
306
307 suppress_bogus_mp_content_warning = skip_mp_cte_check = true;
308 suppress_extraneous_trailing_semicolon_warning = true;
309
310 if (! context_find ("path")) {
311 free (path ("./", TFOLDER));
312 }
313
314 if (file && msgs.size) {
315 die("cannot specify msg and file at same time!");
316 }
317
318 if (outfile) {
319 /* Open the outfile now, so we don't have to risk opening it
320 after running out of fds. */
321 if (strcmp (outfile, "-") == 0) {
322 outfp = stdout;
323 } else if ((outfp = fopen (outfile, "w")) == NULL) {
324 adios (outfile, "unable to open for writing");
325 }
326 }
327
328 /*
329 * check if message is coming from file
330 */
331 if (file) {
332 /* If file is stdin, create a tmp file name before parse_mime()
333 has a chance, because it might put in on a different
334 filesystem than the output file. Instead, put it in the
335 user's preferred tmp directory. */
336 CT ct;
337
338 if (! strcmp ("-", file)) {
339 int fd;
340 char *cp;
341
342 using_stdin = true;
343
344 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
345 die("unable to create temporary file in %s",
346 get_temp_dir());
347 } else {
348 free (file);
349 file = mh_xstrdup (cp);
350 cpydata (STDIN_FILENO, fd, "-", file);
351 }
352
353 if (close (fd)) {
354 (void) m_unlink (file);
355 die("failed to write temporary file");
356 }
357 }
358
359 cts = mh_xcalloc(2, sizeof *cts);
360 ctp = cts;
361
362 if ((ct = parse_mime (file))) {
363 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
364 *ctp++ = ct;
365 } else {
366 inform("unable to parse message from file %s", file);
367 status = NOTOK;
368
369 /* If there's an outfile, pass the input message unchanged, so the
370 message won't get dropped from a pipeline. */
371 if (outfile) {
372 /* Something went wrong. Output might be expected, such as if
373 this were run as a filter. Just copy the input to the
374 output. */
375 if ((infp = fopen (file, "r")) == NULL) {
376 adios (file, "unable to open for reading");
377 }
378
379 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
380 inform("unable to copy message to %s, "
381 "it might be lost\n", outfile);
382 }
383
384 fclose (infp);
385 infp = NULL;
386 }
387 }
388 } else {
389 /*
390 * message(s) are coming from a folder
391 */
392 CT ct;
393
394 if (! msgs.size) {
395 app_msgarg(&msgs, "cur");
396 }
397 if (! folder) {
398 folder = getfolder (1);
399 }
400 maildir = mh_xstrdup(m_maildir (folder));
401
402 /* chdir so that error messages, esp. from MIME parser, just
403 refer to the message and not its path. */
404 if (chdir (maildir) == NOTOK) {
405 adios (maildir, "unable to change directory to");
406 }
407
408 /* read folder and create message structure */
409 if (! (mp = folder_read (folder, 1))) {
410 die("unable to read folder %s", folder);
411 }
412
413 /* check for empty folder */
414 if (mp->nummsg == 0) {
415 die("no messages in %s", folder);
416 }
417
418 /* parse all the message ranges/sequences and set SELECTED */
419 for (msgnum = 0; msgnum < msgs.size; msgnum++)
420 if (! m_convert (mp, msgs.msgs[msgnum])) {
421 done (1);
422 }
423 seq_setprev (mp); /* set the previous-sequence */
424
425 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
426 ctp = cts;
427
428 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
429 if (is_selected(mp, msgnum)) {
430 char *msgnam = m_name (msgnum);
431
432 if ((ct = parse_mime (msgnam))) {
433 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
434 *ctp++ = ct;
435 } else {
436 inform("unable to parse message %s", msgnam);
437 status = NOTOK;
438
439 /* If there's an outfile, pass the input message
440 unchanged, so the message won't get dropped from a
441 pipeline. */
442 if (outfile) {
443 /* Something went wrong. Output might be expected,
444 such as if this were run as a filter. Just copy
445 the input to the output. */
446 /* Can't use path() here because 1) it might have been
447 called before and it caches the pwd, and 2) we call
448 chdir() after that. */
449 char *input_filename =
450 concat (maildir, "/", msgnam, NULL);
451
452 if ((infp = fopen (input_filename, "r")) == NULL) {
453 adios (input_filename,
454 "unable to open for reading");
455 }
456
457 if (copy_input_to_output (input_filename, infp,
458 outfile, outfp) != OK) {
459 inform("unable to copy message to %s, "
460 "it might be lost\n", outfile);
461 }
462
463 fclose (infp);
464 infp = NULL;
465 free (input_filename);
466 }
467 }
468 }
469 }
470
471 if (chgflag) {
472 seq_setcur (mp, mp->hghsel); /* update current message */
473 }
474 seq_save (mp); /* synchronize sequences */
475 context_replace (pfolder, folder);/* update current folder */
476 context_save (); /* save the context file */
477 }
478
479 if (*cts) {
480 for (ctp = cts; *ctp; ++ctp) {
481 status =
482 mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp) == OK
483 ? 0
484 : 1;
485 free_content (*ctp);
486
487 if (using_stdin) {
488 (void) m_unlink (file);
489
490 if (! outfile) {
491 /* Just calling m_backup() unlinks the backup file. */
492 (void) m_backup (file);
493 }
494 }
495 }
496 } else {
497 status = 1;
498 }
499
500 free(maildir);
501 free (cts);
502
503 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
504 if (infp) { fclose (infp); } /* even if stdin */
505 if (outfp) { fclose (outfp); } /* even if stdout */
506 free (outfile);
507 free (file);
508 free (folder);
509 free (arguments);
510
511 done (status == OK ? 0 : 1);
512 return NOTOK;
513 }
514
515
516 /*
517 * Apply transformations to one message.
518 */
519 static int
520 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
521 FILE **infp, char *outfile, FILE **outfp)
522 {
523 /* Store input filename in case one of the transformations, i.e.,
524 fix_boundary(), rewrites to a tmp file. */
525 char *input_filename = maildir
526 ? concat (maildir, "/", (*ctp)->c_file, NULL)
527 : mh_xstrdup ((*ctp)->c_file);
528 bool modify_inplace = false;
529 int message_mods = 0;
530 int status = OK;
531
532 /* Though the input file won't need to be opened if everything goes
533 well, do it here just in case there's a failure, and that failure is
534 running out of file descriptors. */
535 if ((*infp = fopen (input_filename, "r")) == NULL) {
536 adios (input_filename, "unable to open for reading");
537 }
538
539 if (outfile == NULL) {
540 modify_inplace = true;
541
542 if ((*ctp)->c_file) {
543 char *tempfile;
544 /* outfp will be closed by the caller */
545 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
546 NULL) {
547 die("unable to create temporary file in %s",
548 get_temp_dir());
549 }
550 outfile = mh_xstrdup (tempfile);
551 } else {
552 die("missing both input and output filenames\n");
553 }
554 } /* else *outfp was defined by caller */
555
556 reverse_alternative_parts (*ctp);
557 status = fix_always (*ctp, &message_mods);
558 if (status == OK && fx->fixboundary) {
559 status = fix_boundary (ctp, &message_mods);
560 }
561 if (status == OK && fx->fixtypes != NULL) {
562 status = fix_types (*ctp, fx->fixtypes, &message_mods);
563 }
564 if (status == OK && fx->fixcompositecte) {
565 status = fix_composite_cte (*ctp, &message_mods);
566 }
567 if (status == OK && fx->reformat) {
568 status =
569 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
570 }
571 if (status == OK && fx->decodetext) {
572 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
573 &message_mods);
574 update_cte (*ctp);
575 }
576 if (status == OK && fx->textcharset != NULL) {
577 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
578 }
579
580 if (status == OK && ! (*ctp)->c_umask) {
581 /* Set the umask for the contents file. This currently
582 isn't used but just in case it is in the future. */
583 struct stat st;
584
585 if (stat ((*ctp)->c_file, &st) != NOTOK) {
586 (*ctp)->c_umask = ~(st.st_mode & 0777);
587 } else {
588 (*ctp)->c_umask = ~m_gmprot();
589 }
590 }
591
592 /*
593 * Write the content to a file
594 */
595 if (status == OK) {
596 status = write_content (*ctp, input_filename, outfile, *outfp,
597 modify_inplace, message_mods);
598 } else if (! modify_inplace) {
599 /* Something went wrong. Output might be expected, such
600 as if this were run as a filter. Just copy the input
601 to the output. */
602 if (copy_input_to_output (input_filename, *infp, outfile,
603 *outfp) != OK) {
604 inform("unable to copy message to %s, it might be lost\n",
605 outfile);
606 }
607 }
608
609 if (modify_inplace) {
610 if (status != OK) { (void) m_unlink (outfile); }
611 free (outfile);
612 outfile = NULL;
613 }
614
615 fclose (*infp);
616 *infp = NULL;
617 free (input_filename);
618
619 return status;
620 }
621
622
623 /*
624 * Copy input message to output. Assumes not modifying in place, so this
625 * might be running as part of a pipeline.
626 */
627 static int
628 copy_input_to_output (const char *input_filename, FILE *infp,
629 const char *output_filename, FILE *outfp)
630 {
631 int in = fileno (infp);
632 int out = fileno (outfp);
633 int status = OK;
634
635 if (in != -1 && out != -1) {
636 cpydata (in, out, input_filename, output_filename);
637 } else {
638 status = NOTOK;
639 }
640
641 return status;
642 }
643
644
645 /*
646 * Fix mismatched outer level boundary.
647 */
648 static int
649 fix_boundary (CT *ct, int *message_mods)
650 {
651 struct multipart *mp;
652 int status = OK;
653
654 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
655 mp = (struct multipart *) (*ct)->c_ctparams;
656
657 /*
658 * 1) Get boundary at end of part.
659 * 2) Get boundary at beginning of part and compare to the end-of-part
660 * boundary.
661 * 3) Write out contents of ct to tmp file, replacing boundary in
662 * header with boundary from part. Set c_unlink to 1.
663 * 4) Free ct.
664 * 5) Call parse_mime() on the tmp file, replacing ct.
665 */
666
667 if (mp && mp->mp_start) {
668 char *part_boundary;
669
670 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
671 char *fixed;
672
673 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
674 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
675 char *filename = mh_xstrdup ((*ct)->c_file);
676 CT fixed_ct;
677
678 free_content (*ct);
679 if ((fixed_ct = parse_mime (fixed))) {
680 *ct = fixed_ct;
681 (*ct)->c_unlink = 1;
682
683 ++*message_mods;
684 if (verbosw) {
685 report (NULL, NULL, filename,
686 "fix multipart boundary");
687 }
688 } else {
689 *ct = NULL;
690 inform("unable to parse fixed part");
691 status = NOTOK;
692 }
693 free (filename);
694 } else {
695 inform("unable to replace broken boundary");
696 status = NOTOK;
697 }
698 } else {
699 inform("unable to create temporary file in %s",
700 get_temp_dir());
701 status = NOTOK;
702 }
703
704 free (part_boundary);
705 } else {
706 /* Couldn't fix the boundary. Report failure so that mhfixmsg
707 doesn't modify the message. */
708 status = NOTOK;
709 }
710 } else {
711 /* No multipart struct, even though the content type is
712 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
713 the message. */
714 status = NOTOK;
715 }
716 }
717
718 return status;
719 }
720
721
722 /*
723 * Find boundary at end of multipart.
724 */
725 static int
726 get_multipart_boundary (CT ct, char **part_boundary)
727 {
728 char buffer[NMH_BUFSIZ];
729 char *end_boundary = NULL;
730 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
731 ? (off_t) (ct->c_end - sizeof buffer)
732 : (off_t) ct->c_begin;
733 size_t bytes_read;
734 int status = OK;
735
736 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
737 be big enough, even if it's just 1024, to make that unlikely. */
738
739 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
740 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
741 advise (ct->c_file, "unable to open for reading");
742 return NOTOK;
743 }
744
745 /* Get boundary at end of multipart. */
746 while (begin >= (off_t) ct->c_begin) {
747 fseeko (ct->c_fp, begin, SEEK_SET);
748 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
749 char *cp = rfind_str (buffer, bytes_read, "--");
750
751 if (cp) {
752 char *end;
753
754 /* Trim off trailing "--" and anything beyond. */
755 *cp-- = '\0';
756 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
757 if (strlen (end) > 3 && *end++ == '\n' &&
758 *end++ == '-' && *end++ == '-') {
759 end_boundary = mh_xstrdup (end);
760 break;
761 }
762 }
763 }
764 }
765
766 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
767 break;
768 begin -= sizeof buffer;
769 }
770
771 /* Get boundary at beginning of multipart. */
772 if (end_boundary) {
773 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
774 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
775 if (bytes_read >= strlen (end_boundary)) {
776 char *cp = find_str (buffer, bytes_read, end_boundary);
777
778 if (cp && cp - buffer >= 2 && *--cp == '-' &&
779 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
780 status = OK;
781 break;
782 }
783 } else {
784 /* The start and end boundaries didn't match, or the
785 start boundary doesn't begin with "\n--" (or "--"
786 if at the beginning of buffer). Keep trying. */
787 status = NOTOK;
788 }
789 }
790 } else {
791 status = NOTOK;
792 }
793
794 if (ct->c_fp) {
795 fclose (ct->c_fp);
796 ct->c_fp = NULL;
797 }
798
799 if (status == OK) {
800 *part_boundary = end_boundary;
801 } else {
802 *part_boundary = NULL;
803 free (end_boundary);
804 }
805
806 return status;
807 }
808
809
810 /*
811 * Open and copy ct->c_file to file, replacing the multipart boundary.
812 */
813 static int
814 replace_boundary (CT ct, char *file, char *boundary)
815 {
816 FILE *fpin, *fpout;
817 int compnum, state;
818 char buf[NMH_BUFSIZ], name[NAMESZ];
819 char *np, *vp;
820 m_getfld_state_t gstate;
821 int status = OK;
822
823 if (ct->c_file == NULL) {
824 inform("missing input filename");
825 return NOTOK;
826 }
827
828 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
829 advise (ct->c_file, "unable to open for reading");
830 return NOTOK;
831 }
832
833 if ((fpout = fopen (file, "w")) == NULL) {
834 fclose (fpin);
835 advise (file, "unable to open for writing");
836 return NOTOK;
837 }
838
839 gstate = m_getfld_state_init(fpin);
840 for (compnum = 1;;) {
841 int bufsz = (int) sizeof buf;
842
843 switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
844 case FLD:
845 case FLDPLUS:
846 compnum++;
847
848 /* get copies of the buffers */
849 np = mh_xstrdup (name);
850 vp = mh_xstrdup (buf);
851
852 /* if necessary, get rest of field */
853 while (state == FLDPLUS) {
854 bufsz = sizeof buf;
855 state = m_getfld2(&gstate, name, buf, &bufsz);
856 vp = add (buf, vp); /* add to previous value */
857 }
858
859 if (strcasecmp (TYPE_FIELD, np)) {
860 fprintf (fpout, "%s:%s", np, vp);
861 } else {
862 char *new_ctline, *new_params;
863
864 replace_param(&ct->c_ctinfo.ci_first_pm,
865 &ct->c_ctinfo.ci_last_pm, "boundary",
866 boundary, 0);
867
868 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
869 ct->c_ctinfo.ci_subtype, NULL);
870 new_params = output_params(LEN(TYPE_FIELD) +
871 strlen(new_ctline) + 1,
872 ct->c_ctinfo.ci_first_pm, NULL, 0);
873 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
874 FENDNULL(new_params));
875 free(new_ctline);
876 free(new_params);
877 }
878
879 free (vp);
880 free (np);
881
882 continue;
883
884 case BODY:
885 putc('\n', fpout);
886 /* buf will have a terminating NULL, skip it. */
887 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
888 advise (file, "fwrite");
889 }
890 continue;
891
892 case FILEEOF:
893 break;
894
895 case LENERR:
896 case FMTERR:
897 inform("message format error in component #%d", compnum);
898 status = NOTOK;
899 break;
900
901 default:
902 inform("getfld() returned %d", state);
903 status = NOTOK;
904 break;
905 }
906
907 break;
908 }
909
910 m_getfld_state_destroy (&gstate);
911 fclose (fpout);
912 fclose (fpin);
913
914 return status;
915 }
916
917
918 /*
919 * Fix Content-Type header to reflect the content of its part.
920 */
921 static int
922 fix_types (CT ct, svector_t fixtypes, int *message_mods)
923 {
924 int status = OK;
925
926 switch (ct->c_type) {
927 case CT_MULTIPART: {
928 struct multipart *m = (struct multipart *) ct->c_ctparams;
929 struct part *part;
930
931 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
932 status = fix_types (part->mp_part, fixtypes, message_mods);
933 }
934 break;
935 }
936
937 case CT_MESSAGE:
938 if (ct->c_subtype == MESSAGE_EXTERNAL) {
939 struct exbody *e = (struct exbody *) ct->c_ctparams;
940
941 status = fix_types (e->eb_content, fixtypes, message_mods);
942 }
943 break;
944
945 default: {
946 char **typep, *type;
947
948 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
949 for (typep = svector_strs (fixtypes);
950 typep && (type = *typep);
951 ++typep) {
952 char *type_subtype =
953 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
954 NULL);
955
956 if (! strcasecmp (type, type_subtype) &&
957 decode_part (ct) == OK &&
958 ct->c_cefile.ce_file != NULL) {
959 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
960 char *cp;
961
962 if ((cp = strchr (ct_type_subtype, ';'))) {
963 /* Truncate to remove any parameter list from
964 mime_type () result. */
965 *cp = '\0';
966 }
967
968 if (strcasecmp (type, ct_type_subtype)) {
969 char *ct_type, *ct_subtype;
970 HF hf;
971
972 /* The Content-Type header does not match the
973 content, so update these struct Content
974 fields to match:
975 * c_type, c_subtype
976 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
977 * c_ctline
978 */
979 /* Extract type and subtype from type/subtype. */
980 ct_type = mh_xstrdup(ct_type_subtype);
981 if ((cp = strchr (ct_type, '/'))) {
982 *cp = '\0';
983 ct_subtype = mh_xstrdup(++cp);
984 } else {
985 inform("missing / in MIME type of %s %s",
986 ct->c_file, ct->c_partno);
987 free (ct_type);
988 return NOTOK;
989 }
990
991 ct->c_type = ct_str_type (ct_type);
992 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
993
994 free (ct->c_ctinfo.ci_type);
995 ct->c_ctinfo.ci_type = ct_type;
996 free (ct->c_ctinfo.ci_subtype);
997 ct->c_ctinfo.ci_subtype = ct_subtype;
998 if (! replace_substring (&ct->c_ctline, type,
999 ct_type_subtype)) {
1000 inform("did not find %s in %s",
1001 type, ct->c_ctline);
1002 }
1003
1004 /* Update Content-Type header field. */
1005 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1006 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1007 if (replace_substring (&hf->value, type,
1008 ct_type_subtype)) {
1009 ++*message_mods;
1010 if (verbosw) {
1011 report (NULL, ct->c_partno, ct->c_file,
1012 "change Content-Type in header "
1013 "from %s to %s",
1014 type, ct_type_subtype);
1015 }
1016 break;
1017 }
1018 inform("did not find %s in %s", type, hf->value);
1019 }
1020 }
1021 }
1022 free (ct_type_subtype);
1023 }
1024 free (type_subtype);
1025 }
1026 }
1027 }}
1028
1029 return status;
1030 }
1031
1032
1033 /*
1034 * Replace a substring, allocating space to hold the new one.
1035 */
1036 char *
1037 replace_substring (char **str, const char *old, const char *new)
1038 {
1039 char *cp;
1040
1041 if ((cp = strstr (*str, old))) {
1042 char *remainder = cp + strlen (old);
1043 char *prefix, *new_str;
1044
1045 if (cp - *str) {
1046 prefix = mh_xstrdup(*str);
1047 *(prefix + (cp - *str)) = '\0';
1048 new_str = concat (prefix, new, remainder, NULL);
1049 free (prefix);
1050 } else {
1051 new_str = concat (new, remainder, NULL);
1052 }
1053
1054 free (*str);
1055
1056 return *str = new_str;
1057 }
1058
1059 return NULL;
1060 }
1061
1062
1063 /*
1064 * Remove a name=value parameter, given just its name, from a header value.
1065 */
1066 char *
1067 remove_parameter (char *str, const char *name)
1068 {
1069 /* It looks to me, based on the BNF in RFC 2045, than there can't
1070 be whitespace between the parameter name and the "=", or
1071 between the "=" and the parameter value. */
1072 char *param_name = concat (name, "=", NULL);
1073 char *cp;
1074
1075 if ((cp = strstr (str, param_name))) {
1076 char *start, *end;
1077 size_t count = 1;
1078
1079 /* Remove any leading spaces, before the parameter name. */
1080 for (start = cp;
1081 start > str && isspace ((unsigned char) *(start-1));
1082 --start) {
1083 continue;
1084 }
1085 /* Remove a leading semicolon. */
1086 if (start > str && *(start-1) == ';') { --start; }
1087
1088 end = cp + strlen (name) + 1;
1089 if (*end == '"') {
1090 /* Skip past the quoted value, and then the final quote. */
1091 for (++end ; *end && *end != '"'; ++end) { continue; }
1092 ++end;
1093 } else {
1094 /* Skip past the value. */
1095 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1096 }
1097
1098 /* Count how many characters need to be moved. Include
1099 trailing null, which is accounted for by the
1100 initialization of count to 1. */
1101 for (cp = end; *cp; ++cp) { ++count; }
1102 (void) memmove (start, end, count);
1103 }
1104
1105 free (param_name);
1106
1107 return str;
1108 }
1109
1110
1111 /*
1112 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1113 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1114 * 8 bit.
1115 */
1116 static int
1117 fix_composite_cte (CT ct, int *message_mods)
1118 {
1119 int status = OK;
1120
1121 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1122 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1123 ct->c_encoding != CE_BINARY) {
1124 HF hf;
1125
1126 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1127 char *name = hf->name;
1128 for (; isspace((unsigned char)*name); ++name) {
1129 continue;
1130 }
1131
1132 if (! strncasecmp (name, ENCODING_FIELD,
1133 LEN(ENCODING_FIELD))) {
1134 char *prefix = "Nmh-REPLACED-INVALID-";
1135 HF h;
1136
1137 NEW(h);
1138 h->name = mh_xstrdup (hf->name);
1139 h->hf_encoding = hf->hf_encoding;
1140 h->next = hf->next;
1141 hf->next = h;
1142
1143 /* Retain old header but prefix its name. */
1144 free (hf->name);
1145 hf->name = concat (prefix, h->name, NULL);
1146
1147 ++*message_mods;
1148 if (verbosw) {
1149 char *encoding = cpytrim (hf->value);
1150 report (NULL, ct->c_partno, ct->c_file,
1151 "replace Content-Transfer-Encoding of %s "
1152 "with 8 bit", encoding);
1153 free (encoding);
1154 }
1155
1156 h->value = mh_xstrdup (" 8bit\n");
1157
1158 /* Don't need to warn for multiple C-T-E header
1159 fields, parse_mime() already does that. But
1160 if there are any, fix them all as necessary. */
1161 hf = h;
1162 }
1163 }
1164
1165 set_ce (ct, CE_8BIT);
1166 }
1167
1168 if (ct->c_type == CT_MULTIPART) {
1169 struct multipart *m;
1170 struct part *part;
1171
1172 m = (struct multipart *) ct->c_ctparams;
1173 for (part = m->mp_parts; part; part = part->mp_next) {
1174 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1175 status = NOTOK;
1176 break;
1177 }
1178 }
1179 }
1180 }
1181
1182 return status;
1183 }
1184
1185
1186 /*
1187 * Set content encoding.
1188 */
1189 static int
1190 set_ce (CT ct, int encoding)
1191 {
1192 const char *ce = ce_str (encoding);
1193 const struct str2init *ctinit = get_ce_method (ce);
1194
1195 if (ctinit) {
1196 char *cte = concat (" ", ce, "\n", NULL);
1197 bool found_cte = false;
1198 HF hf;
1199 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1200 caller is decode_text_parts (). Save because we'll
1201 overwrite below. */
1202 struct cefile decoded_content_info = ct->c_cefile;
1203
1204 ct->c_encoding = encoding;
1205
1206 ct->c_ctinitfnx = ctinit->si_init;
1207 /* This will assign ct->c_cefile with an all-0 struct, which
1208 is what we want. */
1209 (*ctinit->si_init) (ct);
1210 /* After returning, the caller should set
1211 ct->c_cefile.ce_file to the name of the file containing
1212 the contents. */
1213
1214 if (ct->c_ceclosefnx) {
1215 (*ct->c_ceclosefnx) (ct);
1216 }
1217
1218 /* Restore the cefile. */
1219 ct->c_cefile = decoded_content_info;
1220
1221 /* Update/add Content-Transfer-Encoding header field. */
1222 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1223 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1224 found_cte = true;
1225 free (hf->value);
1226 hf->value = cte;
1227 }
1228 }
1229 if (! found_cte) {
1230 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1231 }
1232
1233 /* Update c_celine. It's used only by mhlist -debug. */
1234 free (ct->c_celine);
1235 ct->c_celine = mh_xstrdup (cte);
1236
1237 return OK;
1238 }
1239
1240 return NOTOK;
1241 }
1242
1243
1244 /*
1245 * Make sure each text part has a corresponding text/plain part.
1246 */
1247 static int
1248 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain)
1249 {
1250 int status = OK;
1251
1252 switch ((*ct)->c_type) {
1253 case CT_TEXT: {
1254 /* Nothing to do for text/plain. */
1255 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1256
1257 if (parent && parent->c_type == CT_MULTIPART &&
1258 parent->c_subtype == MULTI_ALTERNATE) {
1259 int new_subpart_number = 1;
1260 int has_text_plain =
1261 find_textplain_sibling (parent, replacetextplain,
1262 &new_subpart_number);
1263
1264 if (! has_text_plain) {
1265 /* Parent is a multipart/alternative. Insert a new
1266 text/plain subpart. */
1267 const int inserted =
1268 insert_new_text_plain_part (*ct, new_subpart_number,
1269 parent);
1270 if (inserted) {
1271 ++*message_mods;
1272 if (verbosw) {
1273 report (NULL, parent->c_partno, parent->c_file,
1274 "insert text/plain part");
1275 }
1276 } else {
1277 status = NOTOK;
1278 }
1279 }
1280 } else if (parent && parent->c_type == CT_MULTIPART &&
1281 parent->c_subtype == MULTI_RELATED) {
1282 char *type_subtype =
1283 concat ((*ct)->c_ctinfo.ci_type, "/",
1284 (*ct)->c_ctinfo.ci_subtype, NULL);
1285 const char *parent_type =
1286 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1287 int new_subpart_number = 1;
1288 int has_text_plain = 0;
1289
1290 /* Have to do string comparison on the subtype because we
1291 don't enumerate all of them in c_subtype values.
1292 parent_type will be NULL if the multipart/related part
1293 doesn't have a type parameter. The type parameter must
1294 be specified according to RFC 2387 Sec. 3.1 but not all
1295 messages comply. */
1296 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1297 /* The type of this part matches the root type of the
1298 parent multipart/related. Look to see if there's
1299 text/plain sibling. */
1300 has_text_plain =
1301 find_textplain_sibling (parent, replacetextplain,
1302 &new_subpart_number);
1303 }
1304
1305 free (type_subtype);
1306
1307 if (! has_text_plain) {
1308 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1309 struct part *part;
1310 int siblings = 0;
1311
1312 for (part = mp->mp_parts; part; part = part->mp_next) {
1313 if (*ct != part->mp_part) {
1314 ++siblings;
1315 }
1316 }
1317
1318 if (siblings) {
1319 /* Parent is a multipart/related. Insert a new
1320 text/plain subpart in a new multipart/alternative. */
1321 if (insert_into_new_mp_alt (ct, message_mods)) {
1322 /* Not an error if text/plain couldn't be added. */
1323 }
1324 } else {
1325 /* There are no siblings, so insert a new text/plain
1326 subpart, and change the parent type from
1327 multipart/related to multipart/alternative. */
1328 const int inserted =
1329 insert_new_text_plain_part (*ct, new_subpart_number,
1330 parent);
1331
1332 if (inserted) {
1333 HF hf;
1334
1335 parent->c_subtype = MULTI_ALTERNATE;
1336 free (parent->c_ctinfo.ci_subtype);
1337 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1338 if (! replace_substring (&parent->c_ctline, "/related",
1339 "/alternative")) {
1340 inform("did not find multipart/related in %s",
1341 parent->c_ctline);
1342 }
1343
1344 /* Update Content-Type header field. */
1345 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1346 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1347 if (replace_substring (&hf->value, "/related",
1348 "/alternative")) {
1349 ++*message_mods;
1350 if (verbosw) {
1351 report (NULL, parent->c_partno,
1352 parent->c_file,
1353 "insert text/plain part");
1354 }
1355
1356 /* Remove, e.g., type="text/html" from
1357 multipart/alternative. */
1358 remove_parameter (hf->value, "type");
1359 break;
1360 }
1361 inform("did not find multipart/"
1362 "related in header %s", hf->value);
1363 }
1364 }
1365 } else {
1366 /* Not an error if text/plain couldn't be inserted. */
1367 }
1368 }
1369 }
1370 } else {
1371 if (insert_into_new_mp_alt (ct, message_mods)) {
1372 status = NOTOK;
1373 }
1374 }
1375 break;
1376 }
1377
1378 case CT_MULTIPART: {
1379 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1380 struct part *part;
1381
1382 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1383 if ((*ct)->c_type == CT_MULTIPART) {
1384 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1385 replacetextplain);
1386 }
1387 }
1388 break;
1389 }
1390
1391 case CT_MESSAGE:
1392 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1393 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1394
1395 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1396 replacetextplain);
1397 }
1398 break;
1399 }
1400
1401 return status;
1402 }
1403
1404
1405 /*
1406 * See if there is a sibling text/plain, and return its subpart number.
1407 */
1408 static int
1409 find_textplain_sibling (CT parent, int replacetextplain,
1410 int *new_subpart_number)
1411 {
1412 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1413 struct part *part, *prev;
1414 bool has_text_plain = false;
1415
1416 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1417 ++*new_subpart_number;
1418 if (part->mp_part->c_type == CT_TEXT &&
1419 part->mp_part->c_subtype == TEXT_PLAIN) {
1420 if (replacetextplain) {
1421 struct part *old_part;
1422 if (part == mp->mp_parts) {
1423 old_part = mp->mp_parts;
1424 mp->mp_parts = part->mp_next;
1425 } else {
1426 old_part = prev->mp_next;
1427 prev->mp_next = part->mp_next;
1428 }
1429 if (verbosw) {
1430 report (NULL, parent->c_partno, parent->c_file,
1431 "remove text/plain part %s",
1432 old_part->mp_part->c_partno);
1433 }
1434 free_content (old_part->mp_part);
1435 free (old_part);
1436 } else {
1437 has_text_plain = true;
1438 }
1439 break;
1440 }
1441 prev = part;
1442 }
1443
1444 return has_text_plain;
1445 }
1446
1447
1448 /*
1449 * Insert a new text/plain part.
1450 */
1451 static int
1452 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent)
1453 {
1454 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1455 struct part *new_part;
1456
1457 NEW(new_part);
1458 if ((new_part->mp_part = build_text_plain_part (ct))) {
1459 char buffer[16];
1460 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1461
1462 new_part->mp_next = mp->mp_parts;
1463 mp->mp_parts = new_part;
1464 new_part->mp_part->c_partno =
1465 concat (parent->c_partno ? parent->c_partno : "1", ".",
1466 buffer, NULL);
1467
1468 return 1;
1469 }
1470
1471 free_content (new_part->mp_part);
1472 free (new_part);
1473
1474 return 0;
1475 }
1476
1477
1478 /*
1479 * Create a text/plain part to go along with non-plain sibling part.
1480 */
1481 static CT
1482 build_text_plain_part (CT encoded_part)
1483 {
1484 CT tp_part = divide_part (encoded_part);
1485 char *tmp_plain_file = NULL;
1486
1487 if (decode_part (tp_part) == OK) {
1488 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1489 contains the decoded contents. And the decoding function, such
1490 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1491 be unlinked by free_content (). */
1492 char *tempfile;
1493
1494 /* This m_mktemp2() call closes the temp file. */
1495 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1496 inform("unable to create temporary file in %s",
1497 get_temp_dir());
1498 } else {
1499 tmp_plain_file = mh_xstrdup (tempfile);
1500 if (reformat_part (tp_part, tmp_plain_file,
1501 tp_part->c_ctinfo.ci_type,
1502 tp_part->c_ctinfo.ci_subtype,
1503 tp_part->c_type) == OK) {
1504 return tp_part;
1505 }
1506 }
1507 }
1508
1509 free_content (tp_part);
1510 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1511 free (tmp_plain_file);
1512
1513 return NULL;
1514 }
1515
1516
1517 /*
1518 * Slip new text/plain part into a new multipart/alternative.
1519 */
1520 static int
1521 insert_into_new_mp_alt (CT *ct, int *message_mods)
1522 {
1523 CT tp_part = build_text_plain_part (*ct);
1524 int status = OK;
1525
1526 if (tp_part) {
1527 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1528 MULTI_ALTERNATE);
1529 if (mp_alt) {
1530 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1531
1532 if (mp && mp->mp_parts) {
1533 mp->mp_parts->mp_part = tp_part;
1534 /* Make the new multipart/alternative the parent. */
1535 *ct = mp_alt;
1536
1537 ++*message_mods;
1538 if (verbosw) {
1539 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1540 "insert text/plain part");
1541 }
1542 } else {
1543 free_content (tp_part);
1544 free_content (mp_alt);
1545 status = NOTOK;
1546 }
1547 } else {
1548 status = NOTOK;
1549 }
1550 } else {
1551 /* Not an error if text/plain couldn't be built. */
1552 }
1553
1554 return status;
1555 }
1556
1557
1558 /*
1559 * Clone a MIME part.
1560 */
1561 static CT
1562 divide_part (CT ct)
1563 {
1564 CT new_part;
1565
1566 NEW0(new_part);
1567 /* Just copy over what is needed for decoding. c_vrsn and
1568 c_celine aren't necessary. */
1569 new_part->c_file = mh_xstrdup (ct->c_file);
1570 new_part->c_begin = ct->c_begin;
1571 new_part->c_end = ct->c_end;
1572 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1573 new_part->c_type = ct->c_type;
1574 new_part->c_cefile = ct->c_cefile;
1575 new_part->c_encoding = ct->c_encoding;
1576 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1577 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1578 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1579 new_part->c_cesizefnx = ct->c_cesizefnx;
1580
1581 /* c_ctline is used by reformat__part(), so it can preserve
1582 anything after the type/subtype. */
1583 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1584
1585 return new_part;
1586 }
1587
1588
1589 /*
1590 * Copy the content info from one part to another.
1591 */
1592 static void
1593 copy_ctinfo (CI dest, CI src)
1594 {
1595 PM s_pm, d_pm;
1596
1597 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1598 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1599
1600 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1601 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1602 s_pm->pm_value, 0);
1603 if (s_pm->pm_charset) {
1604 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1605 }
1606 if (s_pm->pm_lang) {
1607 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1608 }
1609 }
1610
1611 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1612 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1613 }
1614
1615
1616 /*
1617 * Decode content.
1618 */
1619 static int
1620 decode_part (CT ct)
1621 {
1622 char *tmp_decoded;
1623 int status;
1624 FILE *file;
1625 char *tempfile;
1626
1627 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1628 die("unable to create temporary file in %s", get_temp_dir());
1629 }
1630 tmp_decoded = mh_xstrdup (tempfile);
1631 /* The following call will load ct->c_cefile.ce_file with the tmp
1632 filename of the decoded content. tmp_decoded will contain the
1633 encoded output, get rid of that. */
1634 status = output_message_fp (ct, file, tmp_decoded);
1635 (void) m_unlink (tmp_decoded);
1636 free (tmp_decoded);
1637 if (fclose (file)) {
1638 inform("unable to close temporary file %s, continuing...", tempfile);
1639 }
1640
1641 return status;
1642 }
1643
1644
1645 /*
1646 * Reformat content as plain text.
1647 * Some of the arguments aren't really needed now, but maybe will
1648 * be in the future for other than text types.
1649 */
1650 static int
1651 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type)
1652 {
1653 int output_subtype, output_encoding;
1654 const char *reason = NULL;
1655 char *cp, *cf;
1656 int status;
1657
1658 /* Hacky: this redirects the output from whatever command is used
1659 to show the part to a file. So, the user can't have any output
1660 redirection in that command.
1661 Could show_multi() in mhshowsbr.c avoid this? */
1662
1663 /* Check for invo_name-format-type/subtype. */
1664 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1665 if (verbosw) {
1666 inform("Don't know how to convert %s, there is no "
1667 "%s-format-%s/%s profile entry",
1668 ct->c_file, invo_name, type, subtype);
1669 }
1670 return NOTOK;
1671 }
1672 if (strchr (cf, '>')) {
1673 inform("'>' prohibited in \"%s\",\nplease fix your "
1674 "%s-format-%s/%s profile entry", cf, invo_name, type,
1675 FENDNULL(subtype));
1676
1677 return NOTOK;
1678 }
1679
1680 cp = concat (cf, " >", file, NULL);
1681 status = show_content_aux (ct, 0, cp, NULL, NULL);
1682 free (cp);
1683
1684 /* Unlink decoded content tmp file and free its filename to avoid
1685 leaks. The file stream should already have been closed. */
1686 if (ct->c_cefile.ce_unlink) {
1687 (void) m_unlink (ct->c_cefile.ce_file);
1688 free (ct->c_cefile.ce_file);
1689 ct->c_cefile.ce_file = NULL;
1690 ct->c_cefile.ce_unlink = 0;
1691 }
1692
1693 if (c_type == CT_TEXT) {
1694 output_subtype = TEXT_PLAIN;
1695 } else {
1696 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1697 output_subtype = 0;
1698 }
1699
1700 output_encoding = content_encoding (ct, &reason);
1701 if (status == OK &&
1702 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1703 ct->c_cefile.ce_file = file;
1704 ct->c_cefile.ce_unlink = 1;
1705 } else {
1706 ct->c_cefile.ce_unlink = 0;
1707 status = NOTOK;
1708 }
1709
1710 return status;
1711 }
1712
1713
1714 /*
1715 * Fill in a multipart/alternative part.
1716 */
1717 static CT
1718 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype)
1719 {
1720 char *boundary_prefix = "----=_nmh-multipart";
1721 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1722 char *boundary_indicator = "; boundary=";
1723 char *typename, *subtypename, *name;
1724 CT ct;
1725 struct part *p;
1726 struct multipart *m;
1727 const struct str2init *ctinit;
1728
1729 NEW0(ct);
1730
1731 /* Set up the multipart/alternative part. These fields of *ct were
1732 initialized to 0 by mh_xcalloc():
1733 c_fp, c_unlink, c_begin, c_end,
1734 c_vrsn, c_ctline, c_celine,
1735 c_id, c_descr, c_dispo, c_partno,
1736 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1737 c_cefile, c_encoding,
1738 c_digested, c_digest[16], c_ctexbody,
1739 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1740 c_umask, c_rfc934,
1741 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1742 */
1743
1744 ct->c_file = mh_xstrdup (first_alt->c_file);
1745 ct->c_type = type;
1746 ct->c_subtype = subtype;
1747
1748 ctinit = get_ct_init (ct->c_type);
1749
1750 typename = ct_type_str (type);
1751 subtypename = ct_subtype_str (type, subtype);
1752
1753 {
1754 int serial = 0;
1755 int found_boundary = 1;
1756
1757 while (found_boundary && serial < 1000000) {
1758 found_boundary = 0;
1759
1760 /* Ensure that the boundary doesn't appear in the decoded
1761 content. */
1762 if (new_part->c_cefile.ce_file) {
1763 if ((found_boundary =
1764 boundary_in_content (&new_part->c_cefile.ce_fp,
1765 new_part->c_cefile.ce_file,
1766 boundary)) == NOTOK) {
1767 goto return_null;
1768 }
1769 }
1770
1771 /* Ensure that the boundary doesn't appear in the encoded
1772 content. */
1773 if (! found_boundary && new_part->c_file) {
1774 if ((found_boundary =
1775 boundary_in_content (&new_part->c_fp,
1776 new_part->c_file,
1777 boundary)) == NOTOK) {
1778 goto return_null;
1779 }
1780 }
1781
1782 if (found_boundary) {
1783 /* Try a slightly different boundary. */
1784 char buffer2[16];
1785
1786 free (boundary);
1787 ++serial;
1788 snprintf (buffer2, sizeof buffer2, "%d", serial);
1789 boundary =
1790 concat (boundary_prefix,
1791 FENDNULL(first_alt->c_partno),
1792 "-", buffer2, NULL);
1793 }
1794 }
1795
1796 if (found_boundary) {
1797 inform("giving up trying to find a unique boundary");
1798 goto return_null;
1799 }
1800 }
1801
1802 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1803 boundary, "\"", NULL);
1804
1805 /* Load c_first_hf and c_last_hf. */
1806 transfer_noncontent_headers (first_alt, ct);
1807 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1808 free (name);
1809
1810 /* Load c_partno. */
1811 if (first_alt->c_partno) {
1812 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1813 free (first_alt->c_partno);
1814 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1815 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1816 } else {
1817 first_alt->c_partno = mh_xstrdup ("1");
1818 new_part->c_partno = mh_xstrdup ("2");
1819 }
1820
1821 if (ctinit) {
1822 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1823 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1824 }
1825
1826 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1827 "boundary", boundary, 0);
1828
1829 NEW(p);
1830 NEW(p->mp_next);
1831 p->mp_next->mp_next = NULL;
1832 p->mp_next->mp_part = first_alt;
1833
1834 NEW0(m);
1835 m->mp_start = concat (boundary, "\n", NULL);
1836 m->mp_stop = concat (boundary, "--\n", NULL);
1837 m->mp_parts = p;
1838 ct->c_ctparams = m;
1839
1840 free (boundary);
1841
1842 return ct;
1843
1844 return_null:
1845 free_content(ct);
1846 free(boundary);
1847 return NULL;
1848 }
1849
1850
1851 /*
1852 * Check that the boundary does not appear in the content.
1853 */
1854 static int
1855 boundary_in_content (FILE **fp, char *file, const char *boundary)
1856 {
1857 char buffer[NMH_BUFSIZ];
1858 size_t bytes_read;
1859 bool found_boundary = false;
1860
1861 /* free_content() will close *fp if we fopen it here. */
1862 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1863 advise (file, "unable to open %s for reading", file);
1864 return NOTOK;
1865 }
1866
1867 fseeko (*fp, 0L, SEEK_SET);
1868 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1869 if (find_str (buffer, bytes_read, boundary)) {
1870 found_boundary = true;
1871 break;
1872 }
1873 }
1874
1875 return found_boundary;
1876 }
1877
1878
1879 /*
1880 * Remove all non-Content headers.
1881 */
1882 static void
1883 transfer_noncontent_headers (CT old, CT new)
1884 {
1885 HF hp, hp_prev;
1886
1887 hp_prev = hp = old->c_first_hf;
1888 while (hp) {
1889 HF next = hp->next;
1890
1891 if (strncasecmp (XXX_FIELD_PRF, hp->name, LEN(XXX_FIELD_PRF))) {
1892 if (hp == old->c_last_hf) {
1893 if (hp == old->c_first_hf) {
1894 old->c_last_hf = old->c_first_hf = NULL;
1895 } else {
1896 hp_prev->next = NULL;
1897 old->c_last_hf = hp_prev;
1898 }
1899 } else {
1900 if (hp == old->c_first_hf) {
1901 old->c_first_hf = next;
1902 } else {
1903 hp_prev->next = next;
1904 }
1905 }
1906
1907 /* Put node hp in the new CT. */
1908 if (new->c_first_hf == NULL) {
1909 new->c_first_hf = hp;
1910 } else {
1911 new->c_last_hf->next = hp;
1912 }
1913 new->c_last_hf = hp;
1914 } else {
1915 /* A Content- header, leave in old. */
1916 hp_prev = hp;
1917 }
1918
1919 hp = next;
1920 }
1921 }
1922
1923
1924 /*
1925 * Set content type.
1926 */
1927 static int
1928 set_ct_type (CT ct, int type, int subtype, int encoding)
1929 {
1930 char *typename = ct_type_str (type);
1931 char *subtypename = ct_subtype_str (type, subtype);
1932 /* E.g, " text/plain" */
1933 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1934 /* E.g, " text/plain\n" */
1935 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1936 bool found_content_type = false;
1937 HF hf;
1938 const char *cp = NULL;
1939 char *ctline;
1940 int status;
1941
1942 /* Update/add Content-Type header field. */
1943 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1944 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1945 found_content_type = true;
1946 free (hf->value);
1947 hf->value = (cp = strchr (ct->c_ctline, ';'))
1948 ? concat (type_subtypename, cp, "\n", NULL)
1949 : mh_xstrdup (name_plus_nl);
1950 }
1951 }
1952 if (! found_content_type) {
1953 add_header (ct, mh_xstrdup (TYPE_FIELD),
1954 (cp = strchr (ct->c_ctline, ';'))
1955 ? concat (type_subtypename, cp, "\n", NULL)
1956 : mh_xstrdup (name_plus_nl));
1957 }
1958
1959 /* Some of these might not be used, but set them anyway. */
1960 ctline = cp
1961 ? concat (type_subtypename, cp, NULL)
1962 : concat (type_subtypename, NULL);
1963 free (ct->c_ctline);
1964 ct->c_ctline = ctline;
1965 /* Leave other ctinfo members as they were. */
1966 free (ct->c_ctinfo.ci_type);
1967 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1968 free (ct->c_ctinfo.ci_subtype);
1969 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1970 ct->c_type = type;
1971 ct->c_subtype = subtype;
1972
1973 free (name_plus_nl);
1974 free (type_subtypename);
1975
1976 status = set_ce (ct, encoding);
1977
1978 return status;
1979 }
1980
1981
1982 /*
1983 * It's not necessary to update the charset parameter of a Content-Type
1984 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1985 * (content) was originally in the specified charset, "and will be in
1986 * that character set again after decoding."
1987 */
1988 static int
1989 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1990 int *message_mods)
1991 {
1992 int status = OK;
1993 int lf_line_endings = 0;
1994
1995 switch (ct->c_type) {
1996 case CT_MULTIPART: {
1997 struct multipart *m = (struct multipart *) ct->c_ctparams;
1998 struct part *part;
1999
2000 /* Should check to see if the body for this part is encoded?
2001 For now, it gets passed along as-is by InitMultiPart(). */
2002 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2003 status = decode_text_parts (part->mp_part, encoding, decodetypes,
2004 message_mods);
2005 }
2006 break;
2007 }
2008
2009 case CT_MESSAGE:
2010 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2011 struct exbody *e = (struct exbody *) ct->c_ctparams;
2012
2013 status = decode_text_parts (e->eb_content, encoding, decodetypes,
2014 message_mods);
2015 }
2016 break;
2017
2018 default:
2019 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2020 break;
2021 }
2022
2023 lf_line_endings =
2024 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2025
2026 switch (ct->c_encoding) {
2027 case CE_BASE64:
2028 case CE_QUOTED: {
2029 int ct_encoding;
2030
2031 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2032 const char *reason = NULL;
2033
2034 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2035 && encoding != CE_BINARY) {
2036 /* The decoding isn't acceptable so discard it.
2037 Leave status as OK to allow other transformations. */
2038 if (verbosw) {
2039 report (NULL, ct->c_partno, ct->c_file,
2040 "will not decode%s because it is binary (%s)",
2041 ct->c_partno ? ""
2042 : (FENDNULL(ct->c_ctline)),
2043 reason);
2044 }
2045 (void) m_unlink (ct->c_cefile.ce_file);
2046 free (ct->c_cefile.ce_file);
2047 ct->c_cefile.ce_file = NULL;
2048 } else if (ct->c_encoding == CE_QUOTED &&
2049 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2050 /* The decoding isn't acceptable so discard it.
2051 Leave status as OK to allow other transformations. */
2052 if (verbosw) {
2053 report (NULL, ct->c_partno, ct->c_file,
2054 "will not decode%s because it is 8bit",
2055 ct->c_partno ? ""
2056 : (FENDNULL(ct->c_ctline)));
2057 }
2058 (void) m_unlink (ct->c_cefile.ce_file);
2059 free (ct->c_cefile.ce_file);
2060 ct->c_cefile.ce_file = NULL;
2061 } else {
2062 int enc;
2063
2064 if (ct_encoding == CE_BINARY) {
2065 enc = CE_BINARY;
2066 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2067 enc = CE_QUOTED;
2068 } else {
2069 enc = ct_encoding;
2070 }
2071 if (set_ce (ct, enc) == OK) {
2072 ++*message_mods;
2073 if (verbosw) {
2074 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2075 FENDNULL(ct->c_ctline));
2076 }
2077 if (lf_line_endings) {
2078 strip_crs (ct, message_mods);
2079 }
2080 } else {
2081 status = NOTOK;
2082 }
2083 }
2084 } else {
2085 status = NOTOK;
2086 }
2087 break;
2088 }
2089 case CE_8BIT:
2090 case CE_7BIT:
2091 if (lf_line_endings) {
2092 strip_crs (ct, message_mods);
2093 }
2094 break;
2095 default:
2096 break;
2097 }
2098
2099 break;
2100 }
2101
2102 return status;
2103 }
2104
2105
2106 /*
2107 * Determine if the part with type[/subtype] should be decoded, according to
2108 * decodetypes (which came from the -decodetypes switch).
2109 */
2110 static int
2111 should_decode(const char *decodetypes, const char *type, const char *subtype)
2112 {
2113 /* Quick search for matching type[/subtype] in decodetypes: bracket
2114 decodetypes with commas, then search for ,type, and ,type/subtype, in
2115 it. */
2116
2117 bool found_match = false;
2118 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2119 char *delimited_type = concat(",", type, ",", NULL);
2120
2121 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2122 found_match = true;
2123 } else if (subtype != NULL) {
2124 char *delimited_type_subtype =
2125 concat(",", type, "/", subtype, ",", NULL);
2126
2127 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2128 found_match = true;
2129 }
2130 free(delimited_type_subtype);
2131 }
2132
2133 free(delimited_type);
2134 free(delimited_decodetypes);
2135
2136 return found_match;
2137 }
2138
2139
2140 /*
2141 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2142 * if it has any NUL characters, a CR not followed by a LF, or lines
2143 * greater than 998 characters in length. If binary, reason is set
2144 * to a string explaining why.
2145 */
2146 static int
2147 content_encoding (CT ct, const char **reason)
2148 {
2149 CE ce = &ct->c_cefile;
2150 int encoding = CE_7BIT;
2151
2152 if (ce->ce_file) {
2153 size_t line_len = 0;
2154 char buffer[NMH_BUFSIZ];
2155 size_t inbytes;
2156
2157 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2158 advise (ce->ce_file, "unable to open for reading");
2159 return CE_UNKNOWN;
2160 }
2161
2162 fseeko (ce->ce_fp, 0L, SEEK_SET);
2163 while (encoding != CE_BINARY &&
2164 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2165 char *cp;
2166 size_t i;
2167 int last_char_was_cr = 0;
2168
2169 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2170 if (*cp == '\0' || ++line_len > 998 ||
2171 (*cp != '\n' && last_char_was_cr)) {
2172 encoding = CE_BINARY;
2173 if (*cp == '\0') {
2174 *reason = "null character";
2175 } else if (line_len > 998) {
2176 *reason = "line length > 998";
2177 } else if (*cp != '\n' && last_char_was_cr) {
2178 *reason = "CR not followed by LF";
2179 } else {
2180 /* Should not reach this. */
2181 *reason = "";
2182 }
2183 break;
2184 }
2185 if (*cp == '\n') {
2186 line_len = 0;
2187 } else if (! isascii ((unsigned char) *cp)) {
2188 encoding = CE_8BIT;
2189 }
2190
2191 last_char_was_cr = *cp == '\r';
2192 }
2193 }
2194
2195 fclose (ce->ce_fp);
2196 ce->ce_fp = NULL;
2197 } /* else should never happen */
2198
2199 return encoding;
2200 }
2201
2202
2203 /*
2204 * Strip carriage returns from content.
2205 */
2206 static int
2207 strip_crs (CT ct, int *message_mods)
2208 {
2209 char *charset = content_charset (ct);
2210 int status = OK;
2211
2212 /* Only strip carriage returns if content is ASCII or another
2213 charset that has the same readily recognizable CR followed by a
2214 LF. We can include UTF-8 here because if the high-order bit of
2215 a UTF-8 byte is 0, then it must be a single-byte ASCII
2216 character. */
2217 if (! strcasecmp (charset, "US-ASCII") ||
2218 ! strcasecmp (charset, "UTF-8") ||
2219 ! strncasecmp (charset, "ISO-8859-", 9) ||
2220 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2221 char **file = NULL;
2222 FILE **fp = NULL;
2223 size_t begin;
2224 size_t end;
2225 bool has_crs = false;
2226 bool opened_input_file = false;
2227
2228 if (ct->c_cefile.ce_file) {
2229 file = &ct->c_cefile.ce_file;
2230 fp = &ct->c_cefile.ce_fp;
2231 begin = end = 0;
2232 } else if (ct->c_file) {
2233 file = &ct->c_file;
2234 fp = &ct->c_fp;
2235 begin = (size_t) ct->c_begin;
2236 end = (size_t) ct->c_end;
2237 } /* else don't know where the content is */
2238
2239 if (file && *file && fp) {
2240 if (! *fp) {
2241 if ((*fp = fopen (*file, "r")) == NULL) {
2242 advise (*file, "unable to open for reading");
2243 status = NOTOK;
2244 } else {
2245 opened_input_file = true;
2246 }
2247 }
2248 }
2249
2250 if (fp && *fp) {
2251 char buffer[NMH_BUFSIZ];
2252 size_t bytes_read;
2253 size_t bytes_to_read =
2254 end > 0 && end > begin ? end - begin : sizeof buffer;
2255
2256 fseeko (*fp, begin, SEEK_SET);
2257 while ((bytes_read = fread (buffer, 1,
2258 min (bytes_to_read, sizeof buffer),
2259 *fp)) > 0) {
2260 /* Look for CR followed by a LF. This is supposed to
2261 be text so there should be LF's. If not, don't
2262 modify the content. */
2263 char *cp;
2264 size_t i;
2265 bool last_char_was_cr = false;
2266
2267 if (end > 0) { bytes_to_read -= bytes_read; }
2268
2269 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2270 if (*cp == '\n' && last_char_was_cr) {
2271 has_crs = true;
2272 break;
2273 }
2274
2275 last_char_was_cr = *cp == '\r';
2276 }
2277 }
2278
2279 if (has_crs) {
2280 int fd;
2281 char *stripped_content_file;
2282 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2283
2284 if (tempfile == NULL) {
2285 die("unable to create temporary file in %s",
2286 get_temp_dir());
2287 }
2288 stripped_content_file = mh_xstrdup (tempfile);
2289
2290 /* Strip each CR before a LF from the content. */
2291 fseeko (*fp, begin, SEEK_SET);
2292 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2293 0) {
2294 char *cp;
2295 size_t i;
2296 bool last_char_was_cr = false;
2297
2298 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2299 if (*cp == '\r') {
2300 last_char_was_cr = true;
2301 } else if (last_char_was_cr) {
2302 if (*cp != '\n') {
2303 if (write (fd, "\r", 1) < 0) {
2304 advise (tempfile, "CR write");
2305 }
2306 }
2307 if (write (fd, cp, 1) < 0) {
2308 advise (tempfile, "write");
2309 }
2310 last_char_was_cr = false;
2311 } else {
2312 if (write (fd, cp, 1) < 0) {
2313 advise (tempfile, "write");
2314 }
2315 last_char_was_cr = false;
2316 }
2317 }
2318 }
2319
2320 if (close (fd)) {
2321 inform("unable to write temporary file %s, continuing...",
2322 stripped_content_file);
2323 (void) m_unlink (stripped_content_file);
2324 free(stripped_content_file);
2325 status = NOTOK;
2326 } else {
2327 /* Replace the decoded file with the converted one. */
2328 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2329 (void) m_unlink (ct->c_cefile.ce_file);
2330
2331 free(ct->c_cefile.ce_file);
2332 ct->c_cefile.ce_file = stripped_content_file;
2333 ct->c_cefile.ce_unlink = 1;
2334
2335 ++*message_mods;
2336 if (verbosw) {
2337 report (NULL, ct->c_partno,
2338 begin == 0 && end == 0 ? "" : *file,
2339 "stripped CRs");
2340 }
2341 }
2342 }
2343
2344 if (opened_input_file) {
2345 fclose (*fp);
2346 *fp = NULL;
2347 }
2348 }
2349 }
2350
2351 free (charset);
2352
2353 return status;
2354 }
2355
2356
2357 /*
2358 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2359 * of the part C-T-E's.
2360 */
2361 static void
2362 update_cte (CT ct)
2363 {
2364 const int least_restrictive_enc = least_restrictive_encoding (ct);
2365
2366 if (least_restrictive_enc != CE_UNKNOWN &&
2367 least_restrictive_enc != CE_7BIT) {
2368 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2369 HF hf;
2370 bool found_cte = false;
2371
2372 /* Update/add Content-Transfer-Encoding header field. */
2373 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2374 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2375 found_cte = true;
2376 free (hf->value);
2377 hf->value = cte;
2378 }
2379 }
2380 if (! found_cte) {
2381 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2382 }
2383 }
2384 }
2385
2386
2387 /*
2388 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2389 * within a message.
2390 */
2391 static int
2392 least_restrictive_encoding (CT ct)
2393 {
2394 int encoding = CE_UNKNOWN;
2395
2396 switch (ct->c_type) {
2397 case CT_MULTIPART: {
2398 struct multipart *m = (struct multipart *) ct->c_ctparams;
2399 struct part *part;
2400
2401 for (part = m->mp_parts; part; part = part->mp_next) {
2402 const int part_encoding =
2403 least_restrictive_encoding (part->mp_part);
2404
2405 if (less_restrictive (encoding, part_encoding)) {
2406 encoding = part_encoding;
2407 }
2408 }
2409 break;
2410 }
2411
2412 case CT_MESSAGE:
2413 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2414 struct exbody *e = (struct exbody *) ct->c_ctparams;
2415 const int part_encoding =
2416 least_restrictive_encoding (e->eb_content);
2417
2418 if (less_restrictive (encoding, part_encoding)) {
2419 encoding = part_encoding;
2420 }
2421 }
2422 break;
2423
2424 default: {
2425 if (less_restrictive (encoding, ct->c_encoding)) {
2426 encoding = ct->c_encoding;
2427 }
2428 }}
2429
2430 return encoding;
2431 }
2432
2433
2434 /*
2435 * Return whether the second encoding is less restrictive than the first, where
2436 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2437 * CE_BINARY is less restrictive than CE_8BIT and
2438 * CE_8BIT is less restrictive than CE_7BIT.
2439 */
2440 static int
2441 less_restrictive (int encoding, int second_encoding)
2442 {
2443 switch (second_encoding) {
2444 case CE_BINARY:
2445 return encoding != CE_BINARY;
2446 case CE_8BIT:
2447 return encoding != CE_BINARY && encoding != CE_8BIT;
2448 case CE_7BIT:
2449 return encoding != CE_BINARY && encoding != CE_8BIT &&
2450 encoding != CE_7BIT;
2451 default :
2452 return 0;
2453 }
2454 }
2455
2456
2457 /*
2458 * Convert character set of each part.
2459 */
2460 static int
2461 convert_charsets (CT ct, char *dest_charset, int *message_mods)
2462 {
2463 int status = OK;
2464
2465 switch (ct->c_type) {
2466 case CT_TEXT:
2467 if (ct->c_subtype == TEXT_PLAIN) {
2468 status = convert_charset (ct, dest_charset, message_mods);
2469 if (status == OK) {
2470 if (verbosw) {
2471 char *ct_charset = content_charset (ct);
2472
2473 report (NULL, ct->c_partno, ct->c_file,
2474 "convert %s to %s", ct_charset, dest_charset);
2475 free (ct_charset);
2476 }
2477 } else {
2478 char *ct_charset = content_charset (ct);
2479
2480 report ("iconv", ct->c_partno, ct->c_file,
2481 "failed to convert %s to %s", ct_charset, dest_charset);
2482 free (ct_charset);
2483 }
2484 }
2485 break;
2486
2487 case CT_MULTIPART: {
2488 struct multipart *m = (struct multipart *) ct->c_ctparams;
2489 struct part *part;
2490
2491 /* Should check to see if the body for this part is encoded?
2492 For now, it gets passed along as-is by InitMultiPart(). */
2493 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2494 status =
2495 convert_charsets (part->mp_part, dest_charset, message_mods);
2496 }
2497 break;
2498 }
2499
2500 case CT_MESSAGE:
2501 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2502 struct exbody *e = (struct exbody *) ct->c_ctparams;
2503
2504 status =
2505 convert_charsets (e->eb_content, dest_charset, message_mods);
2506 }
2507 break;
2508
2509 default:
2510 break;
2511 }
2512
2513 return status;
2514 }
2515
2516
2517 /*
2518 * Fix various problems that aren't handled elsewhere. These
2519 * are fixed unconditionally: there are no switches to disable
2520 * them. Currently, "problems" are these:
2521 * 1) remove extraneous semicolon at the end of a header parameter list
2522 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2523 * filename parameters in Content-Type and Content-Disposition
2524 * headers, respectively.
2525 */
2526 static int
2527 fix_always (CT ct, int *message_mods)
2528 {
2529 int status = OK;
2530
2531 switch (ct->c_type) {
2532 case CT_MULTIPART: {
2533 struct multipart *m = (struct multipart *) ct->c_ctparams;
2534 struct part *part;
2535
2536 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2537 status = fix_always (part->mp_part, message_mods);
2538 }
2539 break;
2540 }
2541
2542 case CT_MESSAGE:
2543 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2544 struct exbody *e = (struct exbody *) ct->c_ctparams;
2545
2546 status = fix_always (e->eb_content, message_mods);
2547 }
2548 break;
2549
2550 default: {
2551 HF hf;
2552
2553 if (ct->c_first_hf) {
2554 fix_filename_encoding (ct);
2555 }
2556
2557 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2558 size_t len = strlen (hf->value);
2559
2560 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2561 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2562 /* Only do this for Content-Type and
2563 Content-Disposition fields because those are the
2564 only headers that parse_mime() warns about. */
2565 continue;
2566 }
2567
2568 /* whitespace following a trailing ';' will be nuked as well */
2569 if (hf->value[len - 1] == '\n') {
2570 while (isspace((unsigned char)(hf->value[len - 2]))) {
2571 if (len-- == 0) { break; }
2572 }
2573 }
2574
2575 if (hf->value[len - 2] == ';') {
2576 /* Remove trailing ';' from parameter value. */
2577 hf->value[len - 2] = '\n';
2578 hf->value[len - 1] = '\0';
2579
2580 /* Also, if Content-Type parameter, remove trailing ';'
2581 from ct->c_ctline. This probably isn't necessary
2582 but can't hurt. */
2583 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2584 size_t l = strlen(ct->c_ctline) - 1;
2585 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2586 ct->c_ctline[l] == ';') {
2587 ct->c_ctline[l--] = '\0';
2588 if (l == 0) { break; }
2589 }
2590 }
2591
2592 ++*message_mods;
2593 if (verbosw) {
2594 report (NULL, ct->c_partno, ct->c_file,
2595 "remove trailing ; from %s parameter value",
2596 hf->name);
2597 }
2598 }
2599 }
2600 }}
2601
2602 return status;
2603 }
2604
2605
2606 /*
2607 * Factor out common code for loops in fix_filename_encoding().
2608 */
2609 static int
2610 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm)
2611 {
2612 bool fixed = false;
2613
2614 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2615 /* Looks like an RFC 2047 encoded parameter. */
2616 char decoded[PATH_MAX + 1];
2617
2618 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2619 /* Encode using RFC 2231. */
2620 replace_param (first_pm, last_pm, name, decoded, 0);
2621 fixed = true;
2622 } else {
2623 inform("failed to decode %s parameter %s", name, value);
2624 }
2625 }
2626
2627 return fixed;
2628 }
2629
2630
2631 /*
2632 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2633 * filename parameters in Content-Type and Content-Disposition
2634 * headers, respectively.
2635 */
2636 static int
2637 fix_filename_encoding (CT ct)
2638 {
2639 PM pm;
2640 HF hf;
2641 int fixed = 0;
2642
2643 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2644 if (pm->pm_name && pm->pm_value &&
2645 strcasecmp (pm->pm_name, "name") == 0) {
2646 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2647 &ct->c_ctinfo.ci_first_pm,
2648 &ct->c_ctinfo.ci_last_pm);
2649 }
2650 }
2651
2652 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2653 if (pm->pm_name && pm->pm_value &&
2654 strcasecmp (pm->pm_name, "filename") == 0) {
2655 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2656 &ct->c_dispo_first,
2657 &ct->c_dispo_last);
2658 }
2659 }
2660
2661 /* Fix hf values to correspond. */
2662 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2663 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2664
2665 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2666 field = TYPE_HEADER;
2667 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2668 field = DISPO_HEADER;
2669 }
2670
2671 if (field != OTHER) {
2672 const char *const semicolon_loc = strchr (hf->value, ';');
2673
2674 if (semicolon_loc) {
2675 const size_t len =
2676 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2677 const char *const params =
2678 output_params (len,
2679 field == TYPE_HEADER
2680 ? ct->c_ctinfo.ci_first_pm
2681 : ct->c_dispo_first,
2682 NULL, 0);
2683 const char *const new_params = concat (params, "\n", NULL);
2684
2685 replace_substring (&hf->value, semicolon_loc, new_params);
2686 free((void *)new_params); /* Cast away const. Sigh. */
2687 free((void *)params);
2688 } else {
2689 inform("did not find semicolon in %s:%s\n",
2690 hf->name, hf->value);
2691 }
2692 }
2693 }
2694
2695 return OK;
2696 }
2697
2698
2699 /*
2700 * Output content in input file to output file.
2701 */
2702 static int
2703 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2704 int modify_inplace, int message_mods)
2705 {
2706 int status = OK;
2707
2708 if (modify_inplace) {
2709 if (message_mods > 0) {
2710 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2711 char *infile = input_filename
2712 ? mh_xstrdup (input_filename)
2713 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2714
2715 if (remove_file (infile) == OK) {
2716 if (rename (outfile, infile)) {
2717 /* Rename didn't work, possibly because of an
2718 attempt to rename across filesystems. Try
2719 brute force copy. */
2720 int old = open (outfile, O_RDONLY);
2721 int new =
2722 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2723 int i = -1;
2724
2725 if (old != -1 && new != -1) {
2726 char buffer[NMH_BUFSIZ];
2727
2728 while ((i = read (old, buffer, sizeof buffer)) >
2729 0) {
2730 if (write (new, buffer, i) != i) {
2731 i = -1;
2732 break;
2733 }
2734 }
2735 }
2736 if (new != -1) { close (new); }
2737 if (old != -1) { close (old); }
2738 (void) m_unlink (outfile);
2739
2740 if (i < 0) {
2741 /* The -file argument processing used path() to
2742 expand filename to absolute path. */
2743 int file = ct->c_file && ct->c_file[0] == '/';
2744
2745 inform("unable to rename %s %s to %s, continuing...",
2746 file ? "file" : "message", outfile,
2747 infile);
2748 status = NOTOK;
2749 }
2750 }
2751 } else {
2752 inform("unable to remove input file %s, "
2753 "not modifying it, continuing...", infile);
2754 (void) m_unlink (outfile);
2755 status = NOTOK;
2756 }
2757
2758 free (infile);
2759 } else {
2760 status = NOTOK;
2761 }
2762 } else {
2763 /* No modifications and didn't need the tmp outfile. */
2764 (void) m_unlink (outfile);
2765 }
2766 } else {
2767 /* Output is going to some file. Produce it whether or not
2768 there were modifications. */
2769 status = output_message_fp (ct, outfp, outfile);
2770 }
2771
2772 flush_errors ();
2773 return status;
2774 }
2775
2776
2777 /*
2778 * parse_mime() does not set lf_line_endings in struct text, so use this
2779 * function to do it. It touches the parts the decodetypes identifies.
2780 */
2781 static void
2782 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings)
2783 {
2784 switch (ct->c_type) {
2785 case CT_MULTIPART: {
2786 struct multipart *m = (struct multipart *) ct->c_ctparams;
2787 struct part *part;
2788
2789 for (part = m->mp_parts; part; part = part->mp_next) {
2790 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2791 }
2792 break;
2793 }
2794
2795 case CT_MESSAGE:
2796 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2797 struct exbody *e = (struct exbody *) ct->c_ctparams;
2798
2799 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2800 }
2801 break;
2802
2803 default:
2804 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2805 if (ct->c_ctparams == NULL) {
2806 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2807 }
2808 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2809 }
2810 }
2811 }
2812
2813
2814 /*
2815 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2816 * use the standard MH backup file.
2817 */
2818 static int
2819 remove_file (const char *file)
2820 {
2821 if (rmmproc) {
2822 char *rmm_command = concat (rmmproc, " ", file, NULL);
2823 int status = system (rmm_command);
2824
2825 free (rmm_command);
2826 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2827 }
2828 /* This is OK for a non-message file, it still uses the
2829 BACKUP_PREFIX form. The backup file will be in the same
2830 directory as file. */
2831 return rename (file, m_backup (file));
2832 }
2833
2834
2835 /*
2836 * Output formatted message to user.
2837 */
2838 static void
2839 report (char *what, char *partno, char *filename, char *message, ...)
2840 {
2841 va_list args;
2842 char *fmt;
2843
2844 if (verbosw) {
2845 va_start (args, message);
2846 fmt = concat (filename, partno ? " part " : ", ",
2847 FENDNULL(partno), partno ? ", " : "", message, NULL);
2848
2849 advertise (what, NULL, fmt, args);
2850
2851 free (fmt);
2852 va_end (args);
2853 }
2854 }
2855
2856
2857 static void
2858 pipeser (int i)
2859 {
2860 if (i == SIGQUIT) {
2861 fflush (stdout);
2862 fprintf (stderr, "\n");
2863 fflush (stderr);
2864 }
2865
2866 done (1);
2867 /* NOTREACHED */
2868 }