]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
vector.c: Move interface to own file.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include "h/mh.h"
9 #include "sbr/path.h"
10 #include "sbr/print_version.h"
11 #include "sbr/print_help.h"
12 #include "sbr/error.h"
13 #include "h/fmt_scan.h"
14 #include "h/mime.h"
15 #include "h/mhparse.h"
16 #include "h/done.h"
17 #include "h/utils.h"
18 #include "h/signals.h"
19 #include "sbr/m_maildir.h"
20 #include "sbr/m_mktemp.h"
21 #include "sbr/mime_type.h"
22 #include "mhmisc.h"
23 #include "mhfree.h"
24 #include "mhoutsbr.h"
25 #include "mhshowsbr.h"
26 #include <fcntl.h>
27
28 #define MHFIXMSG_SWITCHES \
29 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
30 X("nodecodetext", 0, NDECODETEXTSW) \
31 X("decodetypes", 0, DECODETYPESW) \
32 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
33 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
34 X("textcharset", 0, TEXTCHARSETSW) \
35 X("notextcharset", 0, NTEXTCHARSETSW) \
36 X("reformat", 0, REFORMATSW) \
37 X("noreformat", 0, NREFORMATSW) \
38 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
39 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
40 X("fixboundary", 0, FIXBOUNDARYSW) \
41 X("nofixboundary", 0, NFIXBOUNDARYSW) \
42 X("fixcte", 0, FIXCOMPOSITECTESW) \
43 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
44 X("fixtype mimetype", 0, FIXTYPESW) \
45 X("file file", 0, FILESW) \
46 X("outfile file", 0, OUTFILESW) \
47 X("rmmproc program", 0, RPROCSW) \
48 X("normmproc", 0, NRPRCSW) \
49 X("changecur", 0, CHGSW) \
50 X("nochangecur", 0, NCHGSW) \
51 X("verbose", 0, VERBSW) \
52 X("noverbose", 0, NVERBSW) \
53 X("version", 0, VERSIONSW) \
54 X("help", 0, HELPSW) \
55
56 #define X(sw, minchars, id) id,
57 DEFINE_SWITCH_ENUM(MHFIXMSG);
58 #undef X
59
60 #define X(sw, minchars, id) { sw, minchars, id },
61 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
62 #undef X
63
64
65 int verbosw;
66 int debugsw; /* Needed by mhparse.c. */
67
68 #define quitser pipeser
69
70 /*
71 * static prototypes
72 */
73 typedef struct fix_transformations {
74 int fixboundary;
75 int fixcompositecte;
76 svector_t fixtypes;
77 int reformat;
78 int replacetextplain;
79 int decodetext;
80 char *decodetypes;
81 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
82 int lf_line_endings;
83 char *textcharset;
84 } fix_transformations;
85
86 static int mhfixmsgsbr (CT *, char *, const fix_transformations *,
87 FILE **, char *, FILE **);
88 static int fix_boundary (CT *, int *);
89 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
90 static int get_multipart_boundary (CT, char **);
91 static int replace_boundary (CT, char *, char *);
92 static int fix_types (CT, svector_t, int *);
93 static char *replace_substring (char **, const char *, const char *);
94 static char *remove_parameter (char *, const char *);
95 static int fix_composite_cte (CT, int *);
96 static int set_ce (CT, int);
97 static int ensure_text_plain (CT *, CT, int *, int);
98 static int find_textplain_sibling (CT, int, int *);
99 static int insert_new_text_plain_part (CT, int, CT);
100 static CT build_text_plain_part (CT);
101 static int insert_into_new_mp_alt (CT *, int *);
102 static CT divide_part (CT);
103 static void copy_ctinfo (CI, CI);
104 static int decode_part (CT);
105 static int reformat_part (CT, char *, char *, char *, int);
106 static CT build_multipart_alt (CT, CT, int, int);
107 static int boundary_in_content (FILE **, char *, const char *);
108 static void transfer_noncontent_headers (CT, CT);
109 static int set_ct_type (CT, int type, int subtype, int encoding);
110 static int decode_text_parts (CT, int, const char *, int *);
111 static int should_decode(const char *, const char *, const char *);
112 static int content_encoding (CT, const char **);
113 static int strip_crs (CT, int *);
114 static void update_cte (CT);
115 static int least_restrictive_encoding (CT) PURE;
116 static int less_restrictive (int, int);
117 static int convert_charsets (CT, char *, int *);
118 static int fix_always (CT, int *);
119 static int fix_filename_param (char *, char *, PM *, PM *);
120 static int fix_filename_encoding (CT);
121 static int write_content (CT, const char *, char *, FILE *, int, int);
122 static void set_text_ctparams(CT, char *, int);
123 static int remove_file (const char *);
124 static void report (char *, char *, char *, char *, ...)
125 CHECK_PRINTF(4, 5);
126 static void pipeser (int);
127
128
129 int
130 main (int argc, char **argv)
131 {
132 int msgnum;
133 char *cp, *file = NULL, *folder = NULL;
134 char *maildir = NULL, buf[100], *outfile = NULL;
135 char **argp, **arguments;
136 struct msgs_array msgs = { 0, 0, NULL };
137 struct msgs *mp = NULL;
138 CT *ctp;
139 FILE *fp, *infp = NULL, *outfp = NULL;
140 bool using_stdin = false;
141 bool chgflag = true;
142 int status = OK;
143 fix_transformations fx;
144 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
145 fx.fixtypes = NULL;
146 fx.replacetextplain = 0;
147 fx.decodetext = CE_8BIT;
148 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
149 fx.lf_line_endings = 0;
150 fx.textcharset = NULL;
151
152 if (nmh_init(argv[0], true, false)) { return 1; }
153
154 arguments = getarguments (invo_name, argc, argv, 1);
155 argp = arguments;
156
157 /*
158 * Parse arguments
159 */
160 while ((cp = *argp++)) {
161 if (*cp == '-') {
162 switch (smatch (++cp, switches)) {
163 case AMBIGSW:
164 ambigsw (cp, switches);
165 done (1);
166 case UNKWNSW:
167 die("-%s unknown", cp);
168
169 case HELPSW:
170 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
171 invo_name);
172 print_help (buf, switches, 1);
173 done (0);
174 case VERSIONSW:
175 print_version(invo_name);
176 done (0);
177
178 case DECODETEXTSW:
179 if (! (cp = *argp++) || *cp == '-') {
180 die("missing argument to %s", argp[-2]);
181 }
182 if (! strcasecmp (cp, "8bit")) {
183 fx.decodetext = CE_8BIT;
184 } else if (! strcasecmp (cp, "7bit")) {
185 fx.decodetext = CE_7BIT;
186 } else if (! strcasecmp (cp, "binary")) {
187 fx.decodetext = CE_BINARY;
188 } else {
189 die("invalid argument to %s", argp[-2]);
190 }
191 continue;
192 case NDECODETEXTSW:
193 fx.decodetext = 0;
194 continue;
195 case DECODETYPESW:
196 if (! (cp = *argp++) || *cp == '-') {
197 die("missing argument to %s", argp[-2]);
198 }
199 fx.decodetypes = cp;
200 continue;
201 case CRLFLINEBREAKSSW:
202 fx.lf_line_endings = 0;
203 continue;
204 case NCRLFLINEBREAKSSW:
205 fx.lf_line_endings = 1;
206 continue;
207 case TEXTCHARSETSW:
208 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
209 die("missing argument to %s", argp[-2]);
210 }
211 fx.textcharset = cp;
212 continue;
213 case NTEXTCHARSETSW:
214 fx.textcharset = 0;
215 continue;
216 case FIXBOUNDARYSW:
217 fx.fixboundary = 1;
218 continue;
219 case NFIXBOUNDARYSW:
220 fx.fixboundary = 0;
221 continue;
222 case FIXCOMPOSITECTESW:
223 fx.fixcompositecte = 1;
224 continue;
225 case NFIXCOMPOSITECTESW:
226 fx.fixcompositecte = 0;
227 continue;
228 case FIXTYPESW:
229 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
230 die("missing argument to %s", argp[-2]);
231 }
232 if (! strncasecmp (cp, "multipart/", 10) ||
233 ! strncasecmp (cp, "message/", 8))
234 die("-fixtype %s not allowed", cp);
235 if (! strchr (cp, '/'))
236 die("-fixtype requires type/subtype");
237 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
238 svector_push_back (fx.fixtypes, cp);
239 continue;
240 case REFORMATSW:
241 fx.reformat = 1;
242 continue;
243 case NREFORMATSW:
244 fx.reformat = 0;
245 continue;
246 case REPLACETEXTPLAINSW:
247 fx.replacetextplain = 1;
248 continue;
249 case NREPLACETEXTPLAINSW:
250 fx.replacetextplain = 0;
251 continue;
252 case FILESW:
253 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
254 die("missing argument to %s", argp[-2]);
255 }
256 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
257 continue;
258 case OUTFILESW:
259 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
260 die("missing argument to %s", argp[-2]);
261 }
262 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
263 continue;
264 case RPROCSW:
265 if (!(rmmproc = *argp++) || *rmmproc == '-') {
266 die("missing argument to %s", argp[-2]);
267 }
268 continue;
269 case NRPRCSW:
270 rmmproc = NULL;
271 continue;
272 case CHGSW:
273 chgflag = true;
274 continue;
275 case NCHGSW:
276 chgflag = false;
277 continue;
278 case VERBSW:
279 verbosw = 1;
280 continue;
281 case NVERBSW:
282 verbosw = 0;
283 continue;
284 }
285 }
286 if (*cp == '+' || *cp == '@') {
287 if (folder)
288 die("only one folder at a time!");
289 folder = pluspath (cp);
290 } else {
291 if (*cp == '/') {
292 /* Interpret a full path as a filename, not a message. */
293 file = mh_xstrdup (cp);
294 } else {
295 app_msgarg (&msgs, cp);
296 }
297 }
298 }
299
300 SIGNAL (SIGQUIT, quitser);
301 SIGNAL (SIGPIPE, pipeser);
302
303 /*
304 * Read the standard profile setup
305 */
306 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
307 readconfig(NULL, fp, cp, 0);
308 fclose (fp);
309 }
310
311 suppress_bogus_mp_content_warning = skip_mp_cte_check = true;
312 suppress_extraneous_trailing_semicolon_warning = true;
313
314 if (! context_find ("path")) {
315 free (path ("./", TFOLDER));
316 }
317
318 if (file && msgs.size) {
319 die("cannot specify msg and file at same time!");
320 }
321
322 if (outfile) {
323 /* Open the outfile now, so we don't have to risk opening it
324 after running out of fds. */
325 if (strcmp (outfile, "-") == 0) {
326 outfp = stdout;
327 } else if ((outfp = fopen (outfile, "w")) == NULL) {
328 adios (outfile, "unable to open for writing");
329 }
330 }
331
332 /*
333 * check if message is coming from file
334 */
335 if (file) {
336 /* If file is stdin, create a tmp file name before parse_mime()
337 has a chance, because it might put in on a different
338 filesystem than the output file. Instead, put it in the
339 user's preferred tmp directory. */
340 CT ct;
341
342 if (! strcmp ("-", file)) {
343 int fd;
344 char *cp;
345
346 using_stdin = true;
347
348 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
349 die("unable to create temporary file in %s",
350 get_temp_dir());
351 } else {
352 free (file);
353 file = mh_xstrdup (cp);
354 cpydata (STDIN_FILENO, fd, "-", file);
355 }
356
357 if (close (fd)) {
358 (void) m_unlink (file);
359 die("failed to write temporary file");
360 }
361 }
362
363 cts = mh_xcalloc(2, sizeof *cts);
364 ctp = cts;
365
366 if ((ct = parse_mime (file))) {
367 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
368 *ctp++ = ct;
369 } else {
370 inform("unable to parse message from file %s", file);
371 status = NOTOK;
372
373 /* If there's an outfile, pass the input message unchanged, so the
374 message won't get dropped from a pipeline. */
375 if (outfile) {
376 /* Something went wrong. Output might be expected, such as if
377 this were run as a filter. Just copy the input to the
378 output. */
379 if ((infp = fopen (file, "r")) == NULL) {
380 adios (file, "unable to open for reading");
381 }
382
383 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
384 inform("unable to copy message to %s, "
385 "it might be lost\n", outfile);
386 }
387
388 fclose (infp);
389 infp = NULL;
390 }
391 }
392 } else {
393 /*
394 * message(s) are coming from a folder
395 */
396 CT ct;
397
398 if (! msgs.size) {
399 app_msgarg(&msgs, "cur");
400 }
401 if (! folder) {
402 folder = getfolder (1);
403 }
404 maildir = mh_xstrdup(m_maildir (folder));
405
406 /* chdir so that error messages, esp. from MIME parser, just
407 refer to the message and not its path. */
408 if (chdir (maildir) == NOTOK) {
409 adios (maildir, "unable to change directory to");
410 }
411
412 /* read folder and create message structure */
413 if (! (mp = folder_read (folder, 1))) {
414 die("unable to read folder %s", folder);
415 }
416
417 /* check for empty folder */
418 if (mp->nummsg == 0) {
419 die("no messages in %s", folder);
420 }
421
422 /* parse all the message ranges/sequences and set SELECTED */
423 for (msgnum = 0; msgnum < msgs.size; msgnum++)
424 if (! m_convert (mp, msgs.msgs[msgnum])) {
425 done (1);
426 }
427 seq_setprev (mp); /* set the previous-sequence */
428
429 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
430 ctp = cts;
431
432 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
433 if (is_selected(mp, msgnum)) {
434 char *msgnam = m_name (msgnum);
435
436 if ((ct = parse_mime (msgnam))) {
437 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
438 *ctp++ = ct;
439 } else {
440 inform("unable to parse message %s", msgnam);
441 status = NOTOK;
442
443 /* If there's an outfile, pass the input message
444 unchanged, so the message won't get dropped from a
445 pipeline. */
446 if (outfile) {
447 /* Something went wrong. Output might be expected,
448 such as if this were run as a filter. Just copy
449 the input to the output. */
450 /* Can't use path() here because 1) it might have been
451 called before and it caches the pwd, and 2) we call
452 chdir() after that. */
453 char *input_filename =
454 concat (maildir, "/", msgnam, NULL);
455
456 if ((infp = fopen (input_filename, "r")) == NULL) {
457 adios (input_filename,
458 "unable to open for reading");
459 }
460
461 if (copy_input_to_output (input_filename, infp,
462 outfile, outfp) != OK) {
463 inform("unable to copy message to %s, "
464 "it might be lost\n", outfile);
465 }
466
467 fclose (infp);
468 infp = NULL;
469 free (input_filename);
470 }
471 }
472 }
473 }
474
475 if (chgflag) {
476 seq_setcur (mp, mp->hghsel); /* update current message */
477 }
478 seq_save (mp); /* synchronize sequences */
479 context_replace (pfolder, folder);/* update current folder */
480 context_save (); /* save the context file */
481 }
482
483 if (*cts) {
484 for (ctp = cts; *ctp; ++ctp) {
485 status =
486 mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp) == OK
487 ? 0
488 : 1;
489 free_content (*ctp);
490
491 if (using_stdin) {
492 (void) m_unlink (file);
493
494 if (! outfile) {
495 /* Just calling m_backup() unlinks the backup file. */
496 (void) m_backup (file);
497 }
498 }
499 }
500 } else {
501 status = 1;
502 }
503
504 free(maildir);
505 free (cts);
506
507 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
508 if (infp) { fclose (infp); } /* even if stdin */
509 if (outfp) { fclose (outfp); } /* even if stdout */
510 free (outfile);
511 free (file);
512 free (folder);
513 free (arguments);
514
515 done (status == OK ? 0 : 1);
516 return NOTOK;
517 }
518
519
520 /*
521 * Apply transformations to one message.
522 */
523 static int
524 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
525 FILE **infp, char *outfile, FILE **outfp)
526 {
527 /* Store input filename in case one of the transformations, i.e.,
528 fix_boundary(), rewrites to a tmp file. */
529 char *input_filename = maildir
530 ? concat (maildir, "/", (*ctp)->c_file, NULL)
531 : mh_xstrdup ((*ctp)->c_file);
532 bool modify_inplace = false;
533 int message_mods = 0;
534 int status = OK;
535
536 /* Though the input file won't need to be opened if everything goes
537 well, do it here just in case there's a failure, and that failure is
538 running out of file descriptors. */
539 if ((*infp = fopen (input_filename, "r")) == NULL) {
540 adios (input_filename, "unable to open for reading");
541 }
542
543 if (outfile == NULL) {
544 modify_inplace = true;
545
546 if ((*ctp)->c_file) {
547 char *tempfile;
548 /* outfp will be closed by the caller */
549 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
550 NULL) {
551 die("unable to create temporary file in %s",
552 get_temp_dir());
553 }
554 outfile = mh_xstrdup (tempfile);
555 } else {
556 die("missing both input and output filenames\n");
557 }
558 } /* else *outfp was defined by caller */
559
560 reverse_alternative_parts (*ctp);
561 status = fix_always (*ctp, &message_mods);
562 if (status == OK && fx->fixboundary) {
563 status = fix_boundary (ctp, &message_mods);
564 }
565 if (status == OK && fx->fixtypes != NULL) {
566 status = fix_types (*ctp, fx->fixtypes, &message_mods);
567 }
568 if (status == OK && fx->fixcompositecte) {
569 status = fix_composite_cte (*ctp, &message_mods);
570 }
571 if (status == OK && fx->reformat) {
572 status =
573 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
574 }
575 if (status == OK && fx->decodetext) {
576 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
577 &message_mods);
578 update_cte (*ctp);
579 }
580 if (status == OK && fx->textcharset != NULL) {
581 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
582 }
583
584 if (status == OK && ! (*ctp)->c_umask) {
585 /* Set the umask for the contents file. This currently
586 isn't used but just in case it is in the future. */
587 struct stat st;
588
589 if (stat ((*ctp)->c_file, &st) != NOTOK) {
590 (*ctp)->c_umask = ~(st.st_mode & 0777);
591 } else {
592 (*ctp)->c_umask = ~m_gmprot();
593 }
594 }
595
596 /*
597 * Write the content to a file
598 */
599 if (status == OK) {
600 status = write_content (*ctp, input_filename, outfile, *outfp,
601 modify_inplace, message_mods);
602 } else if (! modify_inplace) {
603 /* Something went wrong. Output might be expected, such
604 as if this were run as a filter. Just copy the input
605 to the output. */
606 if (copy_input_to_output (input_filename, *infp, outfile,
607 *outfp) != OK) {
608 inform("unable to copy message to %s, it might be lost\n",
609 outfile);
610 }
611 }
612
613 if (modify_inplace) {
614 if (status != OK) { (void) m_unlink (outfile); }
615 free (outfile);
616 outfile = NULL;
617 }
618
619 fclose (*infp);
620 *infp = NULL;
621 free (input_filename);
622
623 return status;
624 }
625
626
627 /*
628 * Copy input message to output. Assumes not modifying in place, so this
629 * might be running as part of a pipeline.
630 */
631 static int
632 copy_input_to_output (const char *input_filename, FILE *infp,
633 const char *output_filename, FILE *outfp)
634 {
635 int in = fileno (infp);
636 int out = fileno (outfp);
637 int status = OK;
638
639 if (in != -1 && out != -1) {
640 cpydata (in, out, input_filename, output_filename);
641 } else {
642 status = NOTOK;
643 }
644
645 return status;
646 }
647
648
649 /*
650 * Fix mismatched outer level boundary.
651 */
652 static int
653 fix_boundary (CT *ct, int *message_mods)
654 {
655 struct multipart *mp;
656 int status = OK;
657
658 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
659 mp = (struct multipart *) (*ct)->c_ctparams;
660
661 /*
662 * 1) Get boundary at end of part.
663 * 2) Get boundary at beginning of part and compare to the end-of-part
664 * boundary.
665 * 3) Write out contents of ct to tmp file, replacing boundary in
666 * header with boundary from part. Set c_unlink to 1.
667 * 4) Free ct.
668 * 5) Call parse_mime() on the tmp file, replacing ct.
669 */
670
671 if (mp && mp->mp_start) {
672 char *part_boundary;
673
674 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
675 char *fixed;
676
677 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
678 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
679 char *filename = mh_xstrdup ((*ct)->c_file);
680 CT fixed_ct;
681
682 free_content (*ct);
683 if ((fixed_ct = parse_mime (fixed))) {
684 *ct = fixed_ct;
685 (*ct)->c_unlink = 1;
686
687 ++*message_mods;
688 if (verbosw) {
689 report (NULL, NULL, filename,
690 "fix multipart boundary");
691 }
692 } else {
693 *ct = NULL;
694 inform("unable to parse fixed part");
695 status = NOTOK;
696 }
697 free (filename);
698 } else {
699 inform("unable to replace broken boundary");
700 status = NOTOK;
701 }
702 } else {
703 inform("unable to create temporary file in %s",
704 get_temp_dir());
705 status = NOTOK;
706 }
707
708 free (part_boundary);
709 } else {
710 /* Couldn't fix the boundary. Report failure so that mhfixmsg
711 doesn't modify the message. */
712 status = NOTOK;
713 }
714 } else {
715 /* No multipart struct, even though the content type is
716 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
717 the message. */
718 status = NOTOK;
719 }
720 }
721
722 return status;
723 }
724
725
726 /*
727 * Find boundary at end of multipart.
728 */
729 static int
730 get_multipart_boundary (CT ct, char **part_boundary)
731 {
732 char buffer[NMH_BUFSIZ];
733 char *end_boundary = NULL;
734 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
735 ? (off_t) (ct->c_end - sizeof buffer)
736 : (off_t) ct->c_begin;
737 size_t bytes_read;
738 int status = OK;
739
740 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
741 be big enough, even if it's just 1024, to make that unlikely. */
742
743 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
744 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
745 advise (ct->c_file, "unable to open for reading");
746 return NOTOK;
747 }
748
749 /* Get boundary at end of multipart. */
750 while (begin >= (off_t) ct->c_begin) {
751 fseeko (ct->c_fp, begin, SEEK_SET);
752 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
753 char *cp = rfind_str (buffer, bytes_read, "--");
754
755 if (cp) {
756 char *end;
757
758 /* Trim off trailing "--" and anything beyond. */
759 *cp-- = '\0';
760 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
761 if (strlen (end) > 3 && *end++ == '\n' &&
762 *end++ == '-' && *end++ == '-') {
763 end_boundary = mh_xstrdup (end);
764 break;
765 }
766 }
767 }
768 }
769
770 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
771 break;
772 begin -= sizeof buffer;
773 }
774
775 /* Get boundary at beginning of multipart. */
776 if (end_boundary) {
777 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
778 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
779 if (bytes_read >= strlen (end_boundary)) {
780 char *cp = find_str (buffer, bytes_read, end_boundary);
781
782 if (cp && cp - buffer >= 2 && *--cp == '-' &&
783 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
784 status = OK;
785 break;
786 }
787 } else {
788 /* The start and end boundaries didn't match, or the
789 start boundary doesn't begin with "\n--" (or "--"
790 if at the beginning of buffer). Keep trying. */
791 status = NOTOK;
792 }
793 }
794 } else {
795 status = NOTOK;
796 }
797
798 if (ct->c_fp) {
799 fclose (ct->c_fp);
800 ct->c_fp = NULL;
801 }
802
803 if (status == OK) {
804 *part_boundary = end_boundary;
805 } else {
806 *part_boundary = NULL;
807 free (end_boundary);
808 }
809
810 return status;
811 }
812
813
814 /*
815 * Open and copy ct->c_file to file, replacing the multipart boundary.
816 */
817 static int
818 replace_boundary (CT ct, char *file, char *boundary)
819 {
820 FILE *fpin, *fpout;
821 int compnum, state;
822 char buf[NMH_BUFSIZ], name[NAMESZ];
823 char *np, *vp;
824 m_getfld_state_t gstate;
825 int status = OK;
826
827 if (ct->c_file == NULL) {
828 inform("missing input filename");
829 return NOTOK;
830 }
831
832 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
833 advise (ct->c_file, "unable to open for reading");
834 return NOTOK;
835 }
836
837 if ((fpout = fopen (file, "w")) == NULL) {
838 fclose (fpin);
839 advise (file, "unable to open for writing");
840 return NOTOK;
841 }
842
843 gstate = m_getfld_state_init(fpin);
844 for (compnum = 1;;) {
845 int bufsz = (int) sizeof buf;
846
847 switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
848 case FLD:
849 case FLDPLUS:
850 compnum++;
851
852 /* get copies of the buffers */
853 np = mh_xstrdup (name);
854 vp = mh_xstrdup (buf);
855
856 /* if necessary, get rest of field */
857 while (state == FLDPLUS) {
858 bufsz = sizeof buf;
859 state = m_getfld2(&gstate, name, buf, &bufsz);
860 vp = add (buf, vp); /* add to previous value */
861 }
862
863 if (strcasecmp (TYPE_FIELD, np)) {
864 fprintf (fpout, "%s:%s", np, vp);
865 } else {
866 char *new_ctline, *new_params;
867
868 replace_param(&ct->c_ctinfo.ci_first_pm,
869 &ct->c_ctinfo.ci_last_pm, "boundary",
870 boundary, 0);
871
872 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
873 ct->c_ctinfo.ci_subtype, NULL);
874 new_params = output_params(LEN(TYPE_FIELD) +
875 strlen(new_ctline) + 1,
876 ct->c_ctinfo.ci_first_pm, NULL, 0);
877 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
878 FENDNULL(new_params));
879 free(new_ctline);
880 free(new_params);
881 }
882
883 free (vp);
884 free (np);
885
886 continue;
887
888 case BODY:
889 putc('\n', fpout);
890 /* buf will have a terminating NULL, skip it. */
891 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
892 advise (file, "fwrite");
893 }
894 continue;
895
896 case FILEEOF:
897 break;
898
899 case LENERR:
900 case FMTERR:
901 inform("message format error in component #%d", compnum);
902 status = NOTOK;
903 break;
904
905 default:
906 inform("getfld() returned %d", state);
907 status = NOTOK;
908 break;
909 }
910
911 break;
912 }
913
914 m_getfld_state_destroy (&gstate);
915 fclose (fpout);
916 fclose (fpin);
917
918 return status;
919 }
920
921
922 /*
923 * Fix Content-Type header to reflect the content of its part.
924 */
925 static int
926 fix_types (CT ct, svector_t fixtypes, int *message_mods)
927 {
928 int status = OK;
929
930 switch (ct->c_type) {
931 case CT_MULTIPART: {
932 struct multipart *m = (struct multipart *) ct->c_ctparams;
933 struct part *part;
934
935 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
936 status = fix_types (part->mp_part, fixtypes, message_mods);
937 }
938 break;
939 }
940
941 case CT_MESSAGE:
942 if (ct->c_subtype == MESSAGE_EXTERNAL) {
943 struct exbody *e = (struct exbody *) ct->c_ctparams;
944
945 status = fix_types (e->eb_content, fixtypes, message_mods);
946 }
947 break;
948
949 default: {
950 char **typep, *type;
951
952 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
953 for (typep = svector_strs (fixtypes);
954 typep && (type = *typep);
955 ++typep) {
956 char *type_subtype =
957 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
958 NULL);
959
960 if (! strcasecmp (type, type_subtype) &&
961 decode_part (ct) == OK &&
962 ct->c_cefile.ce_file != NULL) {
963 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
964 char *cp;
965
966 if ((cp = strchr (ct_type_subtype, ';'))) {
967 /* Truncate to remove any parameter list from
968 mime_type () result. */
969 *cp = '\0';
970 }
971
972 if (strcasecmp (type, ct_type_subtype)) {
973 char *ct_type, *ct_subtype;
974 HF hf;
975
976 /* The Content-Type header does not match the
977 content, so update these struct Content
978 fields to match:
979 * c_type, c_subtype
980 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
981 * c_ctline
982 */
983 /* Extract type and subtype from type/subtype. */
984 ct_type = mh_xstrdup(ct_type_subtype);
985 if ((cp = strchr (ct_type, '/'))) {
986 *cp = '\0';
987 ct_subtype = mh_xstrdup(++cp);
988 } else {
989 inform("missing / in MIME type of %s %s",
990 ct->c_file, ct->c_partno);
991 free (ct_type);
992 return NOTOK;
993 }
994
995 ct->c_type = ct_str_type (ct_type);
996 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
997
998 free (ct->c_ctinfo.ci_type);
999 ct->c_ctinfo.ci_type = ct_type;
1000 free (ct->c_ctinfo.ci_subtype);
1001 ct->c_ctinfo.ci_subtype = ct_subtype;
1002 if (! replace_substring (&ct->c_ctline, type,
1003 ct_type_subtype)) {
1004 inform("did not find %s in %s",
1005 type, ct->c_ctline);
1006 }
1007
1008 /* Update Content-Type header field. */
1009 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1010 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1011 if (replace_substring (&hf->value, type,
1012 ct_type_subtype)) {
1013 ++*message_mods;
1014 if (verbosw) {
1015 report (NULL, ct->c_partno, ct->c_file,
1016 "change Content-Type in header "
1017 "from %s to %s",
1018 type, ct_type_subtype);
1019 }
1020 break;
1021 }
1022 inform("did not find %s in %s", type, hf->value);
1023 }
1024 }
1025 }
1026 free (ct_type_subtype);
1027 }
1028 free (type_subtype);
1029 }
1030 }
1031 }}
1032
1033 return status;
1034 }
1035
1036
1037 /*
1038 * Replace a substring, allocating space to hold the new one.
1039 */
1040 char *
1041 replace_substring (char **str, const char *old, const char *new)
1042 {
1043 char *cp;
1044
1045 if ((cp = strstr (*str, old))) {
1046 char *remainder = cp + strlen (old);
1047 char *prefix, *new_str;
1048
1049 if (cp - *str) {
1050 prefix = mh_xstrdup(*str);
1051 *(prefix + (cp - *str)) = '\0';
1052 new_str = concat (prefix, new, remainder, NULL);
1053 free (prefix);
1054 } else {
1055 new_str = concat (new, remainder, NULL);
1056 }
1057
1058 free (*str);
1059
1060 return *str = new_str;
1061 }
1062
1063 return NULL;
1064 }
1065
1066
1067 /*
1068 * Remove a name=value parameter, given just its name, from a header value.
1069 */
1070 char *
1071 remove_parameter (char *str, const char *name)
1072 {
1073 /* It looks to me, based on the BNF in RFC 2045, than there can't
1074 be whitespace between the parameter name and the "=", or
1075 between the "=" and the parameter value. */
1076 char *param_name = concat (name, "=", NULL);
1077 char *cp;
1078
1079 if ((cp = strstr (str, param_name))) {
1080 char *start, *end;
1081 size_t count = 1;
1082
1083 /* Remove any leading spaces, before the parameter name. */
1084 for (start = cp;
1085 start > str && isspace ((unsigned char) *(start-1));
1086 --start) {
1087 continue;
1088 }
1089 /* Remove a leading semicolon. */
1090 if (start > str && *(start-1) == ';') { --start; }
1091
1092 end = cp + strlen (name) + 1;
1093 if (*end == '"') {
1094 /* Skip past the quoted value, and then the final quote. */
1095 for (++end ; *end && *end != '"'; ++end) { continue; }
1096 ++end;
1097 } else {
1098 /* Skip past the value. */
1099 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1100 }
1101
1102 /* Count how many characters need to be moved. Include
1103 trailing null, which is accounted for by the
1104 initialization of count to 1. */
1105 for (cp = end; *cp; ++cp) { ++count; }
1106 (void) memmove (start, end, count);
1107 }
1108
1109 free (param_name);
1110
1111 return str;
1112 }
1113
1114
1115 /*
1116 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1117 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1118 * 8 bit.
1119 */
1120 static int
1121 fix_composite_cte (CT ct, int *message_mods)
1122 {
1123 int status = OK;
1124
1125 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1126 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1127 ct->c_encoding != CE_BINARY) {
1128 HF hf;
1129
1130 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1131 char *name = hf->name;
1132 for (; isspace((unsigned char)*name); ++name) {
1133 continue;
1134 }
1135
1136 if (! strncasecmp (name, ENCODING_FIELD,
1137 LEN(ENCODING_FIELD))) {
1138 char *prefix = "Nmh-REPLACED-INVALID-";
1139 HF h;
1140
1141 NEW(h);
1142 h->name = mh_xstrdup (hf->name);
1143 h->hf_encoding = hf->hf_encoding;
1144 h->next = hf->next;
1145 hf->next = h;
1146
1147 /* Retain old header but prefix its name. */
1148 free (hf->name);
1149 hf->name = concat (prefix, h->name, NULL);
1150
1151 ++*message_mods;
1152 if (verbosw) {
1153 char *encoding = cpytrim (hf->value);
1154 report (NULL, ct->c_partno, ct->c_file,
1155 "replace Content-Transfer-Encoding of %s "
1156 "with 8 bit", encoding);
1157 free (encoding);
1158 }
1159
1160 h->value = mh_xstrdup (" 8bit\n");
1161
1162 /* Don't need to warn for multiple C-T-E header
1163 fields, parse_mime() already does that. But
1164 if there are any, fix them all as necessary. */
1165 hf = h;
1166 }
1167 }
1168
1169 set_ce (ct, CE_8BIT);
1170 }
1171
1172 if (ct->c_type == CT_MULTIPART) {
1173 struct multipart *m;
1174 struct part *part;
1175
1176 m = (struct multipart *) ct->c_ctparams;
1177 for (part = m->mp_parts; part; part = part->mp_next) {
1178 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1179 status = NOTOK;
1180 break;
1181 }
1182 }
1183 }
1184 }
1185
1186 return status;
1187 }
1188
1189
1190 /*
1191 * Set content encoding.
1192 */
1193 static int
1194 set_ce (CT ct, int encoding)
1195 {
1196 const char *ce = ce_str (encoding);
1197 const struct str2init *ctinit = get_ce_method (ce);
1198
1199 if (ctinit) {
1200 char *cte = concat (" ", ce, "\n", NULL);
1201 bool found_cte = false;
1202 HF hf;
1203 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1204 caller is decode_text_parts (). Save because we'll
1205 overwrite below. */
1206 struct cefile decoded_content_info = ct->c_cefile;
1207
1208 ct->c_encoding = encoding;
1209
1210 ct->c_ctinitfnx = ctinit->si_init;
1211 /* This will assign ct->c_cefile with an all-0 struct, which
1212 is what we want. */
1213 (*ctinit->si_init) (ct);
1214 /* After returning, the caller should set
1215 ct->c_cefile.ce_file to the name of the file containing
1216 the contents. */
1217
1218 if (ct->c_ceclosefnx) {
1219 (*ct->c_ceclosefnx) (ct);
1220 }
1221
1222 /* Restore the cefile. */
1223 ct->c_cefile = decoded_content_info;
1224
1225 /* Update/add Content-Transfer-Encoding header field. */
1226 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1227 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1228 found_cte = true;
1229 free (hf->value);
1230 hf->value = cte;
1231 }
1232 }
1233 if (! found_cte) {
1234 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1235 }
1236
1237 /* Update c_celine. It's used only by mhlist -debug. */
1238 free (ct->c_celine);
1239 ct->c_celine = mh_xstrdup (cte);
1240
1241 return OK;
1242 }
1243
1244 return NOTOK;
1245 }
1246
1247
1248 /*
1249 * Make sure each text part has a corresponding text/plain part.
1250 */
1251 static int
1252 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain)
1253 {
1254 int status = OK;
1255
1256 switch ((*ct)->c_type) {
1257 case CT_TEXT: {
1258 /* Nothing to do for text/plain. */
1259 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1260
1261 if (parent && parent->c_type == CT_MULTIPART &&
1262 parent->c_subtype == MULTI_ALTERNATE) {
1263 int new_subpart_number = 1;
1264 int has_text_plain =
1265 find_textplain_sibling (parent, replacetextplain,
1266 &new_subpart_number);
1267
1268 if (! has_text_plain) {
1269 /* Parent is a multipart/alternative. Insert a new
1270 text/plain subpart. */
1271 const int inserted =
1272 insert_new_text_plain_part (*ct, new_subpart_number,
1273 parent);
1274 if (inserted) {
1275 ++*message_mods;
1276 if (verbosw) {
1277 report (NULL, parent->c_partno, parent->c_file,
1278 "insert text/plain part");
1279 }
1280 } else {
1281 status = NOTOK;
1282 }
1283 }
1284 } else if (parent && parent->c_type == CT_MULTIPART &&
1285 parent->c_subtype == MULTI_RELATED) {
1286 char *type_subtype =
1287 concat ((*ct)->c_ctinfo.ci_type, "/",
1288 (*ct)->c_ctinfo.ci_subtype, NULL);
1289 const char *parent_type =
1290 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1291 int new_subpart_number = 1;
1292 int has_text_plain = 0;
1293
1294 /* Have to do string comparison on the subtype because we
1295 don't enumerate all of them in c_subtype values.
1296 parent_type will be NULL if the multipart/related part
1297 doesn't have a type parameter. The type parameter must
1298 be specified according to RFC 2387 Sec. 3.1 but not all
1299 messages comply. */
1300 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1301 /* The type of this part matches the root type of the
1302 parent multipart/related. Look to see if there's
1303 text/plain sibling. */
1304 has_text_plain =
1305 find_textplain_sibling (parent, replacetextplain,
1306 &new_subpart_number);
1307 }
1308
1309 free (type_subtype);
1310
1311 if (! has_text_plain) {
1312 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1313 struct part *part;
1314 int siblings = 0;
1315
1316 for (part = mp->mp_parts; part; part = part->mp_next) {
1317 if (*ct != part->mp_part) {
1318 ++siblings;
1319 }
1320 }
1321
1322 if (siblings) {
1323 /* Parent is a multipart/related. Insert a new
1324 text/plain subpart in a new multipart/alternative. */
1325 if (insert_into_new_mp_alt (ct, message_mods)) {
1326 /* Not an error if text/plain couldn't be added. */
1327 }
1328 } else {
1329 /* There are no siblings, so insert a new text/plain
1330 subpart, and change the parent type from
1331 multipart/related to multipart/alternative. */
1332 const int inserted =
1333 insert_new_text_plain_part (*ct, new_subpart_number,
1334 parent);
1335
1336 if (inserted) {
1337 HF hf;
1338
1339 parent->c_subtype = MULTI_ALTERNATE;
1340 free (parent->c_ctinfo.ci_subtype);
1341 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1342 if (! replace_substring (&parent->c_ctline, "/related",
1343 "/alternative")) {
1344 inform("did not find multipart/related in %s",
1345 parent->c_ctline);
1346 }
1347
1348 /* Update Content-Type header field. */
1349 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1350 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1351 if (replace_substring (&hf->value, "/related",
1352 "/alternative")) {
1353 ++*message_mods;
1354 if (verbosw) {
1355 report (NULL, parent->c_partno,
1356 parent->c_file,
1357 "insert text/plain part");
1358 }
1359
1360 /* Remove, e.g., type="text/html" from
1361 multipart/alternative. */
1362 remove_parameter (hf->value, "type");
1363 break;
1364 }
1365 inform("did not find multipart/"
1366 "related in header %s", hf->value);
1367 }
1368 }
1369 } else {
1370 /* Not an error if text/plain couldn't be inserted. */
1371 }
1372 }
1373 }
1374 } else {
1375 if (insert_into_new_mp_alt (ct, message_mods)) {
1376 status = NOTOK;
1377 }
1378 }
1379 break;
1380 }
1381
1382 case CT_MULTIPART: {
1383 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1384 struct part *part;
1385
1386 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1387 if ((*ct)->c_type == CT_MULTIPART) {
1388 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1389 replacetextplain);
1390 }
1391 }
1392 break;
1393 }
1394
1395 case CT_MESSAGE:
1396 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1397 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1398
1399 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1400 replacetextplain);
1401 }
1402 break;
1403 }
1404
1405 return status;
1406 }
1407
1408
1409 /*
1410 * See if there is a sibling text/plain, and return its subpart number.
1411 */
1412 static int
1413 find_textplain_sibling (CT parent, int replacetextplain,
1414 int *new_subpart_number)
1415 {
1416 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1417 struct part *part, *prev;
1418 bool has_text_plain = false;
1419
1420 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1421 ++*new_subpart_number;
1422 if (part->mp_part->c_type == CT_TEXT &&
1423 part->mp_part->c_subtype == TEXT_PLAIN) {
1424 if (replacetextplain) {
1425 struct part *old_part;
1426 if (part == mp->mp_parts) {
1427 old_part = mp->mp_parts;
1428 mp->mp_parts = part->mp_next;
1429 } else {
1430 old_part = prev->mp_next;
1431 prev->mp_next = part->mp_next;
1432 }
1433 if (verbosw) {
1434 report (NULL, parent->c_partno, parent->c_file,
1435 "remove text/plain part %s",
1436 old_part->mp_part->c_partno);
1437 }
1438 free_content (old_part->mp_part);
1439 free (old_part);
1440 } else {
1441 has_text_plain = true;
1442 }
1443 break;
1444 }
1445 prev = part;
1446 }
1447
1448 return has_text_plain;
1449 }
1450
1451
1452 /*
1453 * Insert a new text/plain part.
1454 */
1455 static int
1456 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent)
1457 {
1458 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1459 struct part *new_part;
1460
1461 NEW(new_part);
1462 if ((new_part->mp_part = build_text_plain_part (ct))) {
1463 char buffer[16];
1464 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1465
1466 new_part->mp_next = mp->mp_parts;
1467 mp->mp_parts = new_part;
1468 new_part->mp_part->c_partno =
1469 concat (parent->c_partno ? parent->c_partno : "1", ".",
1470 buffer, NULL);
1471
1472 return 1;
1473 }
1474
1475 free_content (new_part->mp_part);
1476 free (new_part);
1477
1478 return 0;
1479 }
1480
1481
1482 /*
1483 * Create a text/plain part to go along with non-plain sibling part.
1484 */
1485 static CT
1486 build_text_plain_part (CT encoded_part)
1487 {
1488 CT tp_part = divide_part (encoded_part);
1489 char *tmp_plain_file = NULL;
1490
1491 if (decode_part (tp_part) == OK) {
1492 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1493 contains the decoded contents. And the decoding function, such
1494 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1495 be unlinked by free_content (). */
1496 char *tempfile;
1497
1498 /* This m_mktemp2() call closes the temp file. */
1499 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1500 inform("unable to create temporary file in %s",
1501 get_temp_dir());
1502 } else {
1503 tmp_plain_file = mh_xstrdup (tempfile);
1504 if (reformat_part (tp_part, tmp_plain_file,
1505 tp_part->c_ctinfo.ci_type,
1506 tp_part->c_ctinfo.ci_subtype,
1507 tp_part->c_type) == OK) {
1508 return tp_part;
1509 }
1510 }
1511 }
1512
1513 free_content (tp_part);
1514 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1515 free (tmp_plain_file);
1516
1517 return NULL;
1518 }
1519
1520
1521 /*
1522 * Slip new text/plain part into a new multipart/alternative.
1523 */
1524 static int
1525 insert_into_new_mp_alt (CT *ct, int *message_mods)
1526 {
1527 CT tp_part = build_text_plain_part (*ct);
1528 int status = OK;
1529
1530 if (tp_part) {
1531 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1532 MULTI_ALTERNATE);
1533 if (mp_alt) {
1534 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1535
1536 if (mp && mp->mp_parts) {
1537 mp->mp_parts->mp_part = tp_part;
1538 /* Make the new multipart/alternative the parent. */
1539 *ct = mp_alt;
1540
1541 ++*message_mods;
1542 if (verbosw) {
1543 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1544 "insert text/plain part");
1545 }
1546 } else {
1547 free_content (tp_part);
1548 free_content (mp_alt);
1549 status = NOTOK;
1550 }
1551 } else {
1552 status = NOTOK;
1553 }
1554 } else {
1555 /* Not an error if text/plain couldn't be built. */
1556 }
1557
1558 return status;
1559 }
1560
1561
1562 /*
1563 * Clone a MIME part.
1564 */
1565 static CT
1566 divide_part (CT ct)
1567 {
1568 CT new_part;
1569
1570 NEW0(new_part);
1571 /* Just copy over what is needed for decoding. c_vrsn and
1572 c_celine aren't necessary. */
1573 new_part->c_file = mh_xstrdup (ct->c_file);
1574 new_part->c_begin = ct->c_begin;
1575 new_part->c_end = ct->c_end;
1576 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1577 new_part->c_type = ct->c_type;
1578 new_part->c_cefile = ct->c_cefile;
1579 new_part->c_encoding = ct->c_encoding;
1580 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1581 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1582 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1583 new_part->c_cesizefnx = ct->c_cesizefnx;
1584
1585 /* c_ctline is used by reformat__part(), so it can preserve
1586 anything after the type/subtype. */
1587 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1588
1589 return new_part;
1590 }
1591
1592
1593 /*
1594 * Copy the content info from one part to another.
1595 */
1596 static void
1597 copy_ctinfo (CI dest, CI src)
1598 {
1599 PM s_pm, d_pm;
1600
1601 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1602 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1603
1604 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1605 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1606 s_pm->pm_value, 0);
1607 if (s_pm->pm_charset) {
1608 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1609 }
1610 if (s_pm->pm_lang) {
1611 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1612 }
1613 }
1614
1615 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1616 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1617 }
1618
1619
1620 /*
1621 * Decode content.
1622 */
1623 static int
1624 decode_part (CT ct)
1625 {
1626 char *tmp_decoded;
1627 int status;
1628 FILE *file;
1629 char *tempfile;
1630
1631 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1632 die("unable to create temporary file in %s", get_temp_dir());
1633 }
1634 tmp_decoded = mh_xstrdup (tempfile);
1635 /* The following call will load ct->c_cefile.ce_file with the tmp
1636 filename of the decoded content. tmp_decoded will contain the
1637 encoded output, get rid of that. */
1638 status = output_message_fp (ct, file, tmp_decoded);
1639 (void) m_unlink (tmp_decoded);
1640 free (tmp_decoded);
1641 if (fclose (file)) {
1642 inform("unable to close temporary file %s, continuing...", tempfile);
1643 }
1644
1645 return status;
1646 }
1647
1648
1649 /*
1650 * Reformat content as plain text.
1651 * Some of the arguments aren't really needed now, but maybe will
1652 * be in the future for other than text types.
1653 */
1654 static int
1655 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type)
1656 {
1657 int output_subtype, output_encoding;
1658 const char *reason = NULL;
1659 char *cp, *cf;
1660 int status;
1661
1662 /* Hacky: this redirects the output from whatever command is used
1663 to show the part to a file. So, the user can't have any output
1664 redirection in that command.
1665 Could show_multi() in mhshowsbr.c avoid this? */
1666
1667 /* Check for invo_name-format-type/subtype. */
1668 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1669 if (verbosw) {
1670 inform("Don't know how to convert %s, there is no "
1671 "%s-format-%s/%s profile entry",
1672 ct->c_file, invo_name, type, subtype);
1673 }
1674 return NOTOK;
1675 }
1676 if (strchr (cf, '>')) {
1677 inform("'>' prohibited in \"%s\",\nplease fix your "
1678 "%s-format-%s/%s profile entry", cf, invo_name, type,
1679 FENDNULL(subtype));
1680
1681 return NOTOK;
1682 }
1683
1684 cp = concat (cf, " >", file, NULL);
1685 status = show_content_aux (ct, 0, cp, NULL, NULL);
1686 free (cp);
1687
1688 /* Unlink decoded content tmp file and free its filename to avoid
1689 leaks. The file stream should already have been closed. */
1690 if (ct->c_cefile.ce_unlink) {
1691 (void) m_unlink (ct->c_cefile.ce_file);
1692 free (ct->c_cefile.ce_file);
1693 ct->c_cefile.ce_file = NULL;
1694 ct->c_cefile.ce_unlink = 0;
1695 }
1696
1697 if (c_type == CT_TEXT) {
1698 output_subtype = TEXT_PLAIN;
1699 } else {
1700 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1701 output_subtype = 0;
1702 }
1703
1704 output_encoding = content_encoding (ct, &reason);
1705 if (status == OK &&
1706 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1707 ct->c_cefile.ce_file = file;
1708 ct->c_cefile.ce_unlink = 1;
1709 } else {
1710 ct->c_cefile.ce_unlink = 0;
1711 status = NOTOK;
1712 }
1713
1714 return status;
1715 }
1716
1717
1718 /*
1719 * Fill in a multipart/alternative part.
1720 */
1721 static CT
1722 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype)
1723 {
1724 char *boundary_prefix = "----=_nmh-multipart";
1725 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1726 char *boundary_indicator = "; boundary=";
1727 char *typename, *subtypename, *name;
1728 CT ct;
1729 struct part *p;
1730 struct multipart *m;
1731 const struct str2init *ctinit;
1732
1733 NEW0(ct);
1734
1735 /* Set up the multipart/alternative part. These fields of *ct were
1736 initialized to 0 by mh_xcalloc():
1737 c_fp, c_unlink, c_begin, c_end,
1738 c_vrsn, c_ctline, c_celine,
1739 c_id, c_descr, c_dispo, c_partno,
1740 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1741 c_cefile, c_encoding,
1742 c_digested, c_digest[16], c_ctexbody,
1743 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1744 c_umask, c_rfc934,
1745 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1746 */
1747
1748 ct->c_file = mh_xstrdup (first_alt->c_file);
1749 ct->c_type = type;
1750 ct->c_subtype = subtype;
1751
1752 ctinit = get_ct_init (ct->c_type);
1753
1754 typename = ct_type_str (type);
1755 subtypename = ct_subtype_str (type, subtype);
1756
1757 {
1758 int serial = 0;
1759 int found_boundary = 1;
1760
1761 while (found_boundary && serial < 1000000) {
1762 found_boundary = 0;
1763
1764 /* Ensure that the boundary doesn't appear in the decoded
1765 content. */
1766 if (new_part->c_cefile.ce_file) {
1767 if ((found_boundary =
1768 boundary_in_content (&new_part->c_cefile.ce_fp,
1769 new_part->c_cefile.ce_file,
1770 boundary)) == NOTOK) {
1771 goto return_null;
1772 }
1773 }
1774
1775 /* Ensure that the boundary doesn't appear in the encoded
1776 content. */
1777 if (! found_boundary && new_part->c_file) {
1778 if ((found_boundary =
1779 boundary_in_content (&new_part->c_fp,
1780 new_part->c_file,
1781 boundary)) == NOTOK) {
1782 goto return_null;
1783 }
1784 }
1785
1786 if (found_boundary) {
1787 /* Try a slightly different boundary. */
1788 char buffer2[16];
1789
1790 free (boundary);
1791 ++serial;
1792 snprintf (buffer2, sizeof buffer2, "%d", serial);
1793 boundary =
1794 concat (boundary_prefix,
1795 FENDNULL(first_alt->c_partno),
1796 "-", buffer2, NULL);
1797 }
1798 }
1799
1800 if (found_boundary) {
1801 inform("giving up trying to find a unique boundary");
1802 goto return_null;
1803 }
1804 }
1805
1806 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1807 boundary, "\"", NULL);
1808
1809 /* Load c_first_hf and c_last_hf. */
1810 transfer_noncontent_headers (first_alt, ct);
1811 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1812 free (name);
1813
1814 /* Load c_partno. */
1815 if (first_alt->c_partno) {
1816 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1817 free (first_alt->c_partno);
1818 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1819 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1820 } else {
1821 first_alt->c_partno = mh_xstrdup ("1");
1822 new_part->c_partno = mh_xstrdup ("2");
1823 }
1824
1825 if (ctinit) {
1826 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1827 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1828 }
1829
1830 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1831 "boundary", boundary, 0);
1832
1833 NEW(p);
1834 NEW(p->mp_next);
1835 p->mp_next->mp_next = NULL;
1836 p->mp_next->mp_part = first_alt;
1837
1838 NEW0(m);
1839 m->mp_start = concat (boundary, "\n", NULL);
1840 m->mp_stop = concat (boundary, "--\n", NULL);
1841 m->mp_parts = p;
1842 ct->c_ctparams = m;
1843
1844 free (boundary);
1845
1846 return ct;
1847
1848 return_null:
1849 free_content(ct);
1850 free(boundary);
1851 return NULL;
1852 }
1853
1854
1855 /*
1856 * Check that the boundary does not appear in the content.
1857 */
1858 static int
1859 boundary_in_content (FILE **fp, char *file, const char *boundary)
1860 {
1861 char buffer[NMH_BUFSIZ];
1862 size_t bytes_read;
1863 bool found_boundary = false;
1864
1865 /* free_content() will close *fp if we fopen it here. */
1866 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1867 advise (file, "unable to open %s for reading", file);
1868 return NOTOK;
1869 }
1870
1871 fseeko (*fp, 0L, SEEK_SET);
1872 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1873 if (find_str (buffer, bytes_read, boundary)) {
1874 found_boundary = true;
1875 break;
1876 }
1877 }
1878
1879 return found_boundary;
1880 }
1881
1882
1883 /*
1884 * Remove all non-Content headers.
1885 */
1886 static void
1887 transfer_noncontent_headers (CT old, CT new)
1888 {
1889 HF hp, hp_prev;
1890
1891 hp_prev = hp = old->c_first_hf;
1892 while (hp) {
1893 HF next = hp->next;
1894
1895 if (strncasecmp (XXX_FIELD_PRF, hp->name, LEN(XXX_FIELD_PRF))) {
1896 if (hp == old->c_last_hf) {
1897 if (hp == old->c_first_hf) {
1898 old->c_last_hf = old->c_first_hf = NULL;
1899 } else {
1900 hp_prev->next = NULL;
1901 old->c_last_hf = hp_prev;
1902 }
1903 } else {
1904 if (hp == old->c_first_hf) {
1905 old->c_first_hf = next;
1906 } else {
1907 hp_prev->next = next;
1908 }
1909 }
1910
1911 /* Put node hp in the new CT. */
1912 if (new->c_first_hf == NULL) {
1913 new->c_first_hf = hp;
1914 } else {
1915 new->c_last_hf->next = hp;
1916 }
1917 new->c_last_hf = hp;
1918 } else {
1919 /* A Content- header, leave in old. */
1920 hp_prev = hp;
1921 }
1922
1923 hp = next;
1924 }
1925 }
1926
1927
1928 /*
1929 * Set content type.
1930 */
1931 static int
1932 set_ct_type (CT ct, int type, int subtype, int encoding)
1933 {
1934 char *typename = ct_type_str (type);
1935 char *subtypename = ct_subtype_str (type, subtype);
1936 /* E.g, " text/plain" */
1937 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1938 /* E.g, " text/plain\n" */
1939 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1940 bool found_content_type = false;
1941 HF hf;
1942 const char *cp = NULL;
1943 char *ctline;
1944 int status;
1945
1946 /* Update/add Content-Type header field. */
1947 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1948 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1949 found_content_type = true;
1950 free (hf->value);
1951 hf->value = (cp = strchr (ct->c_ctline, ';'))
1952 ? concat (type_subtypename, cp, "\n", NULL)
1953 : mh_xstrdup (name_plus_nl);
1954 }
1955 }
1956 if (! found_content_type) {
1957 add_header (ct, mh_xstrdup (TYPE_FIELD),
1958 (cp = strchr (ct->c_ctline, ';'))
1959 ? concat (type_subtypename, cp, "\n", NULL)
1960 : mh_xstrdup (name_plus_nl));
1961 }
1962
1963 /* Some of these might not be used, but set them anyway. */
1964 ctline = cp
1965 ? concat (type_subtypename, cp, NULL)
1966 : concat (type_subtypename, NULL);
1967 free (ct->c_ctline);
1968 ct->c_ctline = ctline;
1969 /* Leave other ctinfo members as they were. */
1970 free (ct->c_ctinfo.ci_type);
1971 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1972 free (ct->c_ctinfo.ci_subtype);
1973 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1974 ct->c_type = type;
1975 ct->c_subtype = subtype;
1976
1977 free (name_plus_nl);
1978 free (type_subtypename);
1979
1980 status = set_ce (ct, encoding);
1981
1982 return status;
1983 }
1984
1985
1986 /*
1987 * It's not necessary to update the charset parameter of a Content-Type
1988 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1989 * (content) was originally in the specified charset, "and will be in
1990 * that character set again after decoding."
1991 */
1992 static int
1993 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1994 int *message_mods)
1995 {
1996 int status = OK;
1997 int lf_line_endings = 0;
1998
1999 switch (ct->c_type) {
2000 case CT_MULTIPART: {
2001 struct multipart *m = (struct multipart *) ct->c_ctparams;
2002 struct part *part;
2003
2004 /* Should check to see if the body for this part is encoded?
2005 For now, it gets passed along as-is by InitMultiPart(). */
2006 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2007 status = decode_text_parts (part->mp_part, encoding, decodetypes,
2008 message_mods);
2009 }
2010 break;
2011 }
2012
2013 case CT_MESSAGE:
2014 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2015 struct exbody *e = (struct exbody *) ct->c_ctparams;
2016
2017 status = decode_text_parts (e->eb_content, encoding, decodetypes,
2018 message_mods);
2019 }
2020 break;
2021
2022 default:
2023 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2024 break;
2025 }
2026
2027 lf_line_endings =
2028 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2029
2030 switch (ct->c_encoding) {
2031 case CE_BASE64:
2032 case CE_QUOTED: {
2033 int ct_encoding;
2034
2035 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2036 const char *reason = NULL;
2037
2038 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2039 && encoding != CE_BINARY) {
2040 /* The decoding isn't acceptable so discard it.
2041 Leave status as OK to allow other transformations. */
2042 if (verbosw) {
2043 report (NULL, ct->c_partno, ct->c_file,
2044 "will not decode%s because it is binary (%s)",
2045 ct->c_partno ? ""
2046 : (FENDNULL(ct->c_ctline)),
2047 reason);
2048 }
2049 (void) m_unlink (ct->c_cefile.ce_file);
2050 free (ct->c_cefile.ce_file);
2051 ct->c_cefile.ce_file = NULL;
2052 } else if (ct->c_encoding == CE_QUOTED &&
2053 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2054 /* The decoding isn't acceptable so discard it.
2055 Leave status as OK to allow other transformations. */
2056 if (verbosw) {
2057 report (NULL, ct->c_partno, ct->c_file,
2058 "will not decode%s because it is 8bit",
2059 ct->c_partno ? ""
2060 : (FENDNULL(ct->c_ctline)));
2061 }
2062 (void) m_unlink (ct->c_cefile.ce_file);
2063 free (ct->c_cefile.ce_file);
2064 ct->c_cefile.ce_file = NULL;
2065 } else {
2066 int enc;
2067
2068 if (ct_encoding == CE_BINARY) {
2069 enc = CE_BINARY;
2070 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2071 enc = CE_QUOTED;
2072 } else {
2073 enc = ct_encoding;
2074 }
2075 if (set_ce (ct, enc) == OK) {
2076 ++*message_mods;
2077 if (verbosw) {
2078 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2079 FENDNULL(ct->c_ctline));
2080 }
2081 if (lf_line_endings) {
2082 strip_crs (ct, message_mods);
2083 }
2084 } else {
2085 status = NOTOK;
2086 }
2087 }
2088 } else {
2089 status = NOTOK;
2090 }
2091 break;
2092 }
2093 case CE_8BIT:
2094 case CE_7BIT:
2095 if (lf_line_endings) {
2096 strip_crs (ct, message_mods);
2097 }
2098 break;
2099 default:
2100 break;
2101 }
2102
2103 break;
2104 }
2105
2106 return status;
2107 }
2108
2109
2110 /*
2111 * Determine if the part with type[/subtype] should be decoded, according to
2112 * decodetypes (which came from the -decodetypes switch).
2113 */
2114 static int
2115 should_decode(const char *decodetypes, const char *type, const char *subtype)
2116 {
2117 /* Quick search for matching type[/subtype] in decodetypes: bracket
2118 decodetypes with commas, then search for ,type, and ,type/subtype, in
2119 it. */
2120
2121 bool found_match = false;
2122 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2123 char *delimited_type = concat(",", type, ",", NULL);
2124
2125 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2126 found_match = true;
2127 } else if (subtype != NULL) {
2128 char *delimited_type_subtype =
2129 concat(",", type, "/", subtype, ",", NULL);
2130
2131 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2132 found_match = true;
2133 }
2134 free(delimited_type_subtype);
2135 }
2136
2137 free(delimited_type);
2138 free(delimited_decodetypes);
2139
2140 return found_match;
2141 }
2142
2143
2144 /*
2145 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2146 * if it has any NUL characters, a CR not followed by a LF, or lines
2147 * greater than 998 characters in length. If binary, reason is set
2148 * to a string explaining why.
2149 */
2150 static int
2151 content_encoding (CT ct, const char **reason)
2152 {
2153 CE ce = &ct->c_cefile;
2154 int encoding = CE_7BIT;
2155
2156 if (ce->ce_file) {
2157 size_t line_len = 0;
2158 char buffer[NMH_BUFSIZ];
2159 size_t inbytes;
2160
2161 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2162 advise (ce->ce_file, "unable to open for reading");
2163 return CE_UNKNOWN;
2164 }
2165
2166 fseeko (ce->ce_fp, 0L, SEEK_SET);
2167 while (encoding != CE_BINARY &&
2168 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2169 char *cp;
2170 size_t i;
2171 int last_char_was_cr = 0;
2172
2173 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2174 if (*cp == '\0' || ++line_len > 998 ||
2175 (*cp != '\n' && last_char_was_cr)) {
2176 encoding = CE_BINARY;
2177 if (*cp == '\0') {
2178 *reason = "null character";
2179 } else if (line_len > 998) {
2180 *reason = "line length > 998";
2181 } else if (*cp != '\n' && last_char_was_cr) {
2182 *reason = "CR not followed by LF";
2183 } else {
2184 /* Should not reach this. */
2185 *reason = "";
2186 }
2187 break;
2188 }
2189 if (*cp == '\n') {
2190 line_len = 0;
2191 } else if (! isascii ((unsigned char) *cp)) {
2192 encoding = CE_8BIT;
2193 }
2194
2195 last_char_was_cr = *cp == '\r';
2196 }
2197 }
2198
2199 fclose (ce->ce_fp);
2200 ce->ce_fp = NULL;
2201 } /* else should never happen */
2202
2203 return encoding;
2204 }
2205
2206
2207 /*
2208 * Strip carriage returns from content.
2209 */
2210 static int
2211 strip_crs (CT ct, int *message_mods)
2212 {
2213 char *charset = content_charset (ct);
2214 int status = OK;
2215
2216 /* Only strip carriage returns if content is ASCII or another
2217 charset that has the same readily recognizable CR followed by a
2218 LF. We can include UTF-8 here because if the high-order bit of
2219 a UTF-8 byte is 0, then it must be a single-byte ASCII
2220 character. */
2221 if (! strcasecmp (charset, "US-ASCII") ||
2222 ! strcasecmp (charset, "UTF-8") ||
2223 ! strncasecmp (charset, "ISO-8859-", 9) ||
2224 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2225 char **file = NULL;
2226 FILE **fp = NULL;
2227 size_t begin;
2228 size_t end;
2229 bool has_crs = false;
2230 bool opened_input_file = false;
2231
2232 if (ct->c_cefile.ce_file) {
2233 file = &ct->c_cefile.ce_file;
2234 fp = &ct->c_cefile.ce_fp;
2235 begin = end = 0;
2236 } else if (ct->c_file) {
2237 file = &ct->c_file;
2238 fp = &ct->c_fp;
2239 begin = (size_t) ct->c_begin;
2240 end = (size_t) ct->c_end;
2241 } /* else don't know where the content is */
2242
2243 if (file && *file && fp) {
2244 if (! *fp) {
2245 if ((*fp = fopen (*file, "r")) == NULL) {
2246 advise (*file, "unable to open for reading");
2247 status = NOTOK;
2248 } else {
2249 opened_input_file = true;
2250 }
2251 }
2252 }
2253
2254 if (fp && *fp) {
2255 char buffer[NMH_BUFSIZ];
2256 size_t bytes_read;
2257 size_t bytes_to_read =
2258 end > 0 && end > begin ? end - begin : sizeof buffer;
2259
2260 fseeko (*fp, begin, SEEK_SET);
2261 while ((bytes_read = fread (buffer, 1,
2262 min (bytes_to_read, sizeof buffer),
2263 *fp)) > 0) {
2264 /* Look for CR followed by a LF. This is supposed to
2265 be text so there should be LF's. If not, don't
2266 modify the content. */
2267 char *cp;
2268 size_t i;
2269 bool last_char_was_cr = false;
2270
2271 if (end > 0) { bytes_to_read -= bytes_read; }
2272
2273 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2274 if (*cp == '\n' && last_char_was_cr) {
2275 has_crs = true;
2276 break;
2277 }
2278
2279 last_char_was_cr = *cp == '\r';
2280 }
2281 }
2282
2283 if (has_crs) {
2284 int fd;
2285 char *stripped_content_file;
2286 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2287
2288 if (tempfile == NULL) {
2289 die("unable to create temporary file in %s",
2290 get_temp_dir());
2291 }
2292 stripped_content_file = mh_xstrdup (tempfile);
2293
2294 /* Strip each CR before a LF from the content. */
2295 fseeko (*fp, begin, SEEK_SET);
2296 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2297 0) {
2298 char *cp;
2299 size_t i;
2300 bool last_char_was_cr = false;
2301
2302 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2303 if (*cp == '\r') {
2304 last_char_was_cr = true;
2305 } else if (last_char_was_cr) {
2306 if (*cp != '\n') {
2307 if (write (fd, "\r", 1) < 0) {
2308 advise (tempfile, "CR write");
2309 }
2310 }
2311 if (write (fd, cp, 1) < 0) {
2312 advise (tempfile, "write");
2313 }
2314 last_char_was_cr = false;
2315 } else {
2316 if (write (fd, cp, 1) < 0) {
2317 advise (tempfile, "write");
2318 }
2319 last_char_was_cr = false;
2320 }
2321 }
2322 }
2323
2324 if (close (fd)) {
2325 inform("unable to write temporary file %s, continuing...",
2326 stripped_content_file);
2327 (void) m_unlink (stripped_content_file);
2328 free(stripped_content_file);
2329 status = NOTOK;
2330 } else {
2331 /* Replace the decoded file with the converted one. */
2332 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2333 (void) m_unlink (ct->c_cefile.ce_file);
2334
2335 free(ct->c_cefile.ce_file);
2336 ct->c_cefile.ce_file = stripped_content_file;
2337 ct->c_cefile.ce_unlink = 1;
2338
2339 ++*message_mods;
2340 if (verbosw) {
2341 report (NULL, ct->c_partno,
2342 begin == 0 && end == 0 ? "" : *file,
2343 "stripped CRs");
2344 }
2345 }
2346 }
2347
2348 if (opened_input_file) {
2349 fclose (*fp);
2350 *fp = NULL;
2351 }
2352 }
2353 }
2354
2355 free (charset);
2356
2357 return status;
2358 }
2359
2360
2361 /*
2362 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2363 * of the part C-T-E's.
2364 */
2365 static void
2366 update_cte (CT ct)
2367 {
2368 const int least_restrictive_enc = least_restrictive_encoding (ct);
2369
2370 if (least_restrictive_enc != CE_UNKNOWN &&
2371 least_restrictive_enc != CE_7BIT) {
2372 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2373 HF hf;
2374 bool found_cte = false;
2375
2376 /* Update/add Content-Transfer-Encoding header field. */
2377 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2378 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2379 found_cte = true;
2380 free (hf->value);
2381 hf->value = cte;
2382 }
2383 }
2384 if (! found_cte) {
2385 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2386 }
2387 }
2388 }
2389
2390
2391 /*
2392 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2393 * within a message.
2394 */
2395 static int
2396 least_restrictive_encoding (CT ct)
2397 {
2398 int encoding = CE_UNKNOWN;
2399
2400 switch (ct->c_type) {
2401 case CT_MULTIPART: {
2402 struct multipart *m = (struct multipart *) ct->c_ctparams;
2403 struct part *part;
2404
2405 for (part = m->mp_parts; part; part = part->mp_next) {
2406 const int part_encoding =
2407 least_restrictive_encoding (part->mp_part);
2408
2409 if (less_restrictive (encoding, part_encoding)) {
2410 encoding = part_encoding;
2411 }
2412 }
2413 break;
2414 }
2415
2416 case CT_MESSAGE:
2417 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2418 struct exbody *e = (struct exbody *) ct->c_ctparams;
2419 const int part_encoding =
2420 least_restrictive_encoding (e->eb_content);
2421
2422 if (less_restrictive (encoding, part_encoding)) {
2423 encoding = part_encoding;
2424 }
2425 }
2426 break;
2427
2428 default: {
2429 if (less_restrictive (encoding, ct->c_encoding)) {
2430 encoding = ct->c_encoding;
2431 }
2432 }}
2433
2434 return encoding;
2435 }
2436
2437
2438 /*
2439 * Return whether the second encoding is less restrictive than the first, where
2440 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2441 * CE_BINARY is less restrictive than CE_8BIT and
2442 * CE_8BIT is less restrictive than CE_7BIT.
2443 */
2444 static int
2445 less_restrictive (int encoding, int second_encoding)
2446 {
2447 switch (second_encoding) {
2448 case CE_BINARY:
2449 return encoding != CE_BINARY;
2450 case CE_8BIT:
2451 return encoding != CE_BINARY && encoding != CE_8BIT;
2452 case CE_7BIT:
2453 return encoding != CE_BINARY && encoding != CE_8BIT &&
2454 encoding != CE_7BIT;
2455 default :
2456 return 0;
2457 }
2458 }
2459
2460
2461 /*
2462 * Convert character set of each part.
2463 */
2464 static int
2465 convert_charsets (CT ct, char *dest_charset, int *message_mods)
2466 {
2467 int status = OK;
2468
2469 switch (ct->c_type) {
2470 case CT_TEXT:
2471 if (ct->c_subtype == TEXT_PLAIN) {
2472 status = convert_charset (ct, dest_charset, message_mods);
2473 if (status == OK) {
2474 if (verbosw) {
2475 char *ct_charset = content_charset (ct);
2476
2477 report (NULL, ct->c_partno, ct->c_file,
2478 "convert %s to %s", ct_charset, dest_charset);
2479 free (ct_charset);
2480 }
2481 } else {
2482 char *ct_charset = content_charset (ct);
2483
2484 report ("iconv", ct->c_partno, ct->c_file,
2485 "failed to convert %s to %s", ct_charset, dest_charset);
2486 free (ct_charset);
2487 }
2488 }
2489 break;
2490
2491 case CT_MULTIPART: {
2492 struct multipart *m = (struct multipart *) ct->c_ctparams;
2493 struct part *part;
2494
2495 /* Should check to see if the body for this part is encoded?
2496 For now, it gets passed along as-is by InitMultiPart(). */
2497 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2498 status =
2499 convert_charsets (part->mp_part, dest_charset, message_mods);
2500 }
2501 break;
2502 }
2503
2504 case CT_MESSAGE:
2505 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2506 struct exbody *e = (struct exbody *) ct->c_ctparams;
2507
2508 status =
2509 convert_charsets (e->eb_content, dest_charset, message_mods);
2510 }
2511 break;
2512
2513 default:
2514 break;
2515 }
2516
2517 return status;
2518 }
2519
2520
2521 /*
2522 * Fix various problems that aren't handled elsewhere. These
2523 * are fixed unconditionally: there are no switches to disable
2524 * them. Currently, "problems" are these:
2525 * 1) remove extraneous semicolon at the end of a header parameter list
2526 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2527 * filename parameters in Content-Type and Content-Disposition
2528 * headers, respectively.
2529 */
2530 static int
2531 fix_always (CT ct, int *message_mods)
2532 {
2533 int status = OK;
2534
2535 switch (ct->c_type) {
2536 case CT_MULTIPART: {
2537 struct multipart *m = (struct multipart *) ct->c_ctparams;
2538 struct part *part;
2539
2540 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2541 status = fix_always (part->mp_part, message_mods);
2542 }
2543 break;
2544 }
2545
2546 case CT_MESSAGE:
2547 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2548 struct exbody *e = (struct exbody *) ct->c_ctparams;
2549
2550 status = fix_always (e->eb_content, message_mods);
2551 }
2552 break;
2553
2554 default: {
2555 HF hf;
2556
2557 if (ct->c_first_hf) {
2558 fix_filename_encoding (ct);
2559 }
2560
2561 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2562 size_t len = strlen (hf->value);
2563
2564 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2565 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2566 /* Only do this for Content-Type and
2567 Content-Disposition fields because those are the
2568 only headers that parse_mime() warns about. */
2569 continue;
2570 }
2571
2572 /* whitespace following a trailing ';' will be nuked as well */
2573 if (hf->value[len - 1] == '\n') {
2574 while (isspace((unsigned char)(hf->value[len - 2]))) {
2575 if (len-- == 0) { break; }
2576 }
2577 }
2578
2579 if (hf->value[len - 2] == ';') {
2580 /* Remove trailing ';' from parameter value. */
2581 hf->value[len - 2] = '\n';
2582 hf->value[len - 1] = '\0';
2583
2584 /* Also, if Content-Type parameter, remove trailing ';'
2585 from ct->c_ctline. This probably isn't necessary
2586 but can't hurt. */
2587 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2588 size_t l = strlen(ct->c_ctline) - 1;
2589 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2590 ct->c_ctline[l] == ';') {
2591 ct->c_ctline[l--] = '\0';
2592 if (l == 0) { break; }
2593 }
2594 }
2595
2596 ++*message_mods;
2597 if (verbosw) {
2598 report (NULL, ct->c_partno, ct->c_file,
2599 "remove trailing ; from %s parameter value",
2600 hf->name);
2601 }
2602 }
2603 }
2604 }}
2605
2606 return status;
2607 }
2608
2609
2610 /*
2611 * Factor out common code for loops in fix_filename_encoding().
2612 */
2613 static int
2614 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm)
2615 {
2616 bool fixed = false;
2617
2618 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2619 /* Looks like an RFC 2047 encoded parameter. */
2620 char decoded[PATH_MAX + 1];
2621
2622 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2623 /* Encode using RFC 2231. */
2624 replace_param (first_pm, last_pm, name, decoded, 0);
2625 fixed = true;
2626 } else {
2627 inform("failed to decode %s parameter %s", name, value);
2628 }
2629 }
2630
2631 return fixed;
2632 }
2633
2634
2635 /*
2636 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2637 * filename parameters in Content-Type and Content-Disposition
2638 * headers, respectively.
2639 */
2640 static int
2641 fix_filename_encoding (CT ct)
2642 {
2643 PM pm;
2644 HF hf;
2645 int fixed = 0;
2646
2647 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2648 if (pm->pm_name && pm->pm_value &&
2649 strcasecmp (pm->pm_name, "name") == 0) {
2650 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2651 &ct->c_ctinfo.ci_first_pm,
2652 &ct->c_ctinfo.ci_last_pm);
2653 }
2654 }
2655
2656 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2657 if (pm->pm_name && pm->pm_value &&
2658 strcasecmp (pm->pm_name, "filename") == 0) {
2659 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2660 &ct->c_dispo_first,
2661 &ct->c_dispo_last);
2662 }
2663 }
2664
2665 /* Fix hf values to correspond. */
2666 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2667 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2668
2669 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2670 field = TYPE_HEADER;
2671 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2672 field = DISPO_HEADER;
2673 }
2674
2675 if (field != OTHER) {
2676 const char *const semicolon_loc = strchr (hf->value, ';');
2677
2678 if (semicolon_loc) {
2679 const size_t len =
2680 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2681 const char *const params =
2682 output_params (len,
2683 field == TYPE_HEADER
2684 ? ct->c_ctinfo.ci_first_pm
2685 : ct->c_dispo_first,
2686 NULL, 0);
2687 const char *const new_params = concat (params, "\n", NULL);
2688
2689 replace_substring (&hf->value, semicolon_loc, new_params);
2690 free((void *)new_params); /* Cast away const. Sigh. */
2691 free((void *)params);
2692 } else {
2693 inform("did not find semicolon in %s:%s\n",
2694 hf->name, hf->value);
2695 }
2696 }
2697 }
2698
2699 return OK;
2700 }
2701
2702
2703 /*
2704 * Output content in input file to output file.
2705 */
2706 static int
2707 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2708 int modify_inplace, int message_mods)
2709 {
2710 int status = OK;
2711
2712 if (modify_inplace) {
2713 if (message_mods > 0) {
2714 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2715 char *infile = input_filename
2716 ? mh_xstrdup (input_filename)
2717 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2718
2719 if (remove_file (infile) == OK) {
2720 if (rename (outfile, infile)) {
2721 /* Rename didn't work, possibly because of an
2722 attempt to rename across filesystems. Try
2723 brute force copy. */
2724 int old = open (outfile, O_RDONLY);
2725 int new =
2726 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2727 int i = -1;
2728
2729 if (old != -1 && new != -1) {
2730 char buffer[NMH_BUFSIZ];
2731
2732 while ((i = read (old, buffer, sizeof buffer)) >
2733 0) {
2734 if (write (new, buffer, i) != i) {
2735 i = -1;
2736 break;
2737 }
2738 }
2739 }
2740 if (new != -1) { close (new); }
2741 if (old != -1) { close (old); }
2742 (void) m_unlink (outfile);
2743
2744 if (i < 0) {
2745 /* The -file argument processing used path() to
2746 expand filename to absolute path. */
2747 int file = ct->c_file && ct->c_file[0] == '/';
2748
2749 inform("unable to rename %s %s to %s, continuing...",
2750 file ? "file" : "message", outfile,
2751 infile);
2752 status = NOTOK;
2753 }
2754 }
2755 } else {
2756 inform("unable to remove input file %s, "
2757 "not modifying it, continuing...", infile);
2758 (void) m_unlink (outfile);
2759 status = NOTOK;
2760 }
2761
2762 free (infile);
2763 } else {
2764 status = NOTOK;
2765 }
2766 } else {
2767 /* No modifications and didn't need the tmp outfile. */
2768 (void) m_unlink (outfile);
2769 }
2770 } else {
2771 /* Output is going to some file. Produce it whether or not
2772 there were modifications. */
2773 status = output_message_fp (ct, outfp, outfile);
2774 }
2775
2776 flush_errors ();
2777 return status;
2778 }
2779
2780
2781 /*
2782 * parse_mime() does not set lf_line_endings in struct text, so use this
2783 * function to do it. It touches the parts the decodetypes identifies.
2784 */
2785 static void
2786 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings)
2787 {
2788 switch (ct->c_type) {
2789 case CT_MULTIPART: {
2790 struct multipart *m = (struct multipart *) ct->c_ctparams;
2791 struct part *part;
2792
2793 for (part = m->mp_parts; part; part = part->mp_next) {
2794 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2795 }
2796 break;
2797 }
2798
2799 case CT_MESSAGE:
2800 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2801 struct exbody *e = (struct exbody *) ct->c_ctparams;
2802
2803 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2804 }
2805 break;
2806
2807 default:
2808 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2809 if (ct->c_ctparams == NULL) {
2810 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2811 }
2812 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2813 }
2814 }
2815 }
2816
2817
2818 /*
2819 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2820 * use the standard MH backup file.
2821 */
2822 static int
2823 remove_file (const char *file)
2824 {
2825 if (rmmproc) {
2826 char *rmm_command = concat (rmmproc, " ", file, NULL);
2827 int status = system (rmm_command);
2828
2829 free (rmm_command);
2830 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2831 }
2832 /* This is OK for a non-message file, it still uses the
2833 BACKUP_PREFIX form. The backup file will be in the same
2834 directory as file. */
2835 return rename (file, m_backup (file));
2836 }
2837
2838
2839 /*
2840 * Output formatted message to user.
2841 */
2842 static void
2843 report (char *what, char *partno, char *filename, char *message, ...)
2844 {
2845 va_list args;
2846 char *fmt;
2847
2848 if (verbosw) {
2849 va_start (args, message);
2850 fmt = concat (filename, partno ? " part " : ", ",
2851 FENDNULL(partno), partno ? ", " : "", message, NULL);
2852
2853 advertise (what, NULL, fmt, args);
2854
2855 free (fmt);
2856 va_end (args);
2857 }
2858 }
2859
2860
2861 static void
2862 pipeser (int i)
2863 {
2864 if (i == SIGQUIT) {
2865 fflush (stdout);
2866 fprintf (stderr, "\n");
2867 fflush (stderr);
2868 }
2869
2870 done (1);
2871 /* NOTREACHED */
2872 }