]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
fdcompare.c: Move interface to own file.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include "h/mh.h"
9 #include "sbr/folder_read.h"
10 #include "sbr/context_save.h"
11 #include "sbr/context_replace.h"
12 #include "sbr/context_find.h"
13 #include "sbr/readconfig.h"
14 #include "sbr/ambigsw.h"
15 #include "sbr/path.h"
16 #include "sbr/print_version.h"
17 #include "sbr/print_help.h"
18 #include "sbr/error.h"
19 #include "h/fmt_scan.h"
20 #include "h/mime.h"
21 #include "h/mhparse.h"
22 #include "h/done.h"
23 #include "h/utils.h"
24 #include "h/signals.h"
25 #include "sbr/m_maildir.h"
26 #include "sbr/m_mktemp.h"
27 #include "sbr/mime_type.h"
28 #include "mhmisc.h"
29 #include "mhfree.h"
30 #include "mhoutsbr.h"
31 #include "mhshowsbr.h"
32 #include <fcntl.h>
33
34 #define MHFIXMSG_SWITCHES \
35 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
36 X("nodecodetext", 0, NDECODETEXTSW) \
37 X("decodetypes", 0, DECODETYPESW) \
38 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
39 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
40 X("textcharset", 0, TEXTCHARSETSW) \
41 X("notextcharset", 0, NTEXTCHARSETSW) \
42 X("reformat", 0, REFORMATSW) \
43 X("noreformat", 0, NREFORMATSW) \
44 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
45 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
46 X("fixboundary", 0, FIXBOUNDARYSW) \
47 X("nofixboundary", 0, NFIXBOUNDARYSW) \
48 X("fixcte", 0, FIXCOMPOSITECTESW) \
49 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
50 X("fixtype mimetype", 0, FIXTYPESW) \
51 X("file file", 0, FILESW) \
52 X("outfile file", 0, OUTFILESW) \
53 X("rmmproc program", 0, RPROCSW) \
54 X("normmproc", 0, NRPRCSW) \
55 X("changecur", 0, CHGSW) \
56 X("nochangecur", 0, NCHGSW) \
57 X("verbose", 0, VERBSW) \
58 X("noverbose", 0, NVERBSW) \
59 X("version", 0, VERSIONSW) \
60 X("help", 0, HELPSW) \
61
62 #define X(sw, minchars, id) id,
63 DEFINE_SWITCH_ENUM(MHFIXMSG);
64 #undef X
65
66 #define X(sw, minchars, id) { sw, minchars, id },
67 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
68 #undef X
69
70
71 int verbosw;
72 int debugsw; /* Needed by mhparse.c. */
73
74 #define quitser pipeser
75
76 /*
77 * static prototypes
78 */
79 typedef struct fix_transformations {
80 int fixboundary;
81 int fixcompositecte;
82 svector_t fixtypes;
83 int reformat;
84 int replacetextplain;
85 int decodetext;
86 char *decodetypes;
87 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
88 int lf_line_endings;
89 char *textcharset;
90 } fix_transformations;
91
92 static int mhfixmsgsbr (CT *, char *, const fix_transformations *,
93 FILE **, char *, FILE **);
94 static int fix_boundary (CT *, int *);
95 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
96 static int get_multipart_boundary (CT, char **);
97 static int replace_boundary (CT, char *, char *);
98 static int fix_types (CT, svector_t, int *);
99 static char *replace_substring (char **, const char *, const char *);
100 static char *remove_parameter (char *, const char *);
101 static int fix_composite_cte (CT, int *);
102 static int set_ce (CT, int);
103 static int ensure_text_plain (CT *, CT, int *, int);
104 static int find_textplain_sibling (CT, int, int *);
105 static int insert_new_text_plain_part (CT, int, CT);
106 static CT build_text_plain_part (CT);
107 static int insert_into_new_mp_alt (CT *, int *);
108 static CT divide_part (CT);
109 static void copy_ctinfo (CI, CI);
110 static int decode_part (CT);
111 static int reformat_part (CT, char *, char *, char *, int);
112 static CT build_multipart_alt (CT, CT, int, int);
113 static int boundary_in_content (FILE **, char *, const char *);
114 static void transfer_noncontent_headers (CT, CT);
115 static int set_ct_type (CT, int type, int subtype, int encoding);
116 static int decode_text_parts (CT, int, const char *, int *);
117 static int should_decode(const char *, const char *, const char *);
118 static int content_encoding (CT, const char **);
119 static int strip_crs (CT, int *);
120 static void update_cte (CT);
121 static int least_restrictive_encoding (CT) PURE;
122 static int less_restrictive (int, int);
123 static int convert_charsets (CT, char *, int *);
124 static int fix_always (CT, int *);
125 static int fix_filename_param (char *, char *, PM *, PM *);
126 static int fix_filename_encoding (CT);
127 static int write_content (CT, const char *, char *, FILE *, int, int);
128 static void set_text_ctparams(CT, char *, int);
129 static int remove_file (const char *);
130 static void report (char *, char *, char *, char *, ...)
131 CHECK_PRINTF(4, 5);
132 static void pipeser (int);
133
134
135 int
136 main (int argc, char **argv)
137 {
138 int msgnum;
139 char *cp, *file = NULL, *folder = NULL;
140 char *maildir = NULL, buf[100], *outfile = NULL;
141 char **argp, **arguments;
142 struct msgs_array msgs = { 0, 0, NULL };
143 struct msgs *mp = NULL;
144 CT *ctp;
145 FILE *fp, *infp = NULL, *outfp = NULL;
146 bool using_stdin = false;
147 bool chgflag = true;
148 int status = OK;
149 fix_transformations fx;
150 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
151 fx.fixtypes = NULL;
152 fx.replacetextplain = 0;
153 fx.decodetext = CE_8BIT;
154 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
155 fx.lf_line_endings = 0;
156 fx.textcharset = NULL;
157
158 if (nmh_init(argv[0], true, false)) { return 1; }
159
160 arguments = getarguments (invo_name, argc, argv, 1);
161 argp = arguments;
162
163 /*
164 * Parse arguments
165 */
166 while ((cp = *argp++)) {
167 if (*cp == '-') {
168 switch (smatch (++cp, switches)) {
169 case AMBIGSW:
170 ambigsw (cp, switches);
171 done (1);
172 case UNKWNSW:
173 die("-%s unknown", cp);
174
175 case HELPSW:
176 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
177 invo_name);
178 print_help (buf, switches, 1);
179 done (0);
180 case VERSIONSW:
181 print_version(invo_name);
182 done (0);
183
184 case DECODETEXTSW:
185 if (! (cp = *argp++) || *cp == '-') {
186 die("missing argument to %s", argp[-2]);
187 }
188 if (! strcasecmp (cp, "8bit")) {
189 fx.decodetext = CE_8BIT;
190 } else if (! strcasecmp (cp, "7bit")) {
191 fx.decodetext = CE_7BIT;
192 } else if (! strcasecmp (cp, "binary")) {
193 fx.decodetext = CE_BINARY;
194 } else {
195 die("invalid argument to %s", argp[-2]);
196 }
197 continue;
198 case NDECODETEXTSW:
199 fx.decodetext = 0;
200 continue;
201 case DECODETYPESW:
202 if (! (cp = *argp++) || *cp == '-') {
203 die("missing argument to %s", argp[-2]);
204 }
205 fx.decodetypes = cp;
206 continue;
207 case CRLFLINEBREAKSSW:
208 fx.lf_line_endings = 0;
209 continue;
210 case NCRLFLINEBREAKSSW:
211 fx.lf_line_endings = 1;
212 continue;
213 case TEXTCHARSETSW:
214 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
215 die("missing argument to %s", argp[-2]);
216 }
217 fx.textcharset = cp;
218 continue;
219 case NTEXTCHARSETSW:
220 fx.textcharset = 0;
221 continue;
222 case FIXBOUNDARYSW:
223 fx.fixboundary = 1;
224 continue;
225 case NFIXBOUNDARYSW:
226 fx.fixboundary = 0;
227 continue;
228 case FIXCOMPOSITECTESW:
229 fx.fixcompositecte = 1;
230 continue;
231 case NFIXCOMPOSITECTESW:
232 fx.fixcompositecte = 0;
233 continue;
234 case FIXTYPESW:
235 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
236 die("missing argument to %s", argp[-2]);
237 }
238 if (! strncasecmp (cp, "multipart/", 10) ||
239 ! strncasecmp (cp, "message/", 8))
240 die("-fixtype %s not allowed", cp);
241 if (! strchr (cp, '/'))
242 die("-fixtype requires type/subtype");
243 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
244 svector_push_back (fx.fixtypes, cp);
245 continue;
246 case REFORMATSW:
247 fx.reformat = 1;
248 continue;
249 case NREFORMATSW:
250 fx.reformat = 0;
251 continue;
252 case REPLACETEXTPLAINSW:
253 fx.replacetextplain = 1;
254 continue;
255 case NREPLACETEXTPLAINSW:
256 fx.replacetextplain = 0;
257 continue;
258 case FILESW:
259 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
260 die("missing argument to %s", argp[-2]);
261 }
262 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
263 continue;
264 case OUTFILESW:
265 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
266 die("missing argument to %s", argp[-2]);
267 }
268 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
269 continue;
270 case RPROCSW:
271 if (!(rmmproc = *argp++) || *rmmproc == '-') {
272 die("missing argument to %s", argp[-2]);
273 }
274 continue;
275 case NRPRCSW:
276 rmmproc = NULL;
277 continue;
278 case CHGSW:
279 chgflag = true;
280 continue;
281 case NCHGSW:
282 chgflag = false;
283 continue;
284 case VERBSW:
285 verbosw = 1;
286 continue;
287 case NVERBSW:
288 verbosw = 0;
289 continue;
290 }
291 }
292 if (*cp == '+' || *cp == '@') {
293 if (folder)
294 die("only one folder at a time!");
295 folder = pluspath (cp);
296 } else {
297 if (*cp == '/') {
298 /* Interpret a full path as a filename, not a message. */
299 file = mh_xstrdup (cp);
300 } else {
301 app_msgarg (&msgs, cp);
302 }
303 }
304 }
305
306 SIGNAL (SIGQUIT, quitser);
307 SIGNAL (SIGPIPE, pipeser);
308
309 /*
310 * Read the standard profile setup
311 */
312 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
313 readconfig(NULL, fp, cp, 0);
314 fclose (fp);
315 }
316
317 suppress_bogus_mp_content_warning = skip_mp_cte_check = true;
318 suppress_extraneous_trailing_semicolon_warning = true;
319
320 if (! context_find ("path")) {
321 free (path ("./", TFOLDER));
322 }
323
324 if (file && msgs.size) {
325 die("cannot specify msg and file at same time!");
326 }
327
328 if (outfile) {
329 /* Open the outfile now, so we don't have to risk opening it
330 after running out of fds. */
331 if (strcmp (outfile, "-") == 0) {
332 outfp = stdout;
333 } else if ((outfp = fopen (outfile, "w")) == NULL) {
334 adios (outfile, "unable to open for writing");
335 }
336 }
337
338 /*
339 * check if message is coming from file
340 */
341 if (file) {
342 /* If file is stdin, create a tmp file name before parse_mime()
343 has a chance, because it might put in on a different
344 filesystem than the output file. Instead, put it in the
345 user's preferred tmp directory. */
346 CT ct;
347
348 if (! strcmp ("-", file)) {
349 int fd;
350 char *cp;
351
352 using_stdin = true;
353
354 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
355 die("unable to create temporary file in %s",
356 get_temp_dir());
357 } else {
358 free (file);
359 file = mh_xstrdup (cp);
360 cpydata (STDIN_FILENO, fd, "-", file);
361 }
362
363 if (close (fd)) {
364 (void) m_unlink (file);
365 die("failed to write temporary file");
366 }
367 }
368
369 cts = mh_xcalloc(2, sizeof *cts);
370 ctp = cts;
371
372 if ((ct = parse_mime (file))) {
373 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
374 *ctp++ = ct;
375 } else {
376 inform("unable to parse message from file %s", file);
377 status = NOTOK;
378
379 /* If there's an outfile, pass the input message unchanged, so the
380 message won't get dropped from a pipeline. */
381 if (outfile) {
382 /* Something went wrong. Output might be expected, such as if
383 this were run as a filter. Just copy the input to the
384 output. */
385 if ((infp = fopen (file, "r")) == NULL) {
386 adios (file, "unable to open for reading");
387 }
388
389 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
390 inform("unable to copy message to %s, "
391 "it might be lost\n", outfile);
392 }
393
394 fclose (infp);
395 infp = NULL;
396 }
397 }
398 } else {
399 /*
400 * message(s) are coming from a folder
401 */
402 CT ct;
403
404 if (! msgs.size) {
405 app_msgarg(&msgs, "cur");
406 }
407 if (! folder) {
408 folder = getfolder (1);
409 }
410 maildir = mh_xstrdup(m_maildir (folder));
411
412 /* chdir so that error messages, esp. from MIME parser, just
413 refer to the message and not its path. */
414 if (chdir (maildir) == NOTOK) {
415 adios (maildir, "unable to change directory to");
416 }
417
418 /* read folder and create message structure */
419 if (! (mp = folder_read (folder, 1))) {
420 die("unable to read folder %s", folder);
421 }
422
423 /* check for empty folder */
424 if (mp->nummsg == 0) {
425 die("no messages in %s", folder);
426 }
427
428 /* parse all the message ranges/sequences and set SELECTED */
429 for (msgnum = 0; msgnum < msgs.size; msgnum++)
430 if (! m_convert (mp, msgs.msgs[msgnum])) {
431 done (1);
432 }
433 seq_setprev (mp); /* set the previous-sequence */
434
435 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
436 ctp = cts;
437
438 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
439 if (is_selected(mp, msgnum)) {
440 char *msgnam = m_name (msgnum);
441
442 if ((ct = parse_mime (msgnam))) {
443 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
444 *ctp++ = ct;
445 } else {
446 inform("unable to parse message %s", msgnam);
447 status = NOTOK;
448
449 /* If there's an outfile, pass the input message
450 unchanged, so the message won't get dropped from a
451 pipeline. */
452 if (outfile) {
453 /* Something went wrong. Output might be expected,
454 such as if this were run as a filter. Just copy
455 the input to the output. */
456 /* Can't use path() here because 1) it might have been
457 called before and it caches the pwd, and 2) we call
458 chdir() after that. */
459 char *input_filename =
460 concat (maildir, "/", msgnam, NULL);
461
462 if ((infp = fopen (input_filename, "r")) == NULL) {
463 adios (input_filename,
464 "unable to open for reading");
465 }
466
467 if (copy_input_to_output (input_filename, infp,
468 outfile, outfp) != OK) {
469 inform("unable to copy message to %s, "
470 "it might be lost\n", outfile);
471 }
472
473 fclose (infp);
474 infp = NULL;
475 free (input_filename);
476 }
477 }
478 }
479 }
480
481 if (chgflag) {
482 seq_setcur (mp, mp->hghsel); /* update current message */
483 }
484 seq_save (mp); /* synchronize sequences */
485 context_replace (pfolder, folder);/* update current folder */
486 context_save (); /* save the context file */
487 }
488
489 if (*cts) {
490 for (ctp = cts; *ctp; ++ctp) {
491 status =
492 mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp) == OK
493 ? 0
494 : 1;
495 free_content (*ctp);
496
497 if (using_stdin) {
498 (void) m_unlink (file);
499
500 if (! outfile) {
501 /* Just calling m_backup() unlinks the backup file. */
502 (void) m_backup (file);
503 }
504 }
505 }
506 } else {
507 status = 1;
508 }
509
510 free(maildir);
511 free (cts);
512
513 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
514 if (infp) { fclose (infp); } /* even if stdin */
515 if (outfp) { fclose (outfp); } /* even if stdout */
516 free (outfile);
517 free (file);
518 free (folder);
519 free (arguments);
520
521 done (status == OK ? 0 : 1);
522 return NOTOK;
523 }
524
525
526 /*
527 * Apply transformations to one message.
528 */
529 static int
530 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
531 FILE **infp, char *outfile, FILE **outfp)
532 {
533 /* Store input filename in case one of the transformations, i.e.,
534 fix_boundary(), rewrites to a tmp file. */
535 char *input_filename = maildir
536 ? concat (maildir, "/", (*ctp)->c_file, NULL)
537 : mh_xstrdup ((*ctp)->c_file);
538 bool modify_inplace = false;
539 int message_mods = 0;
540 int status = OK;
541
542 /* Though the input file won't need to be opened if everything goes
543 well, do it here just in case there's a failure, and that failure is
544 running out of file descriptors. */
545 if ((*infp = fopen (input_filename, "r")) == NULL) {
546 adios (input_filename, "unable to open for reading");
547 }
548
549 if (outfile == NULL) {
550 modify_inplace = true;
551
552 if ((*ctp)->c_file) {
553 char *tempfile;
554 /* outfp will be closed by the caller */
555 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
556 NULL) {
557 die("unable to create temporary file in %s",
558 get_temp_dir());
559 }
560 outfile = mh_xstrdup (tempfile);
561 } else {
562 die("missing both input and output filenames\n");
563 }
564 } /* else *outfp was defined by caller */
565
566 reverse_alternative_parts (*ctp);
567 status = fix_always (*ctp, &message_mods);
568 if (status == OK && fx->fixboundary) {
569 status = fix_boundary (ctp, &message_mods);
570 }
571 if (status == OK && fx->fixtypes != NULL) {
572 status = fix_types (*ctp, fx->fixtypes, &message_mods);
573 }
574 if (status == OK && fx->fixcompositecte) {
575 status = fix_composite_cte (*ctp, &message_mods);
576 }
577 if (status == OK && fx->reformat) {
578 status =
579 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
580 }
581 if (status == OK && fx->decodetext) {
582 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
583 &message_mods);
584 update_cte (*ctp);
585 }
586 if (status == OK && fx->textcharset != NULL) {
587 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
588 }
589
590 if (status == OK && ! (*ctp)->c_umask) {
591 /* Set the umask for the contents file. This currently
592 isn't used but just in case it is in the future. */
593 struct stat st;
594
595 if (stat ((*ctp)->c_file, &st) != NOTOK) {
596 (*ctp)->c_umask = ~(st.st_mode & 0777);
597 } else {
598 (*ctp)->c_umask = ~m_gmprot();
599 }
600 }
601
602 /*
603 * Write the content to a file
604 */
605 if (status == OK) {
606 status = write_content (*ctp, input_filename, outfile, *outfp,
607 modify_inplace, message_mods);
608 } else if (! modify_inplace) {
609 /* Something went wrong. Output might be expected, such
610 as if this were run as a filter. Just copy the input
611 to the output. */
612 if (copy_input_to_output (input_filename, *infp, outfile,
613 *outfp) != OK) {
614 inform("unable to copy message to %s, it might be lost\n",
615 outfile);
616 }
617 }
618
619 if (modify_inplace) {
620 if (status != OK) { (void) m_unlink (outfile); }
621 free (outfile);
622 outfile = NULL;
623 }
624
625 fclose (*infp);
626 *infp = NULL;
627 free (input_filename);
628
629 return status;
630 }
631
632
633 /*
634 * Copy input message to output. Assumes not modifying in place, so this
635 * might be running as part of a pipeline.
636 */
637 static int
638 copy_input_to_output (const char *input_filename, FILE *infp,
639 const char *output_filename, FILE *outfp)
640 {
641 int in = fileno (infp);
642 int out = fileno (outfp);
643 int status = OK;
644
645 if (in != -1 && out != -1) {
646 cpydata (in, out, input_filename, output_filename);
647 } else {
648 status = NOTOK;
649 }
650
651 return status;
652 }
653
654
655 /*
656 * Fix mismatched outer level boundary.
657 */
658 static int
659 fix_boundary (CT *ct, int *message_mods)
660 {
661 struct multipart *mp;
662 int status = OK;
663
664 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
665 mp = (struct multipart *) (*ct)->c_ctparams;
666
667 /*
668 * 1) Get boundary at end of part.
669 * 2) Get boundary at beginning of part and compare to the end-of-part
670 * boundary.
671 * 3) Write out contents of ct to tmp file, replacing boundary in
672 * header with boundary from part. Set c_unlink to 1.
673 * 4) Free ct.
674 * 5) Call parse_mime() on the tmp file, replacing ct.
675 */
676
677 if (mp && mp->mp_start) {
678 char *part_boundary;
679
680 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
681 char *fixed;
682
683 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
684 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
685 char *filename = mh_xstrdup ((*ct)->c_file);
686 CT fixed_ct;
687
688 free_content (*ct);
689 if ((fixed_ct = parse_mime (fixed))) {
690 *ct = fixed_ct;
691 (*ct)->c_unlink = 1;
692
693 ++*message_mods;
694 if (verbosw) {
695 report (NULL, NULL, filename,
696 "fix multipart boundary");
697 }
698 } else {
699 *ct = NULL;
700 inform("unable to parse fixed part");
701 status = NOTOK;
702 }
703 free (filename);
704 } else {
705 inform("unable to replace broken boundary");
706 status = NOTOK;
707 }
708 } else {
709 inform("unable to create temporary file in %s",
710 get_temp_dir());
711 status = NOTOK;
712 }
713
714 free (part_boundary);
715 } else {
716 /* Couldn't fix the boundary. Report failure so that mhfixmsg
717 doesn't modify the message. */
718 status = NOTOK;
719 }
720 } else {
721 /* No multipart struct, even though the content type is
722 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
723 the message. */
724 status = NOTOK;
725 }
726 }
727
728 return status;
729 }
730
731
732 /*
733 * Find boundary at end of multipart.
734 */
735 static int
736 get_multipart_boundary (CT ct, char **part_boundary)
737 {
738 char buffer[NMH_BUFSIZ];
739 char *end_boundary = NULL;
740 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
741 ? (off_t) (ct->c_end - sizeof buffer)
742 : (off_t) ct->c_begin;
743 size_t bytes_read;
744 int status = OK;
745
746 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
747 be big enough, even if it's just 1024, to make that unlikely. */
748
749 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
750 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
751 advise (ct->c_file, "unable to open for reading");
752 return NOTOK;
753 }
754
755 /* Get boundary at end of multipart. */
756 while (begin >= (off_t) ct->c_begin) {
757 fseeko (ct->c_fp, begin, SEEK_SET);
758 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
759 char *cp = rfind_str (buffer, bytes_read, "--");
760
761 if (cp) {
762 char *end;
763
764 /* Trim off trailing "--" and anything beyond. */
765 *cp-- = '\0';
766 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
767 if (strlen (end) > 3 && *end++ == '\n' &&
768 *end++ == '-' && *end++ == '-') {
769 end_boundary = mh_xstrdup (end);
770 break;
771 }
772 }
773 }
774 }
775
776 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
777 break;
778 begin -= sizeof buffer;
779 }
780
781 /* Get boundary at beginning of multipart. */
782 if (end_boundary) {
783 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
784 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
785 if (bytes_read >= strlen (end_boundary)) {
786 char *cp = find_str (buffer, bytes_read, end_boundary);
787
788 if (cp && cp - buffer >= 2 && *--cp == '-' &&
789 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
790 status = OK;
791 break;
792 }
793 } else {
794 /* The start and end boundaries didn't match, or the
795 start boundary doesn't begin with "\n--" (or "--"
796 if at the beginning of buffer). Keep trying. */
797 status = NOTOK;
798 }
799 }
800 } else {
801 status = NOTOK;
802 }
803
804 if (ct->c_fp) {
805 fclose (ct->c_fp);
806 ct->c_fp = NULL;
807 }
808
809 if (status == OK) {
810 *part_boundary = end_boundary;
811 } else {
812 *part_boundary = NULL;
813 free (end_boundary);
814 }
815
816 return status;
817 }
818
819
820 /*
821 * Open and copy ct->c_file to file, replacing the multipart boundary.
822 */
823 static int
824 replace_boundary (CT ct, char *file, char *boundary)
825 {
826 FILE *fpin, *fpout;
827 int compnum, state;
828 char buf[NMH_BUFSIZ], name[NAMESZ];
829 char *np, *vp;
830 m_getfld_state_t gstate;
831 int status = OK;
832
833 if (ct->c_file == NULL) {
834 inform("missing input filename");
835 return NOTOK;
836 }
837
838 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
839 advise (ct->c_file, "unable to open for reading");
840 return NOTOK;
841 }
842
843 if ((fpout = fopen (file, "w")) == NULL) {
844 fclose (fpin);
845 advise (file, "unable to open for writing");
846 return NOTOK;
847 }
848
849 gstate = m_getfld_state_init(fpin);
850 for (compnum = 1;;) {
851 int bufsz = (int) sizeof buf;
852
853 switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
854 case FLD:
855 case FLDPLUS:
856 compnum++;
857
858 /* get copies of the buffers */
859 np = mh_xstrdup (name);
860 vp = mh_xstrdup (buf);
861
862 /* if necessary, get rest of field */
863 while (state == FLDPLUS) {
864 bufsz = sizeof buf;
865 state = m_getfld2(&gstate, name, buf, &bufsz);
866 vp = add (buf, vp); /* add to previous value */
867 }
868
869 if (strcasecmp (TYPE_FIELD, np)) {
870 fprintf (fpout, "%s:%s", np, vp);
871 } else {
872 char *new_ctline, *new_params;
873
874 replace_param(&ct->c_ctinfo.ci_first_pm,
875 &ct->c_ctinfo.ci_last_pm, "boundary",
876 boundary, 0);
877
878 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
879 ct->c_ctinfo.ci_subtype, NULL);
880 new_params = output_params(LEN(TYPE_FIELD) +
881 strlen(new_ctline) + 1,
882 ct->c_ctinfo.ci_first_pm, NULL, 0);
883 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
884 FENDNULL(new_params));
885 free(new_ctline);
886 free(new_params);
887 }
888
889 free (vp);
890 free (np);
891
892 continue;
893
894 case BODY:
895 putc('\n', fpout);
896 /* buf will have a terminating NULL, skip it. */
897 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
898 advise (file, "fwrite");
899 }
900 continue;
901
902 case FILEEOF:
903 break;
904
905 case LENERR:
906 case FMTERR:
907 inform("message format error in component #%d", compnum);
908 status = NOTOK;
909 break;
910
911 default:
912 inform("getfld() returned %d", state);
913 status = NOTOK;
914 break;
915 }
916
917 break;
918 }
919
920 m_getfld_state_destroy (&gstate);
921 fclose (fpout);
922 fclose (fpin);
923
924 return status;
925 }
926
927
928 /*
929 * Fix Content-Type header to reflect the content of its part.
930 */
931 static int
932 fix_types (CT ct, svector_t fixtypes, int *message_mods)
933 {
934 int status = OK;
935
936 switch (ct->c_type) {
937 case CT_MULTIPART: {
938 struct multipart *m = (struct multipart *) ct->c_ctparams;
939 struct part *part;
940
941 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
942 status = fix_types (part->mp_part, fixtypes, message_mods);
943 }
944 break;
945 }
946
947 case CT_MESSAGE:
948 if (ct->c_subtype == MESSAGE_EXTERNAL) {
949 struct exbody *e = (struct exbody *) ct->c_ctparams;
950
951 status = fix_types (e->eb_content, fixtypes, message_mods);
952 }
953 break;
954
955 default: {
956 char **typep, *type;
957
958 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
959 for (typep = svector_strs (fixtypes);
960 typep && (type = *typep);
961 ++typep) {
962 char *type_subtype =
963 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
964 NULL);
965
966 if (! strcasecmp (type, type_subtype) &&
967 decode_part (ct) == OK &&
968 ct->c_cefile.ce_file != NULL) {
969 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
970 char *cp;
971
972 if ((cp = strchr (ct_type_subtype, ';'))) {
973 /* Truncate to remove any parameter list from
974 mime_type () result. */
975 *cp = '\0';
976 }
977
978 if (strcasecmp (type, ct_type_subtype)) {
979 char *ct_type, *ct_subtype;
980 HF hf;
981
982 /* The Content-Type header does not match the
983 content, so update these struct Content
984 fields to match:
985 * c_type, c_subtype
986 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
987 * c_ctline
988 */
989 /* Extract type and subtype from type/subtype. */
990 ct_type = mh_xstrdup(ct_type_subtype);
991 if ((cp = strchr (ct_type, '/'))) {
992 *cp = '\0';
993 ct_subtype = mh_xstrdup(++cp);
994 } else {
995 inform("missing / in MIME type of %s %s",
996 ct->c_file, ct->c_partno);
997 free (ct_type);
998 return NOTOK;
999 }
1000
1001 ct->c_type = ct_str_type (ct_type);
1002 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
1003
1004 free (ct->c_ctinfo.ci_type);
1005 ct->c_ctinfo.ci_type = ct_type;
1006 free (ct->c_ctinfo.ci_subtype);
1007 ct->c_ctinfo.ci_subtype = ct_subtype;
1008 if (! replace_substring (&ct->c_ctline, type,
1009 ct_type_subtype)) {
1010 inform("did not find %s in %s",
1011 type, ct->c_ctline);
1012 }
1013
1014 /* Update Content-Type header field. */
1015 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1016 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1017 if (replace_substring (&hf->value, type,
1018 ct_type_subtype)) {
1019 ++*message_mods;
1020 if (verbosw) {
1021 report (NULL, ct->c_partno, ct->c_file,
1022 "change Content-Type in header "
1023 "from %s to %s",
1024 type, ct_type_subtype);
1025 }
1026 break;
1027 }
1028 inform("did not find %s in %s", type, hf->value);
1029 }
1030 }
1031 }
1032 free (ct_type_subtype);
1033 }
1034 free (type_subtype);
1035 }
1036 }
1037 }}
1038
1039 return status;
1040 }
1041
1042
1043 /*
1044 * Replace a substring, allocating space to hold the new one.
1045 */
1046 char *
1047 replace_substring (char **str, const char *old, const char *new)
1048 {
1049 char *cp;
1050
1051 if ((cp = strstr (*str, old))) {
1052 char *remainder = cp + strlen (old);
1053 char *prefix, *new_str;
1054
1055 if (cp - *str) {
1056 prefix = mh_xstrdup(*str);
1057 *(prefix + (cp - *str)) = '\0';
1058 new_str = concat (prefix, new, remainder, NULL);
1059 free (prefix);
1060 } else {
1061 new_str = concat (new, remainder, NULL);
1062 }
1063
1064 free (*str);
1065
1066 return *str = new_str;
1067 }
1068
1069 return NULL;
1070 }
1071
1072
1073 /*
1074 * Remove a name=value parameter, given just its name, from a header value.
1075 */
1076 char *
1077 remove_parameter (char *str, const char *name)
1078 {
1079 /* It looks to me, based on the BNF in RFC 2045, than there can't
1080 be whitespace between the parameter name and the "=", or
1081 between the "=" and the parameter value. */
1082 char *param_name = concat (name, "=", NULL);
1083 char *cp;
1084
1085 if ((cp = strstr (str, param_name))) {
1086 char *start, *end;
1087 size_t count = 1;
1088
1089 /* Remove any leading spaces, before the parameter name. */
1090 for (start = cp;
1091 start > str && isspace ((unsigned char) *(start-1));
1092 --start) {
1093 continue;
1094 }
1095 /* Remove a leading semicolon. */
1096 if (start > str && *(start-1) == ';') { --start; }
1097
1098 end = cp + strlen (name) + 1;
1099 if (*end == '"') {
1100 /* Skip past the quoted value, and then the final quote. */
1101 for (++end ; *end && *end != '"'; ++end) { continue; }
1102 ++end;
1103 } else {
1104 /* Skip past the value. */
1105 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1106 }
1107
1108 /* Count how many characters need to be moved. Include
1109 trailing null, which is accounted for by the
1110 initialization of count to 1. */
1111 for (cp = end; *cp; ++cp) { ++count; }
1112 (void) memmove (start, end, count);
1113 }
1114
1115 free (param_name);
1116
1117 return str;
1118 }
1119
1120
1121 /*
1122 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1123 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1124 * 8 bit.
1125 */
1126 static int
1127 fix_composite_cte (CT ct, int *message_mods)
1128 {
1129 int status = OK;
1130
1131 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1132 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1133 ct->c_encoding != CE_BINARY) {
1134 HF hf;
1135
1136 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1137 char *name = hf->name;
1138 for (; isspace((unsigned char)*name); ++name) {
1139 continue;
1140 }
1141
1142 if (! strncasecmp (name, ENCODING_FIELD,
1143 LEN(ENCODING_FIELD))) {
1144 char *prefix = "Nmh-REPLACED-INVALID-";
1145 HF h;
1146
1147 NEW(h);
1148 h->name = mh_xstrdup (hf->name);
1149 h->hf_encoding = hf->hf_encoding;
1150 h->next = hf->next;
1151 hf->next = h;
1152
1153 /* Retain old header but prefix its name. */
1154 free (hf->name);
1155 hf->name = concat (prefix, h->name, NULL);
1156
1157 ++*message_mods;
1158 if (verbosw) {
1159 char *encoding = cpytrim (hf->value);
1160 report (NULL, ct->c_partno, ct->c_file,
1161 "replace Content-Transfer-Encoding of %s "
1162 "with 8 bit", encoding);
1163 free (encoding);
1164 }
1165
1166 h->value = mh_xstrdup (" 8bit\n");
1167
1168 /* Don't need to warn for multiple C-T-E header
1169 fields, parse_mime() already does that. But
1170 if there are any, fix them all as necessary. */
1171 hf = h;
1172 }
1173 }
1174
1175 set_ce (ct, CE_8BIT);
1176 }
1177
1178 if (ct->c_type == CT_MULTIPART) {
1179 struct multipart *m;
1180 struct part *part;
1181
1182 m = (struct multipart *) ct->c_ctparams;
1183 for (part = m->mp_parts; part; part = part->mp_next) {
1184 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1185 status = NOTOK;
1186 break;
1187 }
1188 }
1189 }
1190 }
1191
1192 return status;
1193 }
1194
1195
1196 /*
1197 * Set content encoding.
1198 */
1199 static int
1200 set_ce (CT ct, int encoding)
1201 {
1202 const char *ce = ce_str (encoding);
1203 const struct str2init *ctinit = get_ce_method (ce);
1204
1205 if (ctinit) {
1206 char *cte = concat (" ", ce, "\n", NULL);
1207 bool found_cte = false;
1208 HF hf;
1209 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1210 caller is decode_text_parts (). Save because we'll
1211 overwrite below. */
1212 struct cefile decoded_content_info = ct->c_cefile;
1213
1214 ct->c_encoding = encoding;
1215
1216 ct->c_ctinitfnx = ctinit->si_init;
1217 /* This will assign ct->c_cefile with an all-0 struct, which
1218 is what we want. */
1219 (*ctinit->si_init) (ct);
1220 /* After returning, the caller should set
1221 ct->c_cefile.ce_file to the name of the file containing
1222 the contents. */
1223
1224 if (ct->c_ceclosefnx) {
1225 (*ct->c_ceclosefnx) (ct);
1226 }
1227
1228 /* Restore the cefile. */
1229 ct->c_cefile = decoded_content_info;
1230
1231 /* Update/add Content-Transfer-Encoding header field. */
1232 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1233 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1234 found_cte = true;
1235 free (hf->value);
1236 hf->value = cte;
1237 }
1238 }
1239 if (! found_cte) {
1240 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1241 }
1242
1243 /* Update c_celine. It's used only by mhlist -debug. */
1244 free (ct->c_celine);
1245 ct->c_celine = mh_xstrdup (cte);
1246
1247 return OK;
1248 }
1249
1250 return NOTOK;
1251 }
1252
1253
1254 /*
1255 * Make sure each text part has a corresponding text/plain part.
1256 */
1257 static int
1258 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain)
1259 {
1260 int status = OK;
1261
1262 switch ((*ct)->c_type) {
1263 case CT_TEXT: {
1264 /* Nothing to do for text/plain. */
1265 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1266
1267 if (parent && parent->c_type == CT_MULTIPART &&
1268 parent->c_subtype == MULTI_ALTERNATE) {
1269 int new_subpart_number = 1;
1270 int has_text_plain =
1271 find_textplain_sibling (parent, replacetextplain,
1272 &new_subpart_number);
1273
1274 if (! has_text_plain) {
1275 /* Parent is a multipart/alternative. Insert a new
1276 text/plain subpart. */
1277 const int inserted =
1278 insert_new_text_plain_part (*ct, new_subpart_number,
1279 parent);
1280 if (inserted) {
1281 ++*message_mods;
1282 if (verbosw) {
1283 report (NULL, parent->c_partno, parent->c_file,
1284 "insert text/plain part");
1285 }
1286 } else {
1287 status = NOTOK;
1288 }
1289 }
1290 } else if (parent && parent->c_type == CT_MULTIPART &&
1291 parent->c_subtype == MULTI_RELATED) {
1292 char *type_subtype =
1293 concat ((*ct)->c_ctinfo.ci_type, "/",
1294 (*ct)->c_ctinfo.ci_subtype, NULL);
1295 const char *parent_type =
1296 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1297 int new_subpart_number = 1;
1298 int has_text_plain = 0;
1299
1300 /* Have to do string comparison on the subtype because we
1301 don't enumerate all of them in c_subtype values.
1302 parent_type will be NULL if the multipart/related part
1303 doesn't have a type parameter. The type parameter must
1304 be specified according to RFC 2387 Sec. 3.1 but not all
1305 messages comply. */
1306 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1307 /* The type of this part matches the root type of the
1308 parent multipart/related. Look to see if there's
1309 text/plain sibling. */
1310 has_text_plain =
1311 find_textplain_sibling (parent, replacetextplain,
1312 &new_subpart_number);
1313 }
1314
1315 free (type_subtype);
1316
1317 if (! has_text_plain) {
1318 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1319 struct part *part;
1320 int siblings = 0;
1321
1322 for (part = mp->mp_parts; part; part = part->mp_next) {
1323 if (*ct != part->mp_part) {
1324 ++siblings;
1325 }
1326 }
1327
1328 if (siblings) {
1329 /* Parent is a multipart/related. Insert a new
1330 text/plain subpart in a new multipart/alternative. */
1331 if (insert_into_new_mp_alt (ct, message_mods)) {
1332 /* Not an error if text/plain couldn't be added. */
1333 }
1334 } else {
1335 /* There are no siblings, so insert a new text/plain
1336 subpart, and change the parent type from
1337 multipart/related to multipart/alternative. */
1338 const int inserted =
1339 insert_new_text_plain_part (*ct, new_subpart_number,
1340 parent);
1341
1342 if (inserted) {
1343 HF hf;
1344
1345 parent->c_subtype = MULTI_ALTERNATE;
1346 free (parent->c_ctinfo.ci_subtype);
1347 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1348 if (! replace_substring (&parent->c_ctline, "/related",
1349 "/alternative")) {
1350 inform("did not find multipart/related in %s",
1351 parent->c_ctline);
1352 }
1353
1354 /* Update Content-Type header field. */
1355 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1356 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1357 if (replace_substring (&hf->value, "/related",
1358 "/alternative")) {
1359 ++*message_mods;
1360 if (verbosw) {
1361 report (NULL, parent->c_partno,
1362 parent->c_file,
1363 "insert text/plain part");
1364 }
1365
1366 /* Remove, e.g., type="text/html" from
1367 multipart/alternative. */
1368 remove_parameter (hf->value, "type");
1369 break;
1370 }
1371 inform("did not find multipart/"
1372 "related in header %s", hf->value);
1373 }
1374 }
1375 } else {
1376 /* Not an error if text/plain couldn't be inserted. */
1377 }
1378 }
1379 }
1380 } else {
1381 if (insert_into_new_mp_alt (ct, message_mods)) {
1382 status = NOTOK;
1383 }
1384 }
1385 break;
1386 }
1387
1388 case CT_MULTIPART: {
1389 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1390 struct part *part;
1391
1392 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1393 if ((*ct)->c_type == CT_MULTIPART) {
1394 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1395 replacetextplain);
1396 }
1397 }
1398 break;
1399 }
1400
1401 case CT_MESSAGE:
1402 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1403 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1404
1405 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1406 replacetextplain);
1407 }
1408 break;
1409 }
1410
1411 return status;
1412 }
1413
1414
1415 /*
1416 * See if there is a sibling text/plain, and return its subpart number.
1417 */
1418 static int
1419 find_textplain_sibling (CT parent, int replacetextplain,
1420 int *new_subpart_number)
1421 {
1422 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1423 struct part *part, *prev;
1424 bool has_text_plain = false;
1425
1426 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1427 ++*new_subpart_number;
1428 if (part->mp_part->c_type == CT_TEXT &&
1429 part->mp_part->c_subtype == TEXT_PLAIN) {
1430 if (replacetextplain) {
1431 struct part *old_part;
1432 if (part == mp->mp_parts) {
1433 old_part = mp->mp_parts;
1434 mp->mp_parts = part->mp_next;
1435 } else {
1436 old_part = prev->mp_next;
1437 prev->mp_next = part->mp_next;
1438 }
1439 if (verbosw) {
1440 report (NULL, parent->c_partno, parent->c_file,
1441 "remove text/plain part %s",
1442 old_part->mp_part->c_partno);
1443 }
1444 free_content (old_part->mp_part);
1445 free (old_part);
1446 } else {
1447 has_text_plain = true;
1448 }
1449 break;
1450 }
1451 prev = part;
1452 }
1453
1454 return has_text_plain;
1455 }
1456
1457
1458 /*
1459 * Insert a new text/plain part.
1460 */
1461 static int
1462 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent)
1463 {
1464 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1465 struct part *new_part;
1466
1467 NEW(new_part);
1468 if ((new_part->mp_part = build_text_plain_part (ct))) {
1469 char buffer[16];
1470 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1471
1472 new_part->mp_next = mp->mp_parts;
1473 mp->mp_parts = new_part;
1474 new_part->mp_part->c_partno =
1475 concat (parent->c_partno ? parent->c_partno : "1", ".",
1476 buffer, NULL);
1477
1478 return 1;
1479 }
1480
1481 free_content (new_part->mp_part);
1482 free (new_part);
1483
1484 return 0;
1485 }
1486
1487
1488 /*
1489 * Create a text/plain part to go along with non-plain sibling part.
1490 */
1491 static CT
1492 build_text_plain_part (CT encoded_part)
1493 {
1494 CT tp_part = divide_part (encoded_part);
1495 char *tmp_plain_file = NULL;
1496
1497 if (decode_part (tp_part) == OK) {
1498 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1499 contains the decoded contents. And the decoding function, such
1500 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1501 be unlinked by free_content (). */
1502 char *tempfile;
1503
1504 /* This m_mktemp2() call closes the temp file. */
1505 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1506 inform("unable to create temporary file in %s",
1507 get_temp_dir());
1508 } else {
1509 tmp_plain_file = mh_xstrdup (tempfile);
1510 if (reformat_part (tp_part, tmp_plain_file,
1511 tp_part->c_ctinfo.ci_type,
1512 tp_part->c_ctinfo.ci_subtype,
1513 tp_part->c_type) == OK) {
1514 return tp_part;
1515 }
1516 }
1517 }
1518
1519 free_content (tp_part);
1520 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1521 free (tmp_plain_file);
1522
1523 return NULL;
1524 }
1525
1526
1527 /*
1528 * Slip new text/plain part into a new multipart/alternative.
1529 */
1530 static int
1531 insert_into_new_mp_alt (CT *ct, int *message_mods)
1532 {
1533 CT tp_part = build_text_plain_part (*ct);
1534 int status = OK;
1535
1536 if (tp_part) {
1537 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1538 MULTI_ALTERNATE);
1539 if (mp_alt) {
1540 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1541
1542 if (mp && mp->mp_parts) {
1543 mp->mp_parts->mp_part = tp_part;
1544 /* Make the new multipart/alternative the parent. */
1545 *ct = mp_alt;
1546
1547 ++*message_mods;
1548 if (verbosw) {
1549 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1550 "insert text/plain part");
1551 }
1552 } else {
1553 free_content (tp_part);
1554 free_content (mp_alt);
1555 status = NOTOK;
1556 }
1557 } else {
1558 status = NOTOK;
1559 }
1560 } else {
1561 /* Not an error if text/plain couldn't be built. */
1562 }
1563
1564 return status;
1565 }
1566
1567
1568 /*
1569 * Clone a MIME part.
1570 */
1571 static CT
1572 divide_part (CT ct)
1573 {
1574 CT new_part;
1575
1576 NEW0(new_part);
1577 /* Just copy over what is needed for decoding. c_vrsn and
1578 c_celine aren't necessary. */
1579 new_part->c_file = mh_xstrdup (ct->c_file);
1580 new_part->c_begin = ct->c_begin;
1581 new_part->c_end = ct->c_end;
1582 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1583 new_part->c_type = ct->c_type;
1584 new_part->c_cefile = ct->c_cefile;
1585 new_part->c_encoding = ct->c_encoding;
1586 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1587 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1588 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1589 new_part->c_cesizefnx = ct->c_cesizefnx;
1590
1591 /* c_ctline is used by reformat__part(), so it can preserve
1592 anything after the type/subtype. */
1593 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1594
1595 return new_part;
1596 }
1597
1598
1599 /*
1600 * Copy the content info from one part to another.
1601 */
1602 static void
1603 copy_ctinfo (CI dest, CI src)
1604 {
1605 PM s_pm, d_pm;
1606
1607 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1608 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1609
1610 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1611 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1612 s_pm->pm_value, 0);
1613 if (s_pm->pm_charset) {
1614 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1615 }
1616 if (s_pm->pm_lang) {
1617 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1618 }
1619 }
1620
1621 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1622 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1623 }
1624
1625
1626 /*
1627 * Decode content.
1628 */
1629 static int
1630 decode_part (CT ct)
1631 {
1632 char *tmp_decoded;
1633 int status;
1634 FILE *file;
1635 char *tempfile;
1636
1637 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1638 die("unable to create temporary file in %s", get_temp_dir());
1639 }
1640 tmp_decoded = mh_xstrdup (tempfile);
1641 /* The following call will load ct->c_cefile.ce_file with the tmp
1642 filename of the decoded content. tmp_decoded will contain the
1643 encoded output, get rid of that. */
1644 status = output_message_fp (ct, file, tmp_decoded);
1645 (void) m_unlink (tmp_decoded);
1646 free (tmp_decoded);
1647 if (fclose (file)) {
1648 inform("unable to close temporary file %s, continuing...", tempfile);
1649 }
1650
1651 return status;
1652 }
1653
1654
1655 /*
1656 * Reformat content as plain text.
1657 * Some of the arguments aren't really needed now, but maybe will
1658 * be in the future for other than text types.
1659 */
1660 static int
1661 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type)
1662 {
1663 int output_subtype, output_encoding;
1664 const char *reason = NULL;
1665 char *cp, *cf;
1666 int status;
1667
1668 /* Hacky: this redirects the output from whatever command is used
1669 to show the part to a file. So, the user can't have any output
1670 redirection in that command.
1671 Could show_multi() in mhshowsbr.c avoid this? */
1672
1673 /* Check for invo_name-format-type/subtype. */
1674 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1675 if (verbosw) {
1676 inform("Don't know how to convert %s, there is no "
1677 "%s-format-%s/%s profile entry",
1678 ct->c_file, invo_name, type, subtype);
1679 }
1680 return NOTOK;
1681 }
1682 if (strchr (cf, '>')) {
1683 inform("'>' prohibited in \"%s\",\nplease fix your "
1684 "%s-format-%s/%s profile entry", cf, invo_name, type,
1685 FENDNULL(subtype));
1686
1687 return NOTOK;
1688 }
1689
1690 cp = concat (cf, " >", file, NULL);
1691 status = show_content_aux (ct, 0, cp, NULL, NULL);
1692 free (cp);
1693
1694 /* Unlink decoded content tmp file and free its filename to avoid
1695 leaks. The file stream should already have been closed. */
1696 if (ct->c_cefile.ce_unlink) {
1697 (void) m_unlink (ct->c_cefile.ce_file);
1698 free (ct->c_cefile.ce_file);
1699 ct->c_cefile.ce_file = NULL;
1700 ct->c_cefile.ce_unlink = 0;
1701 }
1702
1703 if (c_type == CT_TEXT) {
1704 output_subtype = TEXT_PLAIN;
1705 } else {
1706 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1707 output_subtype = 0;
1708 }
1709
1710 output_encoding = content_encoding (ct, &reason);
1711 if (status == OK &&
1712 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1713 ct->c_cefile.ce_file = file;
1714 ct->c_cefile.ce_unlink = 1;
1715 } else {
1716 ct->c_cefile.ce_unlink = 0;
1717 status = NOTOK;
1718 }
1719
1720 return status;
1721 }
1722
1723
1724 /*
1725 * Fill in a multipart/alternative part.
1726 */
1727 static CT
1728 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype)
1729 {
1730 char *boundary_prefix = "----=_nmh-multipart";
1731 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1732 char *boundary_indicator = "; boundary=";
1733 char *typename, *subtypename, *name;
1734 CT ct;
1735 struct part *p;
1736 struct multipart *m;
1737 const struct str2init *ctinit;
1738
1739 NEW0(ct);
1740
1741 /* Set up the multipart/alternative part. These fields of *ct were
1742 initialized to 0 by mh_xcalloc():
1743 c_fp, c_unlink, c_begin, c_end,
1744 c_vrsn, c_ctline, c_celine,
1745 c_id, c_descr, c_dispo, c_partno,
1746 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1747 c_cefile, c_encoding,
1748 c_digested, c_digest[16], c_ctexbody,
1749 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1750 c_umask, c_rfc934,
1751 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1752 */
1753
1754 ct->c_file = mh_xstrdup (first_alt->c_file);
1755 ct->c_type = type;
1756 ct->c_subtype = subtype;
1757
1758 ctinit = get_ct_init (ct->c_type);
1759
1760 typename = ct_type_str (type);
1761 subtypename = ct_subtype_str (type, subtype);
1762
1763 {
1764 int serial = 0;
1765 int found_boundary = 1;
1766
1767 while (found_boundary && serial < 1000000) {
1768 found_boundary = 0;
1769
1770 /* Ensure that the boundary doesn't appear in the decoded
1771 content. */
1772 if (new_part->c_cefile.ce_file) {
1773 if ((found_boundary =
1774 boundary_in_content (&new_part->c_cefile.ce_fp,
1775 new_part->c_cefile.ce_file,
1776 boundary)) == NOTOK) {
1777 goto return_null;
1778 }
1779 }
1780
1781 /* Ensure that the boundary doesn't appear in the encoded
1782 content. */
1783 if (! found_boundary && new_part->c_file) {
1784 if ((found_boundary =
1785 boundary_in_content (&new_part->c_fp,
1786 new_part->c_file,
1787 boundary)) == NOTOK) {
1788 goto return_null;
1789 }
1790 }
1791
1792 if (found_boundary) {
1793 /* Try a slightly different boundary. */
1794 char buffer2[16];
1795
1796 free (boundary);
1797 ++serial;
1798 snprintf (buffer2, sizeof buffer2, "%d", serial);
1799 boundary =
1800 concat (boundary_prefix,
1801 FENDNULL(first_alt->c_partno),
1802 "-", buffer2, NULL);
1803 }
1804 }
1805
1806 if (found_boundary) {
1807 inform("giving up trying to find a unique boundary");
1808 goto return_null;
1809 }
1810 }
1811
1812 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1813 boundary, "\"", NULL);
1814
1815 /* Load c_first_hf and c_last_hf. */
1816 transfer_noncontent_headers (first_alt, ct);
1817 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1818 free (name);
1819
1820 /* Load c_partno. */
1821 if (first_alt->c_partno) {
1822 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1823 free (first_alt->c_partno);
1824 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1825 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1826 } else {
1827 first_alt->c_partno = mh_xstrdup ("1");
1828 new_part->c_partno = mh_xstrdup ("2");
1829 }
1830
1831 if (ctinit) {
1832 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1833 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1834 }
1835
1836 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1837 "boundary", boundary, 0);
1838
1839 NEW(p);
1840 NEW(p->mp_next);
1841 p->mp_next->mp_next = NULL;
1842 p->mp_next->mp_part = first_alt;
1843
1844 NEW0(m);
1845 m->mp_start = concat (boundary, "\n", NULL);
1846 m->mp_stop = concat (boundary, "--\n", NULL);
1847 m->mp_parts = p;
1848 ct->c_ctparams = m;
1849
1850 free (boundary);
1851
1852 return ct;
1853
1854 return_null:
1855 free_content(ct);
1856 free(boundary);
1857 return NULL;
1858 }
1859
1860
1861 /*
1862 * Check that the boundary does not appear in the content.
1863 */
1864 static int
1865 boundary_in_content (FILE **fp, char *file, const char *boundary)
1866 {
1867 char buffer[NMH_BUFSIZ];
1868 size_t bytes_read;
1869 bool found_boundary = false;
1870
1871 /* free_content() will close *fp if we fopen it here. */
1872 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1873 advise (file, "unable to open %s for reading", file);
1874 return NOTOK;
1875 }
1876
1877 fseeko (*fp, 0L, SEEK_SET);
1878 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1879 if (find_str (buffer, bytes_read, boundary)) {
1880 found_boundary = true;
1881 break;
1882 }
1883 }
1884
1885 return found_boundary;
1886 }
1887
1888
1889 /*
1890 * Remove all non-Content headers.
1891 */
1892 static void
1893 transfer_noncontent_headers (CT old, CT new)
1894 {
1895 HF hp, hp_prev;
1896
1897 hp_prev = hp = old->c_first_hf;
1898 while (hp) {
1899 HF next = hp->next;
1900
1901 if (strncasecmp (XXX_FIELD_PRF, hp->name, LEN(XXX_FIELD_PRF))) {
1902 if (hp == old->c_last_hf) {
1903 if (hp == old->c_first_hf) {
1904 old->c_last_hf = old->c_first_hf = NULL;
1905 } else {
1906 hp_prev->next = NULL;
1907 old->c_last_hf = hp_prev;
1908 }
1909 } else {
1910 if (hp == old->c_first_hf) {
1911 old->c_first_hf = next;
1912 } else {
1913 hp_prev->next = next;
1914 }
1915 }
1916
1917 /* Put node hp in the new CT. */
1918 if (new->c_first_hf == NULL) {
1919 new->c_first_hf = hp;
1920 } else {
1921 new->c_last_hf->next = hp;
1922 }
1923 new->c_last_hf = hp;
1924 } else {
1925 /* A Content- header, leave in old. */
1926 hp_prev = hp;
1927 }
1928
1929 hp = next;
1930 }
1931 }
1932
1933
1934 /*
1935 * Set content type.
1936 */
1937 static int
1938 set_ct_type (CT ct, int type, int subtype, int encoding)
1939 {
1940 char *typename = ct_type_str (type);
1941 char *subtypename = ct_subtype_str (type, subtype);
1942 /* E.g, " text/plain" */
1943 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1944 /* E.g, " text/plain\n" */
1945 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1946 bool found_content_type = false;
1947 HF hf;
1948 const char *cp = NULL;
1949 char *ctline;
1950 int status;
1951
1952 /* Update/add Content-Type header field. */
1953 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1954 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1955 found_content_type = true;
1956 free (hf->value);
1957 hf->value = (cp = strchr (ct->c_ctline, ';'))
1958 ? concat (type_subtypename, cp, "\n", NULL)
1959 : mh_xstrdup (name_plus_nl);
1960 }
1961 }
1962 if (! found_content_type) {
1963 add_header (ct, mh_xstrdup (TYPE_FIELD),
1964 (cp = strchr (ct->c_ctline, ';'))
1965 ? concat (type_subtypename, cp, "\n", NULL)
1966 : mh_xstrdup (name_plus_nl));
1967 }
1968
1969 /* Some of these might not be used, but set them anyway. */
1970 ctline = cp
1971 ? concat (type_subtypename, cp, NULL)
1972 : concat (type_subtypename, NULL);
1973 free (ct->c_ctline);
1974 ct->c_ctline = ctline;
1975 /* Leave other ctinfo members as they were. */
1976 free (ct->c_ctinfo.ci_type);
1977 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1978 free (ct->c_ctinfo.ci_subtype);
1979 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1980 ct->c_type = type;
1981 ct->c_subtype = subtype;
1982
1983 free (name_plus_nl);
1984 free (type_subtypename);
1985
1986 status = set_ce (ct, encoding);
1987
1988 return status;
1989 }
1990
1991
1992 /*
1993 * It's not necessary to update the charset parameter of a Content-Type
1994 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1995 * (content) was originally in the specified charset, "and will be in
1996 * that character set again after decoding."
1997 */
1998 static int
1999 decode_text_parts (CT ct, int encoding, const char *decodetypes,
2000 int *message_mods)
2001 {
2002 int status = OK;
2003 int lf_line_endings = 0;
2004
2005 switch (ct->c_type) {
2006 case CT_MULTIPART: {
2007 struct multipart *m = (struct multipart *) ct->c_ctparams;
2008 struct part *part;
2009
2010 /* Should check to see if the body for this part is encoded?
2011 For now, it gets passed along as-is by InitMultiPart(). */
2012 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2013 status = decode_text_parts (part->mp_part, encoding, decodetypes,
2014 message_mods);
2015 }
2016 break;
2017 }
2018
2019 case CT_MESSAGE:
2020 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2021 struct exbody *e = (struct exbody *) ct->c_ctparams;
2022
2023 status = decode_text_parts (e->eb_content, encoding, decodetypes,
2024 message_mods);
2025 }
2026 break;
2027
2028 default:
2029 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2030 break;
2031 }
2032
2033 lf_line_endings =
2034 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2035
2036 switch (ct->c_encoding) {
2037 case CE_BASE64:
2038 case CE_QUOTED: {
2039 int ct_encoding;
2040
2041 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2042 const char *reason = NULL;
2043
2044 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2045 && encoding != CE_BINARY) {
2046 /* The decoding isn't acceptable so discard it.
2047 Leave status as OK to allow other transformations. */
2048 if (verbosw) {
2049 report (NULL, ct->c_partno, ct->c_file,
2050 "will not decode%s because it is binary (%s)",
2051 ct->c_partno ? ""
2052 : (FENDNULL(ct->c_ctline)),
2053 reason);
2054 }
2055 (void) m_unlink (ct->c_cefile.ce_file);
2056 free (ct->c_cefile.ce_file);
2057 ct->c_cefile.ce_file = NULL;
2058 } else if (ct->c_encoding == CE_QUOTED &&
2059 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2060 /* The decoding isn't acceptable so discard it.
2061 Leave status as OK to allow other transformations. */
2062 if (verbosw) {
2063 report (NULL, ct->c_partno, ct->c_file,
2064 "will not decode%s because it is 8bit",
2065 ct->c_partno ? ""
2066 : (FENDNULL(ct->c_ctline)));
2067 }
2068 (void) m_unlink (ct->c_cefile.ce_file);
2069 free (ct->c_cefile.ce_file);
2070 ct->c_cefile.ce_file = NULL;
2071 } else {
2072 int enc;
2073
2074 if (ct_encoding == CE_BINARY) {
2075 enc = CE_BINARY;
2076 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2077 enc = CE_QUOTED;
2078 } else {
2079 enc = ct_encoding;
2080 }
2081 if (set_ce (ct, enc) == OK) {
2082 ++*message_mods;
2083 if (verbosw) {
2084 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2085 FENDNULL(ct->c_ctline));
2086 }
2087 if (lf_line_endings) {
2088 strip_crs (ct, message_mods);
2089 }
2090 } else {
2091 status = NOTOK;
2092 }
2093 }
2094 } else {
2095 status = NOTOK;
2096 }
2097 break;
2098 }
2099 case CE_8BIT:
2100 case CE_7BIT:
2101 if (lf_line_endings) {
2102 strip_crs (ct, message_mods);
2103 }
2104 break;
2105 default:
2106 break;
2107 }
2108
2109 break;
2110 }
2111
2112 return status;
2113 }
2114
2115
2116 /*
2117 * Determine if the part with type[/subtype] should be decoded, according to
2118 * decodetypes (which came from the -decodetypes switch).
2119 */
2120 static int
2121 should_decode(const char *decodetypes, const char *type, const char *subtype)
2122 {
2123 /* Quick search for matching type[/subtype] in decodetypes: bracket
2124 decodetypes with commas, then search for ,type, and ,type/subtype, in
2125 it. */
2126
2127 bool found_match = false;
2128 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2129 char *delimited_type = concat(",", type, ",", NULL);
2130
2131 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2132 found_match = true;
2133 } else if (subtype != NULL) {
2134 char *delimited_type_subtype =
2135 concat(",", type, "/", subtype, ",", NULL);
2136
2137 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2138 found_match = true;
2139 }
2140 free(delimited_type_subtype);
2141 }
2142
2143 free(delimited_type);
2144 free(delimited_decodetypes);
2145
2146 return found_match;
2147 }
2148
2149
2150 /*
2151 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2152 * if it has any NUL characters, a CR not followed by a LF, or lines
2153 * greater than 998 characters in length. If binary, reason is set
2154 * to a string explaining why.
2155 */
2156 static int
2157 content_encoding (CT ct, const char **reason)
2158 {
2159 CE ce = &ct->c_cefile;
2160 int encoding = CE_7BIT;
2161
2162 if (ce->ce_file) {
2163 size_t line_len = 0;
2164 char buffer[NMH_BUFSIZ];
2165 size_t inbytes;
2166
2167 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2168 advise (ce->ce_file, "unable to open for reading");
2169 return CE_UNKNOWN;
2170 }
2171
2172 fseeko (ce->ce_fp, 0L, SEEK_SET);
2173 while (encoding != CE_BINARY &&
2174 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2175 char *cp;
2176 size_t i;
2177 int last_char_was_cr = 0;
2178
2179 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2180 if (*cp == '\0' || ++line_len > 998 ||
2181 (*cp != '\n' && last_char_was_cr)) {
2182 encoding = CE_BINARY;
2183 if (*cp == '\0') {
2184 *reason = "null character";
2185 } else if (line_len > 998) {
2186 *reason = "line length > 998";
2187 } else if (*cp != '\n' && last_char_was_cr) {
2188 *reason = "CR not followed by LF";
2189 } else {
2190 /* Should not reach this. */
2191 *reason = "";
2192 }
2193 break;
2194 }
2195 if (*cp == '\n') {
2196 line_len = 0;
2197 } else if (! isascii ((unsigned char) *cp)) {
2198 encoding = CE_8BIT;
2199 }
2200
2201 last_char_was_cr = *cp == '\r';
2202 }
2203 }
2204
2205 fclose (ce->ce_fp);
2206 ce->ce_fp = NULL;
2207 } /* else should never happen */
2208
2209 return encoding;
2210 }
2211
2212
2213 /*
2214 * Strip carriage returns from content.
2215 */
2216 static int
2217 strip_crs (CT ct, int *message_mods)
2218 {
2219 char *charset = content_charset (ct);
2220 int status = OK;
2221
2222 /* Only strip carriage returns if content is ASCII or another
2223 charset that has the same readily recognizable CR followed by a
2224 LF. We can include UTF-8 here because if the high-order bit of
2225 a UTF-8 byte is 0, then it must be a single-byte ASCII
2226 character. */
2227 if (! strcasecmp (charset, "US-ASCII") ||
2228 ! strcasecmp (charset, "UTF-8") ||
2229 ! strncasecmp (charset, "ISO-8859-", 9) ||
2230 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2231 char **file = NULL;
2232 FILE **fp = NULL;
2233 size_t begin;
2234 size_t end;
2235 bool has_crs = false;
2236 bool opened_input_file = false;
2237
2238 if (ct->c_cefile.ce_file) {
2239 file = &ct->c_cefile.ce_file;
2240 fp = &ct->c_cefile.ce_fp;
2241 begin = end = 0;
2242 } else if (ct->c_file) {
2243 file = &ct->c_file;
2244 fp = &ct->c_fp;
2245 begin = (size_t) ct->c_begin;
2246 end = (size_t) ct->c_end;
2247 } /* else don't know where the content is */
2248
2249 if (file && *file && fp) {
2250 if (! *fp) {
2251 if ((*fp = fopen (*file, "r")) == NULL) {
2252 advise (*file, "unable to open for reading");
2253 status = NOTOK;
2254 } else {
2255 opened_input_file = true;
2256 }
2257 }
2258 }
2259
2260 if (fp && *fp) {
2261 char buffer[NMH_BUFSIZ];
2262 size_t bytes_read;
2263 size_t bytes_to_read =
2264 end > 0 && end > begin ? end - begin : sizeof buffer;
2265
2266 fseeko (*fp, begin, SEEK_SET);
2267 while ((bytes_read = fread (buffer, 1,
2268 min (bytes_to_read, sizeof buffer),
2269 *fp)) > 0) {
2270 /* Look for CR followed by a LF. This is supposed to
2271 be text so there should be LF's. If not, don't
2272 modify the content. */
2273 char *cp;
2274 size_t i;
2275 bool last_char_was_cr = false;
2276
2277 if (end > 0) { bytes_to_read -= bytes_read; }
2278
2279 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2280 if (*cp == '\n' && last_char_was_cr) {
2281 has_crs = true;
2282 break;
2283 }
2284
2285 last_char_was_cr = *cp == '\r';
2286 }
2287 }
2288
2289 if (has_crs) {
2290 int fd;
2291 char *stripped_content_file;
2292 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2293
2294 if (tempfile == NULL) {
2295 die("unable to create temporary file in %s",
2296 get_temp_dir());
2297 }
2298 stripped_content_file = mh_xstrdup (tempfile);
2299
2300 /* Strip each CR before a LF from the content. */
2301 fseeko (*fp, begin, SEEK_SET);
2302 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2303 0) {
2304 char *cp;
2305 size_t i;
2306 bool last_char_was_cr = false;
2307
2308 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2309 if (*cp == '\r') {
2310 last_char_was_cr = true;
2311 } else if (last_char_was_cr) {
2312 if (*cp != '\n') {
2313 if (write (fd, "\r", 1) < 0) {
2314 advise (tempfile, "CR write");
2315 }
2316 }
2317 if (write (fd, cp, 1) < 0) {
2318 advise (tempfile, "write");
2319 }
2320 last_char_was_cr = false;
2321 } else {
2322 if (write (fd, cp, 1) < 0) {
2323 advise (tempfile, "write");
2324 }
2325 last_char_was_cr = false;
2326 }
2327 }
2328 }
2329
2330 if (close (fd)) {
2331 inform("unable to write temporary file %s, continuing...",
2332 stripped_content_file);
2333 (void) m_unlink (stripped_content_file);
2334 free(stripped_content_file);
2335 status = NOTOK;
2336 } else {
2337 /* Replace the decoded file with the converted one. */
2338 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2339 (void) m_unlink (ct->c_cefile.ce_file);
2340
2341 free(ct->c_cefile.ce_file);
2342 ct->c_cefile.ce_file = stripped_content_file;
2343 ct->c_cefile.ce_unlink = 1;
2344
2345 ++*message_mods;
2346 if (verbosw) {
2347 report (NULL, ct->c_partno,
2348 begin == 0 && end == 0 ? "" : *file,
2349 "stripped CRs");
2350 }
2351 }
2352 }
2353
2354 if (opened_input_file) {
2355 fclose (*fp);
2356 *fp = NULL;
2357 }
2358 }
2359 }
2360
2361 free (charset);
2362
2363 return status;
2364 }
2365
2366
2367 /*
2368 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2369 * of the part C-T-E's.
2370 */
2371 static void
2372 update_cte (CT ct)
2373 {
2374 const int least_restrictive_enc = least_restrictive_encoding (ct);
2375
2376 if (least_restrictive_enc != CE_UNKNOWN &&
2377 least_restrictive_enc != CE_7BIT) {
2378 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2379 HF hf;
2380 bool found_cte = false;
2381
2382 /* Update/add Content-Transfer-Encoding header field. */
2383 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2384 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2385 found_cte = true;
2386 free (hf->value);
2387 hf->value = cte;
2388 }
2389 }
2390 if (! found_cte) {
2391 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2392 }
2393 }
2394 }
2395
2396
2397 /*
2398 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2399 * within a message.
2400 */
2401 static int
2402 least_restrictive_encoding (CT ct)
2403 {
2404 int encoding = CE_UNKNOWN;
2405
2406 switch (ct->c_type) {
2407 case CT_MULTIPART: {
2408 struct multipart *m = (struct multipart *) ct->c_ctparams;
2409 struct part *part;
2410
2411 for (part = m->mp_parts; part; part = part->mp_next) {
2412 const int part_encoding =
2413 least_restrictive_encoding (part->mp_part);
2414
2415 if (less_restrictive (encoding, part_encoding)) {
2416 encoding = part_encoding;
2417 }
2418 }
2419 break;
2420 }
2421
2422 case CT_MESSAGE:
2423 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2424 struct exbody *e = (struct exbody *) ct->c_ctparams;
2425 const int part_encoding =
2426 least_restrictive_encoding (e->eb_content);
2427
2428 if (less_restrictive (encoding, part_encoding)) {
2429 encoding = part_encoding;
2430 }
2431 }
2432 break;
2433
2434 default: {
2435 if (less_restrictive (encoding, ct->c_encoding)) {
2436 encoding = ct->c_encoding;
2437 }
2438 }}
2439
2440 return encoding;
2441 }
2442
2443
2444 /*
2445 * Return whether the second encoding is less restrictive than the first, where
2446 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2447 * CE_BINARY is less restrictive than CE_8BIT and
2448 * CE_8BIT is less restrictive than CE_7BIT.
2449 */
2450 static int
2451 less_restrictive (int encoding, int second_encoding)
2452 {
2453 switch (second_encoding) {
2454 case CE_BINARY:
2455 return encoding != CE_BINARY;
2456 case CE_8BIT:
2457 return encoding != CE_BINARY && encoding != CE_8BIT;
2458 case CE_7BIT:
2459 return encoding != CE_BINARY && encoding != CE_8BIT &&
2460 encoding != CE_7BIT;
2461 default :
2462 return 0;
2463 }
2464 }
2465
2466
2467 /*
2468 * Convert character set of each part.
2469 */
2470 static int
2471 convert_charsets (CT ct, char *dest_charset, int *message_mods)
2472 {
2473 int status = OK;
2474
2475 switch (ct->c_type) {
2476 case CT_TEXT:
2477 if (ct->c_subtype == TEXT_PLAIN) {
2478 status = convert_charset (ct, dest_charset, message_mods);
2479 if (status == OK) {
2480 if (verbosw) {
2481 char *ct_charset = content_charset (ct);
2482
2483 report (NULL, ct->c_partno, ct->c_file,
2484 "convert %s to %s", ct_charset, dest_charset);
2485 free (ct_charset);
2486 }
2487 } else {
2488 char *ct_charset = content_charset (ct);
2489
2490 report ("iconv", ct->c_partno, ct->c_file,
2491 "failed to convert %s to %s", ct_charset, dest_charset);
2492 free (ct_charset);
2493 }
2494 }
2495 break;
2496
2497 case CT_MULTIPART: {
2498 struct multipart *m = (struct multipart *) ct->c_ctparams;
2499 struct part *part;
2500
2501 /* Should check to see if the body for this part is encoded?
2502 For now, it gets passed along as-is by InitMultiPart(). */
2503 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2504 status =
2505 convert_charsets (part->mp_part, dest_charset, message_mods);
2506 }
2507 break;
2508 }
2509
2510 case CT_MESSAGE:
2511 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2512 struct exbody *e = (struct exbody *) ct->c_ctparams;
2513
2514 status =
2515 convert_charsets (e->eb_content, dest_charset, message_mods);
2516 }
2517 break;
2518
2519 default:
2520 break;
2521 }
2522
2523 return status;
2524 }
2525
2526
2527 /*
2528 * Fix various problems that aren't handled elsewhere. These
2529 * are fixed unconditionally: there are no switches to disable
2530 * them. Currently, "problems" are these:
2531 * 1) remove extraneous semicolon at the end of a header parameter list
2532 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2533 * filename parameters in Content-Type and Content-Disposition
2534 * headers, respectively.
2535 */
2536 static int
2537 fix_always (CT ct, int *message_mods)
2538 {
2539 int status = OK;
2540
2541 switch (ct->c_type) {
2542 case CT_MULTIPART: {
2543 struct multipart *m = (struct multipart *) ct->c_ctparams;
2544 struct part *part;
2545
2546 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2547 status = fix_always (part->mp_part, message_mods);
2548 }
2549 break;
2550 }
2551
2552 case CT_MESSAGE:
2553 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2554 struct exbody *e = (struct exbody *) ct->c_ctparams;
2555
2556 status = fix_always (e->eb_content, message_mods);
2557 }
2558 break;
2559
2560 default: {
2561 HF hf;
2562
2563 if (ct->c_first_hf) {
2564 fix_filename_encoding (ct);
2565 }
2566
2567 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2568 size_t len = strlen (hf->value);
2569
2570 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2571 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2572 /* Only do this for Content-Type and
2573 Content-Disposition fields because those are the
2574 only headers that parse_mime() warns about. */
2575 continue;
2576 }
2577
2578 /* whitespace following a trailing ';' will be nuked as well */
2579 if (hf->value[len - 1] == '\n') {
2580 while (isspace((unsigned char)(hf->value[len - 2]))) {
2581 if (len-- == 0) { break; }
2582 }
2583 }
2584
2585 if (hf->value[len - 2] == ';') {
2586 /* Remove trailing ';' from parameter value. */
2587 hf->value[len - 2] = '\n';
2588 hf->value[len - 1] = '\0';
2589
2590 /* Also, if Content-Type parameter, remove trailing ';'
2591 from ct->c_ctline. This probably isn't necessary
2592 but can't hurt. */
2593 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2594 size_t l = strlen(ct->c_ctline) - 1;
2595 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2596 ct->c_ctline[l] == ';') {
2597 ct->c_ctline[l--] = '\0';
2598 if (l == 0) { break; }
2599 }
2600 }
2601
2602 ++*message_mods;
2603 if (verbosw) {
2604 report (NULL, ct->c_partno, ct->c_file,
2605 "remove trailing ; from %s parameter value",
2606 hf->name);
2607 }
2608 }
2609 }
2610 }}
2611
2612 return status;
2613 }
2614
2615
2616 /*
2617 * Factor out common code for loops in fix_filename_encoding().
2618 */
2619 static int
2620 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm)
2621 {
2622 bool fixed = false;
2623
2624 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2625 /* Looks like an RFC 2047 encoded parameter. */
2626 char decoded[PATH_MAX + 1];
2627
2628 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2629 /* Encode using RFC 2231. */
2630 replace_param (first_pm, last_pm, name, decoded, 0);
2631 fixed = true;
2632 } else {
2633 inform("failed to decode %s parameter %s", name, value);
2634 }
2635 }
2636
2637 return fixed;
2638 }
2639
2640
2641 /*
2642 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2643 * filename parameters in Content-Type and Content-Disposition
2644 * headers, respectively.
2645 */
2646 static int
2647 fix_filename_encoding (CT ct)
2648 {
2649 PM pm;
2650 HF hf;
2651 int fixed = 0;
2652
2653 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2654 if (pm->pm_name && pm->pm_value &&
2655 strcasecmp (pm->pm_name, "name") == 0) {
2656 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2657 &ct->c_ctinfo.ci_first_pm,
2658 &ct->c_ctinfo.ci_last_pm);
2659 }
2660 }
2661
2662 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2663 if (pm->pm_name && pm->pm_value &&
2664 strcasecmp (pm->pm_name, "filename") == 0) {
2665 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2666 &ct->c_dispo_first,
2667 &ct->c_dispo_last);
2668 }
2669 }
2670
2671 /* Fix hf values to correspond. */
2672 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2673 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2674
2675 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2676 field = TYPE_HEADER;
2677 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2678 field = DISPO_HEADER;
2679 }
2680
2681 if (field != OTHER) {
2682 const char *const semicolon_loc = strchr (hf->value, ';');
2683
2684 if (semicolon_loc) {
2685 const size_t len =
2686 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2687 const char *const params =
2688 output_params (len,
2689 field == TYPE_HEADER
2690 ? ct->c_ctinfo.ci_first_pm
2691 : ct->c_dispo_first,
2692 NULL, 0);
2693 const char *const new_params = concat (params, "\n", NULL);
2694
2695 replace_substring (&hf->value, semicolon_loc, new_params);
2696 free((void *)new_params); /* Cast away const. Sigh. */
2697 free((void *)params);
2698 } else {
2699 inform("did not find semicolon in %s:%s\n",
2700 hf->name, hf->value);
2701 }
2702 }
2703 }
2704
2705 return OK;
2706 }
2707
2708
2709 /*
2710 * Output content in input file to output file.
2711 */
2712 static int
2713 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2714 int modify_inplace, int message_mods)
2715 {
2716 int status = OK;
2717
2718 if (modify_inplace) {
2719 if (message_mods > 0) {
2720 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2721 char *infile = input_filename
2722 ? mh_xstrdup (input_filename)
2723 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2724
2725 if (remove_file (infile) == OK) {
2726 if (rename (outfile, infile)) {
2727 /* Rename didn't work, possibly because of an
2728 attempt to rename across filesystems. Try
2729 brute force copy. */
2730 int old = open (outfile, O_RDONLY);
2731 int new =
2732 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2733 int i = -1;
2734
2735 if (old != -1 && new != -1) {
2736 char buffer[NMH_BUFSIZ];
2737
2738 while ((i = read (old, buffer, sizeof buffer)) >
2739 0) {
2740 if (write (new, buffer, i) != i) {
2741 i = -1;
2742 break;
2743 }
2744 }
2745 }
2746 if (new != -1) { close (new); }
2747 if (old != -1) { close (old); }
2748 (void) m_unlink (outfile);
2749
2750 if (i < 0) {
2751 /* The -file argument processing used path() to
2752 expand filename to absolute path. */
2753 int file = ct->c_file && ct->c_file[0] == '/';
2754
2755 inform("unable to rename %s %s to %s, continuing...",
2756 file ? "file" : "message", outfile,
2757 infile);
2758 status = NOTOK;
2759 }
2760 }
2761 } else {
2762 inform("unable to remove input file %s, "
2763 "not modifying it, continuing...", infile);
2764 (void) m_unlink (outfile);
2765 status = NOTOK;
2766 }
2767
2768 free (infile);
2769 } else {
2770 status = NOTOK;
2771 }
2772 } else {
2773 /* No modifications and didn't need the tmp outfile. */
2774 (void) m_unlink (outfile);
2775 }
2776 } else {
2777 /* Output is going to some file. Produce it whether or not
2778 there were modifications. */
2779 status = output_message_fp (ct, outfp, outfile);
2780 }
2781
2782 flush_errors ();
2783 return status;
2784 }
2785
2786
2787 /*
2788 * parse_mime() does not set lf_line_endings in struct text, so use this
2789 * function to do it. It touches the parts the decodetypes identifies.
2790 */
2791 static void
2792 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings)
2793 {
2794 switch (ct->c_type) {
2795 case CT_MULTIPART: {
2796 struct multipart *m = (struct multipart *) ct->c_ctparams;
2797 struct part *part;
2798
2799 for (part = m->mp_parts; part; part = part->mp_next) {
2800 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2801 }
2802 break;
2803 }
2804
2805 case CT_MESSAGE:
2806 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2807 struct exbody *e = (struct exbody *) ct->c_ctparams;
2808
2809 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2810 }
2811 break;
2812
2813 default:
2814 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2815 if (ct->c_ctparams == NULL) {
2816 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2817 }
2818 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2819 }
2820 }
2821 }
2822
2823
2824 /*
2825 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2826 * use the standard MH backup file.
2827 */
2828 static int
2829 remove_file (const char *file)
2830 {
2831 if (rmmproc) {
2832 char *rmm_command = concat (rmmproc, " ", file, NULL);
2833 int status = system (rmm_command);
2834
2835 free (rmm_command);
2836 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2837 }
2838 /* This is OK for a non-message file, it still uses the
2839 BACKUP_PREFIX form. The backup file will be in the same
2840 directory as file. */
2841 return rename (file, m_backup (file));
2842 }
2843
2844
2845 /*
2846 * Output formatted message to user.
2847 */
2848 static void
2849 report (char *what, char *partno, char *filename, char *message, ...)
2850 {
2851 va_list args;
2852 char *fmt;
2853
2854 if (verbosw) {
2855 va_start (args, message);
2856 fmt = concat (filename, partno ? " part " : ", ",
2857 FENDNULL(partno), partno ? ", " : "", message, NULL);
2858
2859 advertise (what, NULL, fmt, args);
2860
2861 free (fmt);
2862 va_end (args);
2863 }
2864 }
2865
2866
2867 static void
2868 pipeser (int i)
2869 {
2870 if (i == SIGQUIT) {
2871 fflush (stdout);
2872 fprintf (stderr, "\n");
2873 fflush (stderr);
2874 }
2875
2876 done (1);
2877 /* NOTREACHED */
2878 }