]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
h/prototypes.h: Remove scan_reset_m_getfld_state() prototype.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include <h/mh.h>
9 #include <h/mime.h>
10 #include <h/mhparse.h>
11 #include <h/utils.h>
12 #include <h/signals.h>
13 #include "mhfree.h"
14 #include <fcntl.h>
15
16 #define MHFIXMSG_SWITCHES \
17 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
18 X("nodecodetext", 0, NDECODETEXTSW) \
19 X("decodetypes", 0, DECODETYPESW) \
20 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
21 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
22 X("textcharset", 0, TEXTCHARSETSW) \
23 X("notextcharset", 0, NTEXTCHARSETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
27 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
28 X("fixboundary", 0, FIXBOUNDARYSW) \
29 X("nofixboundary", 0, NFIXBOUNDARYSW) \
30 X("fixcte", 0, FIXCOMPOSITECTESW) \
31 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
32 X("fixtype mimetype", 0, FIXTYPESW) \
33 X("file file", 0, FILESW) \
34 X("outfile file", 0, OUTFILESW) \
35 X("rmmproc program", 0, RPROCSW) \
36 X("normmproc", 0, NRPRCSW) \
37 X("changecur", 0, CHGSW) \
38 X("nochangecur", 0, NCHGSW) \
39 X("verbose", 0, VERBSW) \
40 X("noverbose", 0, NVERBSW) \
41 X("version", 0, VERSIONSW) \
42 X("help", 0, HELPSW) \
43
44 #define X(sw, minchars, id) id,
45 DEFINE_SWITCH_ENUM(MHFIXMSG);
46 #undef X
47
48 #define X(sw, minchars, id) { sw, minchars, id },
49 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
50 #undef X
51
52
53 int verbosw;
54 int debugsw; /* Needed by mhparse.c. */
55
56 #define quitser pipeser
57
58 /* mhparse.c */
59 extern int skip_mp_cte_check; /* flag to InitMultiPart */
60 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
61 extern int bogus_mp_content; /* flag from InitMultiPart */
62 /* flags to/from parse_header_attrs */
63 extern int suppress_extraneous_trailing_semicolon_warning;
64
65 /* mhoutsbr.c */
66 int output_message_fp (CT, FILE *, char *);
67
68 /* mhmisc.c */
69 void flush_errors (void);
70
71 /*
72 * static prototypes
73 */
74 typedef struct fix_transformations {
75 int fixboundary;
76 int fixcompositecte;
77 svector_t fixtypes;
78 int reformat;
79 int replacetextplain;
80 int decodetext;
81 char *decodetypes;
82 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
83 int lf_line_endings;
84 char *textcharset;
85 } fix_transformations;
86
87 int mhfixmsgsbr (CT *, char *, const fix_transformations *, FILE **, char *,
88 FILE **);
89 static int fix_boundary (CT *, int *);
90 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
91 static int get_multipart_boundary (CT, char **);
92 static int replace_boundary (CT, char *, char *);
93 static int fix_types (CT, svector_t, int *);
94 static char *replace_substring (char **, const char *, const char *);
95 static char *remove_parameter (char *, const char *);
96 static int fix_composite_cte (CT, int *);
97 static int set_ce (CT, int);
98 static int ensure_text_plain (CT *, CT, int *, int);
99 static int find_textplain_sibling (CT, int, int *);
100 static int insert_new_text_plain_part (CT, int, CT);
101 static CT build_text_plain_part (CT);
102 static int insert_into_new_mp_alt (CT *, int *);
103 static CT divide_part (CT);
104 static void copy_ctinfo (CI, CI);
105 static int decode_part (CT);
106 static int reformat_part (CT, char *, char *, char *, int);
107 static CT build_multipart_alt (CT, CT, int, int);
108 static int boundary_in_content (FILE **, char *, const char *);
109 static void transfer_noncontent_headers (CT, CT);
110 static int set_ct_type (CT, int type, int subtype, int encoding);
111 static int decode_text_parts (CT, int, const char *, int *);
112 static int should_decode(const char *, const char *, const char *);
113 static int content_encoding (CT, const char **);
114 static int strip_crs (CT, int *);
115 static void update_cte (CT);
116 static int least_restrictive_encoding (CT);
117 static int less_restrictive (int, int);
118 static int convert_charsets (CT, char *, int *);
119 static int fix_always (CT, int *);
120 static int fix_filename_param (char *, char *, PM *, PM *);
121 static int fix_filename_encoding (CT);
122 static int write_content (CT, const char *, char *, FILE *, int, int);
123 static void set_text_ctparams(CT, char *, int);
124 static int remove_file (const char *);
125 static void report (char *, char *, char *, char *, ...);
126 static void pipeser (int);
127
128
129 int
130 main (int argc, char **argv) {
131 int msgnum;
132 char *cp, *file = NULL, *folder = NULL;
133 char *maildir = NULL, buf[100], *outfile = NULL;
134 char **argp, **arguments;
135 struct msgs_array msgs = { 0, 0, NULL };
136 struct msgs *mp = NULL;
137 CT *ctp;
138 FILE *fp, *infp = NULL, *outfp = NULL;
139 int using_stdin = 0;
140 int chgflag = 1;
141 int status = OK;
142 fix_transformations fx;
143 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
144 fx.fixtypes = NULL;
145 fx.replacetextplain = 0;
146 fx.decodetext = CE_8BIT;
147 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
148 fx.lf_line_endings = 0;
149 fx.textcharset = NULL;
150
151 if (nmh_init(argv[0], 2)) { return 1; }
152
153 arguments = getarguments (invo_name, argc, argv, 1);
154 argp = arguments;
155
156 /*
157 * Parse arguments
158 */
159 while ((cp = *argp++)) {
160 if (*cp == '-') {
161 switch (smatch (++cp, switches)) {
162 case AMBIGSW:
163 ambigsw (cp, switches);
164 done (1);
165 case UNKWNSW:
166 adios (NULL, "-%s unknown", cp);
167
168 case HELPSW:
169 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
170 invo_name);
171 print_help (buf, switches, 1);
172 done (0);
173 case VERSIONSW:
174 print_version(invo_name);
175 done (0);
176
177 case DECODETEXTSW:
178 if (! (cp = *argp++) || *cp == '-') {
179 adios (NULL, "missing argument to %s", argp[-2]);
180 }
181 if (! strcasecmp (cp, "8bit")) {
182 fx.decodetext = CE_8BIT;
183 } else if (! strcasecmp (cp, "7bit")) {
184 fx.decodetext = CE_7BIT;
185 } else if (! strcasecmp (cp, "binary")) {
186 fx.decodetext = CE_BINARY;
187 } else {
188 adios (NULL, "invalid argument to %s", argp[-2]);
189 }
190 continue;
191 case NDECODETEXTSW:
192 fx.decodetext = 0;
193 continue;
194 case DECODETYPESW:
195 if (! (cp = *argp++) || *cp == '-') {
196 adios (NULL, "missing argument to %s", argp[-2]);
197 }
198 fx.decodetypes = cp;
199 continue;
200 case CRLFLINEBREAKSSW:
201 fx.lf_line_endings = 0;
202 continue;
203 case NCRLFLINEBREAKSSW:
204 fx.lf_line_endings = 1;
205 continue;
206 case TEXTCHARSETSW:
207 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
208 adios (NULL, "missing argument to %s", argp[-2]);
209 }
210 fx.textcharset = cp;
211 continue;
212 case NTEXTCHARSETSW:
213 fx.textcharset = 0;
214 continue;
215 case FIXBOUNDARYSW:
216 fx.fixboundary = 1;
217 continue;
218 case NFIXBOUNDARYSW:
219 fx.fixboundary = 0;
220 continue;
221 case FIXCOMPOSITECTESW:
222 fx.fixcompositecte = 1;
223 continue;
224 case NFIXCOMPOSITECTESW:
225 fx.fixcompositecte = 0;
226 continue;
227 case FIXTYPESW:
228 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
229 adios (NULL, "missing argument to %s", argp[-2]);
230 }
231 if (! strncasecmp (cp, "multipart/", 10) ||
232 ! strncasecmp (cp, "message/", 8)) {
233 adios (NULL, "-fixtype %s not allowed", cp);
234 } else if (! strchr (cp, '/')) {
235 adios (NULL, "-fixtype requires type/subtype");
236 }
237 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
238 svector_push_back (fx.fixtypes, cp);
239 continue;
240 case REFORMATSW:
241 fx.reformat = 1;
242 continue;
243 case NREFORMATSW:
244 fx.reformat = 0;
245 continue;
246 case REPLACETEXTPLAINSW:
247 fx.replacetextplain = 1;
248 continue;
249 case NREPLACETEXTPLAINSW:
250 fx.replacetextplain = 0;
251 continue;
252 case FILESW:
253 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
254 adios (NULL, "missing argument to %s", argp[-2]);
255 }
256 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
257 continue;
258 case OUTFILESW:
259 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
260 adios (NULL, "missing argument to %s", argp[-2]);
261 }
262 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
263 continue;
264 case RPROCSW:
265 if (!(rmmproc = *argp++) || *rmmproc == '-') {
266 adios (NULL, "missing argument to %s", argp[-2]);
267 }
268 continue;
269 case NRPRCSW:
270 rmmproc = NULL;
271 continue;
272 case CHGSW:
273 chgflag = 1;
274 continue;
275 case NCHGSW:
276 chgflag = 0;
277 continue;
278 case VERBSW:
279 verbosw = 1;
280 continue;
281 case NVERBSW:
282 verbosw = 0;
283 continue;
284 }
285 }
286 if (*cp == '+' || *cp == '@') {
287 if (folder) {
288 adios (NULL, "only one folder at a time!");
289 } else {
290 folder = pluspath (cp);
291 }
292 } else {
293 if (*cp == '/') {
294 /* Interpret a full path as a filename, not a message. */
295 file = mh_xstrdup (cp);
296 } else {
297 app_msgarg (&msgs, cp);
298 }
299 }
300 }
301
302 SIGNAL (SIGQUIT, quitser);
303 SIGNAL (SIGPIPE, pipeser);
304
305 /*
306 * Read the standard profile setup
307 */
308 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
309 readconfig ((struct node **) 0, fp, cp, 0);
310 fclose (fp);
311 }
312
313 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
314 suppress_extraneous_trailing_semicolon_warning = 1;
315
316 if (! context_find ("path")) {
317 free (path ("./", TFOLDER));
318 }
319
320 if (file && msgs.size) {
321 adios (NULL, "cannot specify msg and file at same time!");
322 }
323
324 if (outfile) {
325 /* Open the outfile now, so we don't have to risk opening it
326 after running out of fds. */
327 if (strcmp (outfile, "-") == 0) {
328 outfp = stdout;
329 } else if ((outfp = fopen (outfile, "w")) == NULL) {
330 adios (outfile, "unable to open for writing");
331 }
332 }
333
334 /*
335 * check if message is coming from file
336 */
337 if (file) {
338 /* If file is stdin, create a tmp file name before parse_mime()
339 has a chance, because it might put in on a different
340 filesystem than the output file. Instead, put it in the
341 user's preferred tmp directory. */
342 CT ct;
343
344 if (! strcmp ("-", file)) {
345 int fd;
346 char *cp;
347
348 using_stdin = 1;
349
350 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
351 adios (NULL, "unable to create temporary file in %s",
352 get_temp_dir());
353 } else {
354 free (file);
355 file = mh_xstrdup (cp);
356 cpydata (STDIN_FILENO, fd, "-", file);
357 }
358
359 if (close (fd)) {
360 (void) m_unlink (file);
361 adios (NULL, "failed to write temporary file");
362 }
363 }
364
365 cts = mh_xcalloc(2, sizeof *cts);
366 ctp = cts;
367
368 if ((ct = parse_mime (file))) {
369 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
370 *ctp++ = ct;
371 } else {
372 inform("unable to parse message from file %s", file);
373 status = NOTOK;
374
375 /* If there's an outfile, pass the input message unchanged, so the
376 message won't get dropped from a pipeline. */
377 if (outfile) {
378 /* Something went wrong. Output might be expected, such as if
379 this were run as a filter. Just copy the input to the
380 output. */
381 if ((infp = fopen (file, "r")) == NULL) {
382 adios (file, "unable to open for reading");
383 }
384
385 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
386 inform("unable to copy message to %s, "
387 "it might be lost\n", outfile);
388 }
389
390 fclose (infp);
391 infp = NULL;
392 }
393 }
394 } else {
395 /*
396 * message(s) are coming from a folder
397 */
398 CT ct;
399
400 if (! msgs.size) {
401 app_msgarg(&msgs, "cur");
402 }
403 if (! folder) {
404 folder = getfolder (1);
405 }
406 maildir = mh_xstrdup(m_maildir (folder));
407
408 /* chdir so that error messages, esp. from MIME parser, just
409 refer to the message and not its path. */
410 if (chdir (maildir) == NOTOK) {
411 adios (maildir, "unable to change directory to");
412 }
413
414 /* read folder and create message structure */
415 if (! (mp = folder_read (folder, 1))) {
416 adios (NULL, "unable to read folder %s", folder);
417 }
418
419 /* check for empty folder */
420 if (mp->nummsg == 0) {
421 adios (NULL, "no messages in %s", folder);
422 }
423
424 /* parse all the message ranges/sequences and set SELECTED */
425 for (msgnum = 0; msgnum < msgs.size; msgnum++)
426 if (! m_convert (mp, msgs.msgs[msgnum])) {
427 done (1);
428 }
429 seq_setprev (mp); /* set the previous-sequence */
430
431 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
432 ctp = cts;
433
434 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
435 if (is_selected(mp, msgnum)) {
436 char *msgnam = m_name (msgnum);
437
438 if ((ct = parse_mime (msgnam))) {
439 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
440 *ctp++ = ct;
441 } else {
442 inform("unable to parse message %s", msgnam);
443 status = NOTOK;
444
445 /* If there's an outfile, pass the input message
446 unchanged, so the message won't get dropped from a
447 pipeline. */
448 if (outfile) {
449 /* Something went wrong. Output might be expected,
450 such as if this were run as a filter. Just copy
451 the input to the output. */
452 /* Can't use path() here because 1) it might have been
453 called before and it caches the pwd, and 2) we call
454 chdir() after that. */
455 char *input_filename =
456 concat (maildir, "/", msgnam, NULL);
457
458 if ((infp = fopen (input_filename, "r")) == NULL) {
459 adios (input_filename,
460 "unable to open for reading");
461 }
462
463 if (copy_input_to_output (input_filename, infp,
464 outfile, outfp) != OK) {
465 inform("unable to copy message to %s, "
466 "it might be lost\n", outfile);
467 }
468
469 fclose (infp);
470 infp = NULL;
471 free (input_filename);
472 }
473 }
474 }
475 }
476
477 if (chgflag) {
478 seq_setcur (mp, mp->hghsel); /* update current message */
479 }
480 seq_save (mp); /* synchronize sequences */
481 context_replace (pfolder, folder);/* update current folder */
482 context_save (); /* save the context file */
483 }
484
485 if (*cts) {
486 for (ctp = cts; *ctp; ++ctp) {
487 status += mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp);
488 free_content (*ctp);
489
490 if (using_stdin) {
491 (void) m_unlink (file);
492
493 if (! outfile) {
494 /* Just calling m_backup() unlinks the backup file. */
495 (void) m_backup (file);
496 }
497 }
498 }
499 } else {
500 status = 1;
501 }
502
503 mh_xfree(maildir);
504 free (cts);
505
506 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
507 if (infp) { fclose (infp); } /* even if stdin */
508 if (outfp) { fclose (outfp); } /* even if stdout */
509 free (outfile);
510 free (file);
511 free (folder);
512 free (arguments);
513
514 done (status);
515 return NOTOK;
516 }
517
518
519 /*
520 * Apply transformations to one message.
521 */
522 int
523 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
524 FILE **infp, char *outfile, FILE **outfp) {
525 /* Store input filename in case one of the transformations, i.e.,
526 fix_boundary(), rewrites to a tmp file. */
527 char *input_filename = maildir
528 ? concat (maildir, "/", (*ctp)->c_file, NULL)
529 : mh_xstrdup ((*ctp)->c_file);
530 int modify_inplace = 0;
531 int message_mods = 0;
532 int status = OK;
533
534 /* Though the input file won't need to be opened if everything goes
535 well, do it here just in case there's a failure, and that failure is
536 running out of file descriptors. */
537 if ((*infp = fopen (input_filename, "r")) == NULL) {
538 adios (input_filename, "unable to open for reading");
539 }
540
541 if (outfile == NULL) {
542 modify_inplace = 1;
543
544 if ((*ctp)->c_file) {
545 char *tempfile;
546 /* outfp will be closed by the caller */
547 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
548 NULL) {
549 adios (NULL, "unable to create temporary file in %s",
550 get_temp_dir());
551 }
552 outfile = mh_xstrdup (tempfile);
553 } else {
554 adios (NULL, "missing both input and output filenames\n");
555 }
556 } /* else *outfp was defined by caller */
557
558 reverse_alternative_parts (*ctp);
559 status = fix_always (*ctp, &message_mods);
560 if (status == OK && fx->fixboundary) {
561 status = fix_boundary (ctp, &message_mods);
562 }
563 if (status == OK && fx->fixtypes != NULL) {
564 status = fix_types (*ctp, fx->fixtypes, &message_mods);
565 }
566 if (status == OK && fx->fixcompositecte) {
567 status = fix_composite_cte (*ctp, &message_mods);
568 }
569 if (status == OK && fx->reformat) {
570 status =
571 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
572 }
573 if (status == OK && fx->decodetext) {
574 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
575 &message_mods);
576 update_cte (*ctp);
577 }
578 if (status == OK && fx->textcharset != NULL) {
579 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
580 }
581
582 if (status == OK && ! (*ctp)->c_umask) {
583 /* Set the umask for the contents file. This currently
584 isn't used but just in case it is in the future. */
585 struct stat st;
586
587 if (stat ((*ctp)->c_file, &st) != NOTOK) {
588 (*ctp)->c_umask = ~(st.st_mode & 0777);
589 } else {
590 (*ctp)->c_umask = ~m_gmprot();
591 }
592 }
593
594 /*
595 * Write the content to a file
596 */
597 if (status == OK) {
598 status = write_content (*ctp, input_filename, outfile, *outfp,
599 modify_inplace, message_mods);
600 } else if (! modify_inplace) {
601 /* Something went wrong. Output might be expected, such
602 as if this were run as a filter. Just copy the input
603 to the output. */
604 if (copy_input_to_output (input_filename, *infp, outfile,
605 *outfp) != OK) {
606 inform("unable to copy message to %s, it might be lost\n",
607 outfile);
608 }
609 }
610
611 if (modify_inplace) {
612 if (status != OK) { (void) m_unlink (outfile); }
613 free (outfile);
614 outfile = NULL;
615 }
616
617 fclose (*infp);
618 *infp = NULL;
619 free (input_filename);
620
621 return status;
622 }
623
624
625 /*
626 * Copy input message to output. Assumes not modifying in place, so this
627 * might be running as part of a pipeline.
628 */
629 static int
630 copy_input_to_output (const char *input_filename, FILE *infp,
631 const char *output_filename, FILE *outfp) {
632 int in = fileno (infp);
633 int out = fileno (outfp);
634 int status = OK;
635
636 if (in != -1 && out != -1) {
637 cpydata (in, out, input_filename, output_filename);
638 } else {
639 status = NOTOK;
640 }
641
642 return status;
643 }
644
645
646 /*
647 * Fix mismatched outer level boundary.
648 */
649 static int
650 fix_boundary (CT *ct, int *message_mods) {
651 struct multipart *mp;
652 int status = OK;
653
654 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
655 mp = (struct multipart *) (*ct)->c_ctparams;
656
657 /*
658 * 1) Get boundary at end of part.
659 * 2) Get boundary at beginning of part and compare to the end-of-part
660 * boundary.
661 * 3) Write out contents of ct to tmp file, replacing boundary in
662 * header with boundary from part. Set c_unlink to 1.
663 * 4) Free ct.
664 * 5) Call parse_mime() on the tmp file, replacing ct.
665 */
666
667 if (mp && mp->mp_start) {
668 char *part_boundary;
669
670 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
671 char *fixed;
672
673 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
674 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
675 char *filename = mh_xstrdup ((*ct)->c_file);
676 CT fixed_ct;
677
678 free_content (*ct);
679 if ((fixed_ct = parse_mime (fixed))) {
680 *ct = fixed_ct;
681 (*ct)->c_unlink = 1;
682
683 ++*message_mods;
684 if (verbosw) {
685 report (NULL, NULL, filename,
686 "fix multipart boundary");
687 }
688 } else {
689 *ct = NULL;
690 inform("unable to parse fixed part");
691 status = NOTOK;
692 }
693 free (filename);
694 } else {
695 inform("unable to replace broken boundary");
696 status = NOTOK;
697 }
698 } else {
699 inform("unable to create temporary file in %s",
700 get_temp_dir());
701 status = NOTOK;
702 }
703
704 free (part_boundary);
705 } else {
706 /* Couldn't fix the boundary. Report failure so that mhfixmsg
707 doesn't modify the message. */
708 status = NOTOK;
709 }
710 } else {
711 /* No multipart struct, even though the content type is
712 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
713 the message. */
714 status = NOTOK;
715 }
716 }
717
718 return status;
719 }
720
721
722 /*
723 * Find boundary at end of multipart.
724 */
725 static int
726 get_multipart_boundary (CT ct, char **part_boundary) {
727 char buffer[NMH_BUFSIZ];
728 char *end_boundary = NULL;
729 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
730 ? (off_t) (ct->c_end - sizeof buffer)
731 : (off_t) ct->c_begin;
732 size_t bytes_read;
733 int status = OK;
734
735 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
736 be big enough, even if it's just 1024, to make that unlikely. */
737
738 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
739 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
740 advise (ct->c_file, "unable to open for reading");
741 return NOTOK;
742 }
743
744 /* Get boundary at end of multipart. */
745 while (begin >= (off_t) ct->c_begin) {
746 fseeko (ct->c_fp, begin, SEEK_SET);
747 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
748 char *cp = rfind_str (buffer, bytes_read, "--");
749
750 if (cp) {
751 char *end;
752
753 /* Trim off trailing "--" and anything beyond. */
754 *cp-- = '\0';
755 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
756 if (strlen (end) > 3 && *end++ == '\n' &&
757 *end++ == '-' && *end++ == '-') {
758 end_boundary = mh_xstrdup (end);
759 break;
760 }
761 }
762 }
763 }
764
765 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
766 break;
767 begin -= sizeof buffer;
768 }
769
770 /* Get boundary at beginning of multipart. */
771 if (end_boundary) {
772 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
773 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
774 if (bytes_read >= strlen (end_boundary)) {
775 char *cp = find_str (buffer, bytes_read, end_boundary);
776
777 if (cp && cp - buffer >= 2 && *--cp == '-' &&
778 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
779 status = OK;
780 break;
781 }
782 } else {
783 /* The start and end boundaries didn't match, or the
784 start boundary doesn't begin with "\n--" (or "--"
785 if at the beginning of buffer). Keep trying. */
786 status = NOTOK;
787 }
788 }
789 } else {
790 status = NOTOK;
791 }
792
793 if (ct->c_fp) {
794 fclose (ct->c_fp);
795 ct->c_fp = NULL;
796 }
797
798 if (status == OK) {
799 *part_boundary = end_boundary;
800 } else {
801 *part_boundary = NULL;
802 free (end_boundary);
803 }
804
805 return status;
806 }
807
808
809 /*
810 * Open and copy ct->c_file to file, replacing the multipart boundary.
811 */
812 static int
813 replace_boundary (CT ct, char *file, char *boundary) {
814 FILE *fpin, *fpout;
815 int compnum, state;
816 char buf[NMH_BUFSIZ], name[NAMESZ];
817 char *np, *vp;
818 m_getfld_state_t gstate = 0;
819 int status = OK;
820
821 if (ct->c_file == NULL) {
822 inform("missing input filename");
823 return NOTOK;
824 }
825
826 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
827 advise (ct->c_file, "unable to open for reading");
828 return NOTOK;
829 }
830
831 if ((fpout = fopen (file, "w")) == NULL) {
832 fclose (fpin);
833 advise (file, "unable to open for writing");
834 return NOTOK;
835 }
836
837 for (compnum = 1;;) {
838 int bufsz = (int) sizeof buf;
839
840 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
841 case FLD:
842 case FLDPLUS:
843 compnum++;
844
845 /* get copies of the buffers */
846 np = mh_xstrdup (name);
847 vp = mh_xstrdup (buf);
848
849 /* if necessary, get rest of field */
850 while (state == FLDPLUS) {
851 bufsz = sizeof buf;
852 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
853 vp = add (buf, vp); /* add to previous value */
854 }
855
856 if (strcasecmp (TYPE_FIELD, np)) {
857 fprintf (fpout, "%s:%s", np, vp);
858 } else {
859 char *new_ctline, *new_params;
860
861 replace_param(&ct->c_ctinfo.ci_first_pm,
862 &ct->c_ctinfo.ci_last_pm, "boundary",
863 boundary, 0);
864
865 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
866 ct->c_ctinfo.ci_subtype, NULL);
867 new_params = output_params(strlen(TYPE_FIELD) +
868 strlen(new_ctline) + 1,
869 ct->c_ctinfo.ci_first_pm, NULL, 0);
870 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
871 FENDNULL(new_params));
872 free(new_ctline);
873 mh_xfree(new_params);
874 }
875
876 free (vp);
877 free (np);
878
879 continue;
880
881 case BODY:
882 putc('\n', fpout);
883 /* buf will have a terminating NULL, skip it. */
884 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
885 advise (file, "fwrite");
886 }
887 continue;
888
889 case FILEEOF:
890 break;
891
892 case LENERR:
893 case FMTERR:
894 inform("message format error in component #%d", compnum);
895 status = NOTOK;
896 break;
897
898 default:
899 inform("getfld() returned %d", state);
900 status = NOTOK;
901 break;
902 }
903
904 break;
905 }
906
907 m_getfld_state_destroy (&gstate);
908 fclose (fpout);
909 fclose (fpin);
910
911 return status;
912 }
913
914
915 /*
916 * Fix Content-Type header to reflect the content of its part.
917 */
918 static int
919 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
920 int status = OK;
921
922 switch (ct->c_type) {
923 case CT_MULTIPART: {
924 struct multipart *m = (struct multipart *) ct->c_ctparams;
925 struct part *part;
926
927 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
928 status = fix_types (part->mp_part, fixtypes, message_mods);
929 }
930 break;
931 }
932
933 case CT_MESSAGE:
934 if (ct->c_subtype == MESSAGE_EXTERNAL) {
935 struct exbody *e = (struct exbody *) ct->c_ctparams;
936
937 status = fix_types (e->eb_content, fixtypes, message_mods);
938 }
939 break;
940
941 default: {
942 char **typep, *type;
943
944 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
945 for (typep = svector_strs (fixtypes);
946 typep && (type = *typep);
947 ++typep) {
948 char *type_subtype =
949 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
950 NULL);
951
952 if (! strcasecmp (type, type_subtype) &&
953 decode_part (ct) == OK &&
954 ct->c_cefile.ce_file != NULL) {
955 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
956 char *cp;
957
958 if ((cp = strchr (ct_type_subtype, ';'))) {
959 /* Truncate to remove any parameter list from
960 mime_type () result. */
961 *cp = '\0';
962 }
963
964 if (strcasecmp (type, ct_type_subtype)) {
965 char *ct_type, *ct_subtype;
966 HF hf;
967
968 /* The Content-Type header does not match the
969 content, so update these struct Content
970 fields to match:
971 * c_type, c_subtype
972 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
973 * c_ctline
974 */
975 /* Extract type and subtype from type/subtype. */
976 ct_type = mh_xstrdup(ct_type_subtype);
977 if ((cp = strchr (ct_type, '/'))) {
978 *cp = '\0';
979 ct_subtype = mh_xstrdup(++cp);
980 } else {
981 inform("missing / in MIME type of %s %s",
982 ct->c_file, ct->c_partno);
983 free (ct_type);
984 return NOTOK;
985 }
986
987 ct->c_type = ct_str_type (ct_type);
988 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
989
990 free (ct->c_ctinfo.ci_type);
991 ct->c_ctinfo.ci_type = ct_type;
992 free (ct->c_ctinfo.ci_subtype);
993 ct->c_ctinfo.ci_subtype = ct_subtype;
994 if (! replace_substring (&ct->c_ctline, type,
995 ct_type_subtype)) {
996 inform("did not find %s in %s",
997 type, ct->c_ctline);
998 }
999
1000 /* Update Content-Type header field. */
1001 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1002 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1003 if (replace_substring (&hf->value, type,
1004 ct_type_subtype)) {
1005 ++*message_mods;
1006 if (verbosw) {
1007 report (NULL, ct->c_partno, ct->c_file,
1008 "change Content-Type in header "
1009 "from %s to %s",
1010 type, ct_type_subtype);
1011 }
1012 break;
1013 }
1014 inform("did not find %s in %s", type, hf->value);
1015 }
1016 }
1017 }
1018 free (ct_type_subtype);
1019 }
1020 free (type_subtype);
1021 }
1022 }
1023 }}
1024
1025 return status;
1026 }
1027
1028
1029 /*
1030 * Replace a substring, allocating space to hold the new one.
1031 */
1032 char *
1033 replace_substring (char **str, const char *old, const char *new) {
1034 char *cp;
1035
1036 if ((cp = strstr (*str, old))) {
1037 char *remainder = cp + strlen (old);
1038 char *prefix, *new_str;
1039
1040 if (cp - *str) {
1041 prefix = mh_xstrdup(*str);
1042 *(prefix + (cp - *str)) = '\0';
1043 new_str = concat (prefix, new, remainder, NULL);
1044 free (prefix);
1045 } else {
1046 new_str = concat (new, remainder, NULL);
1047 }
1048
1049 free (*str);
1050
1051 return *str = new_str;
1052 }
1053
1054 return NULL;
1055 }
1056
1057
1058 /*
1059 * Remove a name=value parameter, given just its name, from a header value.
1060 */
1061 char *
1062 remove_parameter (char *str, const char *name) {
1063 /* It looks to me, based on the BNF in RFC 2045, than there can't
1064 be whitespace between the parameter name and the "=", or
1065 between the "=" and the parameter value. */
1066 char *param_name = concat (name, "=", NULL);
1067 char *cp;
1068
1069 if ((cp = strstr (str, param_name))) {
1070 char *start, *end;
1071 size_t count = 1;
1072
1073 /* Remove any leading spaces, before the parameter name. */
1074 for (start = cp;
1075 start > str && isspace ((unsigned char) *(start-1));
1076 --start) {
1077 continue;
1078 }
1079 /* Remove a leading semicolon. */
1080 if (start > str && *(start-1) == ';') { --start; }
1081
1082 end = cp + strlen (name) + 1;
1083 if (*end == '"') {
1084 /* Skip past the quoted value, and then the final quote. */
1085 for (++end ; *end && *end != '"'; ++end) { continue; }
1086 ++end;
1087 } else {
1088 /* Skip past the value. */
1089 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1090 }
1091
1092 /* Count how many characters need to be moved. Include
1093 trailing null, which is accounted for by the
1094 initialization of count to 1. */
1095 for (cp = end; *cp; ++cp) { ++count; }
1096 (void) memmove (start, end, count);
1097 }
1098
1099 free (param_name);
1100
1101 return str;
1102 }
1103
1104
1105 /*
1106 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1107 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1108 * 8 bit.
1109 */
1110 static int
1111 fix_composite_cte (CT ct, int *message_mods) {
1112 int status = OK;
1113
1114 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1115 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1116 ct->c_encoding != CE_BINARY) {
1117 HF hf;
1118
1119 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1120 char *name = hf->name;
1121 for (; *name && isspace ((unsigned char) *name); ++name) {
1122 continue;
1123 }
1124
1125 if (! strncasecmp (name, ENCODING_FIELD,
1126 strlen (ENCODING_FIELD))) {
1127 char *prefix = "Nmh-REPLACED-INVALID-";
1128 HF h;
1129
1130 NEW(h);
1131 h->name = mh_xstrdup (hf->name);
1132 h->hf_encoding = hf->hf_encoding;
1133 h->next = hf->next;
1134 hf->next = h;
1135
1136 /* Retain old header but prefix its name. */
1137 free (hf->name);
1138 hf->name = concat (prefix, h->name, NULL);
1139
1140 ++*message_mods;
1141 if (verbosw) {
1142 char *encoding = cpytrim (hf->value);
1143 report (NULL, ct->c_partno, ct->c_file,
1144 "replace Content-Transfer-Encoding of %s "
1145 "with 8 bit", encoding);
1146 free (encoding);
1147 }
1148
1149 h->value = mh_xstrdup (" 8bit\n");
1150
1151 /* Don't need to warn for multiple C-T-E header
1152 fields, parse_mime() already does that. But
1153 if there are any, fix them all as necessary. */
1154 hf = h;
1155 }
1156 }
1157
1158 set_ce (ct, CE_8BIT);
1159 }
1160
1161 if (ct->c_type == CT_MULTIPART) {
1162 struct multipart *m;
1163 struct part *part;
1164
1165 m = (struct multipart *) ct->c_ctparams;
1166 for (part = m->mp_parts; part; part = part->mp_next) {
1167 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1168 status = NOTOK;
1169 break;
1170 }
1171 }
1172 }
1173 }
1174
1175 return status;
1176 }
1177
1178
1179 /*
1180 * Set content encoding.
1181 */
1182 static int
1183 set_ce (CT ct, int encoding) {
1184 const char *ce = ce_str (encoding);
1185 const struct str2init *ctinit = get_ce_method (ce);
1186
1187 if (ctinit) {
1188 char *cte = concat (" ", ce, "\n", NULL);
1189 int found_cte = 0;
1190 HF hf;
1191 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1192 caller is decode_text_parts (). Save because we'll
1193 overwrite below. */
1194 struct cefile decoded_content_info = ct->c_cefile;
1195
1196 ct->c_encoding = encoding;
1197
1198 ct->c_ctinitfnx = ctinit->si_init;
1199 /* This will assign ct->c_cefile with an all-0 struct, which
1200 is what we want. */
1201 (*ctinit->si_init) (ct);
1202 /* After returning, the caller should set
1203 ct->c_cefile.ce_file to the name of the file containing
1204 the contents. */
1205
1206 if (ct->c_ceclosefnx) {
1207 (*ct->c_ceclosefnx) (ct);
1208 }
1209
1210 /* Restore the cefile. */
1211 ct->c_cefile = decoded_content_info;
1212
1213 /* Update/add Content-Transfer-Encoding header field. */
1214 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1215 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1216 found_cte = 1;
1217 free (hf->value);
1218 hf->value = cte;
1219 }
1220 }
1221 if (! found_cte) {
1222 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1223 }
1224
1225 /* Update c_celine. It's used only by mhlist -debug. */
1226 free (ct->c_celine);
1227 ct->c_celine = mh_xstrdup (cte);
1228
1229 return OK;
1230 }
1231
1232 return NOTOK;
1233 }
1234
1235
1236 /*
1237 * Make sure each text part has a corresponding text/plain part.
1238 */
1239 static int
1240 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1241 int status = OK;
1242
1243 switch ((*ct)->c_type) {
1244 case CT_TEXT: {
1245 /* Nothing to do for text/plain. */
1246 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1247
1248 if (parent && parent->c_type == CT_MULTIPART &&
1249 parent->c_subtype == MULTI_ALTERNATE) {
1250 int new_subpart_number = 1;
1251 int has_text_plain =
1252 find_textplain_sibling (parent, replacetextplain,
1253 &new_subpart_number);
1254
1255 if (! has_text_plain) {
1256 /* Parent is a multipart/alternative. Insert a new
1257 text/plain subpart. */
1258 const int inserted =
1259 insert_new_text_plain_part (*ct, new_subpart_number,
1260 parent);
1261 if (inserted) {
1262 ++*message_mods;
1263 if (verbosw) {
1264 report (NULL, parent->c_partno, parent->c_file,
1265 "insert text/plain part");
1266 }
1267 } else {
1268 status = NOTOK;
1269 }
1270 }
1271 } else if (parent && parent->c_type == CT_MULTIPART &&
1272 parent->c_subtype == MULTI_RELATED) {
1273 char *type_subtype =
1274 concat ((*ct)->c_ctinfo.ci_type, "/",
1275 (*ct)->c_ctinfo.ci_subtype, NULL);
1276 const char *parent_type =
1277 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1278 int new_subpart_number = 1;
1279 int has_text_plain = 0;
1280
1281 /* Have to do string comparison on the subtype because we
1282 don't enumerate all of them in c_subtype values.
1283 parent_type will be NULL if the multipart/related part
1284 doesn't have a type parameter. The type parameter must
1285 be specified according to RFC 2387 Sec. 3.1 but not all
1286 messages comply. */
1287 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1288 /* The type of this part matches the root type of the
1289 parent multipart/related. Look to see if there's
1290 text/plain sibling. */
1291 has_text_plain =
1292 find_textplain_sibling (parent, replacetextplain,
1293 &new_subpart_number);
1294 }
1295
1296 free (type_subtype);
1297
1298 if (! has_text_plain) {
1299 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1300 struct part *part;
1301 int siblings = 0;
1302
1303 for (part = mp->mp_parts; part; part = part->mp_next) {
1304 if (*ct != part->mp_part) {
1305 ++siblings;
1306 }
1307 }
1308
1309 if (siblings) {
1310 /* Parent is a multipart/related. Insert a new
1311 text/plain subpart in a new multipart/alternative. */
1312 if (insert_into_new_mp_alt (ct, message_mods)) {
1313 /* Not an error if text/plain couldn't be added. */
1314 }
1315 } else {
1316 /* There are no siblings, so insert a new text/plain
1317 subpart, and change the parent type from
1318 multipart/related to multipart/alternative. */
1319 const int inserted =
1320 insert_new_text_plain_part (*ct, new_subpart_number,
1321 parent);
1322
1323 if (inserted) {
1324 HF hf;
1325
1326 parent->c_subtype = MULTI_ALTERNATE;
1327 free (parent->c_ctinfo.ci_subtype);
1328 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1329 if (! replace_substring (&parent->c_ctline, "/related",
1330 "/alternative")) {
1331 inform("did not find multipart/related in %s",
1332 parent->c_ctline);
1333 }
1334
1335 /* Update Content-Type header field. */
1336 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1337 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1338 if (replace_substring (&hf->value, "/related",
1339 "/alternative")) {
1340 ++*message_mods;
1341 if (verbosw) {
1342 report (NULL, parent->c_partno,
1343 parent->c_file,
1344 "insert text/plain part");
1345 }
1346
1347 /* Remove, e.g., type="text/html" from
1348 multipart/alternative. */
1349 remove_parameter (hf->value, "type");
1350 break;
1351 }
1352 inform("did not find multipart/"
1353 "related in header %s", hf->value);
1354 }
1355 }
1356 } else {
1357 /* Not an error if text/plain couldn't be inserted. */
1358 }
1359 }
1360 }
1361 } else {
1362 if (insert_into_new_mp_alt (ct, message_mods)) {
1363 status = NOTOK;
1364 }
1365 }
1366 break;
1367 }
1368
1369 case CT_MULTIPART: {
1370 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1371 struct part *part;
1372
1373 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1374 if ((*ct)->c_type == CT_MULTIPART) {
1375 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1376 replacetextplain);
1377 }
1378 }
1379 break;
1380 }
1381
1382 case CT_MESSAGE:
1383 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1384 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1385
1386 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1387 replacetextplain);
1388 }
1389 break;
1390 }
1391
1392 return status;
1393 }
1394
1395
1396 /*
1397 * See if there is a sibling text/plain, and return its subpart number.
1398 */
1399 static int
1400 find_textplain_sibling (CT parent, int replacetextplain,
1401 int *new_subpart_number) {
1402 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1403 struct part *part, *prev;
1404 int has_text_plain = 0;
1405
1406 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1407 ++*new_subpart_number;
1408 if (part->mp_part->c_type == CT_TEXT &&
1409 part->mp_part->c_subtype == TEXT_PLAIN) {
1410 if (replacetextplain) {
1411 struct part *old_part;
1412 if (part == mp->mp_parts) {
1413 old_part = mp->mp_parts;
1414 mp->mp_parts = part->mp_next;
1415 } else {
1416 old_part = prev->mp_next;
1417 prev->mp_next = part->mp_next;
1418 }
1419 if (verbosw) {
1420 report (NULL, parent->c_partno, parent->c_file,
1421 "remove text/plain part %s",
1422 old_part->mp_part->c_partno);
1423 }
1424 free_content (old_part->mp_part);
1425 free (old_part);
1426 } else {
1427 has_text_plain = 1;
1428 }
1429 break;
1430 }
1431 prev = part;
1432 }
1433
1434 return has_text_plain;
1435 }
1436
1437
1438 /*
1439 * Insert a new text/plain part.
1440 */
1441 static int
1442 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1443 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1444 struct part *new_part;
1445
1446 NEW(new_part);
1447 if ((new_part->mp_part = build_text_plain_part (ct))) {
1448 char buffer[16];
1449 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1450
1451 new_part->mp_next = mp->mp_parts;
1452 mp->mp_parts = new_part;
1453 new_part->mp_part->c_partno =
1454 concat (parent->c_partno ? parent->c_partno : "1", ".",
1455 buffer, NULL);
1456
1457 return 1;
1458 }
1459
1460 free_content (new_part->mp_part);
1461 free (new_part);
1462
1463 return 0;
1464 }
1465
1466
1467 /*
1468 * Create a text/plain part to go along with non-plain sibling part.
1469 */
1470 static CT
1471 build_text_plain_part (CT encoded_part) {
1472 CT tp_part = divide_part (encoded_part);
1473 char *tmp_plain_file = NULL;
1474
1475 if (decode_part (tp_part) == OK) {
1476 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1477 contains the decoded contents. And the decoding function, such
1478 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1479 be unlinked by free_content (). */
1480 char *tempfile;
1481
1482 /* This m_mktemp2() call closes the temp file. */
1483 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1484 inform("unable to create temporary file in %s",
1485 get_temp_dir());
1486 } else {
1487 tmp_plain_file = mh_xstrdup (tempfile);
1488 if (reformat_part (tp_part, tmp_plain_file,
1489 tp_part->c_ctinfo.ci_type,
1490 tp_part->c_ctinfo.ci_subtype,
1491 tp_part->c_type) == OK) {
1492 return tp_part;
1493 }
1494 }
1495 }
1496
1497 free_content (tp_part);
1498 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1499 free (tmp_plain_file);
1500
1501 return NULL;
1502 }
1503
1504
1505 /*
1506 * Slip new text/plain part into a new multipart/alternative.
1507 */
1508 static int
1509 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1510 CT tp_part = build_text_plain_part (*ct);
1511 int status = OK;
1512
1513 if (tp_part) {
1514 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1515 MULTI_ALTERNATE);
1516 if (mp_alt) {
1517 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1518
1519 if (mp && mp->mp_parts) {
1520 mp->mp_parts->mp_part = tp_part;
1521 /* Make the new multipart/alternative the parent. */
1522 *ct = mp_alt;
1523
1524 ++*message_mods;
1525 if (verbosw) {
1526 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1527 "insert text/plain part");
1528 }
1529 } else {
1530 free_content (tp_part);
1531 free_content (mp_alt);
1532 status = NOTOK;
1533 }
1534 } else {
1535 status = NOTOK;
1536 }
1537 } else {
1538 /* Not an error if text/plain couldn't be built. */
1539 }
1540
1541 return status;
1542 }
1543
1544
1545 /*
1546 * Clone a MIME part.
1547 */
1548 static CT
1549 divide_part (CT ct) {
1550 CT new_part;
1551
1552 NEW0(new_part);
1553 /* Just copy over what is needed for decoding. c_vrsn and
1554 c_celine aren't necessary. */
1555 new_part->c_file = mh_xstrdup (ct->c_file);
1556 new_part->c_begin = ct->c_begin;
1557 new_part->c_end = ct->c_end;
1558 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1559 new_part->c_type = ct->c_type;
1560 new_part->c_cefile = ct->c_cefile;
1561 new_part->c_encoding = ct->c_encoding;
1562 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1563 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1564 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1565 new_part->c_cesizefnx = ct->c_cesizefnx;
1566
1567 /* c_ctline is used by reformat__part(), so it can preserve
1568 anything after the type/subtype. */
1569 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1570
1571 return new_part;
1572 }
1573
1574
1575 /*
1576 * Copy the content info from one part to another.
1577 */
1578 static void
1579 copy_ctinfo (CI dest, CI src) {
1580 PM s_pm, d_pm;
1581
1582 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1583 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1584
1585 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1586 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1587 s_pm->pm_value, 0);
1588 if (s_pm->pm_charset) {
1589 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1590 }
1591 if (s_pm->pm_lang) {
1592 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1593 }
1594 }
1595
1596 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1597 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1598 }
1599
1600
1601 /*
1602 * Decode content.
1603 */
1604 static int
1605 decode_part (CT ct) {
1606 char *tmp_decoded;
1607 int status;
1608 FILE *file;
1609 char *tempfile;
1610
1611 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1612 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1613 }
1614 tmp_decoded = mh_xstrdup (tempfile);
1615 /* The following call will load ct->c_cefile.ce_file with the tmp
1616 filename of the decoded content. tmp_decoded will contain the
1617 encoded output, get rid of that. */
1618 status = output_message_fp (ct, file, tmp_decoded);
1619 (void) m_unlink (tmp_decoded);
1620 free (tmp_decoded);
1621 if (fclose (file)) {
1622 inform("unable to close temporary file %s, continuing...", tempfile);
1623 }
1624
1625 return status;
1626 }
1627
1628
1629 /*
1630 * Reformat content as plain text.
1631 * Some of the arguments aren't really needed now, but maybe will
1632 * be in the future for other than text types.
1633 */
1634 static int
1635 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1636 int output_subtype, output_encoding;
1637 const char *reason = NULL;
1638 char *cp, *cf;
1639 int status;
1640
1641 /* Hacky: this redirects the output from whatever command is used
1642 to show the part to a file. So, the user can't have any output
1643 redirection in that command.
1644 Could show_multi() in mhshowsbr.c avoid this? */
1645
1646 /* Check for invo_name-format-type/subtype. */
1647 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1648 if (verbosw) {
1649 inform("Don't know how to convert %s, there is no "
1650 "%s-format-%s/%s profile entry",
1651 ct->c_file, invo_name, type, subtype);
1652 }
1653 return NOTOK;
1654 }
1655 if (strchr (cf, '>')) {
1656 inform("'>' prohibited in \"%s\",\nplease fix your "
1657 "%s-format-%s/%s profile entry", cf, invo_name, type,
1658 FENDNULL(subtype));
1659
1660 return NOTOK;
1661 }
1662
1663 cp = concat (cf, " >", file, NULL);
1664 status = show_content_aux (ct, 0, cp, NULL, NULL);
1665 free (cp);
1666
1667 /* Unlink decoded content tmp file and free its filename to avoid
1668 leaks. The file stream should already have been closed. */
1669 if (ct->c_cefile.ce_unlink) {
1670 (void) m_unlink (ct->c_cefile.ce_file);
1671 free (ct->c_cefile.ce_file);
1672 ct->c_cefile.ce_file = NULL;
1673 ct->c_cefile.ce_unlink = 0;
1674 }
1675
1676 if (c_type == CT_TEXT) {
1677 output_subtype = TEXT_PLAIN;
1678 } else {
1679 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1680 output_subtype = 0;
1681 }
1682
1683 output_encoding = content_encoding (ct, &reason);
1684 if (status == OK &&
1685 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1686 ct->c_cefile.ce_file = file;
1687 ct->c_cefile.ce_unlink = 1;
1688 } else {
1689 ct->c_cefile.ce_unlink = 0;
1690 status = NOTOK;
1691 }
1692
1693 return status;
1694 }
1695
1696
1697 /*
1698 * Fill in a multipart/alternative part.
1699 */
1700 static CT
1701 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1702 char *boundary_prefix = "----=_nmh-multipart";
1703 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1704 char *boundary_indicator = "; boundary=";
1705 char *typename, *subtypename, *name;
1706 CT ct;
1707 struct part *p;
1708 struct multipart *m;
1709 const struct str2init *ctinit;
1710
1711 NEW0(ct);
1712
1713 /* Set up the multipart/alternative part. These fields of *ct were
1714 initialized to 0 by mh_xcalloc():
1715 c_fp, c_unlink, c_begin, c_end,
1716 c_vrsn, c_ctline, c_celine,
1717 c_id, c_descr, c_dispo, c_partno,
1718 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1719 c_cefile, c_encoding,
1720 c_digested, c_digest[16], c_ctexbody,
1721 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1722 c_umask, c_rfc934,
1723 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1724 */
1725
1726 ct->c_file = mh_xstrdup (first_alt->c_file);
1727 ct->c_type = type;
1728 ct->c_subtype = subtype;
1729
1730 ctinit = get_ct_init (ct->c_type);
1731
1732 typename = ct_type_str (type);
1733 subtypename = ct_subtype_str (type, subtype);
1734
1735 {
1736 int serial = 0;
1737 int found_boundary = 1;
1738
1739 while (found_boundary && serial < 1000000) {
1740 found_boundary = 0;
1741
1742 /* Ensure that the boundary doesn't appear in the decoded
1743 content. */
1744 if (new_part->c_cefile.ce_file) {
1745 if ((found_boundary =
1746 boundary_in_content (&new_part->c_cefile.ce_fp,
1747 new_part->c_cefile.ce_file,
1748 boundary)) == NOTOK) {
1749 free_content (ct);
1750 return NULL;
1751 }
1752 }
1753
1754 /* Ensure that the boundary doesn't appear in the encoded
1755 content. */
1756 if (! found_boundary && new_part->c_file) {
1757 if ((found_boundary =
1758 boundary_in_content (&new_part->c_fp,
1759 new_part->c_file,
1760 boundary)) == NOTOK) {
1761 free_content (ct);
1762 return NULL;
1763 }
1764 }
1765
1766 if (found_boundary) {
1767 /* Try a slightly different boundary. */
1768 char buffer2[16];
1769
1770 free (boundary);
1771 ++serial;
1772 snprintf (buffer2, sizeof buffer2, "%d", serial);
1773 boundary =
1774 concat (boundary_prefix,
1775 FENDNULL(first_alt->c_partno),
1776 "-", buffer2, NULL);
1777 }
1778 }
1779
1780 if (found_boundary) {
1781 inform("giving up trying to find a unique boundary");
1782 free_content (ct);
1783 return NULL;
1784 }
1785 }
1786
1787 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1788 boundary, "\"", NULL);
1789
1790 /* Load c_first_hf and c_last_hf. */
1791 transfer_noncontent_headers (first_alt, ct);
1792 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1793 free (name);
1794
1795 /* Load c_partno. */
1796 if (first_alt->c_partno) {
1797 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1798 free (first_alt->c_partno);
1799 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1800 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1801 } else {
1802 first_alt->c_partno = mh_xstrdup ("1");
1803 new_part->c_partno = mh_xstrdup ("2");
1804 }
1805
1806 if (ctinit) {
1807 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1808 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1809 }
1810
1811 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1812 "boundary", boundary, 0);
1813
1814 NEW(p);
1815 NEW(p->mp_next);
1816 p->mp_next->mp_next = NULL;
1817 p->mp_next->mp_part = first_alt;
1818
1819 NEW0(m);
1820 m->mp_start = concat (boundary, "\n", NULL);
1821 m->mp_stop = concat (boundary, "--\n", NULL);
1822 m->mp_parts = p;
1823 ct->c_ctparams = m;
1824
1825 free (boundary);
1826
1827 return ct;
1828 }
1829
1830
1831 /*
1832 * Check that the boundary does not appear in the content.
1833 */
1834 static int
1835 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1836 char buffer[NMH_BUFSIZ];
1837 size_t bytes_read;
1838 int found_boundary = 0;
1839
1840 /* free_content() will close *fp if we fopen it here. */
1841 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1842 advise (file, "unable to open %s for reading", file);
1843 return NOTOK;
1844 }
1845
1846 fseeko (*fp, 0L, SEEK_SET);
1847 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1848 if (find_str (buffer, bytes_read, boundary)) {
1849 found_boundary = 1;
1850 break;
1851 }
1852 }
1853
1854 return found_boundary;
1855 }
1856
1857
1858 /*
1859 * Remove all non-Content headers.
1860 */
1861 static void
1862 transfer_noncontent_headers (CT old, CT new) {
1863 HF hp, hp_prev;
1864
1865 hp_prev = hp = old->c_first_hf;
1866 while (hp) {
1867 HF next = hp->next;
1868
1869 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1870 if (hp == old->c_last_hf) {
1871 if (hp == old->c_first_hf) {
1872 old->c_last_hf = old->c_first_hf = NULL;
1873 } else {
1874 hp_prev->next = NULL;
1875 old->c_last_hf = hp_prev;
1876 }
1877 } else {
1878 if (hp == old->c_first_hf) {
1879 old->c_first_hf = next;
1880 } else {
1881 hp_prev->next = next;
1882 }
1883 }
1884
1885 /* Put node hp in the new CT. */
1886 if (new->c_first_hf == NULL) {
1887 new->c_first_hf = hp;
1888 } else {
1889 new->c_last_hf->next = hp;
1890 }
1891 new->c_last_hf = hp;
1892 } else {
1893 /* A Content- header, leave in old. */
1894 hp_prev = hp;
1895 }
1896
1897 hp = next;
1898 }
1899 }
1900
1901
1902 /*
1903 * Set content type.
1904 */
1905 static int
1906 set_ct_type (CT ct, int type, int subtype, int encoding) {
1907 char *typename = ct_type_str (type);
1908 char *subtypename = ct_subtype_str (type, subtype);
1909 /* E.g, " text/plain" */
1910 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1911 /* E.g, " text/plain\n" */
1912 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1913 int found_content_type = 0;
1914 HF hf;
1915 const char *cp = NULL;
1916 char *ctline;
1917 int status;
1918
1919 /* Update/add Content-Type header field. */
1920 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1921 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1922 found_content_type = 1;
1923 free (hf->value);
1924 hf->value = (cp = strchr (ct->c_ctline, ';'))
1925 ? concat (type_subtypename, cp, "\n", NULL)
1926 : mh_xstrdup (name_plus_nl);
1927 }
1928 }
1929 if (! found_content_type) {
1930 add_header (ct, mh_xstrdup (TYPE_FIELD),
1931 (cp = strchr (ct->c_ctline, ';'))
1932 ? concat (type_subtypename, cp, "\n", NULL)
1933 : mh_xstrdup (name_plus_nl));
1934 }
1935
1936 /* Some of these might not be used, but set them anyway. */
1937 ctline = cp
1938 ? concat (type_subtypename, cp, NULL)
1939 : concat (type_subtypename, NULL);
1940 free (ct->c_ctline);
1941 ct->c_ctline = ctline;
1942 /* Leave other ctinfo members as they were. */
1943 free (ct->c_ctinfo.ci_type);
1944 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1945 free (ct->c_ctinfo.ci_subtype);
1946 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1947 ct->c_type = type;
1948 ct->c_subtype = subtype;
1949
1950 free (name_plus_nl);
1951 free (type_subtypename);
1952
1953 status = set_ce (ct, encoding);
1954
1955 return status;
1956 }
1957
1958
1959 /*
1960 * It's not necessary to update the charset parameter of a Content-Type
1961 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1962 * (content) was originally in the specified charset, "and will be in
1963 * that character set again after decoding."
1964 */
1965 static int
1966 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1967 int *message_mods) {
1968 int status = OK;
1969 int lf_line_endings = 0;
1970
1971 switch (ct->c_type) {
1972 case CT_MULTIPART: {
1973 struct multipart *m = (struct multipart *) ct->c_ctparams;
1974 struct part *part;
1975
1976 /* Should check to see if the body for this part is encoded?
1977 For now, it gets passed along as-is by InitMultiPart(). */
1978 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1979 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1980 message_mods);
1981 }
1982 break;
1983 }
1984
1985 case CT_MESSAGE:
1986 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1987 struct exbody *e = (struct exbody *) ct->c_ctparams;
1988
1989 status = decode_text_parts (e->eb_content, encoding, decodetypes,
1990 message_mods);
1991 }
1992 break;
1993
1994 default:
1995 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1996 break;
1997 }
1998
1999 lf_line_endings =
2000 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2001
2002 switch (ct->c_encoding) {
2003 case CE_BASE64:
2004 case CE_QUOTED: {
2005 int ct_encoding;
2006
2007 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2008 const char *reason = NULL;
2009
2010 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2011 && encoding != CE_BINARY) {
2012 /* The decoding isn't acceptable so discard it.
2013 Leave status as OK to allow other transformations. */
2014 if (verbosw) {
2015 report (NULL, ct->c_partno, ct->c_file,
2016 "will not decode%s because it is binary (%s)",
2017 ct->c_partno ? ""
2018 : (FENDNULL(ct->c_ctline)),
2019 reason);
2020 }
2021 (void) m_unlink (ct->c_cefile.ce_file);
2022 free (ct->c_cefile.ce_file);
2023 ct->c_cefile.ce_file = NULL;
2024 } else if (ct->c_encoding == CE_QUOTED &&
2025 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2026 /* The decoding isn't acceptable so discard it.
2027 Leave status as OK to allow other transformations. */
2028 if (verbosw) {
2029 report (NULL, ct->c_partno, ct->c_file,
2030 "will not decode%s because it is 8bit",
2031 ct->c_partno ? ""
2032 : (FENDNULL(ct->c_ctline)));
2033 }
2034 (void) m_unlink (ct->c_cefile.ce_file);
2035 free (ct->c_cefile.ce_file);
2036 ct->c_cefile.ce_file = NULL;
2037 } else {
2038 int enc;
2039
2040 if (ct_encoding == CE_BINARY) {
2041 enc = CE_BINARY;
2042 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2043 enc = CE_QUOTED;
2044 } else {
2045 enc = ct_encoding;
2046 }
2047 if (set_ce (ct, enc) == OK) {
2048 ++*message_mods;
2049 if (verbosw) {
2050 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2051 FENDNULL(ct->c_ctline));
2052 }
2053 if (lf_line_endings) {
2054 strip_crs (ct, message_mods);
2055 }
2056 } else {
2057 status = NOTOK;
2058 }
2059 }
2060 } else {
2061 status = NOTOK;
2062 }
2063 break;
2064 }
2065 case CE_8BIT:
2066 case CE_7BIT:
2067 if (lf_line_endings) {
2068 strip_crs (ct, message_mods);
2069 }
2070 break;
2071 default:
2072 break;
2073 }
2074
2075 break;
2076 }
2077
2078 return status;
2079 }
2080
2081
2082 /*
2083 * Determine if the part with type[/subtype] should be decoded, according to
2084 * decodetypes (which came from the -decodetypes switch).
2085 */
2086 static int
2087 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2088 /* Quick search for matching type[/subtype] in decodetypes: bracket
2089 decodetypes with commas, then search for ,type, and ,type/subtype, in
2090 it. */
2091
2092 int found_match = 0;
2093 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2094 char *delimited_type = concat(",", type, ",", NULL);
2095
2096 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2097 found_match = 1;
2098 } else if (subtype != NULL) {
2099 char *delimited_type_subtype =
2100 concat(",", type, "/", subtype, ",", NULL);
2101
2102 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2103 found_match = 1;
2104 }
2105 free(delimited_type_subtype);
2106 }
2107
2108 free(delimited_type);
2109 free(delimited_decodetypes);
2110
2111 return found_match;
2112 }
2113
2114
2115 /*
2116 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2117 * if it has any NUL characters, a CR not followed by a LF, or lines
2118 * greater than 998 characters in length. If binary, reason is set
2119 * to a string explaining why.
2120 */
2121 static int
2122 content_encoding (CT ct, const char **reason) {
2123 CE ce = &ct->c_cefile;
2124 int encoding = CE_7BIT;
2125
2126 if (ce->ce_file) {
2127 size_t line_len = 0;
2128 char buffer[NMH_BUFSIZ];
2129 size_t inbytes;
2130
2131 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2132 advise (ce->ce_file, "unable to open for reading");
2133 return CE_UNKNOWN;
2134 }
2135
2136 fseeko (ce->ce_fp, 0L, SEEK_SET);
2137 while (encoding != CE_BINARY &&
2138 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2139 char *cp;
2140 size_t i;
2141 int last_char_was_cr = 0;
2142
2143 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2144 if (*cp == '\0' || ++line_len > 998 ||
2145 (*cp != '\n' && last_char_was_cr)) {
2146 encoding = CE_BINARY;
2147 if (*cp == '\0') {
2148 *reason = "null character";
2149 } else if (line_len > 998) {
2150 *reason = "line length > 998";
2151 } else if (*cp != '\n' && last_char_was_cr) {
2152 *reason = "CR not followed by LF";
2153 } else {
2154 /* Should not reach this. */
2155 *reason = "";
2156 }
2157 break;
2158 }
2159 if (*cp == '\n') {
2160 line_len = 0;
2161 } else if (! isascii ((unsigned char) *cp)) {
2162 encoding = CE_8BIT;
2163 }
2164
2165 last_char_was_cr = *cp == '\r';
2166 }
2167 }
2168
2169 fclose (ce->ce_fp);
2170 ce->ce_fp = NULL;
2171 } /* else should never happen */
2172
2173 return encoding;
2174 }
2175
2176
2177 /*
2178 * Strip carriage returns from content.
2179 */
2180 static int
2181 strip_crs (CT ct, int *message_mods) {
2182 char *charset = content_charset (ct);
2183 int status = OK;
2184
2185 /* Only strip carriage returns if content is ASCII or another
2186 charset that has the same readily recognizable CR followed by a
2187 LF. We can include UTF-8 here because if the high-order bit of
2188 a UTF-8 byte is 0, then it must be a single-byte ASCII
2189 character. */
2190 if (! strcasecmp (charset, "US-ASCII") ||
2191 ! strcasecmp (charset, "UTF-8") ||
2192 ! strncasecmp (charset, "ISO-8859-", 9) ||
2193 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2194 char **file = NULL;
2195 FILE **fp = NULL;
2196 size_t begin;
2197 size_t end;
2198 int has_crs = 0;
2199 int opened_input_file = 0;
2200
2201 if (ct->c_cefile.ce_file) {
2202 file = &ct->c_cefile.ce_file;
2203 fp = &ct->c_cefile.ce_fp;
2204 begin = end = 0;
2205 } else if (ct->c_file) {
2206 file = &ct->c_file;
2207 fp = &ct->c_fp;
2208 begin = (size_t) ct->c_begin;
2209 end = (size_t) ct->c_end;
2210 } /* else don't know where the content is */
2211
2212 if (file && *file && fp) {
2213 if (! *fp) {
2214 if ((*fp = fopen (*file, "r")) == NULL) {
2215 advise (*file, "unable to open for reading");
2216 status = NOTOK;
2217 } else {
2218 opened_input_file = 1;
2219 }
2220 }
2221 }
2222
2223 if (fp && *fp) {
2224 char buffer[NMH_BUFSIZ];
2225 size_t bytes_read;
2226 size_t bytes_to_read =
2227 end > 0 && end > begin ? end - begin : sizeof buffer;
2228
2229 fseeko (*fp, begin, SEEK_SET);
2230 while ((bytes_read = fread (buffer, 1,
2231 min (bytes_to_read, sizeof buffer),
2232 *fp)) > 0) {
2233 /* Look for CR followed by a LF. This is supposed to
2234 be text so there should be LF's. If not, don't
2235 modify the content. */
2236 char *cp;
2237 size_t i;
2238 int last_char_was_cr = 0;
2239
2240 if (end > 0) { bytes_to_read -= bytes_read; }
2241
2242 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2243 if (*cp == '\n' && last_char_was_cr) {
2244 has_crs = 1;
2245 break;
2246 }
2247
2248 last_char_was_cr = *cp == '\r';
2249 }
2250 }
2251
2252 if (has_crs) {
2253 int fd;
2254 char *stripped_content_file;
2255 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2256
2257 if (tempfile == NULL) {
2258 adios (NULL, "unable to create temporary file in %s",
2259 get_temp_dir());
2260 }
2261 stripped_content_file = mh_xstrdup (tempfile);
2262
2263 /* Strip each CR before a LF from the content. */
2264 fseeko (*fp, begin, SEEK_SET);
2265 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2266 0) {
2267 char *cp;
2268 size_t i;
2269 int last_char_was_cr = 0;
2270
2271 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2272 if (*cp == '\r') {
2273 last_char_was_cr = 1;
2274 } else if (last_char_was_cr) {
2275 if (*cp != '\n') {
2276 if (write (fd, "\r", 1) < 0) {
2277 advise (tempfile, "CR write");
2278 }
2279 }
2280 if (write (fd, cp, 1) < 0) {
2281 advise (tempfile, "write");
2282 }
2283 last_char_was_cr = 0;
2284 } else {
2285 if (write (fd, cp, 1) < 0) {
2286 advise (tempfile, "write");
2287 }
2288 last_char_was_cr = 0;
2289 }
2290 }
2291 }
2292
2293 if (close (fd)) {
2294 inform("unable to write temporary file %s, continuing...",
2295 stripped_content_file);
2296 (void) m_unlink (stripped_content_file);
2297 status = NOTOK;
2298 } else {
2299 /* Replace the decoded file with the converted one. */
2300 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2301 (void) m_unlink (ct->c_cefile.ce_file);
2302
2303 mh_xfree(ct->c_cefile.ce_file);
2304 ct->c_cefile.ce_file = stripped_content_file;
2305 ct->c_cefile.ce_unlink = 1;
2306
2307 ++*message_mods;
2308 if (verbosw) {
2309 report (NULL, ct->c_partno,
2310 begin == 0 && end == 0 ? "" : *file,
2311 "stripped CRs");
2312 }
2313 }
2314 }
2315
2316 if (opened_input_file) {
2317 fclose (*fp);
2318 *fp = NULL;
2319 }
2320 }
2321 }
2322
2323 free (charset);
2324
2325 return status;
2326 }
2327
2328
2329 /*
2330 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2331 * of the part C-T-E's.
2332 */
2333 static void
2334 update_cte (CT ct) {
2335 const int least_restrictive_enc = least_restrictive_encoding (ct);
2336
2337 if (least_restrictive_enc != CE_UNKNOWN &&
2338 least_restrictive_enc != CE_7BIT) {
2339 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2340 HF hf;
2341 int found_cte = 0;
2342
2343 /* Update/add Content-Transfer-Encoding header field. */
2344 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2345 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2346 found_cte = 1;
2347 free (hf->value);
2348 hf->value = cte;
2349 }
2350 }
2351 if (! found_cte) {
2352 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2353 }
2354 }
2355 }
2356
2357
2358 /*
2359 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2360 * within a message.
2361 */
2362 static int
2363 least_restrictive_encoding (CT ct) {
2364 int encoding = CE_UNKNOWN;
2365
2366 switch (ct->c_type) {
2367 case CT_MULTIPART: {
2368 struct multipart *m = (struct multipart *) ct->c_ctparams;
2369 struct part *part;
2370
2371 for (part = m->mp_parts; part; part = part->mp_next) {
2372 const int part_encoding =
2373 least_restrictive_encoding (part->mp_part);
2374
2375 if (less_restrictive (encoding, part_encoding)) {
2376 encoding = part_encoding;
2377 }
2378 }
2379 break;
2380 }
2381
2382 case CT_MESSAGE:
2383 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2384 struct exbody *e = (struct exbody *) ct->c_ctparams;
2385 const int part_encoding =
2386 least_restrictive_encoding (e->eb_content);
2387
2388 if (less_restrictive (encoding, part_encoding)) {
2389 encoding = part_encoding;
2390 }
2391 }
2392 break;
2393
2394 default: {
2395 if (less_restrictive (encoding, ct->c_encoding)) {
2396 encoding = ct->c_encoding;
2397 }
2398 }}
2399
2400 return encoding;
2401 }
2402
2403
2404 /*
2405 * Return whether the second encoding is less restrictive than the first, where
2406 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2407 * CE_BINARY is less restrictive than CE_8BIT and
2408 * CE_8BIT is less restrictive than CE_7BIT.
2409 */
2410 static int
2411 less_restrictive (int encoding, int second_encoding) {
2412 switch (second_encoding) {
2413 case CE_BINARY:
2414 return encoding != CE_BINARY;
2415 case CE_8BIT:
2416 return encoding != CE_BINARY && encoding != CE_8BIT;
2417 case CE_7BIT:
2418 return encoding != CE_BINARY && encoding != CE_8BIT &&
2419 encoding != CE_7BIT;
2420 default :
2421 return 0;
2422 }
2423 }
2424
2425
2426 /*
2427 * Convert character set of each part.
2428 */
2429 static int
2430 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2431 int status = OK;
2432
2433 switch (ct->c_type) {
2434 case CT_TEXT:
2435 if (ct->c_subtype == TEXT_PLAIN) {
2436 status = convert_charset (ct, dest_charset, message_mods);
2437 if (status == OK) {
2438 if (verbosw) {
2439 char *ct_charset = content_charset (ct);
2440
2441 report (NULL, ct->c_partno, ct->c_file,
2442 "convert %s to %s", ct_charset, dest_charset);
2443 free (ct_charset);
2444 }
2445 } else {
2446 char *ct_charset = content_charset (ct);
2447
2448 report ("iconv", ct->c_partno, ct->c_file,
2449 "failed to convert %s to %s", ct_charset, dest_charset);
2450 free (ct_charset);
2451 }
2452 }
2453 break;
2454
2455 case CT_MULTIPART: {
2456 struct multipart *m = (struct multipart *) ct->c_ctparams;
2457 struct part *part;
2458
2459 /* Should check to see if the body for this part is encoded?
2460 For now, it gets passed along as-is by InitMultiPart(). */
2461 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2462 status =
2463 convert_charsets (part->mp_part, dest_charset, message_mods);
2464 }
2465 break;
2466 }
2467
2468 case CT_MESSAGE:
2469 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2470 struct exbody *e = (struct exbody *) ct->c_ctparams;
2471
2472 status =
2473 convert_charsets (e->eb_content, dest_charset, message_mods);
2474 }
2475 break;
2476
2477 default:
2478 break;
2479 }
2480
2481 return status;
2482 }
2483
2484
2485 /*
2486 * Fix various problems that aren't handled elsewhere. These
2487 * are fixed unconditionally: there are no switches to disable
2488 * them. Currently, "problems" are these:
2489 * 1) remove extraneous semicolon at the end of a header parameter list
2490 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2491 * filename parameters in Content-Type and Content-Disposition
2492 * headers, respectively.
2493 */
2494 static int
2495 fix_always (CT ct, int *message_mods) {
2496 int status = OK;
2497
2498 switch (ct->c_type) {
2499 case CT_MULTIPART: {
2500 struct multipart *m = (struct multipart *) ct->c_ctparams;
2501 struct part *part;
2502
2503 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2504 status = fix_always (part->mp_part, message_mods);
2505 }
2506 break;
2507 }
2508
2509 case CT_MESSAGE:
2510 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2511 struct exbody *e = (struct exbody *) ct->c_ctparams;
2512
2513 status = fix_always (e->eb_content, message_mods);
2514 }
2515 break;
2516
2517 default: {
2518 HF hf;
2519
2520 if (ct->c_first_hf) {
2521 fix_filename_encoding (ct);
2522 }
2523
2524 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2525 size_t len = strlen (hf->value);
2526
2527 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2528 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2529 /* Only do this for Content-Type and
2530 Content-Disposition fields because those are the
2531 only headers that parse_mime() warns about. */
2532 continue;
2533 }
2534
2535 /* whitespace following a trailing ';' will be nuked as well */
2536 if (hf->value[len - 1] == '\n') {
2537 while (isspace((unsigned char)(hf->value[len - 2]))) {
2538 if (len-- == 0) { break; }
2539 }
2540 }
2541
2542 if (hf->value[len - 2] == ';') {
2543 /* Remove trailing ';' from parameter value. */
2544 hf->value[len - 2] = '\n';
2545 hf->value[len - 1] = '\0';
2546
2547 /* Also, if Content-Type parameter, remove trailing ';'
2548 from ct->c_ctline. This probably isn't necessary
2549 but can't hurt. */
2550 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2551 size_t l = strlen(ct->c_ctline) - 1;
2552 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2553 ct->c_ctline[l] == ';') {
2554 ct->c_ctline[l--] = '\0';
2555 if (l == 0) { break; }
2556 }
2557 }
2558
2559 ++*message_mods;
2560 if (verbosw) {
2561 report (NULL, ct->c_partno, ct->c_file,
2562 "remove trailing ; from %s parameter value",
2563 hf->name);
2564 }
2565 }
2566 }
2567 }}
2568
2569 return status;
2570 }
2571
2572
2573 /*
2574 * Factor out common code for loops in fix_filename_encoding().
2575 */
2576 static int
2577 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2578 int fixed = 0;
2579
2580 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2581 /* Looks like an RFC 2047 encoded parameter. */
2582 char decoded[PATH_MAX + 1];
2583
2584 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2585 /* Encode using RFC 2231. */
2586 replace_param (first_pm, last_pm, name, decoded, 0);
2587 fixed = 1;
2588 } else {
2589 inform("failed to decode %s parameter %s", name, value);
2590 }
2591 }
2592
2593 return fixed;
2594 }
2595
2596
2597 /*
2598 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2599 * filename parameters in Content-Type and Content-Disposition
2600 * headers, respectively.
2601 */
2602 static int
2603 fix_filename_encoding (CT ct) {
2604 PM pm;
2605 HF hf;
2606 int fixed = 0;
2607
2608 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2609 if (pm->pm_name && pm->pm_value &&
2610 strcasecmp (pm->pm_name, "name") == 0) {
2611 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2612 &ct->c_ctinfo.ci_first_pm,
2613 &ct->c_ctinfo.ci_last_pm);
2614 }
2615 }
2616
2617 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2618 if (pm->pm_name && pm->pm_value &&
2619 strcasecmp (pm->pm_name, "filename") == 0) {
2620 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2621 &ct->c_dispo_first,
2622 &ct->c_dispo_last);
2623 }
2624 }
2625
2626 /* Fix hf values to correspond. */
2627 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2628 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2629
2630 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2631 field = TYPE_HEADER;
2632 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2633 field = DISPO_HEADER;
2634 }
2635
2636 if (field != OTHER) {
2637 const char *const semicolon_loc = strchr (hf->value, ';');
2638
2639 if (semicolon_loc) {
2640 const size_t len =
2641 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2642 const char *const params =
2643 output_params (len,
2644 field == TYPE_HEADER
2645 ? ct->c_ctinfo.ci_first_pm
2646 : ct->c_dispo_first,
2647 NULL, 0);
2648 const char *const new_params = concat (params, "\n", NULL);
2649
2650 replace_substring (&hf->value, semicolon_loc, new_params);
2651 free((void *)new_params); /* Cast away const. Sigh. */
2652 free((void *)params);
2653 } else {
2654 inform("did not find semicolon in %s:%s\n",
2655 hf->name, hf->value);
2656 }
2657 }
2658 }
2659
2660 return OK;
2661 }
2662
2663
2664 /*
2665 * Output content in input file to output file.
2666 */
2667 static int
2668 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2669 int modify_inplace, int message_mods) {
2670 int status = OK;
2671
2672 if (modify_inplace) {
2673 if (message_mods > 0) {
2674 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2675 char *infile = input_filename
2676 ? mh_xstrdup (input_filename)
2677 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2678
2679 if (remove_file (infile) == OK) {
2680 if (rename (outfile, infile)) {
2681 /* Rename didn't work, possibly because of an
2682 attempt to rename across filesystems. Try
2683 brute force copy. */
2684 int old = open (outfile, O_RDONLY);
2685 int new =
2686 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2687 int i = -1;
2688
2689 if (old != -1 && new != -1) {
2690 char buffer[NMH_BUFSIZ];
2691
2692 while ((i = read (old, buffer, sizeof buffer)) >
2693 0) {
2694 if (write (new, buffer, i) != i) {
2695 i = -1;
2696 break;
2697 }
2698 }
2699 }
2700 if (new != -1) { close (new); }
2701 if (old != -1) { close (old); }
2702 (void) m_unlink (outfile);
2703
2704 if (i < 0) {
2705 /* The -file argument processing used path() to
2706 expand filename to absolute path. */
2707 int file = ct->c_file && ct->c_file[0] == '/';
2708
2709 inform("unable to rename %s %s to %s, continuing...",
2710 file ? "file" : "message", outfile,
2711 infile);
2712 status = NOTOK;
2713 }
2714 }
2715 } else {
2716 inform("unable to remove input file %s, "
2717 "not modifying it, continuing...", infile);
2718 (void) m_unlink (outfile);
2719 status = NOTOK;
2720 }
2721
2722 free (infile);
2723 } else {
2724 status = NOTOK;
2725 }
2726 } else {
2727 /* No modifications and didn't need the tmp outfile. */
2728 (void) m_unlink (outfile);
2729 }
2730 } else {
2731 /* Output is going to some file. Produce it whether or not
2732 there were modifications. */
2733 status = output_message_fp (ct, outfp, outfile);
2734 }
2735
2736 flush_errors ();
2737 return status;
2738 }
2739
2740
2741 /*
2742 * parse_mime() does not set lf_line_endings in struct text, so use this
2743 * function to do it. It touches the parts the decodetypes identifies.
2744 */
2745 static void
2746 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2747 switch (ct->c_type) {
2748 case CT_MULTIPART: {
2749 struct multipart *m = (struct multipart *) ct->c_ctparams;
2750 struct part *part;
2751
2752 for (part = m->mp_parts; part; part = part->mp_next) {
2753 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2754 }
2755 break;
2756 }
2757
2758 case CT_MESSAGE:
2759 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2760 struct exbody *e = (struct exbody *) ct->c_ctparams;
2761
2762 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2763 }
2764 break;
2765
2766 default:
2767 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2768 if (ct->c_ctparams == NULL) {
2769 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2770 }
2771 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2772 }
2773 }
2774 }
2775
2776
2777 /*
2778 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2779 * use the standard MH backup file.
2780 */
2781 static int
2782 remove_file (const char *file) {
2783 if (rmmproc) {
2784 char *rmm_command = concat (rmmproc, " ", file, NULL);
2785 int status = system (rmm_command);
2786
2787 free (rmm_command);
2788 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2789 }
2790 /* This is OK for a non-message file, it still uses the
2791 BACKUP_PREFIX form. The backup file will be in the same
2792 directory as file. */
2793 return rename (file, m_backup (file));
2794 }
2795
2796
2797 /*
2798 * Output formatted message to user.
2799 */
2800 static void
2801 report (char *what, char *partno, char *filename, char *message, ...) {
2802 va_list args;
2803 char *fmt;
2804
2805 if (verbosw) {
2806 va_start (args, message);
2807 fmt = concat (filename, partno ? " part " : ", ",
2808 FENDNULL(partno), partno ? ", " : "", message, NULL);
2809
2810 advertise (what, NULL, fmt, args);
2811
2812 free (fmt);
2813 va_end (args);
2814 }
2815 }
2816
2817
2818 static void
2819 pipeser (int i)
2820 {
2821 if (i == SIGQUIT) {
2822 fflush (stdout);
2823 fprintf (stderr, "\n");
2824 fflush (stderr);
2825 }
2826
2827 done (1);
2828 /* NOTREACHED */
2829 }