]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Reverted commit 9a4b4a3d3b27fe4a7ff6d0b8724ce1c06b5917eb.
[nmh] / uip / mhfixmsg.c
1 /*
2 * mhfixmsg.c -- rewrite a message with various transformations
3 *
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include <fcntl.h>
15
16 #define MHFIXMSG_SWITCHES \
17 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
18 X("nodecodetext", 0, NDECODETEXTSW) \
19 X("decodetypes", 0, DECODETYPESW) \
20 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
21 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
22 X("textcharset", 0, TEXTCHARSETSW) \
23 X("notextcharset", 0, NTEXTCHARSETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
27 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
28 X("fixboundary", 0, FIXBOUNDARYSW) \
29 X("nofixboundary", 0, NFIXBOUNDARYSW) \
30 X("fixcte", 0, FIXCOMPOSITECTESW) \
31 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
32 X("fixtype mimetype", 0, FIXTYPESW) \
33 X("file file", 0, FILESW) \
34 X("outfile file", 0, OUTFILESW) \
35 X("rmmproc program", 0, RPROCSW) \
36 X("normmproc", 0, NRPRCSW) \
37 X("changecur", 0, CHGSW) \
38 X("nochangecur", 0, NCHGSW) \
39 X("verbose", 0, VERBSW) \
40 X("noverbose", 0, NVERBSW) \
41 X("version", 0, VERSIONSW) \
42 X("help", 0, HELPSW) \
43
44 #define X(sw, minchars, id) id,
45 DEFINE_SWITCH_ENUM(MHFIXMSG);
46 #undef X
47
48 #define X(sw, minchars, id) { sw, minchars, id },
49 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
50 #undef X
51
52
53 int verbosw;
54 int debugsw; /* Needed by mhparse.c. */
55
56 #define quitser pipeser
57
58 /* mhparse.c */
59 extern int skip_mp_cte_check; /* flag to InitMultiPart */
60 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
61 extern int bogus_mp_content; /* flag from InitMultiPart */
62 /* flags to/from parse_header_attrs */
63 extern int suppress_extraneous_trailing_semicolon_warning;
64
65 /* mhoutsbr.c */
66 int output_message_fp (CT, FILE *, char *);
67
68 /* mhmisc.c */
69 void flush_errors (void);
70
71 /* mhfree.c */
72 extern CT *cts;
73
74 /*
75 * static prototypes
76 */
77 typedef struct fix_transformations {
78 int fixboundary;
79 int fixcompositecte;
80 svector_t fixtypes;
81 int reformat;
82 int replacetextplain;
83 int decodetext;
84 char *decodetypes;
85 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
86 int lf_line_endings;
87 char *textcharset;
88 } fix_transformations;
89
90 int mhfixmsgsbr (CT *, char *, const fix_transformations *, FILE **, char *,
91 FILE **);
92 static int fix_boundary (CT *, int *);
93 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
94 static int get_multipart_boundary (CT, char **);
95 static int replace_boundary (CT, char *, char *);
96 static int fix_types (CT, svector_t, int *);
97 static char *replace_substring (char **, const char *, const char *);
98 static char *remove_parameter (char *, const char *);
99 static int fix_composite_cte (CT, int *);
100 static int set_ce (CT, int);
101 static int ensure_text_plain (CT *, CT, int *, int);
102 static int find_textplain_sibling (CT, int, int *);
103 static int insert_new_text_plain_part (CT, int, CT);
104 static CT build_text_plain_part (CT);
105 static int insert_into_new_mp_alt (CT *, int *);
106 static CT divide_part (CT);
107 static void copy_ctinfo (CI, CI);
108 static int decode_part (CT);
109 static int reformat_part (CT, char *, char *, char *, int);
110 static CT build_multipart_alt (CT, CT, int, int);
111 static int boundary_in_content (FILE **, char *, const char *);
112 static void transfer_noncontent_headers (CT, CT);
113 static int set_ct_type (CT, int type, int subtype, int encoding);
114 static int decode_text_parts (CT, int, const char *, int *);
115 static int should_decode(const char *, const char *, const char *);
116 static int content_encoding (CT, const char **);
117 static int strip_crs (CT, int *);
118 static void update_cte (CT);
119 static int least_restrictive_encoding (CT);
120 static int less_restrictive (int, int);
121 static int convert_charsets (CT, char *, int *);
122 static int fix_always (CT, int *);
123 static int fix_filename_param (char *, char *, PM *, PM *);
124 static int fix_filename_encoding (CT);
125 static int write_content (CT, const char *, char *, FILE *, int, int);
126 static void set_text_ctparams(CT, char *, int);
127 static int remove_file (const char *);
128 static void report (char *, char *, char *, char *, ...);
129 static void pipeser (int);
130
131
132 int
133 main (int argc, char **argv) {
134 int msgnum;
135 char *cp, *file = NULL, *folder = NULL;
136 char *maildir = NULL, buf[100], *outfile = NULL;
137 char **argp, **arguments;
138 struct msgs_array msgs = { 0, 0, NULL };
139 struct msgs *mp = NULL;
140 CT *ctp;
141 FILE *fp, *infp = NULL, *outfp = NULL;
142 int using_stdin = 0;
143 int chgflag = 1;
144 int status = OK;
145 fix_transformations fx;
146 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
147 fx.fixtypes = NULL;
148 fx.replacetextplain = 0;
149 fx.decodetext = CE_8BIT;
150 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
151 fx.lf_line_endings = 0;
152 fx.textcharset = NULL;
153
154 if (nmh_init(argv[0], 2)) { return 1; }
155
156 arguments = getarguments (invo_name, argc, argv, 1);
157 argp = arguments;
158
159 /*
160 * Parse arguments
161 */
162 while ((cp = *argp++)) {
163 if (*cp == '-') {
164 switch (smatch (++cp, switches)) {
165 case AMBIGSW:
166 ambigsw (cp, switches);
167 done (1);
168 case UNKWNSW:
169 adios (NULL, "-%s unknown", cp);
170
171 case HELPSW:
172 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
173 invo_name);
174 print_help (buf, switches, 1);
175 done (0);
176 case VERSIONSW:
177 print_version(invo_name);
178 done (0);
179
180 case DECODETEXTSW:
181 if (! (cp = *argp++) || *cp == '-') {
182 adios (NULL, "missing argument to %s", argp[-2]);
183 }
184 if (! strcasecmp (cp, "8bit")) {
185 fx.decodetext = CE_8BIT;
186 } else if (! strcasecmp (cp, "7bit")) {
187 fx.decodetext = CE_7BIT;
188 } else if (! strcasecmp (cp, "binary")) {
189 fx.decodetext = CE_BINARY;
190 } else {
191 adios (NULL, "invalid argument to %s", argp[-2]);
192 }
193 continue;
194 case NDECODETEXTSW:
195 fx.decodetext = 0;
196 continue;
197 case DECODETYPESW:
198 if (! (cp = *argp++) || *cp == '-') {
199 adios (NULL, "missing argument to %s", argp[-2]);
200 }
201 fx.decodetypes = cp;
202 continue;
203 case CRLFLINEBREAKSSW:
204 fx.lf_line_endings = 0;
205 continue;
206 case NCRLFLINEBREAKSSW:
207 fx.lf_line_endings = 1;
208 continue;
209 case TEXTCHARSETSW:
210 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
211 adios (NULL, "missing argument to %s", argp[-2]);
212 }
213 fx.textcharset = cp;
214 continue;
215 case NTEXTCHARSETSW:
216 fx.textcharset = 0;
217 continue;
218 case FIXBOUNDARYSW:
219 fx.fixboundary = 1;
220 continue;
221 case NFIXBOUNDARYSW:
222 fx.fixboundary = 0;
223 continue;
224 case FIXCOMPOSITECTESW:
225 fx.fixcompositecte = 1;
226 continue;
227 case NFIXCOMPOSITECTESW:
228 fx.fixcompositecte = 0;
229 continue;
230 case FIXTYPESW:
231 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
232 adios (NULL, "missing argument to %s", argp[-2]);
233 }
234 if (! strncasecmp (cp, "multipart/", 10) ||
235 ! strncasecmp (cp, "message/", 8)) {
236 adios (NULL, "-fixtype %s not allowed", cp);
237 } else if (! strchr (cp, '/')) {
238 adios (NULL, "-fixtype requires type/subtype");
239 }
240 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
241 svector_push_back (fx.fixtypes, cp);
242 continue;
243 case REFORMATSW:
244 fx.reformat = 1;
245 continue;
246 case NREFORMATSW:
247 fx.reformat = 0;
248 continue;
249 case REPLACETEXTPLAINSW:
250 fx.replacetextplain = 1;
251 continue;
252 case NREPLACETEXTPLAINSW:
253 fx.replacetextplain = 0;
254 continue;
255 case FILESW:
256 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
257 adios (NULL, "missing argument to %s", argp[-2]);
258 }
259 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
260 continue;
261 case OUTFILESW:
262 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
263 adios (NULL, "missing argument to %s", argp[-2]);
264 }
265 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
266 continue;
267 case RPROCSW:
268 if (!(rmmproc = *argp++) || *rmmproc == '-') {
269 adios (NULL, "missing argument to %s", argp[-2]);
270 }
271 continue;
272 case NRPRCSW:
273 rmmproc = NULL;
274 continue;
275 case CHGSW:
276 chgflag = 1;
277 continue;
278 case NCHGSW:
279 chgflag = 0;
280 continue;
281 case VERBSW:
282 verbosw = 1;
283 continue;
284 case NVERBSW:
285 verbosw = 0;
286 continue;
287 }
288 }
289 if (*cp == '+' || *cp == '@') {
290 if (folder) {
291 adios (NULL, "only one folder at a time!");
292 } else {
293 folder = pluspath (cp);
294 }
295 } else {
296 if (*cp == '/') {
297 /* Interpret a full path as a filename, not a message. */
298 file = mh_xstrdup (cp);
299 } else {
300 app_msgarg (&msgs, cp);
301 }
302 }
303 }
304
305 SIGNAL (SIGQUIT, quitser);
306 SIGNAL (SIGPIPE, pipeser);
307
308 /*
309 * Read the standard profile setup
310 */
311 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
312 readconfig ((struct node **) 0, fp, cp, 0);
313 fclose (fp);
314 }
315
316 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
317 suppress_extraneous_trailing_semicolon_warning = 1;
318
319 if (! context_find ("path")) {
320 free (path ("./", TFOLDER));
321 }
322
323 if (file && msgs.size) {
324 adios (NULL, "cannot specify msg and file at same time!");
325 }
326
327 if (outfile) {
328 /* Open the outfile now, so we don't have to risk opening it
329 after running out of fds. */
330 if (strcmp (outfile, "-") == 0) {
331 outfp = stdout;
332 } else if ((outfp = fopen (outfile, "w")) == NULL) {
333 adios (outfile, "unable to open for writing");
334 }
335 }
336
337 /*
338 * check if message is coming from file
339 */
340 if (file) {
341 /* If file is stdin, create a tmp file name before parse_mime()
342 has a chance, because it might put in on a different
343 filesystem than the output file. Instead, put it in the
344 user's preferred tmp directory. */
345 CT ct;
346
347 if (! strcmp ("-", file)) {
348 int fd;
349 char *cp;
350
351 using_stdin = 1;
352
353 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
354 adios (NULL, "unable to create temporary file in %s",
355 get_temp_dir());
356 } else {
357 free (file);
358 file = mh_xstrdup (cp);
359 cpydata (STDIN_FILENO, fd, "-", file);
360 }
361
362 if (close (fd)) {
363 (void) m_unlink (file);
364 adios (NULL, "failed to write temporary file");
365 }
366 }
367
368 cts = mh_xcalloc(2, sizeof *cts);
369 ctp = cts;
370
371 if ((ct = parse_mime (file))) {
372 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
373 *ctp++ = ct;
374 } else {
375 advise (NULL, "unable to parse message from file %s", file);
376 status = NOTOK;
377
378 /* If there's an outfile, pass the input message unchanged, so the
379 message won't get dropped from a pipeline. */
380 if (outfile) {
381 /* Something went wrong. Output might be expected, such as if
382 this were run as a filter. Just copy the input to the
383 output. */
384 if ((infp = fopen (file, "r")) == NULL) {
385 adios (file, "unable to open for reading");
386 }
387
388 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
389 advise (NULL, "unable to copy message to %s, "
390 "it might be lost\n", outfile);
391 }
392
393 fclose (infp);
394 infp = NULL;
395 }
396 }
397 } else {
398 /*
399 * message(s) are coming from a folder
400 */
401 CT ct;
402
403 if (! msgs.size) {
404 app_msgarg(&msgs, "cur");
405 }
406 if (! folder) {
407 folder = getfolder (1);
408 }
409 maildir = m_maildir (folder);
410
411 /* chdir so that error messages, esp. from MIME parser, just
412 refer to the message and not its path. */
413 if (chdir (maildir) == NOTOK) {
414 adios (maildir, "unable to change directory to");
415 }
416
417 /* read folder and create message structure */
418 if (! (mp = folder_read (folder, 1))) {
419 adios (NULL, "unable to read folder %s", folder);
420 }
421
422 /* check for empty folder */
423 if (mp->nummsg == 0) {
424 adios (NULL, "no messages in %s", folder);
425 }
426
427 /* parse all the message ranges/sequences and set SELECTED */
428 for (msgnum = 0; msgnum < msgs.size; msgnum++)
429 if (! m_convert (mp, msgs.msgs[msgnum])) {
430 done (1);
431 }
432 seq_setprev (mp); /* set the previous-sequence */
433
434 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
435 ctp = cts;
436
437 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
438 if (is_selected(mp, msgnum)) {
439 char *msgnam = m_name (msgnum);
440
441 if ((ct = parse_mime (msgnam))) {
442 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
443 *ctp++ = ct;
444 } else {
445 advise (NULL, "unable to parse message %s", msgnam);
446 status = NOTOK;
447
448 /* If there's an outfile, pass the input message
449 unchanged, so the message won't get dropped from a
450 pipeline. */
451 if (outfile) {
452 /* Something went wrong. Output might be expected,
453 such as if this were run as a filter. Just copy
454 the input to the output. */
455 /* Can't use path() here because 1) it might have been
456 called before and it caches the pwd, and 2) we call
457 chdir() after that. */
458 char *input_filename =
459 concat (maildir, "/", msgnam, NULL);
460
461 if ((infp = fopen (input_filename, "r")) == NULL) {
462 adios (input_filename,
463 "unable to open for reading");
464 }
465
466 if (copy_input_to_output (input_filename, infp,
467 outfile, outfp) != OK) {
468 advise (NULL,
469 "unable to copy message to %s, "
470 "it might be lost\n",
471 outfile);
472 }
473
474 fclose (infp);
475 infp = NULL;
476 free (input_filename);
477 }
478 }
479 }
480 }
481
482 if (chgflag) {
483 seq_setcur (mp, mp->hghsel); /* update current message */
484 }
485 seq_save (mp); /* synchronize sequences */
486 context_replace (pfolder, folder);/* update current folder */
487 context_save (); /* save the context file */
488 }
489
490 if (*cts) {
491 for (ctp = cts; *ctp; ++ctp) {
492 status += mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp);
493 free_content (*ctp);
494
495 if (using_stdin) {
496 (void) m_unlink (file);
497
498 if (! outfile) {
499 /* Just calling m_backup() unlinks the backup file. */
500 (void) m_backup (file);
501 }
502 }
503 }
504 } else {
505 status = 1;
506 }
507
508 free (cts);
509
510 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
511 if (infp) { fclose (infp); } /* even if stdin */
512 if (outfp) { fclose (outfp); } /* even if stdout */
513 free (outfile);
514 free (file);
515 free (folder);
516 free (arguments);
517
518 done (status);
519 return NOTOK;
520 }
521
522
523 /*
524 * Apply transformations to one message.
525 */
526 int
527 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
528 FILE **infp, char *outfile, FILE **outfp) {
529 /* Store input filename in case one of the transformations, i.e.,
530 fix_boundary(), rewrites to a tmp file. */
531 char *input_filename = maildir
532 ? concat (maildir, "/", (*ctp)->c_file, NULL)
533 : mh_xstrdup ((*ctp)->c_file);
534 int modify_inplace = 0;
535 int message_mods = 0;
536 int status = OK;
537
538 /* Though the input file won't need to be opened if everything goes
539 well, do it here just in case there's a failure, and that failure is
540 running out of file descriptors. */
541 if ((*infp = fopen (input_filename, "r")) == NULL) {
542 adios (input_filename, "unable to open for reading");
543 }
544
545 if (outfile == NULL) {
546 modify_inplace = 1;
547
548 if ((*ctp)->c_file) {
549 char *tempfile;
550 /* outfp will be closed by the caller */
551 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
552 NULL) {
553 adios (NULL, "unable to create temporary file in %s",
554 get_temp_dir());
555 }
556 outfile = mh_xstrdup (tempfile);
557 } else {
558 adios (NULL, "missing both input and output filenames\n");
559 }
560 } /* else *outfp was defined by caller */
561
562 reverse_alternative_parts (*ctp);
563 status = fix_always (*ctp, &message_mods);
564 if (status == OK && fx->fixboundary) {
565 status = fix_boundary (ctp, &message_mods);
566 }
567 if (status == OK && fx->fixtypes != NULL) {
568 status = fix_types (*ctp, fx->fixtypes, &message_mods);
569 }
570 if (status == OK && fx->fixcompositecte) {
571 status = fix_composite_cte (*ctp, &message_mods);
572 }
573 if (status == OK && fx->reformat) {
574 status =
575 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
576 }
577 if (status == OK && fx->decodetext) {
578 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
579 &message_mods);
580 update_cte (*ctp);
581 }
582 if (status == OK && fx->textcharset != NULL) {
583 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
584 }
585
586 if (status == OK && ! (*ctp)->c_umask) {
587 /* Set the umask for the contents file. This currently
588 isn't used but just in case it is in the future. */
589 struct stat st;
590
591 if (stat ((*ctp)->c_file, &st) != NOTOK) {
592 (*ctp)->c_umask = ~(st.st_mode & 0777);
593 } else {
594 (*ctp)->c_umask = ~m_gmprot();
595 }
596 }
597
598 /*
599 * Write the content to a file
600 */
601 if (status == OK) {
602 status = write_content (*ctp, input_filename, outfile, *outfp,
603 modify_inplace, message_mods);
604 } else if (! modify_inplace) {
605 /* Something went wrong. Output might be expected, such
606 as if this were run as a filter. Just copy the input
607 to the output. */
608 if (copy_input_to_output (input_filename, *infp, outfile,
609 *outfp) != OK) {
610 advise (NULL, "unable to copy message to %s, it might be lost\n",
611 outfile);
612 }
613 }
614
615 if (modify_inplace) {
616 if (status != OK) { (void) m_unlink (outfile); }
617 free (outfile);
618 outfile = NULL;
619 }
620
621 fclose (*infp);
622 *infp = NULL;
623 free (input_filename);
624
625 return status;
626 }
627
628
629 /*
630 * Copy input message to output. Assumes not modifying in place, so this
631 * might be running as part of a pipeline.
632 */
633 static int
634 copy_input_to_output (const char *input_filename, FILE *infp,
635 const char *output_filename, FILE *outfp) {
636 int in = fileno (infp);
637 int out = fileno (outfp);
638 int status = OK;
639
640 if (in != -1 && out != -1) {
641 cpydata (in, out, input_filename, output_filename);
642 } else {
643 status = NOTOK;
644 }
645
646 return status;
647 }
648
649
650 /*
651 * Fix mismatched outer level boundary.
652 */
653 static int
654 fix_boundary (CT *ct, int *message_mods) {
655 struct multipart *mp;
656 int status = OK;
657
658 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
659 mp = (struct multipart *) (*ct)->c_ctparams;
660
661 /*
662 * 1) Get boundary at end of part.
663 * 2) Get boundary at beginning of part and compare to the end-of-part
664 * boundary.
665 * 3) Write out contents of ct to tmp file, replacing boundary in
666 * header with boundary from part. Set c_unlink to 1.
667 * 4) Free ct.
668 * 5) Call parse_mime() on the tmp file, replacing ct.
669 */
670
671 if (mp && mp->mp_start) {
672 char *part_boundary;
673
674 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
675 char *fixed;
676
677 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
678 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
679 char *filename = mh_xstrdup ((*ct)->c_file);
680 CT fixed_ct;
681
682 free_content (*ct);
683 if ((fixed_ct = parse_mime (fixed))) {
684 *ct = fixed_ct;
685 (*ct)->c_unlink = 1;
686
687 ++*message_mods;
688 if (verbosw) {
689 report (NULL, NULL, filename,
690 "fix multipart boundary");
691 }
692 } else {
693 *ct = NULL;
694 advise (NULL, "unable to parse fixed part");
695 status = NOTOK;
696 }
697 free (filename);
698 } else {
699 advise (NULL, "unable to replace broken boundary");
700 status = NOTOK;
701 }
702 } else {
703 advise (NULL, "unable to create temporary file in %s",
704 get_temp_dir());
705 status = NOTOK;
706 }
707
708 free (part_boundary);
709 } else {
710 /* Couldn't fix the boundary. Report failure so that mhfixmsg
711 doesn't modify the message. */
712 status = NOTOK;
713 }
714 } else {
715 /* No multipart struct, even though the content type is
716 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
717 the message. */
718 status = NOTOK;
719 }
720 }
721
722 return status;
723 }
724
725
726 /*
727 * Find boundary at end of multipart.
728 */
729 static int
730 get_multipart_boundary (CT ct, char **part_boundary) {
731 char buffer[NMH_BUFSIZ];
732 char *end_boundary = NULL;
733 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
734 ? (off_t) (ct->c_end - sizeof buffer)
735 : (off_t) ct->c_begin;
736 size_t bytes_read;
737 int status = OK;
738
739 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
740 be big enough, even if it's just 1024, to make that unlikely. */
741
742 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
743 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
744 advise (ct->c_file, "unable to open for reading");
745 return NOTOK;
746 }
747
748 /* Get boundary at end of multipart. */
749 while (begin >= (off_t) ct->c_begin) {
750 fseeko (ct->c_fp, begin, SEEK_SET);
751 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
752 char *cp = rfind_str (buffer, bytes_read, "--");
753
754 if (cp) {
755 char *end;
756
757 /* Trim off trailing "--" and anything beyond. */
758 *cp-- = '\0';
759 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
760 if (strlen (end) > 3 && *end++ == '\n' &&
761 *end++ == '-' && *end++ == '-') {
762 end_boundary = mh_xstrdup (end);
763 break;
764 }
765 }
766 }
767 }
768
769 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
770 begin -= sizeof buffer;
771 } else {
772 break;
773 }
774 }
775
776 /* Get boundary at beginning of multipart. */
777 if (end_boundary) {
778 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
779 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
780 if (bytes_read >= strlen (end_boundary)) {
781 char *cp = find_str (buffer, bytes_read, end_boundary);
782
783 if (cp && cp - buffer >= 2 && *--cp == '-' &&
784 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
785 status = OK;
786 break;
787 }
788 } else {
789 /* The start and end boundaries didn't match, or the
790 start boundary doesn't begin with "\n--" (or "--"
791 if at the beginning of buffer). Keep trying. */
792 status = NOTOK;
793 }
794 }
795 } else {
796 status = NOTOK;
797 }
798
799 if (ct->c_fp) {
800 fclose (ct->c_fp);
801 ct->c_fp = NULL;
802 }
803
804 if (status == OK) {
805 *part_boundary = end_boundary;
806 } else {
807 *part_boundary = NULL;
808 free (end_boundary);
809 }
810
811 return status;
812 }
813
814
815 /*
816 * Open and copy ct->c_file to file, replacing the multipart boundary.
817 */
818 static int
819 replace_boundary (CT ct, char *file, char *boundary) {
820 FILE *fpin, *fpout;
821 int compnum, state;
822 char buf[NMH_BUFSIZ], name[NAMESZ];
823 char *np, *vp;
824 m_getfld_state_t gstate = 0;
825 int status = OK;
826
827 if (ct->c_file == NULL) {
828 advise (NULL, "missing input filename");
829 return NOTOK;
830 }
831
832 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
833 advise (ct->c_file, "unable to open for reading");
834 return NOTOK;
835 }
836
837 if ((fpout = fopen (file, "w")) == NULL) {
838 fclose (fpin);
839 advise (file, "unable to open for writing");
840 return NOTOK;
841 }
842
843 for (compnum = 1;;) {
844 int bufsz = (int) sizeof buf;
845
846 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
847 case FLD:
848 case FLDPLUS:
849 compnum++;
850
851 /* get copies of the buffers */
852 np = mh_xstrdup (name);
853 vp = mh_xstrdup (buf);
854
855 /* if necessary, get rest of field */
856 while (state == FLDPLUS) {
857 bufsz = sizeof buf;
858 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
859 vp = add (buf, vp); /* add to previous value */
860 }
861
862 if (strcasecmp (TYPE_FIELD, np)) {
863 fprintf (fpout, "%s:%s", np, vp);
864 } else {
865 char *new_ctline, *new_params;
866
867 replace_param(&ct->c_ctinfo.ci_first_pm,
868 &ct->c_ctinfo.ci_last_pm, "boundary",
869 boundary, 0);
870
871 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
872 ct->c_ctinfo.ci_subtype, NULL);
873 new_params = output_params(strlen(TYPE_FIELD) +
874 strlen(new_ctline) + 1,
875 ct->c_ctinfo.ci_first_pm, NULL, 0);
876 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
877 new_params ? new_params : "");
878 free(new_ctline);
879 mh_xfree(new_params);
880 }
881
882 free (vp);
883 free (np);
884
885 continue;
886
887 case BODY:
888 putc('\n', fpout);
889 /* buf will have a terminating NULL, skip it. */
890 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
891 advise (file, "fwrite");
892 }
893 continue;
894
895 case FILEEOF:
896 break;
897
898 case LENERR:
899 case FMTERR:
900 advise (NULL, "message format error in component #%d", compnum);
901 status = NOTOK;
902 break;
903
904 default:
905 advise (NULL, "getfld() returned %d", state);
906 status = NOTOK;
907 break;
908 }
909
910 break;
911 }
912
913 m_getfld_state_destroy (&gstate);
914 fclose (fpout);
915 fclose (fpin);
916
917 return status;
918 }
919
920
921 /*
922 * Fix Content-Type header to reflect the content of its part.
923 */
924 static int
925 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
926 int status = OK;
927
928 switch (ct->c_type) {
929 case CT_MULTIPART: {
930 struct multipart *m = (struct multipart *) ct->c_ctparams;
931 struct part *part;
932
933 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
934 status = fix_types (part->mp_part, fixtypes, message_mods);
935 }
936 break;
937 }
938
939 case CT_MESSAGE:
940 if (ct->c_subtype == MESSAGE_EXTERNAL) {
941 struct exbody *e = (struct exbody *) ct->c_ctparams;
942
943 status = fix_types (e->eb_content, fixtypes, message_mods);
944 }
945 break;
946
947 default: {
948 char **typep, *type;
949
950 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
951 for (typep = svector_strs (fixtypes);
952 typep && (type = *typep);
953 ++typep) {
954 char *type_subtype =
955 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
956 NULL);
957
958 if (! strcasecmp (type, type_subtype) &&
959 decode_part (ct) == OK &&
960 ct->c_cefile.ce_file != NULL) {
961 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
962 char *cp;
963
964 if ((cp = strchr (ct_type_subtype, ';'))) {
965 /* Truncate to remove any parameter list from
966 mime_type () result. */
967 *cp = '\0';
968 }
969
970 if (strcasecmp (type, ct_type_subtype)) {
971 char *ct_type, *ct_subtype;
972 HF hf;
973
974 /* The Content-Type header does not match the
975 content, so update these struct Content
976 fields to match:
977 * c_type, c_subtype
978 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
979 * c_ctline
980 */
981 /* Extract type and subtype from type/subtype. */
982 ct_type = mh_xstrdup(ct_type_subtype);
983 if ((cp = strchr (ct_type, '/'))) {
984 *cp = '\0';
985 ct_subtype = mh_xstrdup(++cp);
986 } else {
987 advise (NULL, "missing / in MIME type of %s %s",
988 ct->c_file, ct->c_partno);
989 free (ct_type);
990 return NOTOK;
991 }
992
993 ct->c_type = ct_str_type (ct_type);
994 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
995
996 free (ct->c_ctinfo.ci_type);
997 ct->c_ctinfo.ci_type = ct_type;
998 free (ct->c_ctinfo.ci_subtype);
999 ct->c_ctinfo.ci_subtype = ct_subtype;
1000 if (! replace_substring (&ct->c_ctline, type,
1001 ct_type_subtype)) {
1002 advise (NULL, "did not find %s in %s",
1003 type, ct->c_ctline);
1004 }
1005
1006 /* Update Content-Type header field. */
1007 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1008 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1009 if (replace_substring (&hf->value, type,
1010 ct_type_subtype)) {
1011 ++*message_mods;
1012 if (verbosw) {
1013 report (NULL, ct->c_partno, ct->c_file,
1014 "change Content-Type in header "
1015 "from %s to %s",
1016 type, ct_type_subtype);
1017 }
1018 break;
1019 } else {
1020 advise (NULL, "did not find %s in %s",
1021 type, hf->value);
1022 }
1023 }
1024 }
1025 }
1026 free (ct_type_subtype);
1027 }
1028 free (type_subtype);
1029 }
1030 }
1031 }}
1032
1033 return status;
1034 }
1035
1036
1037 /*
1038 * Replace a substring, allocating space to hold the new one.
1039 */
1040 char *
1041 replace_substring (char **str, const char *old, const char *new) {
1042 char *cp;
1043
1044 if ((cp = strstr (*str, old))) {
1045 char *remainder = cp + strlen (old);
1046 char *prefix, *new_str;
1047
1048 if (cp - *str) {
1049 prefix = mh_xstrdup(*str);
1050 *(prefix + (cp - *str)) = '\0';
1051 new_str = concat (prefix, new, remainder, NULL);
1052 free (prefix);
1053 } else {
1054 new_str = concat (new, remainder, NULL);
1055 }
1056
1057 free (*str);
1058
1059 return *str = new_str;
1060 }
1061
1062 return NULL;
1063 }
1064
1065
1066 /*
1067 * Remove a name=value parameter, given just its name, from a header value.
1068 */
1069 char *
1070 remove_parameter (char *str, const char *name) {
1071 /* It looks to me, based on the BNF in RFC 2045, than there can't
1072 be whitespace betwwen the parameter name and the "=", or
1073 between the "=" and the parameter value. */
1074 char *param_name = concat (name, "=", NULL);
1075 char *cp;
1076
1077 if ((cp = strstr (str, param_name))) {
1078 char *start, *end;
1079 size_t count = 1;
1080
1081 /* Remove any leading spaces, before the parameter name. */
1082 for (start = cp;
1083 start > str && isspace ((unsigned char) *(start-1));
1084 --start) {
1085 continue;
1086 }
1087 /* Remove a leading semicolon. */
1088 if (start > str && *(start-1) == ';') { --start; }
1089
1090 end = cp + strlen (name) + 1;
1091 if (*end == '"') {
1092 /* Skip past the quoted value, and then the final quote. */
1093 for (++end ; *end && *end != '"'; ++end) { continue; }
1094 ++end;
1095 } else {
1096 /* Skip past the value. */
1097 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1098 }
1099
1100 /* Count how many characters need to be moved. Include
1101 trailing null, which is accounted for by the
1102 initialization of count to 1. */
1103 for (cp = end; *cp; ++cp) { ++count; }
1104 (void) memmove (start, end, count);
1105 }
1106
1107 free (param_name);
1108
1109 return str;
1110 }
1111
1112
1113 /*
1114 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1115 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1116 * 8 bit.
1117 */
1118 static int
1119 fix_composite_cte (CT ct, int *message_mods) {
1120 int status = OK;
1121
1122 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1123 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1124 ct->c_encoding != CE_BINARY) {
1125 HF hf;
1126
1127 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1128 char *name = hf->name;
1129 for (; *name && isspace ((unsigned char) *name); ++name) {
1130 continue;
1131 }
1132
1133 if (! strncasecmp (name, ENCODING_FIELD,
1134 strlen (ENCODING_FIELD))) {
1135 char *prefix = "Nmh-REPLACED-INVALID-";
1136 HF h;
1137
1138 NEW(h);
1139 h->name = mh_xstrdup (hf->name);
1140 h->hf_encoding = hf->hf_encoding;
1141 h->next = hf->next;
1142 hf->next = h;
1143
1144 /* Retain old header but prefix its name. */
1145 free (hf->name);
1146 hf->name = concat (prefix, h->name, NULL);
1147
1148 ++*message_mods;
1149 if (verbosw) {
1150 char *encoding = cpytrim (hf->value);
1151 report (NULL, ct->c_partno, ct->c_file,
1152 "replace Content-Transfer-Encoding of %s "
1153 "with 8 bit", encoding);
1154 free (encoding);
1155 }
1156
1157 h->value = mh_xstrdup (" 8bit\n");
1158
1159 /* Don't need to warn for multiple C-T-E header
1160 fields, parse_mime() already does that. But
1161 if there are any, fix them all as necessary. */
1162 hf = h;
1163 }
1164 }
1165
1166 set_ce (ct, CE_8BIT);
1167 }
1168
1169 if (ct->c_type == CT_MULTIPART) {
1170 struct multipart *m;
1171 struct part *part;
1172
1173 m = (struct multipart *) ct->c_ctparams;
1174 for (part = m->mp_parts; part; part = part->mp_next) {
1175 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1176 status = NOTOK;
1177 break;
1178 }
1179 }
1180 }
1181 }
1182
1183 return status;
1184 }
1185
1186
1187 /*
1188 * Set content encoding.
1189 */
1190 static int
1191 set_ce (CT ct, int encoding) {
1192 const char *ce = ce_str (encoding);
1193 const struct str2init *ctinit = get_ce_method (ce);
1194
1195 if (ctinit) {
1196 char *cte = concat (" ", ce, "\n", NULL);
1197 int found_cte = 0;
1198 HF hf;
1199 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1200 caller is decode_text_parts (). Save because we'll
1201 overwrite below. */
1202 struct cefile decoded_content_info = ct->c_cefile;
1203
1204 ct->c_encoding = encoding;
1205
1206 ct->c_ctinitfnx = ctinit->si_init;
1207 /* This will assign ct->c_cefile with an all-0 struct, which
1208 is what we want. */
1209 (*ctinit->si_init) (ct);
1210 /* After returning, the caller should set
1211 ct->c_cefile.ce_file to the name of the file containing
1212 the contents. */
1213
1214 if (ct->c_ceclosefnx) {
1215 (*ct->c_ceclosefnx) (ct);
1216 }
1217
1218 /* Restore the cefile. */
1219 ct->c_cefile = decoded_content_info;
1220
1221 /* Update/add Content-Transfer-Encoding header field. */
1222 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1223 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1224 found_cte = 1;
1225 free (hf->value);
1226 hf->value = cte;
1227 }
1228 }
1229 if (! found_cte) {
1230 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1231 }
1232
1233 /* Update c_celine. It's used only by mhlist -debug. */
1234 free (ct->c_celine);
1235 ct->c_celine = mh_xstrdup (cte);
1236
1237 return OK;
1238 }
1239
1240 return NOTOK;
1241 }
1242
1243
1244 /*
1245 * Make sure each text part has a corresponding text/plain part.
1246 */
1247 static int
1248 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1249 int status = OK;
1250
1251 switch ((*ct)->c_type) {
1252 case CT_TEXT: {
1253 /* Nothing to do for text/plain. */
1254 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1255
1256 if (parent && parent->c_type == CT_MULTIPART &&
1257 parent->c_subtype == MULTI_ALTERNATE) {
1258 int new_subpart_number = 1;
1259 int has_text_plain =
1260 find_textplain_sibling (parent, replacetextplain,
1261 &new_subpart_number);
1262
1263 if (! has_text_plain) {
1264 /* Parent is a multipart/alternative. Insert a new
1265 text/plain subpart. */
1266 const int inserted =
1267 insert_new_text_plain_part (*ct, new_subpart_number,
1268 parent);
1269 if (inserted) {
1270 ++*message_mods;
1271 if (verbosw) {
1272 report (NULL, parent->c_partno, parent->c_file,
1273 "insert text/plain part");
1274 }
1275 } else {
1276 status = NOTOK;
1277 }
1278 }
1279 } else if (parent && parent->c_type == CT_MULTIPART &&
1280 parent->c_subtype == MULTI_RELATED) {
1281 char *type_subtype =
1282 concat ((*ct)->c_ctinfo.ci_type, "/",
1283 (*ct)->c_ctinfo.ci_subtype, NULL);
1284 const char *parent_type =
1285 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1286 int new_subpart_number = 1;
1287 int has_text_plain = 0;
1288
1289 /* Have to do string comparison on the subtype because we
1290 don't enumerate all of them in c_subtype values.
1291 parent_type will be NULL if the multipart/related part
1292 doesn't have a type parameter. The type parameter must
1293 be specified according to RFC 2387 Sec. 3.1 but not all
1294 messages comply. */
1295 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1296 /* The type of this part matches the root type of the
1297 parent multipart/related. Look to see if there's
1298 text/plain sibling. */
1299 has_text_plain =
1300 find_textplain_sibling (parent, replacetextplain,
1301 &new_subpart_number);
1302 }
1303
1304 free (type_subtype);
1305
1306 if (! has_text_plain) {
1307 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1308 struct part *part;
1309 int siblings = 0;
1310
1311 for (part = mp->mp_parts; part; part = part->mp_next) {
1312 if (*ct != part->mp_part) {
1313 ++siblings;
1314 }
1315 }
1316
1317 if (siblings) {
1318 /* Parent is a multipart/related. Insert a new
1319 text/plain subpart in a new multipart/alternative. */
1320 if (insert_into_new_mp_alt (ct, message_mods)) {
1321 /* Not an error if text/plain couldn't be added. */
1322 }
1323 } else {
1324 /* There are no siblings, so insert a new text/plain
1325 subpart, and change the parent type from
1326 multipart/related to multipart/alternative. */
1327 const int inserted =
1328 insert_new_text_plain_part (*ct, new_subpart_number,
1329 parent);
1330
1331 if (inserted) {
1332 HF hf;
1333
1334 parent->c_subtype = MULTI_ALTERNATE;
1335 free (parent->c_ctinfo.ci_subtype);
1336 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1337 if (! replace_substring (&parent->c_ctline, "/related",
1338 "/alternative")) {
1339 advise (NULL,
1340 "did not find multipart/related in %s",
1341 parent->c_ctline);
1342 }
1343
1344 /* Update Content-Type header field. */
1345 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1346 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1347 if (replace_substring (&hf->value, "/related",
1348 "/alternative")) {
1349 ++*message_mods;
1350 if (verbosw) {
1351 report (NULL, parent->c_partno,
1352 parent->c_file,
1353 "insert text/plain part");
1354 }
1355
1356 /* Remove, e.g., type="text/html" from
1357 multipart/alternative. */
1358 remove_parameter (hf->value, "type");
1359 break;
1360 } else {
1361 advise (NULL, "did not find multipart/"
1362 "related in header %s",
1363 hf->value);
1364 }
1365 }
1366 }
1367 } else {
1368 /* Not an error if text/plain couldn't be inserted. */
1369 }
1370 }
1371 }
1372 } else {
1373 if (insert_into_new_mp_alt (ct, message_mods)) {
1374 status = NOTOK;
1375 }
1376 }
1377 break;
1378 }
1379
1380 case CT_MULTIPART: {
1381 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1382 struct part *part;
1383
1384 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1385 if ((*ct)->c_type == CT_MULTIPART) {
1386 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1387 replacetextplain);
1388 }
1389 }
1390 break;
1391 }
1392
1393 case CT_MESSAGE:
1394 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1395 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1396
1397 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1398 replacetextplain);
1399 }
1400 break;
1401 }
1402
1403 return status;
1404 }
1405
1406
1407 /*
1408 * See if there is a sibling text/plain, and return its subpart number.
1409 */
1410 static int
1411 find_textplain_sibling (CT parent, int replacetextplain,
1412 int *new_subpart_number) {
1413 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1414 struct part *part, *prev;
1415 int has_text_plain = 0;
1416
1417 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1418 ++*new_subpart_number;
1419 if (part->mp_part->c_type == CT_TEXT &&
1420 part->mp_part->c_subtype == TEXT_PLAIN) {
1421 if (replacetextplain) {
1422 struct part *old_part;
1423 if (part == mp->mp_parts) {
1424 old_part = mp->mp_parts;
1425 mp->mp_parts = part->mp_next;
1426 } else {
1427 old_part = prev->mp_next;
1428 prev->mp_next = part->mp_next;
1429 }
1430 if (verbosw) {
1431 report (NULL, parent->c_partno, parent->c_file,
1432 "remove text/plain part %s",
1433 old_part->mp_part->c_partno);
1434 }
1435 free_content (old_part->mp_part);
1436 free (old_part);
1437 } else {
1438 has_text_plain = 1;
1439 }
1440 break;
1441 }
1442 prev = part;
1443 }
1444
1445 return has_text_plain;
1446 }
1447
1448
1449 /*
1450 * Insert a new text/plain part.
1451 */
1452 static int
1453 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1454 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1455 struct part *new_part;
1456
1457 NEW(new_part);
1458 if ((new_part->mp_part = build_text_plain_part (ct))) {
1459 char buffer[16];
1460 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1461
1462 new_part->mp_next = mp->mp_parts;
1463 mp->mp_parts = new_part;
1464 new_part->mp_part->c_partno =
1465 concat (parent->c_partno ? parent->c_partno : "1", ".",
1466 buffer, NULL);
1467
1468 return 1;
1469 }
1470
1471 free_content (new_part->mp_part);
1472 free (new_part);
1473
1474 return 0;
1475 }
1476
1477
1478 /*
1479 * Create a text/plain part to go along with non-plain sibling part.
1480 */
1481 static CT
1482 build_text_plain_part (CT encoded_part) {
1483 CT tp_part = divide_part (encoded_part);
1484 char *tmp_plain_file = NULL;
1485
1486 if (decode_part (tp_part) == OK) {
1487 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1488 contains the decoded contents. And the decoding function, such
1489 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1490 be unlinked by free_content (). */
1491 char *tempfile;
1492
1493 /* This m_mktemp2() call closes the temp file. */
1494 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1495 advise (NULL, "unable to create temporary file in %s",
1496 get_temp_dir());
1497 } else {
1498 tmp_plain_file = mh_xstrdup (tempfile);
1499 if (reformat_part (tp_part, tmp_plain_file,
1500 tp_part->c_ctinfo.ci_type,
1501 tp_part->c_ctinfo.ci_subtype,
1502 tp_part->c_type) == OK) {
1503 return tp_part;
1504 }
1505 }
1506 }
1507
1508 free_content (tp_part);
1509 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1510 free (tmp_plain_file);
1511
1512 return NULL;
1513 }
1514
1515
1516 /*
1517 * Slip new text/plain part into a new multipart/alternative.
1518 */
1519 static int
1520 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1521 CT tp_part = build_text_plain_part (*ct);
1522 int status = OK;
1523
1524 if (tp_part) {
1525 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1526 MULTI_ALTERNATE);
1527 if (mp_alt) {
1528 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1529
1530 if (mp && mp->mp_parts) {
1531 mp->mp_parts->mp_part = tp_part;
1532 /* Make the new multipart/alternative the parent. */
1533 *ct = mp_alt;
1534
1535 ++*message_mods;
1536 if (verbosw) {
1537 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1538 "insert text/plain part");
1539 }
1540 } else {
1541 free_content (tp_part);
1542 free_content (mp_alt);
1543 status = NOTOK;
1544 }
1545 } else {
1546 status = NOTOK;
1547 }
1548 } else {
1549 /* Not an error if text/plain couldn't be built. */
1550 }
1551
1552 return status;
1553 }
1554
1555
1556 /*
1557 * Clone a MIME part.
1558 */
1559 static CT
1560 divide_part (CT ct) {
1561 CT new_part;
1562
1563 NEW0(new_part);
1564 /* Just copy over what is needed for decoding. c_vrsn and
1565 c_celine aren't necessary. */
1566 new_part->c_file = mh_xstrdup (ct->c_file);
1567 new_part->c_begin = ct->c_begin;
1568 new_part->c_end = ct->c_end;
1569 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1570 new_part->c_type = ct->c_type;
1571 new_part->c_cefile = ct->c_cefile;
1572 new_part->c_encoding = ct->c_encoding;
1573 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1574 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1575 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1576 new_part->c_cesizefnx = ct->c_cesizefnx;
1577
1578 /* c_ctline is used by reformat__part(), so it can preserve
1579 anything after the type/subtype. */
1580 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1581
1582 return new_part;
1583 }
1584
1585
1586 /*
1587 * Copy the content info from one part to another.
1588 */
1589 static void
1590 copy_ctinfo (CI dest, CI src) {
1591 PM s_pm, d_pm;
1592
1593 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1594 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1595
1596 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1597 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1598 s_pm->pm_value, 0);
1599 if (s_pm->pm_charset) {
1600 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1601 }
1602 if (s_pm->pm_lang) {
1603 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1604 }
1605 }
1606
1607 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1608 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1609 }
1610
1611
1612 /*
1613 * Decode content.
1614 */
1615 static int
1616 decode_part (CT ct) {
1617 char *tmp_decoded;
1618 int status;
1619 FILE *file;
1620 char *tempfile;
1621
1622 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1623 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1624 }
1625 tmp_decoded = mh_xstrdup (tempfile);
1626 /* The following call will load ct->c_cefile.ce_file with the tmp
1627 filename of the decoded content. tmp_decoded will contain the
1628 encoded output, get rid of that. */
1629 status = output_message_fp (ct, file, tmp_decoded);
1630 (void) m_unlink (tmp_decoded);
1631 free (tmp_decoded);
1632 if (fclose (file)) {
1633 admonish (NULL, "unable to close temporary file %s", tempfile);
1634 }
1635
1636 return status;
1637 }
1638
1639
1640 /*
1641 * Reformat content as plain text.
1642 * Some of the arguments aren't really needed now, but maybe will
1643 * be in the future for other than text types.
1644 */
1645 static int
1646 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1647 int output_subtype, output_encoding;
1648 const char *reason = NULL;
1649 char *cp, *cf;
1650 int status;
1651
1652 /* Hacky: this redirects the output from whatever command is used
1653 to show the part to a file. So, the user can't have any output
1654 redirection in that command.
1655 Could show_multi() in mhshowsbr.c avoid this? */
1656
1657 /* Check for invo_name-format-type/subtype. */
1658 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1659 if (verbosw) {
1660 advise (NULL, "Don't know how to convert %s, there is no "
1661 "%s-format-%s/%s profile entry",
1662 ct->c_file, invo_name, type, subtype);
1663 }
1664 return NOTOK;
1665 }
1666 if (strchr (cf, '>')) {
1667 advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1668 "%s-format-%s/%s profile entry", cf, invo_name, type,
1669 subtype ? subtype : "");
1670
1671 return NOTOK;
1672 }
1673
1674 cp = concat (cf, " >", file, NULL);
1675 status = show_content_aux (ct, 0, cp, NULL, NULL);
1676 free (cp);
1677
1678 /* Unlink decoded content tmp file and free its filename to avoid
1679 leaks. The file stream should already have been closed. */
1680 if (ct->c_cefile.ce_unlink) {
1681 (void) m_unlink (ct->c_cefile.ce_file);
1682 free (ct->c_cefile.ce_file);
1683 ct->c_cefile.ce_file = NULL;
1684 ct->c_cefile.ce_unlink = 0;
1685 }
1686
1687 if (c_type == CT_TEXT) {
1688 output_subtype = TEXT_PLAIN;
1689 } else {
1690 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1691 output_subtype = 0;
1692 }
1693
1694 output_encoding = content_encoding (ct, &reason);
1695 if (status == OK &&
1696 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1697 ct->c_cefile.ce_file = file;
1698 ct->c_cefile.ce_unlink = 1;
1699 } else {
1700 ct->c_cefile.ce_unlink = 0;
1701 status = NOTOK;
1702 }
1703
1704 return status;
1705 }
1706
1707
1708 /*
1709 * Fill in a multipart/alternative part.
1710 */
1711 static CT
1712 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1713 char *boundary_prefix = "----=_nmh-multipart";
1714 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1715 char *boundary_indicator = "; boundary=";
1716 char *typename, *subtypename, *name;
1717 CT ct;
1718 struct part *p;
1719 struct multipart *m;
1720 const struct str2init *ctinit;
1721
1722 NEW0(ct);
1723
1724 /* Set up the multipart/alternative part. These fields of *ct were
1725 initialized to 0 by mh_xcalloc():
1726 c_fp, c_unlink, c_begin, c_end,
1727 c_vrsn, c_ctline, c_celine,
1728 c_id, c_descr, c_dispo, c_partno,
1729 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1730 c_cefile, c_encoding,
1731 c_digested, c_digest[16], c_ctexbody,
1732 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1733 c_umask, c_rfc934,
1734 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1735 */
1736
1737 ct->c_file = mh_xstrdup (first_alt->c_file);
1738 ct->c_type = type;
1739 ct->c_subtype = subtype;
1740
1741 ctinit = get_ct_init (ct->c_type);
1742
1743 typename = ct_type_str (type);
1744 subtypename = ct_subtype_str (type, subtype);
1745
1746 {
1747 int serial = 0;
1748 int found_boundary = 1;
1749
1750 while (found_boundary && serial < 1000000) {
1751 found_boundary = 0;
1752
1753 /* Ensure that the boundary doesn't appear in the decoded
1754 content. */
1755 if (new_part->c_cefile.ce_file) {
1756 if ((found_boundary =
1757 boundary_in_content (&new_part->c_cefile.ce_fp,
1758 new_part->c_cefile.ce_file,
1759 boundary)) == NOTOK) {
1760 free_content (ct);
1761 return NULL;
1762 }
1763 }
1764
1765 /* Ensure that the boundary doesn't appear in the encoded
1766 content. */
1767 if (! found_boundary && new_part->c_file) {
1768 if ((found_boundary =
1769 boundary_in_content (&new_part->c_fp,
1770 new_part->c_file,
1771 boundary)) == NOTOK) {
1772 free_content (ct);
1773 return NULL;
1774 }
1775 }
1776
1777 if (found_boundary) {
1778 /* Try a slightly different boundary. */
1779 char buffer2[16];
1780
1781 free (boundary);
1782 ++serial;
1783 snprintf (buffer2, sizeof buffer2, "%d", serial);
1784 boundary =
1785 concat (boundary_prefix,
1786 first_alt->c_partno ? first_alt->c_partno : "",
1787 "-", buffer2, NULL);
1788 }
1789 }
1790
1791 if (found_boundary) {
1792 advise (NULL, "giving up trying to find a unique boundary");
1793 free_content (ct);
1794 return NULL;
1795 }
1796 }
1797
1798 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1799 boundary, "\"", NULL);
1800
1801 /* Load c_first_hf and c_last_hf. */
1802 transfer_noncontent_headers (first_alt, ct);
1803 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1804 free (name);
1805
1806 /* Load c_partno. */
1807 if (first_alt->c_partno) {
1808 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1809 free (first_alt->c_partno);
1810 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1811 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1812 } else {
1813 first_alt->c_partno = mh_xstrdup ("1");
1814 new_part->c_partno = mh_xstrdup ("2");
1815 }
1816
1817 if (ctinit) {
1818 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1819 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1820 }
1821
1822 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1823 "boundary", boundary, 0);
1824
1825 NEW(p);
1826 NEW(p->mp_next);
1827 p->mp_next->mp_next = NULL;
1828 p->mp_next->mp_part = first_alt;
1829
1830 NEW0(m);
1831 m->mp_start = concat (boundary, "\n", NULL);
1832 m->mp_stop = concat (boundary, "--\n", NULL);
1833 m->mp_parts = p;
1834 ct->c_ctparams = m;
1835
1836 free (boundary);
1837
1838 return ct;
1839 }
1840
1841
1842 /*
1843 * Check that the boundary does not appear in the content.
1844 */
1845 static int
1846 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1847 char buffer[NMH_BUFSIZ];
1848 size_t bytes_read;
1849 int found_boundary = 0;
1850
1851 /* free_content() will close *fp if we fopen it here. */
1852 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1853 advise (file, "unable to open %s for reading", file);
1854 return NOTOK;
1855 }
1856
1857 fseeko (*fp, 0L, SEEK_SET);
1858 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1859 if (find_str (buffer, bytes_read, boundary)) {
1860 found_boundary = 1;
1861 break;
1862 }
1863 }
1864
1865 return found_boundary;
1866 }
1867
1868
1869 /*
1870 * Remove all non-Content headers.
1871 */
1872 static void
1873 transfer_noncontent_headers (CT old, CT new) {
1874 HF hp, hp_prev;
1875
1876 hp_prev = hp = old->c_first_hf;
1877 while (hp) {
1878 HF next = hp->next;
1879
1880 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1881 if (hp == old->c_last_hf) {
1882 if (hp == old->c_first_hf) {
1883 old->c_last_hf = old->c_first_hf = NULL;
1884 } else {
1885 hp_prev->next = NULL;
1886 old->c_last_hf = hp_prev;
1887 }
1888 } else {
1889 if (hp == old->c_first_hf) {
1890 old->c_first_hf = next;
1891 } else {
1892 hp_prev->next = next;
1893 }
1894 }
1895
1896 /* Put node hp in the new CT. */
1897 if (new->c_first_hf == NULL) {
1898 new->c_first_hf = hp;
1899 } else {
1900 new->c_last_hf->next = hp;
1901 }
1902 new->c_last_hf = hp;
1903 } else {
1904 /* A Content- header, leave in old. */
1905 hp_prev = hp;
1906 }
1907
1908 hp = next;
1909 }
1910 }
1911
1912
1913 /*
1914 * Set content type.
1915 */
1916 static int
1917 set_ct_type (CT ct, int type, int subtype, int encoding) {
1918 char *typename = ct_type_str (type);
1919 char *subtypename = ct_subtype_str (type, subtype);
1920 /* E.g, " text/plain" */
1921 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1922 /* E.g, " text/plain\n" */
1923 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1924 int found_content_type = 0;
1925 HF hf;
1926 const char *cp = NULL;
1927 char *ctline;
1928 int status;
1929
1930 /* Update/add Content-Type header field. */
1931 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1932 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1933 found_content_type = 1;
1934 free (hf->value);
1935 hf->value = (cp = strchr (ct->c_ctline, ';'))
1936 ? concat (type_subtypename, cp, "\n", NULL)
1937 : mh_xstrdup (name_plus_nl);
1938 }
1939 }
1940 if (! found_content_type) {
1941 add_header (ct, mh_xstrdup (TYPE_FIELD),
1942 (cp = strchr (ct->c_ctline, ';'))
1943 ? concat (type_subtypename, cp, "\n", NULL)
1944 : mh_xstrdup (name_plus_nl));
1945 }
1946
1947 /* Some of these might not be used, but set them anyway. */
1948 ctline = cp
1949 ? concat (type_subtypename, cp, NULL)
1950 : concat (type_subtypename, NULL);
1951 free (ct->c_ctline);
1952 ct->c_ctline = ctline;
1953 /* Leave other ctinfo members as they were. */
1954 free (ct->c_ctinfo.ci_type);
1955 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1956 free (ct->c_ctinfo.ci_subtype);
1957 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1958 ct->c_type = type;
1959 ct->c_subtype = subtype;
1960
1961 free (name_plus_nl);
1962 free (type_subtypename);
1963
1964 status = set_ce (ct, encoding);
1965
1966 return status;
1967 }
1968
1969
1970 /*
1971 * It's not necessary to update the charset parameter of a Content-Type
1972 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1973 * (content) was originally in the specified charset, "and will be in
1974 * that character set again after decoding."
1975 */
1976 static int
1977 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1978 int *message_mods) {
1979 int status = OK;
1980 int lf_line_endings = 0;
1981
1982 switch (ct->c_type) {
1983 case CT_MULTIPART: {
1984 struct multipart *m = (struct multipart *) ct->c_ctparams;
1985 struct part *part;
1986
1987 /* Should check to see if the body for this part is encoded?
1988 For now, it gets passed along as-is by InitMultiPart(). */
1989 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1990 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1991 message_mods);
1992 }
1993 break;
1994 }
1995
1996 case CT_MESSAGE:
1997 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1998 struct exbody *e = (struct exbody *) ct->c_ctparams;
1999
2000 status = decode_text_parts (e->eb_content, encoding, decodetypes,
2001 message_mods);
2002 }
2003 break;
2004
2005 default:
2006 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2007 break;
2008 }
2009
2010 lf_line_endings =
2011 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2012
2013 switch (ct->c_encoding) {
2014 case CE_BASE64:
2015 case CE_QUOTED: {
2016 int ct_encoding;
2017
2018 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2019 const char *reason = NULL;
2020
2021 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2022 && encoding != CE_BINARY) {
2023 /* The decoding isn't acceptable so discard it.
2024 Leave status as OK to allow other transformations. */
2025 if (verbosw) {
2026 report (NULL, ct->c_partno, ct->c_file,
2027 "will not decode%s because it is binary (%s)",
2028 ct->c_partno ? ""
2029 : ct->c_ctline ? ct->c_ctline
2030 : "",
2031 reason);
2032 }
2033 (void) m_unlink (ct->c_cefile.ce_file);
2034 free (ct->c_cefile.ce_file);
2035 ct->c_cefile.ce_file = NULL;
2036 } else if (ct->c_encoding == CE_QUOTED &&
2037 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2038 /* The decoding isn't acceptable so discard it.
2039 Leave status as OK to allow other transformations. */
2040 if (verbosw) {
2041 report (NULL, ct->c_partno, ct->c_file,
2042 "will not decode%s because it is 8bit",
2043 ct->c_partno ? ""
2044 : ct->c_ctline ? ct->c_ctline
2045 : "");
2046 }
2047 (void) m_unlink (ct->c_cefile.ce_file);
2048 free (ct->c_cefile.ce_file);
2049 ct->c_cefile.ce_file = NULL;
2050 } else {
2051 int enc;
2052
2053 if (ct_encoding == CE_BINARY) {
2054 enc = CE_BINARY;
2055 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2056 enc = CE_QUOTED;
2057 } else {
2058 enc = ct_encoding;
2059 }
2060 if (set_ce (ct, enc) == OK) {
2061 ++*message_mods;
2062 if (verbosw) {
2063 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2064 ct->c_ctline ? ct->c_ctline : "");
2065 }
2066 if (lf_line_endings) {
2067 strip_crs (ct, message_mods);
2068 }
2069 } else {
2070 status = NOTOK;
2071 }
2072 }
2073 } else {
2074 status = NOTOK;
2075 }
2076 break;
2077 }
2078 case CE_8BIT:
2079 case CE_7BIT:
2080 if (lf_line_endings) {
2081 strip_crs (ct, message_mods);
2082 }
2083 break;
2084 default:
2085 break;
2086 }
2087
2088 break;
2089 }
2090
2091 return status;
2092 }
2093
2094
2095 /*
2096 * Determine if the part with type[/subtype] should be decoded, according to
2097 * decodetypes (which came from the -decodetypes switch).
2098 */
2099 static int
2100 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2101 /* Quick search for matching type[/subtype] in decodetypes: bracket
2102 decodetypes with commas, then search for ,type, and ,type/subtype, in
2103 it. */
2104
2105 int found_match = 0;
2106 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2107 char *delimited_type = concat(",", type, ",", NULL);
2108
2109 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2110 found_match = 1;
2111 } else if (subtype != NULL) {
2112 char *delimited_type_subtype =
2113 concat(",", type, "/", subtype, ",", NULL);
2114
2115 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2116 found_match = 1;
2117 }
2118 free(delimited_type_subtype);
2119 }
2120
2121 free(delimited_type);
2122 free(delimited_decodetypes);
2123
2124 return found_match;
2125 }
2126
2127
2128 /*
2129 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2130 * if it has any NUL characters, a CR not followed by a LF, or lines
2131 * greater than 998 characters in length. If binary, reason is set
2132 * to a string explaining why.
2133 */
2134 static int
2135 content_encoding (CT ct, const char **reason) {
2136 CE ce = &ct->c_cefile;
2137 int encoding = CE_7BIT;
2138
2139 if (ce->ce_file) {
2140 size_t line_len = 0;
2141 char buffer[NMH_BUFSIZ];
2142 size_t inbytes;
2143
2144 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2145 advise (ce->ce_file, "unable to open for reading");
2146 return CE_UNKNOWN;
2147 }
2148
2149 fseeko (ce->ce_fp, 0L, SEEK_SET);
2150 while (encoding != CE_BINARY &&
2151 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2152 char *cp;
2153 size_t i;
2154 int last_char_was_cr = 0;
2155
2156 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2157 if (*cp == '\0' || ++line_len > 998 ||
2158 (*cp != '\n' && last_char_was_cr)) {
2159 encoding = CE_BINARY;
2160 if (*cp == '\0') {
2161 *reason = "null character";
2162 } else if (line_len > 998) {
2163 *reason = "line length > 998";
2164 } else if (*cp != '\n' && last_char_was_cr) {
2165 *reason = "CR not followed by LF";
2166 } else {
2167 /* Should not reach this. */
2168 *reason = "";
2169 }
2170 break;
2171 } else if (*cp == '\n') {
2172 line_len = 0;
2173 } else if (! isascii ((unsigned char) *cp)) {
2174 encoding = CE_8BIT;
2175 }
2176
2177 last_char_was_cr = *cp == '\r' ? 1 : 0;
2178 }
2179 }
2180
2181 fclose (ce->ce_fp);
2182 ce->ce_fp = NULL;
2183 } /* else should never happen */
2184
2185 return encoding;
2186 }
2187
2188
2189 /*
2190 * Strip carriage returns from content.
2191 */
2192 static int
2193 strip_crs (CT ct, int *message_mods) {
2194 char *charset = content_charset (ct);
2195 int status = OK;
2196
2197 /* Only strip carriage returns if content is ASCII or another
2198 charset that has the same readily recognizable CR followed by a
2199 LF. We can include UTF-8 here because if the high-order bit of
2200 a UTF-8 byte is 0, then it must be a single-byte ASCII
2201 character. */
2202 if (! strcasecmp (charset, "US-ASCII") ||
2203 ! strcasecmp (charset, "UTF-8") ||
2204 ! strncasecmp (charset, "ISO-8859-", 9) ||
2205 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2206 char **file = NULL;
2207 FILE **fp = NULL;
2208 size_t begin;
2209 size_t end;
2210 int has_crs = 0;
2211 int opened_input_file = 0;
2212
2213 if (ct->c_cefile.ce_file) {
2214 file = &ct->c_cefile.ce_file;
2215 fp = &ct->c_cefile.ce_fp;
2216 begin = end = 0;
2217 } else if (ct->c_file) {
2218 file = &ct->c_file;
2219 fp = &ct->c_fp;
2220 begin = (size_t) ct->c_begin;
2221 end = (size_t) ct->c_end;
2222 } /* else don't know where the content is */
2223
2224 if (file && *file && fp) {
2225 if (! *fp) {
2226 if ((*fp = fopen (*file, "r")) == NULL) {
2227 advise (*file, "unable to open for reading");
2228 status = NOTOK;
2229 } else {
2230 opened_input_file = 1;
2231 }
2232 }
2233 }
2234
2235 if (fp && *fp) {
2236 char buffer[NMH_BUFSIZ];
2237 size_t bytes_read;
2238 size_t bytes_to_read =
2239 end > 0 && end > begin ? end - begin : sizeof buffer;
2240
2241 fseeko (*fp, begin, SEEK_SET);
2242 while ((bytes_read = fread (buffer, 1,
2243 min (bytes_to_read, sizeof buffer),
2244 *fp)) > 0) {
2245 /* Look for CR followed by a LF. This is supposed to
2246 be text so there should be LF's. If not, don't
2247 modify the content. */
2248 char *cp;
2249 size_t i;
2250 int last_char_was_cr = 0;
2251
2252 if (end > 0) { bytes_to_read -= bytes_read; }
2253
2254 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2255 if (*cp == '\n' && last_char_was_cr) {
2256 has_crs = 1;
2257 break;
2258 }
2259
2260 last_char_was_cr = *cp == '\r' ? 1 : 0;
2261 }
2262 }
2263
2264 if (has_crs) {
2265 int fd;
2266 char *stripped_content_file;
2267 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2268
2269 if (tempfile == NULL) {
2270 adios (NULL, "unable to create temporary file in %s",
2271 get_temp_dir());
2272 }
2273 stripped_content_file = mh_xstrdup (tempfile);
2274
2275 /* Strip each CR before a LF from the content. */
2276 fseeko (*fp, begin, SEEK_SET);
2277 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2278 0) {
2279 char *cp;
2280 size_t i;
2281 int last_char_was_cr = 0;
2282
2283 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2284 if (*cp == '\r') {
2285 last_char_was_cr = 1;
2286 } else if (last_char_was_cr) {
2287 if (*cp != '\n') {
2288 if (write (fd, "\r", 1) < 0) {
2289 advise (tempfile, "CR write");
2290 }
2291 }
2292 if (write (fd, cp, 1) < 0) {
2293 advise (tempfile, "write");
2294 }
2295 last_char_was_cr = 0;
2296 } else {
2297 if (write (fd, cp, 1) < 0) {
2298 advise (tempfile, "write");
2299 }
2300 last_char_was_cr = 0;
2301 }
2302 }
2303 }
2304
2305 if (close (fd)) {
2306 admonish (NULL, "unable to write temporary file %s",
2307 stripped_content_file);
2308 (void) m_unlink (stripped_content_file);
2309 status = NOTOK;
2310 } else {
2311 /* Replace the decoded file with the converted one. */
2312 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2313 (void) m_unlink (ct->c_cefile.ce_file);
2314
2315 mh_xfree(ct->c_cefile.ce_file);
2316 ct->c_cefile.ce_file = stripped_content_file;
2317 ct->c_cefile.ce_unlink = 1;
2318
2319 ++*message_mods;
2320 if (verbosw) {
2321 report (NULL, ct->c_partno,
2322 begin == 0 && end == 0 ? "" : *file,
2323 "stripped CRs");
2324 }
2325 }
2326 }
2327
2328 if (opened_input_file) {
2329 fclose (*fp);
2330 *fp = NULL;
2331 }
2332 }
2333 }
2334
2335 free (charset);
2336
2337 return status;
2338 }
2339
2340
2341 /*
2342 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2343 * of the part C-T-E's.
2344 */
2345 static void
2346 update_cte (CT ct) {
2347 const int least_restrictive_enc = least_restrictive_encoding (ct);
2348
2349 if (least_restrictive_enc != CE_UNKNOWN &&
2350 least_restrictive_enc != CE_7BIT) {
2351 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2352 HF hf;
2353 int found_cte = 0;
2354
2355 /* Update/add Content-Transfer-Encoding header field. */
2356 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2357 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2358 found_cte = 1;
2359 free (hf->value);
2360 hf->value = cte;
2361 }
2362 }
2363 if (! found_cte) {
2364 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2365 }
2366 }
2367 }
2368
2369
2370 /*
2371 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2372 * within a message.
2373 */
2374 static int
2375 least_restrictive_encoding (CT ct) {
2376 int encoding = CE_UNKNOWN;
2377
2378 switch (ct->c_type) {
2379 case CT_MULTIPART: {
2380 struct multipart *m = (struct multipart *) ct->c_ctparams;
2381 struct part *part;
2382
2383 for (part = m->mp_parts; part; part = part->mp_next) {
2384 const int part_encoding =
2385 least_restrictive_encoding (part->mp_part);
2386
2387 if (less_restrictive (encoding, part_encoding)) {
2388 encoding = part_encoding;
2389 }
2390 }
2391 break;
2392 }
2393
2394 case CT_MESSAGE:
2395 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2396 struct exbody *e = (struct exbody *) ct->c_ctparams;
2397 const int part_encoding =
2398 least_restrictive_encoding (e->eb_content);
2399
2400 if (less_restrictive (encoding, part_encoding)) {
2401 encoding = part_encoding;
2402 }
2403 }
2404 break;
2405
2406 default: {
2407 if (less_restrictive (encoding, ct->c_encoding)) {
2408 encoding = ct->c_encoding;
2409 }
2410 }}
2411
2412 return encoding;
2413 }
2414
2415
2416 /*
2417 * Return whether the second encoding is less restrictive than the first, where
2418 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2419 * CE_BINARY is less restrictive than CE_8BIT and
2420 * CE_8BIT is less restrictive than CE_7BIT.
2421 */
2422 static int
2423 less_restrictive (int encoding, int second_encoding) {
2424 switch (second_encoding) {
2425 case CE_BINARY:
2426 return encoding != CE_BINARY;
2427 case CE_8BIT:
2428 return encoding != CE_BINARY && encoding != CE_8BIT;
2429 case CE_7BIT:
2430 return encoding != CE_BINARY && encoding != CE_8BIT &&
2431 encoding != CE_7BIT;
2432 default :
2433 return 0;
2434 }
2435 }
2436
2437
2438 /*
2439 * Convert character set of each part.
2440 */
2441 static int
2442 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2443 int status = OK;
2444
2445 switch (ct->c_type) {
2446 case CT_TEXT:
2447 if (ct->c_subtype == TEXT_PLAIN) {
2448 status = convert_charset (ct, dest_charset, message_mods);
2449 if (status == OK) {
2450 if (verbosw) {
2451 char *ct_charset = content_charset (ct);
2452
2453 report (NULL, ct->c_partno, ct->c_file,
2454 "convert %s to %s", ct_charset, dest_charset);
2455 free (ct_charset);
2456 }
2457 } else {
2458 char *ct_charset = content_charset (ct);
2459
2460 report ("iconv", ct->c_partno, ct->c_file,
2461 "failed to convert %s to %s", ct_charset, dest_charset);
2462 free (ct_charset);
2463 }
2464 }
2465 break;
2466
2467 case CT_MULTIPART: {
2468 struct multipart *m = (struct multipart *) ct->c_ctparams;
2469 struct part *part;
2470
2471 /* Should check to see if the body for this part is encoded?
2472 For now, it gets passed along as-is by InitMultiPart(). */
2473 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2474 status =
2475 convert_charsets (part->mp_part, dest_charset, message_mods);
2476 }
2477 break;
2478 }
2479
2480 case CT_MESSAGE:
2481 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2482 struct exbody *e = (struct exbody *) ct->c_ctparams;
2483
2484 status =
2485 convert_charsets (e->eb_content, dest_charset, message_mods);
2486 }
2487 break;
2488
2489 default:
2490 break;
2491 }
2492
2493 return status;
2494 }
2495
2496
2497 /*
2498 * Fix various problems that aren't handled elsewhere. These
2499 * are fixed unconditionally: there are no switches to disable
2500 * them. Currently, "problems" are these:
2501 * 1) remove extraneous semicolon at the end of a header parameter list
2502 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2503 * filename parameters in Content-Type and Content-Disposition
2504 * headers, respectively.
2505 */
2506 static int
2507 fix_always (CT ct, int *message_mods) {
2508 int status = OK;
2509
2510 switch (ct->c_type) {
2511 case CT_MULTIPART: {
2512 struct multipart *m = (struct multipart *) ct->c_ctparams;
2513 struct part *part;
2514
2515 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2516 status = fix_always (part->mp_part, message_mods);
2517 }
2518 break;
2519 }
2520
2521 case CT_MESSAGE:
2522 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2523 struct exbody *e = (struct exbody *) ct->c_ctparams;
2524
2525 status = fix_always (e->eb_content, message_mods);
2526 }
2527 break;
2528
2529 default: {
2530 HF hf;
2531
2532 if (ct->c_first_hf) {
2533 fix_filename_encoding (ct);
2534 }
2535
2536 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2537 size_t len = strlen (hf->value);
2538
2539 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2540 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2541 /* Only do this for Content-Type and
2542 Content-Disposition fields because those are the
2543 only headers that parse_mime() warns about. */
2544 continue;
2545 }
2546
2547 /* whitespace following a trailing ';' will be nuked as well */
2548 if (hf->value[len - 1] == '\n') {
2549 while (isspace((unsigned char)(hf->value[len - 2]))) {
2550 if (len-- == 0) { break; }
2551 }
2552 }
2553
2554 if (hf->value[len - 2] == ';') {
2555 /* Remove trailing ';' from parameter value. */
2556 hf->value[len - 2] = '\n';
2557 hf->value[len - 1] = '\0';
2558
2559 /* Also, if Content-Type parameter, remove trailing ';'
2560 from ct->c_ctline. This probably isn't necessary
2561 but can't hurt. */
2562 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2563 size_t l = strlen(ct->c_ctline) - 1;
2564 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2565 ct->c_ctline[l] == ';') {
2566 ct->c_ctline[l--] = '\0';
2567 if (l == 0) { break; }
2568 }
2569 }
2570
2571 ++*message_mods;
2572 if (verbosw) {
2573 report (NULL, ct->c_partno, ct->c_file,
2574 "remove trailing ; from %s parameter value",
2575 hf->name);
2576 }
2577 }
2578 }
2579 }}
2580
2581 return status;
2582 }
2583
2584
2585 /*
2586 * Factor out common code for loops in fix_filename_encoding().
2587 */
2588 static int
2589 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2590 int fixed = 0;
2591
2592 if (HasPrefix(value, "=?") && HasSuffix(value, "?=")) {
2593 /* Looks like an RFC 2047 encoded parameter. */
2594 char decoded[PATH_MAX + 1];
2595
2596 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2597 /* Encode using RFC 2231. */
2598 replace_param (first_pm, last_pm, name, decoded, 0);
2599 fixed = 1;
2600 } else {
2601 advise (NULL, "failed to decode %s parameter %s", name, value);
2602 }
2603 }
2604
2605 return fixed;
2606 }
2607
2608
2609 /*
2610 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2611 * filename parameters in Content-Type and Content-Disposition
2612 * headers, respectively.
2613 */
2614 static int
2615 fix_filename_encoding (CT ct) {
2616 PM pm;
2617 HF hf;
2618 int fixed = 0;
2619
2620 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2621 if (pm->pm_name && pm->pm_value &&
2622 strcasecmp (pm->pm_name, "name") == 0) {
2623 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2624 &ct->c_ctinfo.ci_first_pm,
2625 &ct->c_ctinfo.ci_last_pm);
2626 }
2627 }
2628
2629 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2630 if (pm->pm_name && pm->pm_value &&
2631 strcasecmp (pm->pm_name, "filename") == 0) {
2632 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2633 &ct->c_dispo_first,
2634 &ct->c_dispo_last);
2635 }
2636 }
2637
2638 /* Fix hf values to correspond. */
2639 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2640 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2641
2642 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2643 field = TYPE_HEADER;
2644 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2645 field = DISPO_HEADER;
2646 }
2647
2648 if (field != OTHER) {
2649 const char *const semicolon_loc = strchr (hf->value, ';');
2650
2651 if (semicolon_loc) {
2652 const size_t len =
2653 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2654 const char *const params =
2655 output_params (len,
2656 field == TYPE_HEADER
2657 ? ct->c_ctinfo.ci_first_pm
2658 : ct->c_dispo_first,
2659 NULL, 0);
2660 const char *const new_params = concat (params, "\n", NULL);
2661
2662 replace_substring (&hf->value, semicolon_loc, new_params);
2663 free((void *)new_params); /* Cast away const. Sigh. */
2664 free((void *)params);
2665 } else {
2666 advise (NULL, "did not find semicolon in %s:%s\n",
2667 hf->name, hf->value);
2668 }
2669 }
2670 }
2671
2672 return OK;
2673 }
2674
2675
2676 /*
2677 * Output content in input file to output file.
2678 */
2679 static int
2680 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2681 int modify_inplace, int message_mods) {
2682 int status = OK;
2683
2684 if (modify_inplace) {
2685 if (message_mods > 0) {
2686 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2687 char *infile = input_filename
2688 ? mh_xstrdup (input_filename)
2689 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2690
2691 if (remove_file (infile) == OK) {
2692 if (rename (outfile, infile)) {
2693 /* Rename didn't work, possibly because of an
2694 attempt to rename across filesystems. Try
2695 brute force copy. */
2696 int old = open (outfile, O_RDONLY);
2697 int new =
2698 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2699 int i = -1;
2700
2701 if (old != -1 && new != -1) {
2702 char buffer[NMH_BUFSIZ];
2703
2704 while ((i = read (old, buffer, sizeof buffer)) >
2705 0) {
2706 if (write (new, buffer, i) != i) {
2707 i = -1;
2708 break;
2709 }
2710 }
2711 }
2712 if (new != -1) { close (new); }
2713 if (old != -1) { close (old); }
2714 (void) m_unlink (outfile);
2715
2716 if (i < 0) {
2717 /* The -file argument processing used path() to
2718 expand filename to absolute path. */
2719 int file = ct->c_file && ct->c_file[0] == '/';
2720
2721 admonish (NULL, "unable to rename %s %s to %s",
2722 file ? "file" : "message", outfile,
2723 infile);
2724 status = NOTOK;
2725 }
2726 }
2727 } else {
2728 admonish (NULL, "unable to remove input file %s, "
2729 "not modifying it", infile);
2730 (void) m_unlink (outfile);
2731 status = NOTOK;
2732 }
2733
2734 free (infile);
2735 } else {
2736 status = NOTOK;
2737 }
2738 } else {
2739 /* No modifications and didn't need the tmp outfile. */
2740 (void) m_unlink (outfile);
2741 }
2742 } else {
2743 /* Output is going to some file. Produce it whether or not
2744 there were modifications. */
2745 status = output_message_fp (ct, outfp, outfile);
2746 }
2747
2748 flush_errors ();
2749 return status;
2750 }
2751
2752
2753 /*
2754 * parse_mime() does not set lf_line_endings in struct text, so use this
2755 * function to do it. It touches the parts the decodetypes identifies.
2756 */
2757 static void
2758 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2759 switch (ct->c_type) {
2760 case CT_MULTIPART: {
2761 struct multipart *m = (struct multipart *) ct->c_ctparams;
2762 struct part *part;
2763
2764 for (part = m->mp_parts; part; part = part->mp_next) {
2765 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2766 }
2767 break;
2768 }
2769
2770 case CT_MESSAGE:
2771 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2772 struct exbody *e = (struct exbody *) ct->c_ctparams;
2773
2774 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2775 }
2776 break;
2777
2778 default:
2779 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2780 if (ct->c_ctparams == NULL) {
2781 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2782 }
2783 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2784 }
2785 }
2786 }
2787
2788
2789 /*
2790 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2791 * use the standard MH backup file.
2792 */
2793 static int
2794 remove_file (const char *file) {
2795 if (rmmproc) {
2796 char *rmm_command = concat (rmmproc, " ", file, NULL);
2797 int status = system (rmm_command);
2798
2799 free (rmm_command);
2800 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2801 }
2802 /* This is OK for a non-message file, it still uses the
2803 BACKUP_PREFIX form. The backup file will be in the same
2804 directory as file. */
2805 return rename (file, m_backup (file));
2806 }
2807
2808
2809 /*
2810 * Output formatted message to user.
2811 */
2812 static void
2813 report (char *what, char *partno, char *filename, char *message, ...) {
2814 va_list args;
2815 char *fmt;
2816
2817 if (verbosw) {
2818 va_start (args, message);
2819 fmt = concat (filename, partno ? " part " : ", ",
2820 partno ? partno : "", partno ? ", " : "", message, NULL);
2821
2822 advertise (what, NULL, fmt, args);
2823
2824 free (fmt);
2825 va_end (args);
2826 }
2827 }
2828
2829
2830 static void
2831 pipeser (int i)
2832 {
2833 if (i == SIGQUIT) {
2834 fflush (stdout);
2835 fprintf (stderr, "\n");
2836 fflush (stderr);
2837 }
2838
2839 done (1);
2840 /* NOTREACHED */
2841 }