]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
picksbr.c: Specify parameters of nexus's n_action function pointer.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include <h/mh.h>
9 #include <h/fmt_scan.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include "../sbr/m_maildir.h"
15 #include "../sbr/m_mktemp.h"
16 #include "../sbr/mime_type.h"
17 #include "mhfree.h"
18 #include "mhoutsbr.h"
19 #include "mhshowsbr.h"
20 #include <fcntl.h>
21
22 #define MHFIXMSG_SWITCHES \
23 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
24 X("nodecodetext", 0, NDECODETEXTSW) \
25 X("decodetypes", 0, DECODETYPESW) \
26 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
27 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
28 X("textcharset", 0, TEXTCHARSETSW) \
29 X("notextcharset", 0, NTEXTCHARSETSW) \
30 X("reformat", 0, REFORMATSW) \
31 X("noreformat", 0, NREFORMATSW) \
32 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
33 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
34 X("fixboundary", 0, FIXBOUNDARYSW) \
35 X("nofixboundary", 0, NFIXBOUNDARYSW) \
36 X("fixcte", 0, FIXCOMPOSITECTESW) \
37 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
38 X("fixtype mimetype", 0, FIXTYPESW) \
39 X("file file", 0, FILESW) \
40 X("outfile file", 0, OUTFILESW) \
41 X("rmmproc program", 0, RPROCSW) \
42 X("normmproc", 0, NRPRCSW) \
43 X("changecur", 0, CHGSW) \
44 X("nochangecur", 0, NCHGSW) \
45 X("verbose", 0, VERBSW) \
46 X("noverbose", 0, NVERBSW) \
47 X("version", 0, VERSIONSW) \
48 X("help", 0, HELPSW) \
49
50 #define X(sw, minchars, id) id,
51 DEFINE_SWITCH_ENUM(MHFIXMSG);
52 #undef X
53
54 #define X(sw, minchars, id) { sw, minchars, id },
55 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
56 #undef X
57
58
59 int verbosw;
60 int debugsw; /* Needed by mhparse.c. */
61
62 #define quitser pipeser
63
64 /* mhparse.c */
65 extern int skip_mp_cte_check; /* flag to InitMultiPart */
66 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
67 extern int bogus_mp_content; /* flag from InitMultiPart */
68 /* flags to/from parse_header_attrs */
69 extern int suppress_extraneous_trailing_semicolon_warning;
70
71 /* mhmisc.c */
72 void flush_errors (void);
73
74 /*
75 * static prototypes
76 */
77 typedef struct fix_transformations {
78 int fixboundary;
79 int fixcompositecte;
80 svector_t fixtypes;
81 int reformat;
82 int replacetextplain;
83 int decodetext;
84 char *decodetypes;
85 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
86 int lf_line_endings;
87 char *textcharset;
88 } fix_transformations;
89
90 int mhfixmsgsbr (CT *, char *, const fix_transformations *, FILE **, char *,
91 FILE **);
92 static int fix_boundary (CT *, int *);
93 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
94 static int get_multipart_boundary (CT, char **);
95 static int replace_boundary (CT, char *, char *);
96 static int fix_types (CT, svector_t, int *);
97 static char *replace_substring (char **, const char *, const char *);
98 static char *remove_parameter (char *, const char *);
99 static int fix_composite_cte (CT, int *);
100 static int set_ce (CT, int);
101 static int ensure_text_plain (CT *, CT, int *, int);
102 static int find_textplain_sibling (CT, int, int *);
103 static int insert_new_text_plain_part (CT, int, CT);
104 static CT build_text_plain_part (CT);
105 static int insert_into_new_mp_alt (CT *, int *);
106 static CT divide_part (CT);
107 static void copy_ctinfo (CI, CI);
108 static int decode_part (CT);
109 static int reformat_part (CT, char *, char *, char *, int);
110 static CT build_multipart_alt (CT, CT, int, int);
111 static int boundary_in_content (FILE **, char *, const char *);
112 static void transfer_noncontent_headers (CT, CT);
113 static int set_ct_type (CT, int type, int subtype, int encoding);
114 static int decode_text_parts (CT, int, const char *, int *);
115 static int should_decode(const char *, const char *, const char *);
116 static int content_encoding (CT, const char **);
117 static int strip_crs (CT, int *);
118 static void update_cte (CT);
119 static int least_restrictive_encoding (CT);
120 static int less_restrictive (int, int);
121 static int convert_charsets (CT, char *, int *);
122 static int fix_always (CT, int *);
123 static int fix_filename_param (char *, char *, PM *, PM *);
124 static int fix_filename_encoding (CT);
125 static int write_content (CT, const char *, char *, FILE *, int, int);
126 static void set_text_ctparams(CT, char *, int);
127 static int remove_file (const char *);
128 static void report (char *, char *, char *, char *, ...);
129 static void pipeser (int);
130
131
132 int
133 main (int argc, char **argv) {
134 int msgnum;
135 char *cp, *file = NULL, *folder = NULL;
136 char *maildir = NULL, buf[100], *outfile = NULL;
137 char **argp, **arguments;
138 struct msgs_array msgs = { 0, 0, NULL };
139 struct msgs *mp = NULL;
140 CT *ctp;
141 FILE *fp, *infp = NULL, *outfp = NULL;
142 int using_stdin = 0;
143 int chgflag = 1;
144 int status = OK;
145 fix_transformations fx;
146 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
147 fx.fixtypes = NULL;
148 fx.replacetextplain = 0;
149 fx.decodetext = CE_8BIT;
150 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
151 fx.lf_line_endings = 0;
152 fx.textcharset = NULL;
153
154 if (nmh_init(argv[0], 2)) { return 1; }
155
156 arguments = getarguments (invo_name, argc, argv, 1);
157 argp = arguments;
158
159 /*
160 * Parse arguments
161 */
162 while ((cp = *argp++)) {
163 if (*cp == '-') {
164 switch (smatch (++cp, switches)) {
165 case AMBIGSW:
166 ambigsw (cp, switches);
167 done (1);
168 case UNKWNSW:
169 adios (NULL, "-%s unknown", cp);
170
171 case HELPSW:
172 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
173 invo_name);
174 print_help (buf, switches, 1);
175 done (0);
176 case VERSIONSW:
177 print_version(invo_name);
178 done (0);
179
180 case DECODETEXTSW:
181 if (! (cp = *argp++) || *cp == '-') {
182 adios (NULL, "missing argument to %s", argp[-2]);
183 }
184 if (! strcasecmp (cp, "8bit")) {
185 fx.decodetext = CE_8BIT;
186 } else if (! strcasecmp (cp, "7bit")) {
187 fx.decodetext = CE_7BIT;
188 } else if (! strcasecmp (cp, "binary")) {
189 fx.decodetext = CE_BINARY;
190 } else {
191 adios (NULL, "invalid argument to %s", argp[-2]);
192 }
193 continue;
194 case NDECODETEXTSW:
195 fx.decodetext = 0;
196 continue;
197 case DECODETYPESW:
198 if (! (cp = *argp++) || *cp == '-') {
199 adios (NULL, "missing argument to %s", argp[-2]);
200 }
201 fx.decodetypes = cp;
202 continue;
203 case CRLFLINEBREAKSSW:
204 fx.lf_line_endings = 0;
205 continue;
206 case NCRLFLINEBREAKSSW:
207 fx.lf_line_endings = 1;
208 continue;
209 case TEXTCHARSETSW:
210 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
211 adios (NULL, "missing argument to %s", argp[-2]);
212 }
213 fx.textcharset = cp;
214 continue;
215 case NTEXTCHARSETSW:
216 fx.textcharset = 0;
217 continue;
218 case FIXBOUNDARYSW:
219 fx.fixboundary = 1;
220 continue;
221 case NFIXBOUNDARYSW:
222 fx.fixboundary = 0;
223 continue;
224 case FIXCOMPOSITECTESW:
225 fx.fixcompositecte = 1;
226 continue;
227 case NFIXCOMPOSITECTESW:
228 fx.fixcompositecte = 0;
229 continue;
230 case FIXTYPESW:
231 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
232 adios (NULL, "missing argument to %s", argp[-2]);
233 }
234 if (! strncasecmp (cp, "multipart/", 10) ||
235 ! strncasecmp (cp, "message/", 8))
236 adios (NULL, "-fixtype %s not allowed", cp);
237 if (! strchr (cp, '/'))
238 adios (NULL, "-fixtype requires type/subtype");
239 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
240 svector_push_back (fx.fixtypes, cp);
241 continue;
242 case REFORMATSW:
243 fx.reformat = 1;
244 continue;
245 case NREFORMATSW:
246 fx.reformat = 0;
247 continue;
248 case REPLACETEXTPLAINSW:
249 fx.replacetextplain = 1;
250 continue;
251 case NREPLACETEXTPLAINSW:
252 fx.replacetextplain = 0;
253 continue;
254 case FILESW:
255 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
256 adios (NULL, "missing argument to %s", argp[-2]);
257 }
258 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
259 continue;
260 case OUTFILESW:
261 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
262 adios (NULL, "missing argument to %s", argp[-2]);
263 }
264 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
265 continue;
266 case RPROCSW:
267 if (!(rmmproc = *argp++) || *rmmproc == '-') {
268 adios (NULL, "missing argument to %s", argp[-2]);
269 }
270 continue;
271 case NRPRCSW:
272 rmmproc = NULL;
273 continue;
274 case CHGSW:
275 chgflag = 1;
276 continue;
277 case NCHGSW:
278 chgflag = 0;
279 continue;
280 case VERBSW:
281 verbosw = 1;
282 continue;
283 case NVERBSW:
284 verbosw = 0;
285 continue;
286 }
287 }
288 if (*cp == '+' || *cp == '@') {
289 if (folder)
290 adios (NULL, "only one folder at a time!");
291 folder = pluspath (cp);
292 } else {
293 if (*cp == '/') {
294 /* Interpret a full path as a filename, not a message. */
295 file = mh_xstrdup (cp);
296 } else {
297 app_msgarg (&msgs, cp);
298 }
299 }
300 }
301
302 SIGNAL (SIGQUIT, quitser);
303 SIGNAL (SIGPIPE, pipeser);
304
305 /*
306 * Read the standard profile setup
307 */
308 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
309 readconfig ((struct node **) 0, fp, cp, 0);
310 fclose (fp);
311 }
312
313 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
314 suppress_extraneous_trailing_semicolon_warning = 1;
315
316 if (! context_find ("path")) {
317 free (path ("./", TFOLDER));
318 }
319
320 if (file && msgs.size) {
321 adios (NULL, "cannot specify msg and file at same time!");
322 }
323
324 if (outfile) {
325 /* Open the outfile now, so we don't have to risk opening it
326 after running out of fds. */
327 if (strcmp (outfile, "-") == 0) {
328 outfp = stdout;
329 } else if ((outfp = fopen (outfile, "w")) == NULL) {
330 adios (outfile, "unable to open for writing");
331 }
332 }
333
334 /*
335 * check if message is coming from file
336 */
337 if (file) {
338 /* If file is stdin, create a tmp file name before parse_mime()
339 has a chance, because it might put in on a different
340 filesystem than the output file. Instead, put it in the
341 user's preferred tmp directory. */
342 CT ct;
343
344 if (! strcmp ("-", file)) {
345 int fd;
346 char *cp;
347
348 using_stdin = 1;
349
350 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
351 adios (NULL, "unable to create temporary file in %s",
352 get_temp_dir());
353 } else {
354 free (file);
355 file = mh_xstrdup (cp);
356 cpydata (STDIN_FILENO, fd, "-", file);
357 }
358
359 if (close (fd)) {
360 (void) m_unlink (file);
361 adios (NULL, "failed to write temporary file");
362 }
363 }
364
365 cts = mh_xcalloc(2, sizeof *cts);
366 ctp = cts;
367
368 if ((ct = parse_mime (file))) {
369 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
370 *ctp++ = ct;
371 } else {
372 inform("unable to parse message from file %s", file);
373 status = NOTOK;
374
375 /* If there's an outfile, pass the input message unchanged, so the
376 message won't get dropped from a pipeline. */
377 if (outfile) {
378 /* Something went wrong. Output might be expected, such as if
379 this were run as a filter. Just copy the input to the
380 output. */
381 if ((infp = fopen (file, "r")) == NULL) {
382 adios (file, "unable to open for reading");
383 }
384
385 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
386 inform("unable to copy message to %s, "
387 "it might be lost\n", outfile);
388 }
389
390 fclose (infp);
391 infp = NULL;
392 }
393 }
394 } else {
395 /*
396 * message(s) are coming from a folder
397 */
398 CT ct;
399
400 if (! msgs.size) {
401 app_msgarg(&msgs, "cur");
402 }
403 if (! folder) {
404 folder = getfolder (1);
405 }
406 maildir = mh_xstrdup(m_maildir (folder));
407
408 /* chdir so that error messages, esp. from MIME parser, just
409 refer to the message and not its path. */
410 if (chdir (maildir) == NOTOK) {
411 adios (maildir, "unable to change directory to");
412 }
413
414 /* read folder and create message structure */
415 if (! (mp = folder_read (folder, 1))) {
416 adios (NULL, "unable to read folder %s", folder);
417 }
418
419 /* check for empty folder */
420 if (mp->nummsg == 0) {
421 adios (NULL, "no messages in %s", folder);
422 }
423
424 /* parse all the message ranges/sequences and set SELECTED */
425 for (msgnum = 0; msgnum < msgs.size; msgnum++)
426 if (! m_convert (mp, msgs.msgs[msgnum])) {
427 done (1);
428 }
429 seq_setprev (mp); /* set the previous-sequence */
430
431 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
432 ctp = cts;
433
434 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
435 if (is_selected(mp, msgnum)) {
436 char *msgnam = m_name (msgnum);
437
438 if ((ct = parse_mime (msgnam))) {
439 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
440 *ctp++ = ct;
441 } else {
442 inform("unable to parse message %s", msgnam);
443 status = NOTOK;
444
445 /* If there's an outfile, pass the input message
446 unchanged, so the message won't get dropped from a
447 pipeline. */
448 if (outfile) {
449 /* Something went wrong. Output might be expected,
450 such as if this were run as a filter. Just copy
451 the input to the output. */
452 /* Can't use path() here because 1) it might have been
453 called before and it caches the pwd, and 2) we call
454 chdir() after that. */
455 char *input_filename =
456 concat (maildir, "/", msgnam, NULL);
457
458 if ((infp = fopen (input_filename, "r")) == NULL) {
459 adios (input_filename,
460 "unable to open for reading");
461 }
462
463 if (copy_input_to_output (input_filename, infp,
464 outfile, outfp) != OK) {
465 inform("unable to copy message to %s, "
466 "it might be lost\n", outfile);
467 }
468
469 fclose (infp);
470 infp = NULL;
471 free (input_filename);
472 }
473 }
474 }
475 }
476
477 if (chgflag) {
478 seq_setcur (mp, mp->hghsel); /* update current message */
479 }
480 seq_save (mp); /* synchronize sequences */
481 context_replace (pfolder, folder);/* update current folder */
482 context_save (); /* save the context file */
483 }
484
485 if (*cts) {
486 for (ctp = cts; *ctp; ++ctp) {
487 status += mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp);
488 free_content (*ctp);
489
490 if (using_stdin) {
491 (void) m_unlink (file);
492
493 if (! outfile) {
494 /* Just calling m_backup() unlinks the backup file. */
495 (void) m_backup (file);
496 }
497 }
498 }
499 } else {
500 status = 1;
501 }
502
503 mh_xfree(maildir);
504 free (cts);
505
506 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
507 if (infp) { fclose (infp); } /* even if stdin */
508 if (outfp) { fclose (outfp); } /* even if stdout */
509 free (outfile);
510 free (file);
511 free (folder);
512 free (arguments);
513
514 done (status);
515 return NOTOK;
516 }
517
518
519 /*
520 * Apply transformations to one message.
521 */
522 int
523 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
524 FILE **infp, char *outfile, FILE **outfp) {
525 /* Store input filename in case one of the transformations, i.e.,
526 fix_boundary(), rewrites to a tmp file. */
527 char *input_filename = maildir
528 ? concat (maildir, "/", (*ctp)->c_file, NULL)
529 : mh_xstrdup ((*ctp)->c_file);
530 int modify_inplace = 0;
531 int message_mods = 0;
532 int status = OK;
533
534 /* Though the input file won't need to be opened if everything goes
535 well, do it here just in case there's a failure, and that failure is
536 running out of file descriptors. */
537 if ((*infp = fopen (input_filename, "r")) == NULL) {
538 adios (input_filename, "unable to open for reading");
539 }
540
541 if (outfile == NULL) {
542 modify_inplace = 1;
543
544 if ((*ctp)->c_file) {
545 char *tempfile;
546 /* outfp will be closed by the caller */
547 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
548 NULL) {
549 adios (NULL, "unable to create temporary file in %s",
550 get_temp_dir());
551 }
552 outfile = mh_xstrdup (tempfile);
553 } else {
554 adios (NULL, "missing both input and output filenames\n");
555 }
556 } /* else *outfp was defined by caller */
557
558 reverse_alternative_parts (*ctp);
559 status = fix_always (*ctp, &message_mods);
560 if (status == OK && fx->fixboundary) {
561 status = fix_boundary (ctp, &message_mods);
562 }
563 if (status == OK && fx->fixtypes != NULL) {
564 status = fix_types (*ctp, fx->fixtypes, &message_mods);
565 }
566 if (status == OK && fx->fixcompositecte) {
567 status = fix_composite_cte (*ctp, &message_mods);
568 }
569 if (status == OK && fx->reformat) {
570 status =
571 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
572 }
573 if (status == OK && fx->decodetext) {
574 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
575 &message_mods);
576 update_cte (*ctp);
577 }
578 if (status == OK && fx->textcharset != NULL) {
579 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
580 }
581
582 if (status == OK && ! (*ctp)->c_umask) {
583 /* Set the umask for the contents file. This currently
584 isn't used but just in case it is in the future. */
585 struct stat st;
586
587 if (stat ((*ctp)->c_file, &st) != NOTOK) {
588 (*ctp)->c_umask = ~(st.st_mode & 0777);
589 } else {
590 (*ctp)->c_umask = ~m_gmprot();
591 }
592 }
593
594 /*
595 * Write the content to a file
596 */
597 if (status == OK) {
598 status = write_content (*ctp, input_filename, outfile, *outfp,
599 modify_inplace, message_mods);
600 } else if (! modify_inplace) {
601 /* Something went wrong. Output might be expected, such
602 as if this were run as a filter. Just copy the input
603 to the output. */
604 if (copy_input_to_output (input_filename, *infp, outfile,
605 *outfp) != OK) {
606 inform("unable to copy message to %s, it might be lost\n",
607 outfile);
608 }
609 }
610
611 if (modify_inplace) {
612 if (status != OK) { (void) m_unlink (outfile); }
613 free (outfile);
614 outfile = NULL;
615 }
616
617 fclose (*infp);
618 *infp = NULL;
619 free (input_filename);
620
621 return status;
622 }
623
624
625 /*
626 * Copy input message to output. Assumes not modifying in place, so this
627 * might be running as part of a pipeline.
628 */
629 static int
630 copy_input_to_output (const char *input_filename, FILE *infp,
631 const char *output_filename, FILE *outfp) {
632 int in = fileno (infp);
633 int out = fileno (outfp);
634 int status = OK;
635
636 if (in != -1 && out != -1) {
637 cpydata (in, out, input_filename, output_filename);
638 } else {
639 status = NOTOK;
640 }
641
642 return status;
643 }
644
645
646 /*
647 * Fix mismatched outer level boundary.
648 */
649 static int
650 fix_boundary (CT *ct, int *message_mods) {
651 struct multipart *mp;
652 int status = OK;
653
654 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
655 mp = (struct multipart *) (*ct)->c_ctparams;
656
657 /*
658 * 1) Get boundary at end of part.
659 * 2) Get boundary at beginning of part and compare to the end-of-part
660 * boundary.
661 * 3) Write out contents of ct to tmp file, replacing boundary in
662 * header with boundary from part. Set c_unlink to 1.
663 * 4) Free ct.
664 * 5) Call parse_mime() on the tmp file, replacing ct.
665 */
666
667 if (mp && mp->mp_start) {
668 char *part_boundary;
669
670 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
671 char *fixed;
672
673 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
674 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
675 char *filename = mh_xstrdup ((*ct)->c_file);
676 CT fixed_ct;
677
678 free_content (*ct);
679 if ((fixed_ct = parse_mime (fixed))) {
680 *ct = fixed_ct;
681 (*ct)->c_unlink = 1;
682
683 ++*message_mods;
684 if (verbosw) {
685 report (NULL, NULL, filename,
686 "fix multipart boundary");
687 }
688 } else {
689 *ct = NULL;
690 inform("unable to parse fixed part");
691 status = NOTOK;
692 }
693 free (filename);
694 } else {
695 inform("unable to replace broken boundary");
696 status = NOTOK;
697 }
698 } else {
699 inform("unable to create temporary file in %s",
700 get_temp_dir());
701 status = NOTOK;
702 }
703
704 free (part_boundary);
705 } else {
706 /* Couldn't fix the boundary. Report failure so that mhfixmsg
707 doesn't modify the message. */
708 status = NOTOK;
709 }
710 } else {
711 /* No multipart struct, even though the content type is
712 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
713 the message. */
714 status = NOTOK;
715 }
716 }
717
718 return status;
719 }
720
721
722 /*
723 * Find boundary at end of multipart.
724 */
725 static int
726 get_multipart_boundary (CT ct, char **part_boundary) {
727 char buffer[NMH_BUFSIZ];
728 char *end_boundary = NULL;
729 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
730 ? (off_t) (ct->c_end - sizeof buffer)
731 : (off_t) ct->c_begin;
732 size_t bytes_read;
733 int status = OK;
734
735 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
736 be big enough, even if it's just 1024, to make that unlikely. */
737
738 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
739 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
740 advise (ct->c_file, "unable to open for reading");
741 return NOTOK;
742 }
743
744 /* Get boundary at end of multipart. */
745 while (begin >= (off_t) ct->c_begin) {
746 fseeko (ct->c_fp, begin, SEEK_SET);
747 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
748 char *cp = rfind_str (buffer, bytes_read, "--");
749
750 if (cp) {
751 char *end;
752
753 /* Trim off trailing "--" and anything beyond. */
754 *cp-- = '\0';
755 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
756 if (strlen (end) > 3 && *end++ == '\n' &&
757 *end++ == '-' && *end++ == '-') {
758 end_boundary = mh_xstrdup (end);
759 break;
760 }
761 }
762 }
763 }
764
765 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
766 break;
767 begin -= sizeof buffer;
768 }
769
770 /* Get boundary at beginning of multipart. */
771 if (end_boundary) {
772 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
773 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
774 if (bytes_read >= strlen (end_boundary)) {
775 char *cp = find_str (buffer, bytes_read, end_boundary);
776
777 if (cp && cp - buffer >= 2 && *--cp == '-' &&
778 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
779 status = OK;
780 break;
781 }
782 } else {
783 /* The start and end boundaries didn't match, or the
784 start boundary doesn't begin with "\n--" (or "--"
785 if at the beginning of buffer). Keep trying. */
786 status = NOTOK;
787 }
788 }
789 } else {
790 status = NOTOK;
791 }
792
793 if (ct->c_fp) {
794 fclose (ct->c_fp);
795 ct->c_fp = NULL;
796 }
797
798 if (status == OK) {
799 *part_boundary = end_boundary;
800 } else {
801 *part_boundary = NULL;
802 free (end_boundary);
803 }
804
805 return status;
806 }
807
808
809 /*
810 * Open and copy ct->c_file to file, replacing the multipart boundary.
811 */
812 static int
813 replace_boundary (CT ct, char *file, char *boundary) {
814 FILE *fpin, *fpout;
815 int compnum, state;
816 char buf[NMH_BUFSIZ], name[NAMESZ];
817 char *np, *vp;
818 m_getfld_state_t gstate;
819 int status = OK;
820
821 if (ct->c_file == NULL) {
822 inform("missing input filename");
823 return NOTOK;
824 }
825
826 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
827 advise (ct->c_file, "unable to open for reading");
828 return NOTOK;
829 }
830
831 if ((fpout = fopen (file, "w")) == NULL) {
832 fclose (fpin);
833 advise (file, "unable to open for writing");
834 return NOTOK;
835 }
836
837 gstate = m_getfld_state_init(fpin);
838 for (compnum = 1;;) {
839 int bufsz = (int) sizeof buf;
840
841 switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
842 case FLD:
843 case FLDPLUS:
844 compnum++;
845
846 /* get copies of the buffers */
847 np = mh_xstrdup (name);
848 vp = mh_xstrdup (buf);
849
850 /* if necessary, get rest of field */
851 while (state == FLDPLUS) {
852 bufsz = sizeof buf;
853 state = m_getfld2(&gstate, name, buf, &bufsz);
854 vp = add (buf, vp); /* add to previous value */
855 }
856
857 if (strcasecmp (TYPE_FIELD, np)) {
858 fprintf (fpout, "%s:%s", np, vp);
859 } else {
860 char *new_ctline, *new_params;
861
862 replace_param(&ct->c_ctinfo.ci_first_pm,
863 &ct->c_ctinfo.ci_last_pm, "boundary",
864 boundary, 0);
865
866 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
867 ct->c_ctinfo.ci_subtype, NULL);
868 new_params = output_params(strlen(TYPE_FIELD) +
869 strlen(new_ctline) + 1,
870 ct->c_ctinfo.ci_first_pm, NULL, 0);
871 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
872 FENDNULL(new_params));
873 free(new_ctline);
874 mh_xfree(new_params);
875 }
876
877 free (vp);
878 free (np);
879
880 continue;
881
882 case BODY:
883 putc('\n', fpout);
884 /* buf will have a terminating NULL, skip it. */
885 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
886 advise (file, "fwrite");
887 }
888 continue;
889
890 case FILEEOF:
891 break;
892
893 case LENERR:
894 case FMTERR:
895 inform("message format error in component #%d", compnum);
896 status = NOTOK;
897 break;
898
899 default:
900 inform("getfld() returned %d", state);
901 status = NOTOK;
902 break;
903 }
904
905 break;
906 }
907
908 m_getfld_state_destroy (&gstate);
909 fclose (fpout);
910 fclose (fpin);
911
912 return status;
913 }
914
915
916 /*
917 * Fix Content-Type header to reflect the content of its part.
918 */
919 static int
920 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
921 int status = OK;
922
923 switch (ct->c_type) {
924 case CT_MULTIPART: {
925 struct multipart *m = (struct multipart *) ct->c_ctparams;
926 struct part *part;
927
928 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
929 status = fix_types (part->mp_part, fixtypes, message_mods);
930 }
931 break;
932 }
933
934 case CT_MESSAGE:
935 if (ct->c_subtype == MESSAGE_EXTERNAL) {
936 struct exbody *e = (struct exbody *) ct->c_ctparams;
937
938 status = fix_types (e->eb_content, fixtypes, message_mods);
939 }
940 break;
941
942 default: {
943 char **typep, *type;
944
945 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
946 for (typep = svector_strs (fixtypes);
947 typep && (type = *typep);
948 ++typep) {
949 char *type_subtype =
950 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
951 NULL);
952
953 if (! strcasecmp (type, type_subtype) &&
954 decode_part (ct) == OK &&
955 ct->c_cefile.ce_file != NULL) {
956 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
957 char *cp;
958
959 if ((cp = strchr (ct_type_subtype, ';'))) {
960 /* Truncate to remove any parameter list from
961 mime_type () result. */
962 *cp = '\0';
963 }
964
965 if (strcasecmp (type, ct_type_subtype)) {
966 char *ct_type, *ct_subtype;
967 HF hf;
968
969 /* The Content-Type header does not match the
970 content, so update these struct Content
971 fields to match:
972 * c_type, c_subtype
973 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
974 * c_ctline
975 */
976 /* Extract type and subtype from type/subtype. */
977 ct_type = mh_xstrdup(ct_type_subtype);
978 if ((cp = strchr (ct_type, '/'))) {
979 *cp = '\0';
980 ct_subtype = mh_xstrdup(++cp);
981 } else {
982 inform("missing / in MIME type of %s %s",
983 ct->c_file, ct->c_partno);
984 free (ct_type);
985 return NOTOK;
986 }
987
988 ct->c_type = ct_str_type (ct_type);
989 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
990
991 free (ct->c_ctinfo.ci_type);
992 ct->c_ctinfo.ci_type = ct_type;
993 free (ct->c_ctinfo.ci_subtype);
994 ct->c_ctinfo.ci_subtype = ct_subtype;
995 if (! replace_substring (&ct->c_ctline, type,
996 ct_type_subtype)) {
997 inform("did not find %s in %s",
998 type, ct->c_ctline);
999 }
1000
1001 /* Update Content-Type header field. */
1002 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1003 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1004 if (replace_substring (&hf->value, type,
1005 ct_type_subtype)) {
1006 ++*message_mods;
1007 if (verbosw) {
1008 report (NULL, ct->c_partno, ct->c_file,
1009 "change Content-Type in header "
1010 "from %s to %s",
1011 type, ct_type_subtype);
1012 }
1013 break;
1014 }
1015 inform("did not find %s in %s", type, hf->value);
1016 }
1017 }
1018 }
1019 free (ct_type_subtype);
1020 }
1021 free (type_subtype);
1022 }
1023 }
1024 }}
1025
1026 return status;
1027 }
1028
1029
1030 /*
1031 * Replace a substring, allocating space to hold the new one.
1032 */
1033 char *
1034 replace_substring (char **str, const char *old, const char *new) {
1035 char *cp;
1036
1037 if ((cp = strstr (*str, old))) {
1038 char *remainder = cp + strlen (old);
1039 char *prefix, *new_str;
1040
1041 if (cp - *str) {
1042 prefix = mh_xstrdup(*str);
1043 *(prefix + (cp - *str)) = '\0';
1044 new_str = concat (prefix, new, remainder, NULL);
1045 free (prefix);
1046 } else {
1047 new_str = concat (new, remainder, NULL);
1048 }
1049
1050 free (*str);
1051
1052 return *str = new_str;
1053 }
1054
1055 return NULL;
1056 }
1057
1058
1059 /*
1060 * Remove a name=value parameter, given just its name, from a header value.
1061 */
1062 char *
1063 remove_parameter (char *str, const char *name) {
1064 /* It looks to me, based on the BNF in RFC 2045, than there can't
1065 be whitespace between the parameter name and the "=", or
1066 between the "=" and the parameter value. */
1067 char *param_name = concat (name, "=", NULL);
1068 char *cp;
1069
1070 if ((cp = strstr (str, param_name))) {
1071 char *start, *end;
1072 size_t count = 1;
1073
1074 /* Remove any leading spaces, before the parameter name. */
1075 for (start = cp;
1076 start > str && isspace ((unsigned char) *(start-1));
1077 --start) {
1078 continue;
1079 }
1080 /* Remove a leading semicolon. */
1081 if (start > str && *(start-1) == ';') { --start; }
1082
1083 end = cp + strlen (name) + 1;
1084 if (*end == '"') {
1085 /* Skip past the quoted value, and then the final quote. */
1086 for (++end ; *end && *end != '"'; ++end) { continue; }
1087 ++end;
1088 } else {
1089 /* Skip past the value. */
1090 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1091 }
1092
1093 /* Count how many characters need to be moved. Include
1094 trailing null, which is accounted for by the
1095 initialization of count to 1. */
1096 for (cp = end; *cp; ++cp) { ++count; }
1097 (void) memmove (start, end, count);
1098 }
1099
1100 free (param_name);
1101
1102 return str;
1103 }
1104
1105
1106 /*
1107 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1108 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1109 * 8 bit.
1110 */
1111 static int
1112 fix_composite_cte (CT ct, int *message_mods) {
1113 int status = OK;
1114
1115 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1116 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1117 ct->c_encoding != CE_BINARY) {
1118 HF hf;
1119
1120 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1121 char *name = hf->name;
1122 for (; *name && isspace ((unsigned char) *name); ++name) {
1123 continue;
1124 }
1125
1126 if (! strncasecmp (name, ENCODING_FIELD,
1127 strlen (ENCODING_FIELD))) {
1128 char *prefix = "Nmh-REPLACED-INVALID-";
1129 HF h;
1130
1131 NEW(h);
1132 h->name = mh_xstrdup (hf->name);
1133 h->hf_encoding = hf->hf_encoding;
1134 h->next = hf->next;
1135 hf->next = h;
1136
1137 /* Retain old header but prefix its name. */
1138 free (hf->name);
1139 hf->name = concat (prefix, h->name, NULL);
1140
1141 ++*message_mods;
1142 if (verbosw) {
1143 char *encoding = cpytrim (hf->value);
1144 report (NULL, ct->c_partno, ct->c_file,
1145 "replace Content-Transfer-Encoding of %s "
1146 "with 8 bit", encoding);
1147 free (encoding);
1148 }
1149
1150 h->value = mh_xstrdup (" 8bit\n");
1151
1152 /* Don't need to warn for multiple C-T-E header
1153 fields, parse_mime() already does that. But
1154 if there are any, fix them all as necessary. */
1155 hf = h;
1156 }
1157 }
1158
1159 set_ce (ct, CE_8BIT);
1160 }
1161
1162 if (ct->c_type == CT_MULTIPART) {
1163 struct multipart *m;
1164 struct part *part;
1165
1166 m = (struct multipart *) ct->c_ctparams;
1167 for (part = m->mp_parts; part; part = part->mp_next) {
1168 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1169 status = NOTOK;
1170 break;
1171 }
1172 }
1173 }
1174 }
1175
1176 return status;
1177 }
1178
1179
1180 /*
1181 * Set content encoding.
1182 */
1183 static int
1184 set_ce (CT ct, int encoding) {
1185 const char *ce = ce_str (encoding);
1186 const struct str2init *ctinit = get_ce_method (ce);
1187
1188 if (ctinit) {
1189 char *cte = concat (" ", ce, "\n", NULL);
1190 int found_cte = 0;
1191 HF hf;
1192 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1193 caller is decode_text_parts (). Save because we'll
1194 overwrite below. */
1195 struct cefile decoded_content_info = ct->c_cefile;
1196
1197 ct->c_encoding = encoding;
1198
1199 ct->c_ctinitfnx = ctinit->si_init;
1200 /* This will assign ct->c_cefile with an all-0 struct, which
1201 is what we want. */
1202 (*ctinit->si_init) (ct);
1203 /* After returning, the caller should set
1204 ct->c_cefile.ce_file to the name of the file containing
1205 the contents. */
1206
1207 if (ct->c_ceclosefnx) {
1208 (*ct->c_ceclosefnx) (ct);
1209 }
1210
1211 /* Restore the cefile. */
1212 ct->c_cefile = decoded_content_info;
1213
1214 /* Update/add Content-Transfer-Encoding header field. */
1215 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1216 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1217 found_cte = 1;
1218 free (hf->value);
1219 hf->value = cte;
1220 }
1221 }
1222 if (! found_cte) {
1223 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1224 }
1225
1226 /* Update c_celine. It's used only by mhlist -debug. */
1227 free (ct->c_celine);
1228 ct->c_celine = mh_xstrdup (cte);
1229
1230 return OK;
1231 }
1232
1233 return NOTOK;
1234 }
1235
1236
1237 /*
1238 * Make sure each text part has a corresponding text/plain part.
1239 */
1240 static int
1241 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1242 int status = OK;
1243
1244 switch ((*ct)->c_type) {
1245 case CT_TEXT: {
1246 /* Nothing to do for text/plain. */
1247 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1248
1249 if (parent && parent->c_type == CT_MULTIPART &&
1250 parent->c_subtype == MULTI_ALTERNATE) {
1251 int new_subpart_number = 1;
1252 int has_text_plain =
1253 find_textplain_sibling (parent, replacetextplain,
1254 &new_subpart_number);
1255
1256 if (! has_text_plain) {
1257 /* Parent is a multipart/alternative. Insert a new
1258 text/plain subpart. */
1259 const int inserted =
1260 insert_new_text_plain_part (*ct, new_subpart_number,
1261 parent);
1262 if (inserted) {
1263 ++*message_mods;
1264 if (verbosw) {
1265 report (NULL, parent->c_partno, parent->c_file,
1266 "insert text/plain part");
1267 }
1268 } else {
1269 status = NOTOK;
1270 }
1271 }
1272 } else if (parent && parent->c_type == CT_MULTIPART &&
1273 parent->c_subtype == MULTI_RELATED) {
1274 char *type_subtype =
1275 concat ((*ct)->c_ctinfo.ci_type, "/",
1276 (*ct)->c_ctinfo.ci_subtype, NULL);
1277 const char *parent_type =
1278 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1279 int new_subpart_number = 1;
1280 int has_text_plain = 0;
1281
1282 /* Have to do string comparison on the subtype because we
1283 don't enumerate all of them in c_subtype values.
1284 parent_type will be NULL if the multipart/related part
1285 doesn't have a type parameter. The type parameter must
1286 be specified according to RFC 2387 Sec. 3.1 but not all
1287 messages comply. */
1288 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1289 /* The type of this part matches the root type of the
1290 parent multipart/related. Look to see if there's
1291 text/plain sibling. */
1292 has_text_plain =
1293 find_textplain_sibling (parent, replacetextplain,
1294 &new_subpart_number);
1295 }
1296
1297 free (type_subtype);
1298
1299 if (! has_text_plain) {
1300 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1301 struct part *part;
1302 int siblings = 0;
1303
1304 for (part = mp->mp_parts; part; part = part->mp_next) {
1305 if (*ct != part->mp_part) {
1306 ++siblings;
1307 }
1308 }
1309
1310 if (siblings) {
1311 /* Parent is a multipart/related. Insert a new
1312 text/plain subpart in a new multipart/alternative. */
1313 if (insert_into_new_mp_alt (ct, message_mods)) {
1314 /* Not an error if text/plain couldn't be added. */
1315 }
1316 } else {
1317 /* There are no siblings, so insert a new text/plain
1318 subpart, and change the parent type from
1319 multipart/related to multipart/alternative. */
1320 const int inserted =
1321 insert_new_text_plain_part (*ct, new_subpart_number,
1322 parent);
1323
1324 if (inserted) {
1325 HF hf;
1326
1327 parent->c_subtype = MULTI_ALTERNATE;
1328 free (parent->c_ctinfo.ci_subtype);
1329 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1330 if (! replace_substring (&parent->c_ctline, "/related",
1331 "/alternative")) {
1332 inform("did not find multipart/related in %s",
1333 parent->c_ctline);
1334 }
1335
1336 /* Update Content-Type header field. */
1337 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1338 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1339 if (replace_substring (&hf->value, "/related",
1340 "/alternative")) {
1341 ++*message_mods;
1342 if (verbosw) {
1343 report (NULL, parent->c_partno,
1344 parent->c_file,
1345 "insert text/plain part");
1346 }
1347
1348 /* Remove, e.g., type="text/html" from
1349 multipart/alternative. */
1350 remove_parameter (hf->value, "type");
1351 break;
1352 }
1353 inform("did not find multipart/"
1354 "related in header %s", hf->value);
1355 }
1356 }
1357 } else {
1358 /* Not an error if text/plain couldn't be inserted. */
1359 }
1360 }
1361 }
1362 } else {
1363 if (insert_into_new_mp_alt (ct, message_mods)) {
1364 status = NOTOK;
1365 }
1366 }
1367 break;
1368 }
1369
1370 case CT_MULTIPART: {
1371 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1372 struct part *part;
1373
1374 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1375 if ((*ct)->c_type == CT_MULTIPART) {
1376 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1377 replacetextplain);
1378 }
1379 }
1380 break;
1381 }
1382
1383 case CT_MESSAGE:
1384 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1385 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1386
1387 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1388 replacetextplain);
1389 }
1390 break;
1391 }
1392
1393 return status;
1394 }
1395
1396
1397 /*
1398 * See if there is a sibling text/plain, and return its subpart number.
1399 */
1400 static int
1401 find_textplain_sibling (CT parent, int replacetextplain,
1402 int *new_subpart_number) {
1403 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1404 struct part *part, *prev;
1405 int has_text_plain = 0;
1406
1407 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1408 ++*new_subpart_number;
1409 if (part->mp_part->c_type == CT_TEXT &&
1410 part->mp_part->c_subtype == TEXT_PLAIN) {
1411 if (replacetextplain) {
1412 struct part *old_part;
1413 if (part == mp->mp_parts) {
1414 old_part = mp->mp_parts;
1415 mp->mp_parts = part->mp_next;
1416 } else {
1417 old_part = prev->mp_next;
1418 prev->mp_next = part->mp_next;
1419 }
1420 if (verbosw) {
1421 report (NULL, parent->c_partno, parent->c_file,
1422 "remove text/plain part %s",
1423 old_part->mp_part->c_partno);
1424 }
1425 free_content (old_part->mp_part);
1426 free (old_part);
1427 } else {
1428 has_text_plain = 1;
1429 }
1430 break;
1431 }
1432 prev = part;
1433 }
1434
1435 return has_text_plain;
1436 }
1437
1438
1439 /*
1440 * Insert a new text/plain part.
1441 */
1442 static int
1443 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1444 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1445 struct part *new_part;
1446
1447 NEW(new_part);
1448 if ((new_part->mp_part = build_text_plain_part (ct))) {
1449 char buffer[16];
1450 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1451
1452 new_part->mp_next = mp->mp_parts;
1453 mp->mp_parts = new_part;
1454 new_part->mp_part->c_partno =
1455 concat (parent->c_partno ? parent->c_partno : "1", ".",
1456 buffer, NULL);
1457
1458 return 1;
1459 }
1460
1461 free_content (new_part->mp_part);
1462 free (new_part);
1463
1464 return 0;
1465 }
1466
1467
1468 /*
1469 * Create a text/plain part to go along with non-plain sibling part.
1470 */
1471 static CT
1472 build_text_plain_part (CT encoded_part) {
1473 CT tp_part = divide_part (encoded_part);
1474 char *tmp_plain_file = NULL;
1475
1476 if (decode_part (tp_part) == OK) {
1477 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1478 contains the decoded contents. And the decoding function, such
1479 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1480 be unlinked by free_content (). */
1481 char *tempfile;
1482
1483 /* This m_mktemp2() call closes the temp file. */
1484 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1485 inform("unable to create temporary file in %s",
1486 get_temp_dir());
1487 } else {
1488 tmp_plain_file = mh_xstrdup (tempfile);
1489 if (reformat_part (tp_part, tmp_plain_file,
1490 tp_part->c_ctinfo.ci_type,
1491 tp_part->c_ctinfo.ci_subtype,
1492 tp_part->c_type) == OK) {
1493 return tp_part;
1494 }
1495 }
1496 }
1497
1498 free_content (tp_part);
1499 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1500 free (tmp_plain_file);
1501
1502 return NULL;
1503 }
1504
1505
1506 /*
1507 * Slip new text/plain part into a new multipart/alternative.
1508 */
1509 static int
1510 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1511 CT tp_part = build_text_plain_part (*ct);
1512 int status = OK;
1513
1514 if (tp_part) {
1515 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1516 MULTI_ALTERNATE);
1517 if (mp_alt) {
1518 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1519
1520 if (mp && mp->mp_parts) {
1521 mp->mp_parts->mp_part = tp_part;
1522 /* Make the new multipart/alternative the parent. */
1523 *ct = mp_alt;
1524
1525 ++*message_mods;
1526 if (verbosw) {
1527 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1528 "insert text/plain part");
1529 }
1530 } else {
1531 free_content (tp_part);
1532 free_content (mp_alt);
1533 status = NOTOK;
1534 }
1535 } else {
1536 status = NOTOK;
1537 }
1538 } else {
1539 /* Not an error if text/plain couldn't be built. */
1540 }
1541
1542 return status;
1543 }
1544
1545
1546 /*
1547 * Clone a MIME part.
1548 */
1549 static CT
1550 divide_part (CT ct) {
1551 CT new_part;
1552
1553 NEW0(new_part);
1554 /* Just copy over what is needed for decoding. c_vrsn and
1555 c_celine aren't necessary. */
1556 new_part->c_file = mh_xstrdup (ct->c_file);
1557 new_part->c_begin = ct->c_begin;
1558 new_part->c_end = ct->c_end;
1559 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1560 new_part->c_type = ct->c_type;
1561 new_part->c_cefile = ct->c_cefile;
1562 new_part->c_encoding = ct->c_encoding;
1563 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1564 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1565 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1566 new_part->c_cesizefnx = ct->c_cesizefnx;
1567
1568 /* c_ctline is used by reformat__part(), so it can preserve
1569 anything after the type/subtype. */
1570 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1571
1572 return new_part;
1573 }
1574
1575
1576 /*
1577 * Copy the content info from one part to another.
1578 */
1579 static void
1580 copy_ctinfo (CI dest, CI src) {
1581 PM s_pm, d_pm;
1582
1583 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1584 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1585
1586 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1587 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1588 s_pm->pm_value, 0);
1589 if (s_pm->pm_charset) {
1590 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1591 }
1592 if (s_pm->pm_lang) {
1593 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1594 }
1595 }
1596
1597 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1598 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1599 }
1600
1601
1602 /*
1603 * Decode content.
1604 */
1605 static int
1606 decode_part (CT ct) {
1607 char *tmp_decoded;
1608 int status;
1609 FILE *file;
1610 char *tempfile;
1611
1612 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1613 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1614 }
1615 tmp_decoded = mh_xstrdup (tempfile);
1616 /* The following call will load ct->c_cefile.ce_file with the tmp
1617 filename of the decoded content. tmp_decoded will contain the
1618 encoded output, get rid of that. */
1619 status = output_message_fp (ct, file, tmp_decoded);
1620 (void) m_unlink (tmp_decoded);
1621 free (tmp_decoded);
1622 if (fclose (file)) {
1623 inform("unable to close temporary file %s, continuing...", tempfile);
1624 }
1625
1626 return status;
1627 }
1628
1629
1630 /*
1631 * Reformat content as plain text.
1632 * Some of the arguments aren't really needed now, but maybe will
1633 * be in the future for other than text types.
1634 */
1635 static int
1636 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1637 int output_subtype, output_encoding;
1638 const char *reason = NULL;
1639 char *cp, *cf;
1640 int status;
1641
1642 /* Hacky: this redirects the output from whatever command is used
1643 to show the part to a file. So, the user can't have any output
1644 redirection in that command.
1645 Could show_multi() in mhshowsbr.c avoid this? */
1646
1647 /* Check for invo_name-format-type/subtype. */
1648 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1649 if (verbosw) {
1650 inform("Don't know how to convert %s, there is no "
1651 "%s-format-%s/%s profile entry",
1652 ct->c_file, invo_name, type, subtype);
1653 }
1654 return NOTOK;
1655 }
1656 if (strchr (cf, '>')) {
1657 inform("'>' prohibited in \"%s\",\nplease fix your "
1658 "%s-format-%s/%s profile entry", cf, invo_name, type,
1659 FENDNULL(subtype));
1660
1661 return NOTOK;
1662 }
1663
1664 cp = concat (cf, " >", file, NULL);
1665 status = show_content_aux (ct, 0, cp, NULL, NULL);
1666 free (cp);
1667
1668 /* Unlink decoded content tmp file and free its filename to avoid
1669 leaks. The file stream should already have been closed. */
1670 if (ct->c_cefile.ce_unlink) {
1671 (void) m_unlink (ct->c_cefile.ce_file);
1672 free (ct->c_cefile.ce_file);
1673 ct->c_cefile.ce_file = NULL;
1674 ct->c_cefile.ce_unlink = 0;
1675 }
1676
1677 if (c_type == CT_TEXT) {
1678 output_subtype = TEXT_PLAIN;
1679 } else {
1680 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1681 output_subtype = 0;
1682 }
1683
1684 output_encoding = content_encoding (ct, &reason);
1685 if (status == OK &&
1686 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1687 ct->c_cefile.ce_file = file;
1688 ct->c_cefile.ce_unlink = 1;
1689 } else {
1690 ct->c_cefile.ce_unlink = 0;
1691 status = NOTOK;
1692 }
1693
1694 return status;
1695 }
1696
1697
1698 /*
1699 * Fill in a multipart/alternative part.
1700 */
1701 static CT
1702 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1703 char *boundary_prefix = "----=_nmh-multipart";
1704 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1705 char *boundary_indicator = "; boundary=";
1706 char *typename, *subtypename, *name;
1707 CT ct;
1708 struct part *p;
1709 struct multipart *m;
1710 const struct str2init *ctinit;
1711
1712 NEW0(ct);
1713
1714 /* Set up the multipart/alternative part. These fields of *ct were
1715 initialized to 0 by mh_xcalloc():
1716 c_fp, c_unlink, c_begin, c_end,
1717 c_vrsn, c_ctline, c_celine,
1718 c_id, c_descr, c_dispo, c_partno,
1719 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1720 c_cefile, c_encoding,
1721 c_digested, c_digest[16], c_ctexbody,
1722 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1723 c_umask, c_rfc934,
1724 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1725 */
1726
1727 ct->c_file = mh_xstrdup (first_alt->c_file);
1728 ct->c_type = type;
1729 ct->c_subtype = subtype;
1730
1731 ctinit = get_ct_init (ct->c_type);
1732
1733 typename = ct_type_str (type);
1734 subtypename = ct_subtype_str (type, subtype);
1735
1736 {
1737 int serial = 0;
1738 int found_boundary = 1;
1739
1740 while (found_boundary && serial < 1000000) {
1741 found_boundary = 0;
1742
1743 /* Ensure that the boundary doesn't appear in the decoded
1744 content. */
1745 if (new_part->c_cefile.ce_file) {
1746 if ((found_boundary =
1747 boundary_in_content (&new_part->c_cefile.ce_fp,
1748 new_part->c_cefile.ce_file,
1749 boundary)) == NOTOK) {
1750 free_content (ct);
1751 return NULL;
1752 }
1753 }
1754
1755 /* Ensure that the boundary doesn't appear in the encoded
1756 content. */
1757 if (! found_boundary && new_part->c_file) {
1758 if ((found_boundary =
1759 boundary_in_content (&new_part->c_fp,
1760 new_part->c_file,
1761 boundary)) == NOTOK) {
1762 free_content (ct);
1763 return NULL;
1764 }
1765 }
1766
1767 if (found_boundary) {
1768 /* Try a slightly different boundary. */
1769 char buffer2[16];
1770
1771 free (boundary);
1772 ++serial;
1773 snprintf (buffer2, sizeof buffer2, "%d", serial);
1774 boundary =
1775 concat (boundary_prefix,
1776 FENDNULL(first_alt->c_partno),
1777 "-", buffer2, NULL);
1778 }
1779 }
1780
1781 if (found_boundary) {
1782 inform("giving up trying to find a unique boundary");
1783 free_content (ct);
1784 return NULL;
1785 }
1786 }
1787
1788 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1789 boundary, "\"", NULL);
1790
1791 /* Load c_first_hf and c_last_hf. */
1792 transfer_noncontent_headers (first_alt, ct);
1793 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1794 free (name);
1795
1796 /* Load c_partno. */
1797 if (first_alt->c_partno) {
1798 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1799 free (first_alt->c_partno);
1800 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1801 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1802 } else {
1803 first_alt->c_partno = mh_xstrdup ("1");
1804 new_part->c_partno = mh_xstrdup ("2");
1805 }
1806
1807 if (ctinit) {
1808 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1809 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1810 }
1811
1812 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1813 "boundary", boundary, 0);
1814
1815 NEW(p);
1816 NEW(p->mp_next);
1817 p->mp_next->mp_next = NULL;
1818 p->mp_next->mp_part = first_alt;
1819
1820 NEW0(m);
1821 m->mp_start = concat (boundary, "\n", NULL);
1822 m->mp_stop = concat (boundary, "--\n", NULL);
1823 m->mp_parts = p;
1824 ct->c_ctparams = m;
1825
1826 free (boundary);
1827
1828 return ct;
1829 }
1830
1831
1832 /*
1833 * Check that the boundary does not appear in the content.
1834 */
1835 static int
1836 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1837 char buffer[NMH_BUFSIZ];
1838 size_t bytes_read;
1839 int found_boundary = 0;
1840
1841 /* free_content() will close *fp if we fopen it here. */
1842 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1843 advise (file, "unable to open %s for reading", file);
1844 return NOTOK;
1845 }
1846
1847 fseeko (*fp, 0L, SEEK_SET);
1848 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1849 if (find_str (buffer, bytes_read, boundary)) {
1850 found_boundary = 1;
1851 break;
1852 }
1853 }
1854
1855 return found_boundary;
1856 }
1857
1858
1859 /*
1860 * Remove all non-Content headers.
1861 */
1862 static void
1863 transfer_noncontent_headers (CT old, CT new) {
1864 HF hp, hp_prev;
1865
1866 hp_prev = hp = old->c_first_hf;
1867 while (hp) {
1868 HF next = hp->next;
1869
1870 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1871 if (hp == old->c_last_hf) {
1872 if (hp == old->c_first_hf) {
1873 old->c_last_hf = old->c_first_hf = NULL;
1874 } else {
1875 hp_prev->next = NULL;
1876 old->c_last_hf = hp_prev;
1877 }
1878 } else {
1879 if (hp == old->c_first_hf) {
1880 old->c_first_hf = next;
1881 } else {
1882 hp_prev->next = next;
1883 }
1884 }
1885
1886 /* Put node hp in the new CT. */
1887 if (new->c_first_hf == NULL) {
1888 new->c_first_hf = hp;
1889 } else {
1890 new->c_last_hf->next = hp;
1891 }
1892 new->c_last_hf = hp;
1893 } else {
1894 /* A Content- header, leave in old. */
1895 hp_prev = hp;
1896 }
1897
1898 hp = next;
1899 }
1900 }
1901
1902
1903 /*
1904 * Set content type.
1905 */
1906 static int
1907 set_ct_type (CT ct, int type, int subtype, int encoding) {
1908 char *typename = ct_type_str (type);
1909 char *subtypename = ct_subtype_str (type, subtype);
1910 /* E.g, " text/plain" */
1911 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1912 /* E.g, " text/plain\n" */
1913 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1914 int found_content_type = 0;
1915 HF hf;
1916 const char *cp = NULL;
1917 char *ctline;
1918 int status;
1919
1920 /* Update/add Content-Type header field. */
1921 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1922 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1923 found_content_type = 1;
1924 free (hf->value);
1925 hf->value = (cp = strchr (ct->c_ctline, ';'))
1926 ? concat (type_subtypename, cp, "\n", NULL)
1927 : mh_xstrdup (name_plus_nl);
1928 }
1929 }
1930 if (! found_content_type) {
1931 add_header (ct, mh_xstrdup (TYPE_FIELD),
1932 (cp = strchr (ct->c_ctline, ';'))
1933 ? concat (type_subtypename, cp, "\n", NULL)
1934 : mh_xstrdup (name_plus_nl));
1935 }
1936
1937 /* Some of these might not be used, but set them anyway. */
1938 ctline = cp
1939 ? concat (type_subtypename, cp, NULL)
1940 : concat (type_subtypename, NULL);
1941 free (ct->c_ctline);
1942 ct->c_ctline = ctline;
1943 /* Leave other ctinfo members as they were. */
1944 free (ct->c_ctinfo.ci_type);
1945 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1946 free (ct->c_ctinfo.ci_subtype);
1947 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1948 ct->c_type = type;
1949 ct->c_subtype = subtype;
1950
1951 free (name_plus_nl);
1952 free (type_subtypename);
1953
1954 status = set_ce (ct, encoding);
1955
1956 return status;
1957 }
1958
1959
1960 /*
1961 * It's not necessary to update the charset parameter of a Content-Type
1962 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1963 * (content) was originally in the specified charset, "and will be in
1964 * that character set again after decoding."
1965 */
1966 static int
1967 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1968 int *message_mods) {
1969 int status = OK;
1970 int lf_line_endings = 0;
1971
1972 switch (ct->c_type) {
1973 case CT_MULTIPART: {
1974 struct multipart *m = (struct multipart *) ct->c_ctparams;
1975 struct part *part;
1976
1977 /* Should check to see if the body for this part is encoded?
1978 For now, it gets passed along as-is by InitMultiPart(). */
1979 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1980 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1981 message_mods);
1982 }
1983 break;
1984 }
1985
1986 case CT_MESSAGE:
1987 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1988 struct exbody *e = (struct exbody *) ct->c_ctparams;
1989
1990 status = decode_text_parts (e->eb_content, encoding, decodetypes,
1991 message_mods);
1992 }
1993 break;
1994
1995 default:
1996 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1997 break;
1998 }
1999
2000 lf_line_endings =
2001 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2002
2003 switch (ct->c_encoding) {
2004 case CE_BASE64:
2005 case CE_QUOTED: {
2006 int ct_encoding;
2007
2008 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2009 const char *reason = NULL;
2010
2011 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2012 && encoding != CE_BINARY) {
2013 /* The decoding isn't acceptable so discard it.
2014 Leave status as OK to allow other transformations. */
2015 if (verbosw) {
2016 report (NULL, ct->c_partno, ct->c_file,
2017 "will not decode%s because it is binary (%s)",
2018 ct->c_partno ? ""
2019 : (FENDNULL(ct->c_ctline)),
2020 reason);
2021 }
2022 (void) m_unlink (ct->c_cefile.ce_file);
2023 free (ct->c_cefile.ce_file);
2024 ct->c_cefile.ce_file = NULL;
2025 } else if (ct->c_encoding == CE_QUOTED &&
2026 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2027 /* The decoding isn't acceptable so discard it.
2028 Leave status as OK to allow other transformations. */
2029 if (verbosw) {
2030 report (NULL, ct->c_partno, ct->c_file,
2031 "will not decode%s because it is 8bit",
2032 ct->c_partno ? ""
2033 : (FENDNULL(ct->c_ctline)));
2034 }
2035 (void) m_unlink (ct->c_cefile.ce_file);
2036 free (ct->c_cefile.ce_file);
2037 ct->c_cefile.ce_file = NULL;
2038 } else {
2039 int enc;
2040
2041 if (ct_encoding == CE_BINARY) {
2042 enc = CE_BINARY;
2043 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2044 enc = CE_QUOTED;
2045 } else {
2046 enc = ct_encoding;
2047 }
2048 if (set_ce (ct, enc) == OK) {
2049 ++*message_mods;
2050 if (verbosw) {
2051 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2052 FENDNULL(ct->c_ctline));
2053 }
2054 if (lf_line_endings) {
2055 strip_crs (ct, message_mods);
2056 }
2057 } else {
2058 status = NOTOK;
2059 }
2060 }
2061 } else {
2062 status = NOTOK;
2063 }
2064 break;
2065 }
2066 case CE_8BIT:
2067 case CE_7BIT:
2068 if (lf_line_endings) {
2069 strip_crs (ct, message_mods);
2070 }
2071 break;
2072 default:
2073 break;
2074 }
2075
2076 break;
2077 }
2078
2079 return status;
2080 }
2081
2082
2083 /*
2084 * Determine if the part with type[/subtype] should be decoded, according to
2085 * decodetypes (which came from the -decodetypes switch).
2086 */
2087 static int
2088 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2089 /* Quick search for matching type[/subtype] in decodetypes: bracket
2090 decodetypes with commas, then search for ,type, and ,type/subtype, in
2091 it. */
2092
2093 int found_match = 0;
2094 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2095 char *delimited_type = concat(",", type, ",", NULL);
2096
2097 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2098 found_match = 1;
2099 } else if (subtype != NULL) {
2100 char *delimited_type_subtype =
2101 concat(",", type, "/", subtype, ",", NULL);
2102
2103 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2104 found_match = 1;
2105 }
2106 free(delimited_type_subtype);
2107 }
2108
2109 free(delimited_type);
2110 free(delimited_decodetypes);
2111
2112 return found_match;
2113 }
2114
2115
2116 /*
2117 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2118 * if it has any NUL characters, a CR not followed by a LF, or lines
2119 * greater than 998 characters in length. If binary, reason is set
2120 * to a string explaining why.
2121 */
2122 static int
2123 content_encoding (CT ct, const char **reason) {
2124 CE ce = &ct->c_cefile;
2125 int encoding = CE_7BIT;
2126
2127 if (ce->ce_file) {
2128 size_t line_len = 0;
2129 char buffer[NMH_BUFSIZ];
2130 size_t inbytes;
2131
2132 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2133 advise (ce->ce_file, "unable to open for reading");
2134 return CE_UNKNOWN;
2135 }
2136
2137 fseeko (ce->ce_fp, 0L, SEEK_SET);
2138 while (encoding != CE_BINARY &&
2139 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2140 char *cp;
2141 size_t i;
2142 int last_char_was_cr = 0;
2143
2144 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2145 if (*cp == '\0' || ++line_len > 998 ||
2146 (*cp != '\n' && last_char_was_cr)) {
2147 encoding = CE_BINARY;
2148 if (*cp == '\0') {
2149 *reason = "null character";
2150 } else if (line_len > 998) {
2151 *reason = "line length > 998";
2152 } else if (*cp != '\n' && last_char_was_cr) {
2153 *reason = "CR not followed by LF";
2154 } else {
2155 /* Should not reach this. */
2156 *reason = "";
2157 }
2158 break;
2159 }
2160 if (*cp == '\n') {
2161 line_len = 0;
2162 } else if (! isascii ((unsigned char) *cp)) {
2163 encoding = CE_8BIT;
2164 }
2165
2166 last_char_was_cr = *cp == '\r';
2167 }
2168 }
2169
2170 fclose (ce->ce_fp);
2171 ce->ce_fp = NULL;
2172 } /* else should never happen */
2173
2174 return encoding;
2175 }
2176
2177
2178 /*
2179 * Strip carriage returns from content.
2180 */
2181 static int
2182 strip_crs (CT ct, int *message_mods) {
2183 char *charset = content_charset (ct);
2184 int status = OK;
2185
2186 /* Only strip carriage returns if content is ASCII or another
2187 charset that has the same readily recognizable CR followed by a
2188 LF. We can include UTF-8 here because if the high-order bit of
2189 a UTF-8 byte is 0, then it must be a single-byte ASCII
2190 character. */
2191 if (! strcasecmp (charset, "US-ASCII") ||
2192 ! strcasecmp (charset, "UTF-8") ||
2193 ! strncasecmp (charset, "ISO-8859-", 9) ||
2194 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2195 char **file = NULL;
2196 FILE **fp = NULL;
2197 size_t begin;
2198 size_t end;
2199 int has_crs = 0;
2200 int opened_input_file = 0;
2201
2202 if (ct->c_cefile.ce_file) {
2203 file = &ct->c_cefile.ce_file;
2204 fp = &ct->c_cefile.ce_fp;
2205 begin = end = 0;
2206 } else if (ct->c_file) {
2207 file = &ct->c_file;
2208 fp = &ct->c_fp;
2209 begin = (size_t) ct->c_begin;
2210 end = (size_t) ct->c_end;
2211 } /* else don't know where the content is */
2212
2213 if (file && *file && fp) {
2214 if (! *fp) {
2215 if ((*fp = fopen (*file, "r")) == NULL) {
2216 advise (*file, "unable to open for reading");
2217 status = NOTOK;
2218 } else {
2219 opened_input_file = 1;
2220 }
2221 }
2222 }
2223
2224 if (fp && *fp) {
2225 char buffer[NMH_BUFSIZ];
2226 size_t bytes_read;
2227 size_t bytes_to_read =
2228 end > 0 && end > begin ? end - begin : sizeof buffer;
2229
2230 fseeko (*fp, begin, SEEK_SET);
2231 while ((bytes_read = fread (buffer, 1,
2232 min (bytes_to_read, sizeof buffer),
2233 *fp)) > 0) {
2234 /* Look for CR followed by a LF. This is supposed to
2235 be text so there should be LF's. If not, don't
2236 modify the content. */
2237 char *cp;
2238 size_t i;
2239 int last_char_was_cr = 0;
2240
2241 if (end > 0) { bytes_to_read -= bytes_read; }
2242
2243 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2244 if (*cp == '\n' && last_char_was_cr) {
2245 has_crs = 1;
2246 break;
2247 }
2248
2249 last_char_was_cr = *cp == '\r';
2250 }
2251 }
2252
2253 if (has_crs) {
2254 int fd;
2255 char *stripped_content_file;
2256 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2257
2258 if (tempfile == NULL) {
2259 adios (NULL, "unable to create temporary file in %s",
2260 get_temp_dir());
2261 }
2262 stripped_content_file = mh_xstrdup (tempfile);
2263
2264 /* Strip each CR before a LF from the content. */
2265 fseeko (*fp, begin, SEEK_SET);
2266 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2267 0) {
2268 char *cp;
2269 size_t i;
2270 int last_char_was_cr = 0;
2271
2272 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2273 if (*cp == '\r') {
2274 last_char_was_cr = 1;
2275 } else if (last_char_was_cr) {
2276 if (*cp != '\n') {
2277 if (write (fd, "\r", 1) < 0) {
2278 advise (tempfile, "CR write");
2279 }
2280 }
2281 if (write (fd, cp, 1) < 0) {
2282 advise (tempfile, "write");
2283 }
2284 last_char_was_cr = 0;
2285 } else {
2286 if (write (fd, cp, 1) < 0) {
2287 advise (tempfile, "write");
2288 }
2289 last_char_was_cr = 0;
2290 }
2291 }
2292 }
2293
2294 if (close (fd)) {
2295 inform("unable to write temporary file %s, continuing...",
2296 stripped_content_file);
2297 (void) m_unlink (stripped_content_file);
2298 status = NOTOK;
2299 } else {
2300 /* Replace the decoded file with the converted one. */
2301 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2302 (void) m_unlink (ct->c_cefile.ce_file);
2303
2304 mh_xfree(ct->c_cefile.ce_file);
2305 ct->c_cefile.ce_file = stripped_content_file;
2306 ct->c_cefile.ce_unlink = 1;
2307
2308 ++*message_mods;
2309 if (verbosw) {
2310 report (NULL, ct->c_partno,
2311 begin == 0 && end == 0 ? "" : *file,
2312 "stripped CRs");
2313 }
2314 }
2315 }
2316
2317 if (opened_input_file) {
2318 fclose (*fp);
2319 *fp = NULL;
2320 }
2321 }
2322 }
2323
2324 free (charset);
2325
2326 return status;
2327 }
2328
2329
2330 /*
2331 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2332 * of the part C-T-E's.
2333 */
2334 static void
2335 update_cte (CT ct) {
2336 const int least_restrictive_enc = least_restrictive_encoding (ct);
2337
2338 if (least_restrictive_enc != CE_UNKNOWN &&
2339 least_restrictive_enc != CE_7BIT) {
2340 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2341 HF hf;
2342 int found_cte = 0;
2343
2344 /* Update/add Content-Transfer-Encoding header field. */
2345 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2346 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2347 found_cte = 1;
2348 free (hf->value);
2349 hf->value = cte;
2350 }
2351 }
2352 if (! found_cte) {
2353 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2354 }
2355 }
2356 }
2357
2358
2359 /*
2360 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2361 * within a message.
2362 */
2363 static int
2364 least_restrictive_encoding (CT ct) {
2365 int encoding = CE_UNKNOWN;
2366
2367 switch (ct->c_type) {
2368 case CT_MULTIPART: {
2369 struct multipart *m = (struct multipart *) ct->c_ctparams;
2370 struct part *part;
2371
2372 for (part = m->mp_parts; part; part = part->mp_next) {
2373 const int part_encoding =
2374 least_restrictive_encoding (part->mp_part);
2375
2376 if (less_restrictive (encoding, part_encoding)) {
2377 encoding = part_encoding;
2378 }
2379 }
2380 break;
2381 }
2382
2383 case CT_MESSAGE:
2384 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2385 struct exbody *e = (struct exbody *) ct->c_ctparams;
2386 const int part_encoding =
2387 least_restrictive_encoding (e->eb_content);
2388
2389 if (less_restrictive (encoding, part_encoding)) {
2390 encoding = part_encoding;
2391 }
2392 }
2393 break;
2394
2395 default: {
2396 if (less_restrictive (encoding, ct->c_encoding)) {
2397 encoding = ct->c_encoding;
2398 }
2399 }}
2400
2401 return encoding;
2402 }
2403
2404
2405 /*
2406 * Return whether the second encoding is less restrictive than the first, where
2407 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2408 * CE_BINARY is less restrictive than CE_8BIT and
2409 * CE_8BIT is less restrictive than CE_7BIT.
2410 */
2411 static int
2412 less_restrictive (int encoding, int second_encoding) {
2413 switch (second_encoding) {
2414 case CE_BINARY:
2415 return encoding != CE_BINARY;
2416 case CE_8BIT:
2417 return encoding != CE_BINARY && encoding != CE_8BIT;
2418 case CE_7BIT:
2419 return encoding != CE_BINARY && encoding != CE_8BIT &&
2420 encoding != CE_7BIT;
2421 default :
2422 return 0;
2423 }
2424 }
2425
2426
2427 /*
2428 * Convert character set of each part.
2429 */
2430 static int
2431 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2432 int status = OK;
2433
2434 switch (ct->c_type) {
2435 case CT_TEXT:
2436 if (ct->c_subtype == TEXT_PLAIN) {
2437 status = convert_charset (ct, dest_charset, message_mods);
2438 if (status == OK) {
2439 if (verbosw) {
2440 char *ct_charset = content_charset (ct);
2441
2442 report (NULL, ct->c_partno, ct->c_file,
2443 "convert %s to %s", ct_charset, dest_charset);
2444 free (ct_charset);
2445 }
2446 } else {
2447 char *ct_charset = content_charset (ct);
2448
2449 report ("iconv", ct->c_partno, ct->c_file,
2450 "failed to convert %s to %s", ct_charset, dest_charset);
2451 free (ct_charset);
2452 }
2453 }
2454 break;
2455
2456 case CT_MULTIPART: {
2457 struct multipart *m = (struct multipart *) ct->c_ctparams;
2458 struct part *part;
2459
2460 /* Should check to see if the body for this part is encoded?
2461 For now, it gets passed along as-is by InitMultiPart(). */
2462 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2463 status =
2464 convert_charsets (part->mp_part, dest_charset, message_mods);
2465 }
2466 break;
2467 }
2468
2469 case CT_MESSAGE:
2470 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2471 struct exbody *e = (struct exbody *) ct->c_ctparams;
2472
2473 status =
2474 convert_charsets (e->eb_content, dest_charset, message_mods);
2475 }
2476 break;
2477
2478 default:
2479 break;
2480 }
2481
2482 return status;
2483 }
2484
2485
2486 /*
2487 * Fix various problems that aren't handled elsewhere. These
2488 * are fixed unconditionally: there are no switches to disable
2489 * them. Currently, "problems" are these:
2490 * 1) remove extraneous semicolon at the end of a header parameter list
2491 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2492 * filename parameters in Content-Type and Content-Disposition
2493 * headers, respectively.
2494 */
2495 static int
2496 fix_always (CT ct, int *message_mods) {
2497 int status = OK;
2498
2499 switch (ct->c_type) {
2500 case CT_MULTIPART: {
2501 struct multipart *m = (struct multipart *) ct->c_ctparams;
2502 struct part *part;
2503
2504 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2505 status = fix_always (part->mp_part, message_mods);
2506 }
2507 break;
2508 }
2509
2510 case CT_MESSAGE:
2511 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2512 struct exbody *e = (struct exbody *) ct->c_ctparams;
2513
2514 status = fix_always (e->eb_content, message_mods);
2515 }
2516 break;
2517
2518 default: {
2519 HF hf;
2520
2521 if (ct->c_first_hf) {
2522 fix_filename_encoding (ct);
2523 }
2524
2525 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2526 size_t len = strlen (hf->value);
2527
2528 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2529 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2530 /* Only do this for Content-Type and
2531 Content-Disposition fields because those are the
2532 only headers that parse_mime() warns about. */
2533 continue;
2534 }
2535
2536 /* whitespace following a trailing ';' will be nuked as well */
2537 if (hf->value[len - 1] == '\n') {
2538 while (isspace((unsigned char)(hf->value[len - 2]))) {
2539 if (len-- == 0) { break; }
2540 }
2541 }
2542
2543 if (hf->value[len - 2] == ';') {
2544 /* Remove trailing ';' from parameter value. */
2545 hf->value[len - 2] = '\n';
2546 hf->value[len - 1] = '\0';
2547
2548 /* Also, if Content-Type parameter, remove trailing ';'
2549 from ct->c_ctline. This probably isn't necessary
2550 but can't hurt. */
2551 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2552 size_t l = strlen(ct->c_ctline) - 1;
2553 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2554 ct->c_ctline[l] == ';') {
2555 ct->c_ctline[l--] = '\0';
2556 if (l == 0) { break; }
2557 }
2558 }
2559
2560 ++*message_mods;
2561 if (verbosw) {
2562 report (NULL, ct->c_partno, ct->c_file,
2563 "remove trailing ; from %s parameter value",
2564 hf->name);
2565 }
2566 }
2567 }
2568 }}
2569
2570 return status;
2571 }
2572
2573
2574 /*
2575 * Factor out common code for loops in fix_filename_encoding().
2576 */
2577 static int
2578 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2579 int fixed = 0;
2580
2581 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2582 /* Looks like an RFC 2047 encoded parameter. */
2583 char decoded[PATH_MAX + 1];
2584
2585 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2586 /* Encode using RFC 2231. */
2587 replace_param (first_pm, last_pm, name, decoded, 0);
2588 fixed = 1;
2589 } else {
2590 inform("failed to decode %s parameter %s", name, value);
2591 }
2592 }
2593
2594 return fixed;
2595 }
2596
2597
2598 /*
2599 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2600 * filename parameters in Content-Type and Content-Disposition
2601 * headers, respectively.
2602 */
2603 static int
2604 fix_filename_encoding (CT ct) {
2605 PM pm;
2606 HF hf;
2607 int fixed = 0;
2608
2609 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2610 if (pm->pm_name && pm->pm_value &&
2611 strcasecmp (pm->pm_name, "name") == 0) {
2612 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2613 &ct->c_ctinfo.ci_first_pm,
2614 &ct->c_ctinfo.ci_last_pm);
2615 }
2616 }
2617
2618 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2619 if (pm->pm_name && pm->pm_value &&
2620 strcasecmp (pm->pm_name, "filename") == 0) {
2621 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2622 &ct->c_dispo_first,
2623 &ct->c_dispo_last);
2624 }
2625 }
2626
2627 /* Fix hf values to correspond. */
2628 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2629 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2630
2631 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2632 field = TYPE_HEADER;
2633 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2634 field = DISPO_HEADER;
2635 }
2636
2637 if (field != OTHER) {
2638 const char *const semicolon_loc = strchr (hf->value, ';');
2639
2640 if (semicolon_loc) {
2641 const size_t len =
2642 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2643 const char *const params =
2644 output_params (len,
2645 field == TYPE_HEADER
2646 ? ct->c_ctinfo.ci_first_pm
2647 : ct->c_dispo_first,
2648 NULL, 0);
2649 const char *const new_params = concat (params, "\n", NULL);
2650
2651 replace_substring (&hf->value, semicolon_loc, new_params);
2652 free((void *)new_params); /* Cast away const. Sigh. */
2653 free((void *)params);
2654 } else {
2655 inform("did not find semicolon in %s:%s\n",
2656 hf->name, hf->value);
2657 }
2658 }
2659 }
2660
2661 return OK;
2662 }
2663
2664
2665 /*
2666 * Output content in input file to output file.
2667 */
2668 static int
2669 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2670 int modify_inplace, int message_mods) {
2671 int status = OK;
2672
2673 if (modify_inplace) {
2674 if (message_mods > 0) {
2675 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2676 char *infile = input_filename
2677 ? mh_xstrdup (input_filename)
2678 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2679
2680 if (remove_file (infile) == OK) {
2681 if (rename (outfile, infile)) {
2682 /* Rename didn't work, possibly because of an
2683 attempt to rename across filesystems. Try
2684 brute force copy. */
2685 int old = open (outfile, O_RDONLY);
2686 int new =
2687 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2688 int i = -1;
2689
2690 if (old != -1 && new != -1) {
2691 char buffer[NMH_BUFSIZ];
2692
2693 while ((i = read (old, buffer, sizeof buffer)) >
2694 0) {
2695 if (write (new, buffer, i) != i) {
2696 i = -1;
2697 break;
2698 }
2699 }
2700 }
2701 if (new != -1) { close (new); }
2702 if (old != -1) { close (old); }
2703 (void) m_unlink (outfile);
2704
2705 if (i < 0) {
2706 /* The -file argument processing used path() to
2707 expand filename to absolute path. */
2708 int file = ct->c_file && ct->c_file[0] == '/';
2709
2710 inform("unable to rename %s %s to %s, continuing...",
2711 file ? "file" : "message", outfile,
2712 infile);
2713 status = NOTOK;
2714 }
2715 }
2716 } else {
2717 inform("unable to remove input file %s, "
2718 "not modifying it, continuing...", infile);
2719 (void) m_unlink (outfile);
2720 status = NOTOK;
2721 }
2722
2723 free (infile);
2724 } else {
2725 status = NOTOK;
2726 }
2727 } else {
2728 /* No modifications and didn't need the tmp outfile. */
2729 (void) m_unlink (outfile);
2730 }
2731 } else {
2732 /* Output is going to some file. Produce it whether or not
2733 there were modifications. */
2734 status = output_message_fp (ct, outfp, outfile);
2735 }
2736
2737 flush_errors ();
2738 return status;
2739 }
2740
2741
2742 /*
2743 * parse_mime() does not set lf_line_endings in struct text, so use this
2744 * function to do it. It touches the parts the decodetypes identifies.
2745 */
2746 static void
2747 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2748 switch (ct->c_type) {
2749 case CT_MULTIPART: {
2750 struct multipart *m = (struct multipart *) ct->c_ctparams;
2751 struct part *part;
2752
2753 for (part = m->mp_parts; part; part = part->mp_next) {
2754 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2755 }
2756 break;
2757 }
2758
2759 case CT_MESSAGE:
2760 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2761 struct exbody *e = (struct exbody *) ct->c_ctparams;
2762
2763 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2764 }
2765 break;
2766
2767 default:
2768 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2769 if (ct->c_ctparams == NULL) {
2770 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2771 }
2772 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2773 }
2774 }
2775 }
2776
2777
2778 /*
2779 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2780 * use the standard MH backup file.
2781 */
2782 static int
2783 remove_file (const char *file) {
2784 if (rmmproc) {
2785 char *rmm_command = concat (rmmproc, " ", file, NULL);
2786 int status = system (rmm_command);
2787
2788 free (rmm_command);
2789 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2790 }
2791 /* This is OK for a non-message file, it still uses the
2792 BACKUP_PREFIX form. The backup file will be in the same
2793 directory as file. */
2794 return rename (file, m_backup (file));
2795 }
2796
2797
2798 /*
2799 * Output formatted message to user.
2800 */
2801 static void
2802 report (char *what, char *partno, char *filename, char *message, ...) {
2803 va_list args;
2804 char *fmt;
2805
2806 if (verbosw) {
2807 va_start (args, message);
2808 fmt = concat (filename, partno ? " part " : ", ",
2809 FENDNULL(partno), partno ? ", " : "", message, NULL);
2810
2811 advertise (what, NULL, fmt, args);
2812
2813 free (fmt);
2814 va_end (args);
2815 }
2816 }
2817
2818
2819 static void
2820 pipeser (int i)
2821 {
2822 if (i == SIGQUIT) {
2823 fflush (stdout);
2824 fprintf (stderr, "\n");
2825 fflush (stderr);
2826 }
2827
2828 done (1);
2829 /* NOTREACHED */
2830 }