]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Added mhshow-suffix-text entry.
[nmh] / uip / mhfixmsg.c
1 /*
2 * mhfixmsg.c -- rewrite a message with various transformations
3 *
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include <fcntl.h>
15
16 #define MHFIXMSG_SWITCHES \
17 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
18 X("nodecodetext", 0, NDECODETEXTSW) \
19 X("decodetypes", 0, DECODETYPESW) \
20 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
21 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
22 X("textcharset", 0, TEXTCHARSETSW) \
23 X("notextcharset", 0, NTEXTCHARSETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
27 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
28 X("fixboundary", 0, FIXBOUNDARYSW) \
29 X("nofixboundary", 0, NFIXBOUNDARYSW) \
30 X("fixcte", 0, FIXCOMPOSITECTESW) \
31 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
32 X("fixtype mimetype", 0, FIXTYPESW) \
33 X("file file", 0, FILESW) \
34 X("outfile file", 0, OUTFILESW) \
35 X("rmmproc program", 0, RPROCSW) \
36 X("normmproc", 0, NRPRCSW) \
37 X("changecur", 0, CHGSW) \
38 X("nochangecur", 0, NCHGSW) \
39 X("verbose", 0, VERBSW) \
40 X("noverbose", 0, NVERBSW) \
41 X("version", 0, VERSIONSW) \
42 X("help", 0, HELPSW) \
43
44 #define X(sw, minchars, id) id,
45 DEFINE_SWITCH_ENUM(MHFIXMSG);
46 #undef X
47
48 #define X(sw, minchars, id) { sw, minchars, id },
49 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
50 #undef X
51
52
53 int verbosw;
54 int debugsw; /* Needed by mhparse.c. */
55
56 #define quitser pipeser
57
58 /* mhparse.c */
59 extern int skip_mp_cte_check; /* flag to InitMultiPart */
60 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
61 extern int bogus_mp_content; /* flag from InitMultiPart */
62 /* flags to/from parse_header_attrs */
63 extern int suppress_extraneous_trailing_semicolon_warning;
64
65 /* mhoutsbr.c */
66 int output_message (CT, char *);
67
68 /* mhmisc.c */
69 void flush_errors (void);
70
71 /* mhfree.c */
72 extern CT *cts;
73
74 /*
75 * static prototypes
76 */
77 typedef struct fix_transformations {
78 int fixboundary;
79 int fixcompositecte;
80 svector_t fixtypes;
81 int reformat;
82 int replacetextplain;
83 int decodetext;
84 char *decodetypes;
85 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
86 int lf_line_endings;
87 char *textcharset;
88 } fix_transformations;
89
90 int mhfixmsgsbr (CT *, char *, const fix_transformations *, char *);
91 static int fix_boundary (CT *, int *);
92 static int copy_input_to_output (const char *, const char *);
93 static int get_multipart_boundary (CT, char **);
94 static int replace_boundary (CT, char *, char *);
95 static int fix_types (CT, svector_t, int *);
96 static char *replace_substring (char **, const char *, const char *);
97 static char *remove_parameter (char *, const char *);
98 static int fix_composite_cte (CT, int *);
99 static int set_ce (CT, int);
100 static int ensure_text_plain (CT *, CT, int *, int);
101 static int find_textplain_sibling (CT, int, int *);
102 static int insert_new_text_plain_part (CT, int, CT);
103 static CT build_text_plain_part (CT);
104 static int insert_into_new_mp_alt (CT *, int *);
105 static CT divide_part (CT);
106 static void copy_ctinfo (CI, CI);
107 static int decode_part (CT);
108 static int reformat_part (CT, char *, char *, char *, int);
109 static CT build_multipart_alt (CT, CT, int, int);
110 static int boundary_in_content (FILE **, char *, const char *);
111 static void transfer_noncontent_headers (CT, CT);
112 static int set_ct_type (CT, int type, int subtype, int encoding);
113 static int decode_text_parts (CT, int, const char *, int *);
114 static int should_decode(const char *, const char *, const char *);
115 static int content_encoding (CT, const char **);
116 static int strip_crs (CT, int *);
117 static void update_cte (CT);
118 static int least_restrictive_encoding (CT);
119 static int less_restrictive (int, int);
120 static int convert_charsets (CT, char *, int *);
121 static int fix_always (CT, int *);
122 static int fix_filename_param (char *, char *, PM *, PM *);
123 static int fix_filename_encoding (CT);
124 static int write_content (CT, const char *, char *, int, int);
125 static void set_text_ctparams(CT, char *, int);
126 static int remove_file (const char *);
127 static void report (char *, char *, char *, char *, ...);
128 static void pipeser (int);
129
130
131 int
132 main (int argc, char **argv) {
133 int msgnum;
134 char *cp, *file = NULL, *folder = NULL;
135 char *maildir = NULL, buf[100], *outfile = NULL;
136 char **argp, **arguments;
137 struct msgs_array msgs = { 0, 0, NULL };
138 struct msgs *mp = NULL;
139 CT *ctp;
140 FILE *fp;
141 int using_stdin = 0;
142 int chgflag = 1;
143 int status = OK;
144 fix_transformations fx;
145 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
146 fx.fixtypes = NULL;
147 fx.replacetextplain = 0;
148 fx.decodetext = CE_8BIT;
149 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
150 fx.lf_line_endings = 0;
151 fx.textcharset = NULL;
152
153 if (nmh_init(argv[0], 2)) { return 1; }
154
155 arguments = getarguments (invo_name, argc, argv, 1);
156 argp = arguments;
157
158 /*
159 * Parse arguments
160 */
161 while ((cp = *argp++)) {
162 if (*cp == '-') {
163 switch (smatch (++cp, switches)) {
164 case AMBIGSW:
165 ambigsw (cp, switches);
166 done (1);
167 case UNKWNSW:
168 adios (NULL, "-%s unknown", cp);
169
170 case HELPSW:
171 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
172 invo_name);
173 print_help (buf, switches, 1);
174 done (0);
175 case VERSIONSW:
176 print_version(invo_name);
177 done (0);
178
179 case DECODETEXTSW:
180 if (! (cp = *argp++) || *cp == '-') {
181 adios (NULL, "missing argument to %s", argp[-2]);
182 }
183 if (! strcasecmp (cp, "8bit")) {
184 fx.decodetext = CE_8BIT;
185 } else if (! strcasecmp (cp, "7bit")) {
186 fx.decodetext = CE_7BIT;
187 } else if (! strcasecmp (cp, "binary")) {
188 fx.decodetext = CE_BINARY;
189 } else {
190 adios (NULL, "invalid argument to %s", argp[-2]);
191 }
192 continue;
193 case NDECODETEXTSW:
194 fx.decodetext = 0;
195 continue;
196 case DECODETYPESW:
197 if (! (cp = *argp++) || *cp == '-') {
198 adios (NULL, "missing argument to %s", argp[-2]);
199 }
200 fx.decodetypes = cp;
201 continue;
202 case CRLFLINEBREAKSSW:
203 fx.lf_line_endings = 0;
204 continue;
205 case NCRLFLINEBREAKSSW:
206 fx.lf_line_endings = 1;
207 continue;
208 case TEXTCHARSETSW:
209 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
210 adios (NULL, "missing argument to %s", argp[-2]);
211 }
212 fx.textcharset = cp;
213 continue;
214 case NTEXTCHARSETSW:
215 fx.textcharset = 0;
216 continue;
217 case FIXBOUNDARYSW:
218 fx.fixboundary = 1;
219 continue;
220 case NFIXBOUNDARYSW:
221 fx.fixboundary = 0;
222 continue;
223 case FIXCOMPOSITECTESW:
224 fx.fixcompositecte = 1;
225 continue;
226 case NFIXCOMPOSITECTESW:
227 fx.fixcompositecte = 0;
228 continue;
229 case FIXTYPESW:
230 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
231 adios (NULL, "missing argument to %s", argp[-2]);
232 }
233 if (! strncasecmp (cp, "multipart/", 10) ||
234 ! strncasecmp (cp, "message/", 8)) {
235 adios (NULL, "-fixtype %s not allowed", cp);
236 } else if (! strchr (cp, '/')) {
237 adios (NULL, "-fixtype requires type/subtype");
238 }
239 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
240 svector_push_back (fx.fixtypes, cp);
241 continue;
242 case REFORMATSW:
243 fx.reformat = 1;
244 continue;
245 case NREFORMATSW:
246 fx.reformat = 0;
247 continue;
248 case REPLACETEXTPLAINSW:
249 fx.replacetextplain = 1;
250 continue;
251 case NREPLACETEXTPLAINSW:
252 fx.replacetextplain = 0;
253 continue;
254 case FILESW:
255 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
256 adios (NULL, "missing argument to %s", argp[-2]);
257 }
258 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
259 continue;
260 case OUTFILESW:
261 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
262 adios (NULL, "missing argument to %s", argp[-2]);
263 }
264 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
265 continue;
266 case RPROCSW:
267 if (!(rmmproc = *argp++) || *rmmproc == '-') {
268 adios (NULL, "missing argument to %s", argp[-2]);
269 }
270 continue;
271 case NRPRCSW:
272 rmmproc = NULL;
273 continue;
274 case CHGSW:
275 chgflag = 1;
276 continue;
277 case NCHGSW:
278 chgflag = 0;
279 continue;
280 case VERBSW:
281 verbosw = 1;
282 continue;
283 case NVERBSW:
284 verbosw = 0;
285 continue;
286 }
287 }
288 if (*cp == '+' || *cp == '@') {
289 if (folder) {
290 adios (NULL, "only one folder at a time!");
291 } else {
292 folder = pluspath (cp);
293 }
294 } else {
295 if (*cp == '/') {
296 /* Interpret a full path as a filename, not a message. */
297 file = mh_xstrdup (cp);
298 } else {
299 app_msgarg (&msgs, cp);
300 }
301 }
302 }
303
304 SIGNAL (SIGQUIT, quitser);
305 SIGNAL (SIGPIPE, pipeser);
306
307 /*
308 * Read the standard profile setup
309 */
310 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
311 readconfig ((struct node **) 0, fp, cp, 0);
312 fclose (fp);
313 }
314
315 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
316 suppress_extraneous_trailing_semicolon_warning = 1;
317
318 if (! context_find ("path")) {
319 free (path ("./", TFOLDER));
320 }
321
322 if (file && msgs.size) {
323 adios (NULL, "cannot specify msg and file at same time!");
324 }
325
326 /*
327 * check if message is coming from file
328 */
329 if (file) {
330 /* If file is stdin, create a tmp file name before parse_mime()
331 has a chance, because it might put in on a different
332 filesystem than the output file. Instead, put it in the
333 user's preferred tmp directory. */
334 CT ct;
335
336 if (! strcmp ("-", file)) {
337 int fd;
338 char *cp;
339
340 using_stdin = 1;
341
342 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
343 adios (NULL, "unable to create temporary file in %s",
344 get_temp_dir());
345 } else {
346 free (file);
347 file = mh_xstrdup (cp);
348 cpydata (STDIN_FILENO, fd, "-", file);
349 }
350
351 if (close (fd)) {
352 (void) m_unlink (file);
353 adios (NULL, "failed to write temporary file");
354 }
355 }
356
357 cts = mh_xcalloc(2, sizeof *cts);
358 ctp = cts;
359
360 if ((ct = parse_mime (file))) {
361 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
362 *ctp++ = ct;
363 } else {
364 advise (NULL, "unable to parse message from file %s", file);
365 status = NOTOK;
366
367 /* If there's an outfile, pass the input message unchanged, so the message won't
368 get dropped from a pipeline. */
369 if (outfile) {
370 /* Something went wrong. Output might be expected, such as if this were run
371 as a filter. Just copy the input to the output. */
372 if (copy_input_to_output (file, outfile) != OK) {
373 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
374 }
375 }
376 }
377 } else {
378 /*
379 * message(s) are coming from a folder
380 */
381 CT ct;
382
383 if (! msgs.size) {
384 app_msgarg(&msgs, "cur");
385 }
386 if (! folder) {
387 folder = getfolder (1);
388 }
389 maildir = m_maildir (folder);
390
391 /* chdir so that error messages, esp. from MIME parser, just
392 refer to the message and not its path. */
393 if (chdir (maildir) == NOTOK) {
394 adios (maildir, "unable to change directory to");
395 }
396
397 /* read folder and create message structure */
398 if (! (mp = folder_read (folder, 1))) {
399 adios (NULL, "unable to read folder %s", folder);
400 }
401
402 /* check for empty folder */
403 if (mp->nummsg == 0) {
404 adios (NULL, "no messages in %s", folder);
405 }
406
407 /* parse all the message ranges/sequences and set SELECTED */
408 for (msgnum = 0; msgnum < msgs.size; msgnum++)
409 if (! m_convert (mp, msgs.msgs[msgnum])) {
410 done (1);
411 }
412 seq_setprev (mp); /* set the previous-sequence */
413
414 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
415 ctp = cts;
416
417 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
418 if (is_selected(mp, msgnum)) {
419 char *msgnam = m_name (msgnum);
420
421 if ((ct = parse_mime (msgnam))) {
422 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
423 *ctp++ = ct;
424 } else {
425 advise (NULL, "unable to parse message %s", msgnam);
426 status = NOTOK;
427
428 /* If there's an outfile, pass the input message
429 unchanged, so the message won't get dropped from a
430 pipeline. */
431 if (outfile) {
432 /* Something went wrong. Output might be expected,
433 such as if this were run as a filter. Just copy
434 the input to the output. */
435 /* Can't use path() here because 1) it might have been
436 called before and it caches the pwd, and 2) we call
437 chdir() after that. */
438 char *input_filename =
439 concat (maildir, "/", msgnam, NULL);
440
441 if (copy_input_to_output (input_filename, outfile) != OK) {
442 advise (NULL,
443 "unable to copy message to %s, it might be lost\n",
444 outfile);
445 }
446 free (input_filename);
447 }
448 }
449 }
450 }
451
452 if (chgflag) {
453 seq_setcur (mp, mp->hghsel); /* update current message */
454 }
455 seq_save (mp); /* synchronize sequences */
456 context_replace (pfolder, folder);/* update current folder */
457 context_save (); /* save the context file */
458 }
459
460 if (*cts) {
461 for (ctp = cts; *ctp; ++ctp) {
462 status += mhfixmsgsbr (ctp, maildir, &fx, outfile);
463 free_content (*ctp);
464
465 if (using_stdin) {
466 (void) m_unlink (file);
467
468 if (! outfile) {
469 /* Just calling m_backup() unlinks the backup file. */
470 (void) m_backup (file);
471 }
472 }
473 }
474 } else {
475 status = 1;
476 }
477
478 free (cts);
479
480 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
481 free (outfile);
482 free (file);
483 free (folder);
484 free (arguments);
485
486 done (status);
487 return NOTOK;
488 }
489
490
491 /*
492 * Apply transformations to one message.
493 */
494 int
495 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
496 char *outfile) {
497 /* Store input filename in case one of the transformations, i.e.,
498 fix_boundary(), rewrites to a tmp file. */
499 char *input_filename = maildir
500 ? concat (maildir, "/", (*ctp)->c_file, NULL)
501 : mh_xstrdup ((*ctp)->c_file);
502 int modify_inplace = 0;
503 int message_mods = 0;
504 int status = OK;
505
506 if (outfile == NULL) {
507 modify_inplace = 1;
508
509 if ((*ctp)->c_file) {
510 char *tempfile;
511 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
512 adios (NULL, "unable to create temporary file in %s",
513 get_temp_dir());
514 }
515 outfile = mh_xstrdup (tempfile);
516 } else {
517 adios (NULL, "missing both input and output filenames\n");
518 }
519 }
520
521 reverse_alternative_parts (*ctp);
522 status = fix_always (*ctp, &message_mods);
523 if (status == OK && fx->fixboundary) {
524 status = fix_boundary (ctp, &message_mods);
525 }
526 if (status == OK && fx->fixtypes != NULL) {
527 status = fix_types (*ctp, fx->fixtypes, &message_mods);
528 }
529 if (status == OK && fx->fixcompositecte) {
530 status = fix_composite_cte (*ctp, &message_mods);
531 }
532 if (status == OK && fx->reformat) {
533 status =
534 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
535 }
536 if (status == OK && fx->decodetext) {
537 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
538 &message_mods);
539 update_cte (*ctp);
540 }
541 if (status == OK && fx->textcharset != NULL) {
542 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
543 }
544
545 if (status == OK && ! (*ctp)->c_umask) {
546 /* Set the umask for the contents file. This currently
547 isn't used but just in case it is in the future. */
548 struct stat st;
549
550 if (stat ((*ctp)->c_file, &st) != NOTOK) {
551 (*ctp)->c_umask = ~(st.st_mode & 0777);
552 } else {
553 (*ctp)->c_umask = ~m_gmprot();
554 }
555 }
556
557 /*
558 * Write the content to a file
559 */
560 if (status == OK) {
561 status = write_content (*ctp, input_filename, outfile, modify_inplace,
562 message_mods);
563 } else if (! modify_inplace) {
564 /* Something went wrong. Output might be expected, such
565 as if this were run as a filter. Just copy the input
566 to the output. */
567 if (copy_input_to_output (input_filename, outfile) != OK) {
568 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
569 }
570 }
571
572 if (modify_inplace) {
573 if (status != OK) { (void) m_unlink (outfile); }
574 free (outfile);
575 outfile = NULL;
576 }
577
578 free (input_filename);
579
580 return status;
581 }
582
583
584 /*
585 * Copy input message to output. Assumes not modifying in place, so this
586 * might be running as part of a pipeline.
587 */
588 static int
589 copy_input_to_output (const char *input_filename, const char *output_filename) {
590 int in = open (input_filename, O_RDONLY);
591 int out = strcmp (output_filename, "-")
592 ? open (output_filename, O_WRONLY | O_CREAT, m_gmprot ())
593 : STDOUT_FILENO;
594 int status = OK;
595
596 if (in != -1 && out != -1) {
597 cpydata (in, out, input_filename, output_filename);
598 } else {
599 status = NOTOK;
600 }
601
602 close (out);
603 close (in);
604
605 return status;
606 }
607
608
609 /*
610 * Fix mismatched outer level boundary.
611 */
612 static int
613 fix_boundary (CT *ct, int *message_mods) {
614 struct multipart *mp;
615 int status = OK;
616
617 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
618 mp = (struct multipart *) (*ct)->c_ctparams;
619
620 /*
621 * 1) Get boundary at end of part.
622 * 2) Get boundary at beginning of part and compare to the end-of-part
623 * boundary.
624 * 3) Write out contents of ct to tmp file, replacing boundary in
625 * header with boundary from part. Set c_unlink to 1.
626 * 4) Free ct.
627 * 5) Call parse_mime() on the tmp file, replacing ct.
628 */
629
630 if (mp && mp->mp_start) {
631 char *part_boundary;
632
633 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
634 char *fixed;
635
636 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
637 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
638 char *filename = mh_xstrdup ((*ct)->c_file);
639 CT fixed_ct;
640
641 free_content (*ct);
642 if ((fixed_ct = parse_mime (fixed))) {
643 *ct = fixed_ct;
644 (*ct)->c_unlink = 1;
645
646 ++*message_mods;
647 if (verbosw) {
648 report (NULL, NULL, filename,
649 "fix multipart boundary");
650 }
651 } else {
652 *ct = NULL;
653 advise (NULL, "unable to parse fixed part");
654 status = NOTOK;
655 }
656 free (filename);
657 } else {
658 advise (NULL, "unable to replace broken boundary");
659 status = NOTOK;
660 }
661 } else {
662 advise (NULL, "unable to create temporary file in %s",
663 get_temp_dir());
664 status = NOTOK;
665 }
666
667 free (part_boundary);
668 } else {
669 /* Couldn't fix the boundary. Report failure so that mhfixmsg
670 doesn't modify the message. */
671 status = NOTOK;
672 }
673 } else {
674 /* No multipart struct, even though the content type is
675 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
676 the message. */
677 status = NOTOK;
678 }
679 }
680
681 return status;
682 }
683
684
685 /*
686 * Find boundary at end of multipart.
687 */
688 static int
689 get_multipart_boundary (CT ct, char **part_boundary) {
690 char buffer[NMH_BUFSIZ];
691 char *end_boundary = NULL;
692 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
693 ? (off_t) (ct->c_end - sizeof buffer)
694 : (off_t) ct->c_begin;
695 size_t bytes_read;
696 int status = OK;
697
698 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
699 be big enough, even if it's just 1024, to make that unlikely. */
700
701 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
702 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
703 advise (ct->c_file, "unable to open for reading");
704 return NOTOK;
705 }
706
707 /* Get boundary at end of multipart. */
708 while (begin >= (off_t) ct->c_begin) {
709 fseeko (ct->c_fp, begin, SEEK_SET);
710 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
711 char *cp = rfind_str (buffer, bytes_read, "--");
712
713 if (cp) {
714 char *end;
715
716 /* Trim off trailing "--" and anything beyond. */
717 *cp-- = '\0';
718 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
719 if (strlen (end) > 3 && *end++ == '\n' &&
720 *end++ == '-' && *end++ == '-') {
721 end_boundary = mh_xstrdup (end);
722 break;
723 }
724 }
725 }
726 }
727
728 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
729 begin -= sizeof buffer;
730 } else {
731 break;
732 }
733 }
734
735 /* Get boundary at beginning of multipart. */
736 if (end_boundary) {
737 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
738 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
739 if (bytes_read >= strlen (end_boundary)) {
740 char *cp = find_str (buffer, bytes_read, end_boundary);
741
742 if (cp && cp - buffer >= 2 && *--cp == '-' &&
743 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
744 status = OK;
745 break;
746 }
747 } else {
748 /* The start and end boundaries didn't match, or the
749 start boundary doesn't begin with "\n--" (or "--"
750 if at the beginning of buffer). Keep trying. */
751 status = NOTOK;
752 }
753 }
754 } else {
755 status = NOTOK;
756 }
757
758 if (ct->c_fp) {
759 fclose (ct->c_fp);
760 ct->c_fp = NULL;
761 }
762
763 if (status == OK) {
764 *part_boundary = end_boundary;
765 } else {
766 *part_boundary = NULL;
767 free (end_boundary);
768 }
769
770 return status;
771 }
772
773
774 /*
775 * Open and copy ct->c_file to file, replacing the multipart boundary.
776 */
777 static int
778 replace_boundary (CT ct, char *file, char *boundary) {
779 FILE *fpin, *fpout;
780 int compnum, state;
781 char buf[NMH_BUFSIZ], name[NAMESZ];
782 char *np, *vp;
783 m_getfld_state_t gstate = 0;
784 int status = OK;
785
786 if (ct->c_file == NULL) {
787 advise (NULL, "missing input filename");
788 return NOTOK;
789 }
790
791 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
792 advise (ct->c_file, "unable to open for reading");
793 return NOTOK;
794 }
795
796 if ((fpout = fopen (file, "w")) == NULL) {
797 fclose (fpin);
798 advise (file, "unable to open for writing");
799 return NOTOK;
800 }
801
802 for (compnum = 1;;) {
803 int bufsz = (int) sizeof buf;
804
805 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
806 case FLD:
807 case FLDPLUS:
808 compnum++;
809
810 /* get copies of the buffers */
811 np = mh_xstrdup (name);
812 vp = mh_xstrdup (buf);
813
814 /* if necessary, get rest of field */
815 while (state == FLDPLUS) {
816 bufsz = sizeof buf;
817 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
818 vp = add (buf, vp); /* add to previous value */
819 }
820
821 if (strcasecmp (TYPE_FIELD, np)) {
822 fprintf (fpout, "%s:%s", np, vp);
823 } else {
824 char *new_ctline, *new_params;
825
826 replace_param(&ct->c_ctinfo.ci_first_pm,
827 &ct->c_ctinfo.ci_last_pm, "boundary",
828 boundary, 0);
829
830 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
831 ct->c_ctinfo.ci_subtype, NULL);
832 new_params = output_params(strlen(TYPE_FIELD) +
833 strlen(new_ctline) + 1,
834 ct->c_ctinfo.ci_first_pm, NULL, 0);
835 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
836 new_params ? new_params : "");
837 free(new_ctline);
838 mh_xfree(new_params);
839 }
840
841 free (vp);
842 free (np);
843
844 continue;
845
846 case BODY:
847 putc('\n', fpout);
848 /* buf will have a terminating NULL, skip it. */
849 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
850 advise (file, "fwrite");
851 }
852 continue;
853
854 case FILEEOF:
855 break;
856
857 case LENERR:
858 case FMTERR:
859 advise (NULL, "message format error in component #%d", compnum);
860 status = NOTOK;
861 break;
862
863 default:
864 advise (NULL, "getfld() returned %d", state);
865 status = NOTOK;
866 break;
867 }
868
869 break;
870 }
871
872 m_getfld_state_destroy (&gstate);
873 fclose (fpout);
874 fclose (fpin);
875
876 return status;
877 }
878
879
880 /*
881 * Fix Content-Type header to reflect the content of its part.
882 */
883 static int
884 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
885 int status = OK;
886
887 switch (ct->c_type) {
888 case CT_MULTIPART: {
889 struct multipart *m = (struct multipart *) ct->c_ctparams;
890 struct part *part;
891
892 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
893 status = fix_types (part->mp_part, fixtypes, message_mods);
894 }
895 break;
896 }
897
898 case CT_MESSAGE:
899 if (ct->c_subtype == MESSAGE_EXTERNAL) {
900 struct exbody *e = (struct exbody *) ct->c_ctparams;
901
902 status = fix_types (e->eb_content, fixtypes, message_mods);
903 }
904 break;
905
906 default: {
907 char **typep, *type;
908
909 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
910 for (typep = svector_strs (fixtypes);
911 typep && (type = *typep);
912 ++typep) {
913 char *type_subtype =
914 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
915 NULL);
916
917 if (! strcasecmp (type, type_subtype) &&
918 decode_part (ct) == OK &&
919 ct->c_cefile.ce_file != NULL) {
920 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
921 char *cp;
922
923 if ((cp = strchr (ct_type_subtype, ';'))) {
924 /* Truncate to remove any parameter list from
925 mime_type () result. */
926 *cp = '\0';
927 }
928
929 if (strcasecmp (type, ct_type_subtype)) {
930 char *ct_type, *ct_subtype;
931 HF hf;
932
933 /* The Content-Type header does not match the
934 content, so update these struct Content
935 fields to match:
936 * c_type, c_subtype
937 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
938 * c_ctline
939 */
940 /* Extract type and subtype from type/subtype. */
941 ct_type = mh_xstrdup(ct_type_subtype);
942 if ((cp = strchr (ct_type, '/'))) {
943 *cp = '\0';
944 ct_subtype = mh_xstrdup(++cp);
945 } else {
946 advise (NULL, "missing / in MIME type of %s %s",
947 ct->c_file, ct->c_partno);
948 free (ct_type);
949 return NOTOK;
950 }
951
952 ct->c_type = ct_str_type (ct_type);
953 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
954
955 free (ct->c_ctinfo.ci_type);
956 ct->c_ctinfo.ci_type = ct_type;
957 free (ct->c_ctinfo.ci_subtype);
958 ct->c_ctinfo.ci_subtype = ct_subtype;
959 if (! replace_substring (&ct->c_ctline, type,
960 ct_type_subtype)) {
961 advise (NULL, "did not find %s in %s",
962 type, ct->c_ctline);
963 }
964
965 /* Update Content-Type header field. */
966 for (hf = ct->c_first_hf; hf; hf = hf->next) {
967 if (! strcasecmp (TYPE_FIELD, hf->name)) {
968 if (replace_substring (&hf->value, type,
969 ct_type_subtype)) {
970 ++*message_mods;
971 if (verbosw) {
972 report (NULL, ct->c_partno, ct->c_file,
973 "change Content-Type in header "
974 "from %s to %s",
975 type, ct_type_subtype);
976 }
977 break;
978 } else {
979 advise (NULL, "did not find %s in %s",
980 type, hf->value);
981 }
982 }
983 }
984 }
985 free (ct_type_subtype);
986 }
987 free (type_subtype);
988 }
989 }
990 }}
991
992 return status;
993 }
994
995
996 /*
997 * Replace a substring, allocating space to hold the new one.
998 */
999 char *
1000 replace_substring (char **str, const char *old, const char *new) {
1001 char *cp;
1002
1003 if ((cp = strstr (*str, old))) {
1004 char *remainder = cp + strlen (old);
1005 char *prefix, *new_str;
1006
1007 if (cp - *str) {
1008 prefix = mh_xstrdup(*str);
1009 *(prefix + (cp - *str)) = '\0';
1010 new_str = concat (prefix, new, remainder, NULL);
1011 free (prefix);
1012 } else {
1013 new_str = concat (new, remainder, NULL);
1014 }
1015
1016 free (*str);
1017
1018 return *str = new_str;
1019 }
1020
1021 return NULL;
1022 }
1023
1024
1025 /*
1026 * Remove a name=value parameter, given just its name, from a header value.
1027 */
1028 char *
1029 remove_parameter (char *str, const char *name) {
1030 /* It looks to me, based on the BNF in RFC 2045, than there can't
1031 be whitespace betwwen the parameter name and the "=", or
1032 between the "=" and the parameter value. */
1033 char *param_name = concat (name, "=", NULL);
1034 char *cp;
1035
1036 if ((cp = strstr (str, param_name))) {
1037 char *start, *end;
1038 size_t count = 1;
1039
1040 /* Remove any leading spaces, before the parameter name. */
1041 for (start = cp;
1042 start > str && isspace ((unsigned char) *(start-1));
1043 --start) {
1044 continue;
1045 }
1046 /* Remove a leading semicolon. */
1047 if (start > str && *(start-1) == ';') { --start; }
1048
1049 end = cp + strlen (name) + 1;
1050 if (*end == '"') {
1051 /* Skip past the quoted value, and then the final quote. */
1052 for (++end ; *end && *end != '"'; ++end) { continue; }
1053 ++end;
1054 } else {
1055 /* Skip past the value. */
1056 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1057 }
1058
1059 /* Count how many characters need to be moved. Include
1060 trailing null, which is accounted for by the
1061 initialization of count to 1. */
1062 for (cp = end; *cp; ++cp) { ++count; }
1063 (void) memmove (start, end, count);
1064 }
1065
1066 free (param_name);
1067
1068 return str;
1069 }
1070
1071
1072 /*
1073 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1074 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1075 * 8 bit.
1076 */
1077 static int
1078 fix_composite_cte (CT ct, int *message_mods) {
1079 int status = OK;
1080
1081 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1082 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1083 ct->c_encoding != CE_BINARY) {
1084 HF hf;
1085
1086 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1087 char *name = hf->name;
1088 for (; *name && isspace ((unsigned char) *name); ++name) {
1089 continue;
1090 }
1091
1092 if (! strncasecmp (name, ENCODING_FIELD,
1093 strlen (ENCODING_FIELD))) {
1094 char *prefix = "Nmh-REPLACED-INVALID-";
1095 HF h;
1096
1097 NEW(h);
1098 h->name = mh_xstrdup (hf->name);
1099 h->hf_encoding = hf->hf_encoding;
1100 h->next = hf->next;
1101 hf->next = h;
1102
1103 /* Retain old header but prefix its name. */
1104 free (hf->name);
1105 hf->name = concat (prefix, h->name, NULL);
1106
1107 ++*message_mods;
1108 if (verbosw) {
1109 char *encoding = cpytrim (hf->value);
1110 report (NULL, ct->c_partno, ct->c_file,
1111 "replace Content-Transfer-Encoding of %s "
1112 "with 8 bit", encoding);
1113 free (encoding);
1114 }
1115
1116 h->value = mh_xstrdup (" 8bit\n");
1117
1118 /* Don't need to warn for multiple C-T-E header
1119 fields, parse_mime() already does that. But
1120 if there are any, fix them all as necessary. */
1121 hf = h;
1122 }
1123 }
1124
1125 set_ce (ct, CE_8BIT);
1126 }
1127
1128 if (ct->c_type == CT_MULTIPART) {
1129 struct multipart *m;
1130 struct part *part;
1131
1132 m = (struct multipart *) ct->c_ctparams;
1133 for (part = m->mp_parts; part; part = part->mp_next) {
1134 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1135 status = NOTOK;
1136 break;
1137 }
1138 }
1139 }
1140 }
1141
1142 return status;
1143 }
1144
1145
1146 /*
1147 * Set content encoding.
1148 */
1149 static int
1150 set_ce (CT ct, int encoding) {
1151 const char *ce = ce_str (encoding);
1152 const struct str2init *ctinit = get_ce_method (ce);
1153
1154 if (ctinit) {
1155 char *cte = concat (" ", ce, "\n", NULL);
1156 int found_cte = 0;
1157 HF hf;
1158 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1159 caller is decode_text_parts (). Save because we'll
1160 overwrite below. */
1161 struct cefile decoded_content_info = ct->c_cefile;
1162
1163 ct->c_encoding = encoding;
1164
1165 ct->c_ctinitfnx = ctinit->si_init;
1166 /* This will assign ct->c_cefile with an all-0 struct, which
1167 is what we want. */
1168 (*ctinit->si_init) (ct);
1169 /* After returning, the caller should set
1170 ct->c_cefile.ce_file to the name of the file containing
1171 the contents. */
1172
1173 if (ct->c_ceclosefnx) {
1174 (*ct->c_ceclosefnx) (ct);
1175 }
1176
1177 /* Restore the cefile. */
1178 ct->c_cefile = decoded_content_info;
1179
1180 /* Update/add Content-Transfer-Encoding header field. */
1181 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1182 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1183 found_cte = 1;
1184 free (hf->value);
1185 hf->value = cte;
1186 }
1187 }
1188 if (! found_cte) {
1189 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1190 }
1191
1192 /* Update c_celine. It's used only by mhlist -debug. */
1193 free (ct->c_celine);
1194 ct->c_celine = mh_xstrdup (cte);
1195
1196 return OK;
1197 }
1198
1199 return NOTOK;
1200 }
1201
1202
1203 /*
1204 * Make sure each text part has a corresponding text/plain part.
1205 */
1206 static int
1207 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1208 int status = OK;
1209
1210 switch ((*ct)->c_type) {
1211 case CT_TEXT: {
1212 /* Nothing to do for text/plain. */
1213 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1214
1215 if (parent && parent->c_type == CT_MULTIPART &&
1216 parent->c_subtype == MULTI_ALTERNATE) {
1217 int new_subpart_number = 1;
1218 int has_text_plain =
1219 find_textplain_sibling (parent, replacetextplain,
1220 &new_subpart_number);
1221
1222 if (! has_text_plain) {
1223 /* Parent is a multipart/alternative. Insert a new
1224 text/plain subpart. */
1225 const int inserted =
1226 insert_new_text_plain_part (*ct, new_subpart_number,
1227 parent);
1228 if (inserted) {
1229 ++*message_mods;
1230 if (verbosw) {
1231 report (NULL, parent->c_partno, parent->c_file,
1232 "insert text/plain part");
1233 }
1234 } else {
1235 status = NOTOK;
1236 }
1237 }
1238 } else if (parent && parent->c_type == CT_MULTIPART &&
1239 parent->c_subtype == MULTI_RELATED) {
1240 char *type_subtype =
1241 concat ((*ct)->c_ctinfo.ci_type, "/",
1242 (*ct)->c_ctinfo.ci_subtype, NULL);
1243 const char *parent_type =
1244 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1245 int new_subpart_number = 1;
1246 int has_text_plain = 0;
1247
1248 /* Have to do string comparison on the subtype because we
1249 don't enumerate all of them in c_subtype values.
1250 parent_type will be NULL if the multipart/related part
1251 doesn't have a type parameter. The type parameter must
1252 be specified according to RFC 2387 Sec. 3.1 but not all
1253 messages comply. */
1254 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1255 /* The type of this part matches the root type of the
1256 parent multipart/related. Look to see if there's
1257 text/plain sibling. */
1258 has_text_plain =
1259 find_textplain_sibling (parent, replacetextplain,
1260 &new_subpart_number);
1261 }
1262
1263 free (type_subtype);
1264
1265 if (! has_text_plain) {
1266 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1267 struct part *part;
1268 int siblings = 0;
1269
1270 for (part = mp->mp_parts; part; part = part->mp_next) {
1271 if (*ct != part->mp_part) {
1272 ++siblings;
1273 }
1274 }
1275
1276 if (siblings) {
1277 /* Parent is a multipart/related. Insert a new
1278 text/plain subpart in a new multipart/alternative. */
1279 if (insert_into_new_mp_alt (ct, message_mods)) {
1280 /* Not an error if text/plain couldn't be added. */
1281 }
1282 } else {
1283 /* There are no siblings, so insert a new text/plain
1284 subpart, and change the parent type from
1285 multipart/related to multipart/alternative. */
1286 const int inserted =
1287 insert_new_text_plain_part (*ct, new_subpart_number,
1288 parent);
1289
1290 if (inserted) {
1291 HF hf;
1292
1293 parent->c_subtype = MULTI_ALTERNATE;
1294 free (parent->c_ctinfo.ci_subtype);
1295 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1296 if (! replace_substring (&parent->c_ctline, "/related",
1297 "/alternative")) {
1298 advise (NULL,
1299 "did not find multipart/related in %s",
1300 parent->c_ctline);
1301 }
1302
1303 /* Update Content-Type header field. */
1304 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1305 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1306 if (replace_substring (&hf->value, "/related",
1307 "/alternative")) {
1308 ++*message_mods;
1309 if (verbosw) {
1310 report (NULL, parent->c_partno,
1311 parent->c_file,
1312 "insert text/plain part");
1313 }
1314
1315 /* Remove, e.g., type="text/html" from
1316 multipart/alternative. */
1317 remove_parameter (hf->value, "type");
1318 break;
1319 } else {
1320 advise (NULL, "did not find multipart/"
1321 "related in header %s",
1322 hf->value);
1323 }
1324 }
1325 }
1326 } else {
1327 /* Not an error if text/plain couldn't be inserted. */
1328 }
1329 }
1330 }
1331 } else {
1332 if (insert_into_new_mp_alt (ct, message_mods)) {
1333 status = NOTOK;
1334 }
1335 }
1336 break;
1337 }
1338
1339 case CT_MULTIPART: {
1340 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1341 struct part *part;
1342
1343 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1344 if ((*ct)->c_type == CT_MULTIPART) {
1345 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1346 replacetextplain);
1347 }
1348 }
1349 break;
1350 }
1351
1352 case CT_MESSAGE:
1353 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1354 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1355
1356 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1357 replacetextplain);
1358 }
1359 break;
1360 }
1361
1362 return status;
1363 }
1364
1365
1366 /*
1367 * See if there is a sibling text/plain, and return its subpart number.
1368 */
1369 static int
1370 find_textplain_sibling (CT parent, int replacetextplain,
1371 int *new_subpart_number) {
1372 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1373 struct part *part, *prev;
1374 int has_text_plain = 0;
1375
1376 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1377 ++*new_subpart_number;
1378 if (part->mp_part->c_type == CT_TEXT &&
1379 part->mp_part->c_subtype == TEXT_PLAIN) {
1380 if (replacetextplain) {
1381 struct part *old_part;
1382 if (part == mp->mp_parts) {
1383 old_part = mp->mp_parts;
1384 mp->mp_parts = part->mp_next;
1385 } else {
1386 old_part = prev->mp_next;
1387 prev->mp_next = part->mp_next;
1388 }
1389 if (verbosw) {
1390 report (NULL, parent->c_partno, parent->c_file,
1391 "remove text/plain part %s",
1392 old_part->mp_part->c_partno);
1393 }
1394 free_content (old_part->mp_part);
1395 free (old_part);
1396 } else {
1397 has_text_plain = 1;
1398 }
1399 break;
1400 }
1401 prev = part;
1402 }
1403
1404 return has_text_plain;
1405 }
1406
1407
1408 /*
1409 * Insert a new text/plain part.
1410 */
1411 static int
1412 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1413 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1414 struct part *new_part;
1415
1416 NEW(new_part);
1417 if ((new_part->mp_part = build_text_plain_part (ct))) {
1418 char buffer[16];
1419 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1420
1421 new_part->mp_next = mp->mp_parts;
1422 mp->mp_parts = new_part;
1423 new_part->mp_part->c_partno =
1424 concat (parent->c_partno ? parent->c_partno : "1", ".",
1425 buffer, NULL);
1426
1427 return 1;
1428 }
1429
1430 free_content (new_part->mp_part);
1431 free (new_part);
1432
1433 return 0;
1434 }
1435
1436
1437 /*
1438 * Create a text/plain part to go along with non-plain sibling part.
1439 */
1440 static CT
1441 build_text_plain_part (CT encoded_part) {
1442 CT tp_part = divide_part (encoded_part);
1443 char *tmp_plain_file = NULL;
1444
1445 if (decode_part (tp_part) == OK) {
1446 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1447 contains the decoded contents. And the decoding function, such
1448 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1449 be unlinked by free_content (). */
1450 char *tempfile;
1451
1452 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1453 advise (NULL, "unable to create temporary file in %s",
1454 get_temp_dir());
1455 } else {
1456 tmp_plain_file = mh_xstrdup (tempfile);
1457 if (reformat_part (tp_part, tmp_plain_file,
1458 tp_part->c_ctinfo.ci_type,
1459 tp_part->c_ctinfo.ci_subtype,
1460 tp_part->c_type) == OK) {
1461 return tp_part;
1462 }
1463 }
1464 }
1465
1466 free_content (tp_part);
1467 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1468 free (tmp_plain_file);
1469
1470 return NULL;
1471 }
1472
1473
1474 /*
1475 * Slip new text/plain part into a new multipart/alternative.
1476 */
1477 static int
1478 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1479 CT tp_part = build_text_plain_part (*ct);
1480 int status = OK;
1481
1482 if (tp_part) {
1483 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1484 MULTI_ALTERNATE);
1485 if (mp_alt) {
1486 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1487
1488 if (mp && mp->mp_parts) {
1489 mp->mp_parts->mp_part = tp_part;
1490 /* Make the new multipart/alternative the parent. */
1491 *ct = mp_alt;
1492
1493 ++*message_mods;
1494 if (verbosw) {
1495 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1496 "insert text/plain part");
1497 }
1498 } else {
1499 free_content (tp_part);
1500 free_content (mp_alt);
1501 status = NOTOK;
1502 }
1503 } else {
1504 status = NOTOK;
1505 }
1506 } else {
1507 /* Not an error if text/plain couldn't be built. */
1508 }
1509
1510 return status;
1511 }
1512
1513
1514 /*
1515 * Clone a MIME part.
1516 */
1517 static CT
1518 divide_part (CT ct) {
1519 CT new_part;
1520
1521 NEW0(new_part);
1522 /* Just copy over what is needed for decoding. c_vrsn and
1523 c_celine aren't necessary. */
1524 new_part->c_file = mh_xstrdup (ct->c_file);
1525 new_part->c_begin = ct->c_begin;
1526 new_part->c_end = ct->c_end;
1527 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1528 new_part->c_type = ct->c_type;
1529 new_part->c_cefile = ct->c_cefile;
1530 new_part->c_encoding = ct->c_encoding;
1531 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1532 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1533 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1534 new_part->c_cesizefnx = ct->c_cesizefnx;
1535
1536 /* c_ctline is used by reformat__part(), so it can preserve
1537 anything after the type/subtype. */
1538 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1539
1540 return new_part;
1541 }
1542
1543
1544 /*
1545 * Copy the content info from one part to another.
1546 */
1547 static void
1548 copy_ctinfo (CI dest, CI src) {
1549 PM s_pm, d_pm;
1550
1551 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1552 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1553
1554 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1555 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1556 s_pm->pm_value, 0);
1557 if (s_pm->pm_charset) {
1558 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1559 }
1560 if (s_pm->pm_lang) {
1561 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1562 }
1563 }
1564
1565 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1566 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1567 }
1568
1569
1570 /*
1571 * Decode content.
1572 */
1573 static int
1574 decode_part (CT ct) {
1575 char *tmp_decoded;
1576 int status;
1577 char *tempfile;
1578
1579 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1580 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1581 }
1582 tmp_decoded = mh_xstrdup (tempfile);
1583 /* The following call will load ct->c_cefile.ce_file with the tmp
1584 filename of the decoded content. tmp_decoded will contain the
1585 encoded output, get rid of that. */
1586 status = output_message (ct, tmp_decoded);
1587 (void) m_unlink (tmp_decoded);
1588 free (tmp_decoded);
1589
1590 return status;
1591 }
1592
1593
1594 /*
1595 * Reformat content as plain text.
1596 * Some of the arguments aren't really needed now, but maybe will
1597 * be in the future for other than text types.
1598 */
1599 static int
1600 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1601 int output_subtype, output_encoding;
1602 const char *reason = NULL;
1603 char *cp, *cf;
1604 int status;
1605
1606 /* Hacky: this redirects the output from whatever command is used
1607 to show the part to a file. So, the user can't have any output
1608 redirection in that command.
1609 Could show_multi() in mhshowsbr.c avoid this? */
1610
1611 /* Check for invo_name-format-type/subtype. */
1612 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1613 if (verbosw) {
1614 advise (NULL, "Don't know how to convert %s, there is no "
1615 "%s-format-%s/%s profile entry",
1616 ct->c_file, invo_name, type, subtype);
1617 }
1618 return NOTOK;
1619 }
1620 if (strchr (cf, '>')) {
1621 advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1622 "%s-format-%s/%s profile entry", cf, invo_name, type,
1623 subtype ? subtype : "");
1624
1625 return NOTOK;
1626 }
1627
1628 cp = concat (cf, " >", file, NULL);
1629 status = show_content_aux (ct, 0, cp, NULL, NULL);
1630 free (cp);
1631
1632 /* Unlink decoded content tmp file and free its filename to avoid
1633 leaks. The file stream should already have been closed. */
1634 if (ct->c_cefile.ce_unlink) {
1635 (void) m_unlink (ct->c_cefile.ce_file);
1636 free (ct->c_cefile.ce_file);
1637 ct->c_cefile.ce_file = NULL;
1638 ct->c_cefile.ce_unlink = 0;
1639 }
1640
1641 if (c_type == CT_TEXT) {
1642 output_subtype = TEXT_PLAIN;
1643 } else {
1644 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1645 output_subtype = 0;
1646 }
1647
1648 output_encoding = content_encoding (ct, &reason);
1649 if (status == OK &&
1650 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1651 ct->c_cefile.ce_file = file;
1652 ct->c_cefile.ce_unlink = 1;
1653 } else {
1654 ct->c_cefile.ce_unlink = 0;
1655 status = NOTOK;
1656 }
1657
1658 return status;
1659 }
1660
1661
1662 /*
1663 * Fill in a multipart/alternative part.
1664 */
1665 static CT
1666 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1667 char *boundary_prefix = "----=_nmh-multipart";
1668 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1669 char *boundary_indicator = "; boundary=";
1670 char *typename, *subtypename, *name;
1671 CT ct;
1672 struct part *p;
1673 struct multipart *m;
1674 const struct str2init *ctinit;
1675
1676 NEW0(ct);
1677
1678 /* Set up the multipart/alternative part. These fields of *ct were
1679 initialized to 0 by mh_xcalloc():
1680 c_fp, c_unlink, c_begin, c_end,
1681 c_vrsn, c_ctline, c_celine,
1682 c_id, c_descr, c_dispo, c_partno,
1683 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1684 c_cefile, c_encoding,
1685 c_digested, c_digest[16], c_ctexbody,
1686 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1687 c_umask, c_rfc934,
1688 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1689 */
1690
1691 ct->c_file = mh_xstrdup (first_alt->c_file);
1692 ct->c_type = type;
1693 ct->c_subtype = subtype;
1694
1695 ctinit = get_ct_init (ct->c_type);
1696
1697 typename = ct_type_str (type);
1698 subtypename = ct_subtype_str (type, subtype);
1699
1700 {
1701 int serial = 0;
1702 int found_boundary = 1;
1703
1704 while (found_boundary && serial < 1000000) {
1705 found_boundary = 0;
1706
1707 /* Ensure that the boundary doesn't appear in the decoded
1708 content. */
1709 if (new_part->c_cefile.ce_file) {
1710 if ((found_boundary =
1711 boundary_in_content (&new_part->c_cefile.ce_fp,
1712 new_part->c_cefile.ce_file,
1713 boundary)) == NOTOK) {
1714 free_content (ct);
1715 return NULL;
1716 }
1717 }
1718
1719 /* Ensure that the boundary doesn't appear in the encoded
1720 content. */
1721 if (! found_boundary && new_part->c_file) {
1722 if ((found_boundary =
1723 boundary_in_content (&new_part->c_fp,
1724 new_part->c_file,
1725 boundary)) == NOTOK) {
1726 free_content (ct);
1727 return NULL;
1728 }
1729 }
1730
1731 if (found_boundary) {
1732 /* Try a slightly different boundary. */
1733 char buffer2[16];
1734
1735 free (boundary);
1736 ++serial;
1737 snprintf (buffer2, sizeof buffer2, "%d", serial);
1738 boundary =
1739 concat (boundary_prefix,
1740 first_alt->c_partno ? first_alt->c_partno : "",
1741 "-", buffer2, NULL);
1742 }
1743 }
1744
1745 if (found_boundary) {
1746 advise (NULL, "giving up trying to find a unique boundary");
1747 free_content (ct);
1748 return NULL;
1749 }
1750 }
1751
1752 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1753 boundary, "\"", NULL);
1754
1755 /* Load c_first_hf and c_last_hf. */
1756 transfer_noncontent_headers (first_alt, ct);
1757 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1758 free (name);
1759
1760 /* Load c_partno. */
1761 if (first_alt->c_partno) {
1762 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1763 free (first_alt->c_partno);
1764 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1765 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1766 } else {
1767 first_alt->c_partno = mh_xstrdup ("1");
1768 new_part->c_partno = mh_xstrdup ("2");
1769 }
1770
1771 if (ctinit) {
1772 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1773 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1774 }
1775
1776 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1777 "boundary", boundary, 0);
1778
1779 NEW(p);
1780 NEW(p->mp_next);
1781 p->mp_next->mp_next = NULL;
1782 p->mp_next->mp_part = first_alt;
1783
1784 NEW0(m);
1785 m->mp_start = concat (boundary, "\n", NULL);
1786 m->mp_stop = concat (boundary, "--\n", NULL);
1787 m->mp_parts = p;
1788 ct->c_ctparams = m;
1789
1790 free (boundary);
1791
1792 return ct;
1793 }
1794
1795
1796 /*
1797 * Check that the boundary does not appear in the content.
1798 */
1799 static int
1800 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1801 char buffer[NMH_BUFSIZ];
1802 size_t bytes_read;
1803 int found_boundary = 0;
1804
1805 /* free_content() will close *fp if we fopen it here. */
1806 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1807 advise (file, "unable to open %s for reading", file);
1808 return NOTOK;
1809 }
1810
1811 fseeko (*fp, 0L, SEEK_SET);
1812 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1813 if (find_str (buffer, bytes_read, boundary)) {
1814 found_boundary = 1;
1815 break;
1816 }
1817 }
1818
1819 return found_boundary;
1820 }
1821
1822
1823 /*
1824 * Remove all non-Content headers.
1825 */
1826 static void
1827 transfer_noncontent_headers (CT old, CT new) {
1828 HF hp, hp_prev;
1829
1830 hp_prev = hp = old->c_first_hf;
1831 while (hp) {
1832 HF next = hp->next;
1833
1834 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1835 if (hp == old->c_last_hf) {
1836 if (hp == old->c_first_hf) {
1837 old->c_last_hf = old->c_first_hf = NULL;
1838 } else {
1839 hp_prev->next = NULL;
1840 old->c_last_hf = hp_prev;
1841 }
1842 } else {
1843 if (hp == old->c_first_hf) {
1844 old->c_first_hf = next;
1845 } else {
1846 hp_prev->next = next;
1847 }
1848 }
1849
1850 /* Put node hp in the new CT. */
1851 if (new->c_first_hf == NULL) {
1852 new->c_first_hf = hp;
1853 } else {
1854 new->c_last_hf->next = hp;
1855 }
1856 new->c_last_hf = hp;
1857 } else {
1858 /* A Content- header, leave in old. */
1859 hp_prev = hp;
1860 }
1861
1862 hp = next;
1863 }
1864 }
1865
1866
1867 /*
1868 * Set content type.
1869 */
1870 static int
1871 set_ct_type (CT ct, int type, int subtype, int encoding) {
1872 char *typename = ct_type_str (type);
1873 char *subtypename = ct_subtype_str (type, subtype);
1874 /* E.g, " text/plain" */
1875 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1876 /* E.g, " text/plain\n" */
1877 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1878 int found_content_type = 0;
1879 HF hf;
1880 const char *cp = NULL;
1881 char *ctline;
1882 int status;
1883
1884 /* Update/add Content-Type header field. */
1885 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1886 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1887 found_content_type = 1;
1888 free (hf->value);
1889 hf->value = (cp = strchr (ct->c_ctline, ';'))
1890 ? concat (type_subtypename, cp, "\n", NULL)
1891 : mh_xstrdup (name_plus_nl);
1892 }
1893 }
1894 if (! found_content_type) {
1895 add_header (ct, mh_xstrdup (TYPE_FIELD),
1896 (cp = strchr (ct->c_ctline, ';'))
1897 ? concat (type_subtypename, cp, "\n", NULL)
1898 : mh_xstrdup (name_plus_nl));
1899 }
1900
1901 /* Some of these might not be used, but set them anyway. */
1902 ctline = cp
1903 ? concat (type_subtypename, cp, NULL)
1904 : concat (type_subtypename, NULL);
1905 free (ct->c_ctline);
1906 ct->c_ctline = ctline;
1907 /* Leave other ctinfo members as they were. */
1908 free (ct->c_ctinfo.ci_type);
1909 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1910 free (ct->c_ctinfo.ci_subtype);
1911 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1912 ct->c_type = type;
1913 ct->c_subtype = subtype;
1914
1915 free (name_plus_nl);
1916 free (type_subtypename);
1917
1918 status = set_ce (ct, encoding);
1919
1920 return status;
1921 }
1922
1923
1924 /*
1925 * It's not necessary to update the charset parameter of a Content-Type
1926 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1927 * (content) was originally in the specified charset, "and will be in
1928 * that character set again after decoding."
1929 */
1930 static int
1931 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1932 int *message_mods) {
1933 int status = OK;
1934 int lf_line_endings = 0;
1935
1936 switch (ct->c_type) {
1937 case CT_MULTIPART: {
1938 struct multipart *m = (struct multipart *) ct->c_ctparams;
1939 struct part *part;
1940
1941 /* Should check to see if the body for this part is encoded?
1942 For now, it gets passed along as-is by InitMultiPart(). */
1943 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1944 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1945 message_mods);
1946 }
1947 break;
1948 }
1949
1950 case CT_MESSAGE:
1951 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1952 struct exbody *e = (struct exbody *) ct->c_ctparams;
1953
1954 status = decode_text_parts (e->eb_content, encoding, decodetypes,
1955 message_mods);
1956 }
1957 break;
1958
1959 default:
1960 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1961 break;
1962 }
1963
1964 lf_line_endings =
1965 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
1966
1967 switch (ct->c_encoding) {
1968 case CE_BASE64:
1969 case CE_QUOTED: {
1970 int ct_encoding;
1971
1972 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
1973 const char *reason = NULL;
1974
1975 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
1976 && encoding != CE_BINARY) {
1977 /* The decoding isn't acceptable so discard it.
1978 Leave status as OK to allow other transformations. */
1979 if (verbosw) {
1980 report (NULL, ct->c_partno, ct->c_file,
1981 "will not decode%s because it is binary (%s)",
1982 ct->c_partno ? ""
1983 : ct->c_ctline ? ct->c_ctline
1984 : "",
1985 reason);
1986 }
1987 (void) m_unlink (ct->c_cefile.ce_file);
1988 free (ct->c_cefile.ce_file);
1989 ct->c_cefile.ce_file = NULL;
1990 } else if (ct->c_encoding == CE_QUOTED &&
1991 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1992 /* The decoding isn't acceptable so discard it.
1993 Leave status as OK to allow other transformations. */
1994 if (verbosw) {
1995 report (NULL, ct->c_partno, ct->c_file,
1996 "will not decode%s because it is 8bit",
1997 ct->c_partno ? ""
1998 : ct->c_ctline ? ct->c_ctline
1999 : "");
2000 }
2001 (void) m_unlink (ct->c_cefile.ce_file);
2002 free (ct->c_cefile.ce_file);
2003 ct->c_cefile.ce_file = NULL;
2004 } else {
2005 int enc;
2006
2007 if (ct_encoding == CE_BINARY) {
2008 enc = CE_BINARY;
2009 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2010 enc = CE_QUOTED;
2011 } else {
2012 enc = ct_encoding;
2013 }
2014 if (set_ce (ct, enc) == OK) {
2015 ++*message_mods;
2016 if (verbosw) {
2017 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2018 ct->c_ctline ? ct->c_ctline : "");
2019 }
2020 if (lf_line_endings) {
2021 strip_crs (ct, message_mods);
2022 }
2023 } else {
2024 status = NOTOK;
2025 }
2026 }
2027 } else {
2028 status = NOTOK;
2029 }
2030 break;
2031 }
2032 case CE_8BIT:
2033 case CE_7BIT:
2034 if (lf_line_endings) {
2035 strip_crs (ct, message_mods);
2036 }
2037 break;
2038 default:
2039 break;
2040 }
2041
2042 break;
2043 }
2044
2045 return status;
2046 }
2047
2048
2049 /*
2050 * Determine if the part with type[/subtype] should be decoded, according to
2051 * decodetypes (which came from the -decodetypes switch).
2052 */
2053 static int
2054 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2055 /* Quick search for matching type[/subtype] in decodetypes: bracket
2056 decodetypes with commas, then search for ,type, and ,type/subtype, in
2057 it. */
2058
2059 int found_match = 0;
2060 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2061 char *delimited_type = concat(",", type, ",", NULL);
2062
2063 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2064 found_match = 1;
2065 } else if (subtype != NULL) {
2066 char *delimited_type_subtype =
2067 concat(",", type, "/", subtype, ",", NULL);
2068
2069 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2070 found_match = 1;
2071 }
2072 free(delimited_type_subtype);
2073 }
2074
2075 free(delimited_type);
2076 free(delimited_decodetypes);
2077
2078 return found_match;
2079 }
2080
2081
2082 /*
2083 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2084 * if it has any NUL characters, a CR not followed by a LF, or lines
2085 * greater than 998 characters in length. If binary, reason is set
2086 * to a string explaining why.
2087 */
2088 static int
2089 content_encoding (CT ct, const char **reason) {
2090 CE ce = &ct->c_cefile;
2091 int encoding = CE_7BIT;
2092
2093 if (ce->ce_file) {
2094 size_t line_len = 0;
2095 char buffer[NMH_BUFSIZ];
2096 size_t inbytes;
2097
2098 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2099 advise (ce->ce_file, "unable to open for reading");
2100 return CE_UNKNOWN;
2101 }
2102
2103 fseeko (ce->ce_fp, 0L, SEEK_SET);
2104 while (encoding != CE_BINARY &&
2105 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2106 char *cp;
2107 size_t i;
2108 int last_char_was_cr = 0;
2109
2110 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2111 if (*cp == '\0' || ++line_len > 998 ||
2112 (*cp != '\n' && last_char_was_cr)) {
2113 encoding = CE_BINARY;
2114 if (*cp == '\0') {
2115 *reason = "null character";
2116 } else if (line_len > 998) {
2117 *reason = "line length > 998";
2118 } else if (*cp != '\n' && last_char_was_cr) {
2119 *reason = "CR not followed by LF";
2120 } else {
2121 /* Should not reach this. */
2122 *reason = "";
2123 }
2124 break;
2125 } else if (*cp == '\n') {
2126 line_len = 0;
2127 } else if (! isascii ((unsigned char) *cp)) {
2128 encoding = CE_8BIT;
2129 }
2130
2131 last_char_was_cr = *cp == '\r' ? 1 : 0;
2132 }
2133 }
2134
2135 fclose (ce->ce_fp);
2136 ce->ce_fp = NULL;
2137 } /* else should never happen */
2138
2139 return encoding;
2140 }
2141
2142
2143 /*
2144 * Strip carriage returns from content.
2145 */
2146 static int
2147 strip_crs (CT ct, int *message_mods) {
2148 char *charset = content_charset (ct);
2149 int status = OK;
2150
2151 /* Only strip carriage returns if content is ASCII or another
2152 charset that has the same readily recognizable CR followed by a
2153 LF. We can include UTF-8 here because if the high-order bit of
2154 a UTF-8 byte is 0, then it must be a single-byte ASCII
2155 character. */
2156 if (! strcasecmp (charset, "US-ASCII") ||
2157 ! strcasecmp (charset, "UTF-8") ||
2158 ! strncasecmp (charset, "ISO-8859-", 9) ||
2159 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2160 char **file = NULL;
2161 FILE **fp = NULL;
2162 size_t begin;
2163 size_t end;
2164 int has_crs = 0;
2165 int opened_input_file = 0;
2166
2167 if (ct->c_cefile.ce_file) {
2168 file = &ct->c_cefile.ce_file;
2169 fp = &ct->c_cefile.ce_fp;
2170 begin = end = 0;
2171 } else if (ct->c_file) {
2172 file = &ct->c_file;
2173 fp = &ct->c_fp;
2174 begin = (size_t) ct->c_begin;
2175 end = (size_t) ct->c_end;
2176 } /* else don't know where the content is */
2177
2178 if (file && *file && fp) {
2179 if (! *fp) {
2180 if ((*fp = fopen (*file, "r")) == NULL) {
2181 advise (*file, "unable to open for reading");
2182 status = NOTOK;
2183 } else {
2184 opened_input_file = 1;
2185 }
2186 }
2187 }
2188
2189 if (fp && *fp) {
2190 char buffer[NMH_BUFSIZ];
2191 size_t bytes_read;
2192 size_t bytes_to_read =
2193 end > 0 && end > begin ? end - begin : sizeof buffer;
2194
2195 fseeko (*fp, begin, SEEK_SET);
2196 while ((bytes_read = fread (buffer, 1,
2197 min (bytes_to_read, sizeof buffer),
2198 *fp)) > 0) {
2199 /* Look for CR followed by a LF. This is supposed to
2200 be text so there should be LF's. If not, don't
2201 modify the content. */
2202 char *cp;
2203 size_t i;
2204 int last_char_was_cr = 0;
2205
2206 if (end > 0) { bytes_to_read -= bytes_read; }
2207
2208 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2209 if (*cp == '\n' && last_char_was_cr) {
2210 has_crs = 1;
2211 break;
2212 }
2213
2214 last_char_was_cr = *cp == '\r' ? 1 : 0;
2215 }
2216 }
2217
2218 if (has_crs) {
2219 int fd;
2220 char *stripped_content_file;
2221 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2222
2223 if (tempfile == NULL) {
2224 adios (NULL, "unable to create temporary file in %s",
2225 get_temp_dir());
2226 }
2227 stripped_content_file = mh_xstrdup (tempfile);
2228
2229 /* Strip each CR before a LF from the content. */
2230 fseeko (*fp, begin, SEEK_SET);
2231 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2232 0) {
2233 char *cp;
2234 size_t i;
2235 int last_char_was_cr = 0;
2236
2237 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2238 if (*cp == '\r') {
2239 last_char_was_cr = 1;
2240 } else if (last_char_was_cr) {
2241 if (*cp != '\n') {
2242 if (write (fd, "\r", 1) < 0) {
2243 advise (tempfile, "CR write");
2244 }
2245 }
2246 if (write (fd, cp, 1) < 0) {
2247 advise (tempfile, "write");
2248 }
2249 last_char_was_cr = 0;
2250 } else {
2251 if (write (fd, cp, 1) < 0) {
2252 advise (tempfile, "write");
2253 }
2254 last_char_was_cr = 0;
2255 }
2256 }
2257 }
2258
2259 if (close (fd)) {
2260 admonish (NULL, "unable to write temporary file %s",
2261 stripped_content_file);
2262 (void) m_unlink (stripped_content_file);
2263 status = NOTOK;
2264 } else {
2265 /* Replace the decoded file with the converted one. */
2266 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2267 (void) m_unlink (ct->c_cefile.ce_file);
2268
2269 mh_xfree(ct->c_cefile.ce_file);
2270 ct->c_cefile.ce_file = stripped_content_file;
2271 ct->c_cefile.ce_unlink = 1;
2272
2273 ++*message_mods;
2274 if (verbosw) {
2275 report (NULL, ct->c_partno,
2276 begin == 0 && end == 0 ? "" : *file,
2277 "stripped CRs");
2278 }
2279 }
2280 }
2281
2282 if (opened_input_file) {
2283 fclose (*fp);
2284 *fp = NULL;
2285 }
2286 }
2287 }
2288
2289 free (charset);
2290
2291 return status;
2292 }
2293
2294
2295 /*
2296 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2297 * of the part C-T-E's.
2298 */
2299 static void
2300 update_cte (CT ct) {
2301 const int least_restrictive_enc = least_restrictive_encoding (ct);
2302
2303 if (least_restrictive_enc != CE_UNKNOWN &&
2304 least_restrictive_enc != CE_7BIT) {
2305 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2306 HF hf;
2307 int found_cte = 0;
2308
2309 /* Update/add Content-Transfer-Encoding header field. */
2310 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2311 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2312 found_cte = 1;
2313 free (hf->value);
2314 hf->value = cte;
2315 }
2316 }
2317 if (! found_cte) {
2318 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2319 }
2320 }
2321 }
2322
2323
2324 /*
2325 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2326 * within a message.
2327 */
2328 static int
2329 least_restrictive_encoding (CT ct) {
2330 int encoding = CE_UNKNOWN;
2331
2332 switch (ct->c_type) {
2333 case CT_MULTIPART: {
2334 struct multipart *m = (struct multipart *) ct->c_ctparams;
2335 struct part *part;
2336
2337 for (part = m->mp_parts; part; part = part->mp_next) {
2338 const int part_encoding =
2339 least_restrictive_encoding (part->mp_part);
2340
2341 if (less_restrictive (encoding, part_encoding)) {
2342 encoding = part_encoding;
2343 }
2344 }
2345 break;
2346 }
2347
2348 case CT_MESSAGE:
2349 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2350 struct exbody *e = (struct exbody *) ct->c_ctparams;
2351 const int part_encoding =
2352 least_restrictive_encoding (e->eb_content);
2353
2354 if (less_restrictive (encoding, part_encoding)) {
2355 encoding = part_encoding;
2356 }
2357 }
2358 break;
2359
2360 default: {
2361 if (less_restrictive (encoding, ct->c_encoding)) {
2362 encoding = ct->c_encoding;
2363 }
2364 }}
2365
2366 return encoding;
2367 }
2368
2369
2370 /*
2371 * Return whether the second encoding is less restrictive than the first, where
2372 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2373 * CE_BINARY is less restrictive than CE_8BIT and
2374 * CE_8BIT is less restrictive than CE_7BIT.
2375 */
2376 static int
2377 less_restrictive (int encoding, int second_encoding) {
2378 switch (second_encoding) {
2379 case CE_BINARY:
2380 return encoding != CE_BINARY;
2381 case CE_8BIT:
2382 return encoding != CE_BINARY && encoding != CE_8BIT;
2383 case CE_7BIT:
2384 return encoding != CE_BINARY && encoding != CE_8BIT &&
2385 encoding != CE_7BIT;
2386 default :
2387 return 0;
2388 }
2389 }
2390
2391
2392 /*
2393 * Convert character set of each part.
2394 */
2395 static int
2396 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2397 int status = OK;
2398
2399 switch (ct->c_type) {
2400 case CT_TEXT:
2401 if (ct->c_subtype == TEXT_PLAIN) {
2402 status = convert_charset (ct, dest_charset, message_mods);
2403 if (status == OK) {
2404 if (verbosw) {
2405 char *ct_charset = content_charset (ct);
2406
2407 report (NULL, ct->c_partno, ct->c_file,
2408 "convert %s to %s", ct_charset, dest_charset);
2409 free (ct_charset);
2410 }
2411 } else {
2412 char *ct_charset = content_charset (ct);
2413
2414 report ("iconv", ct->c_partno, ct->c_file,
2415 "failed to convert %s to %s", ct_charset, dest_charset);
2416 free (ct_charset);
2417 }
2418 }
2419 break;
2420
2421 case CT_MULTIPART: {
2422 struct multipart *m = (struct multipart *) ct->c_ctparams;
2423 struct part *part;
2424
2425 /* Should check to see if the body for this part is encoded?
2426 For now, it gets passed along as-is by InitMultiPart(). */
2427 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2428 status =
2429 convert_charsets (part->mp_part, dest_charset, message_mods);
2430 }
2431 break;
2432 }
2433
2434 case CT_MESSAGE:
2435 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2436 struct exbody *e = (struct exbody *) ct->c_ctparams;
2437
2438 status =
2439 convert_charsets (e->eb_content, dest_charset, message_mods);
2440 }
2441 break;
2442
2443 default:
2444 break;
2445 }
2446
2447 return status;
2448 }
2449
2450
2451 /*
2452 * Fix various problems that aren't handled elsewhere. These
2453 * are fixed unconditionally: there are no switches to disable
2454 * them. Currently, "problems" are these:
2455 * 1) remove extraneous semicolon at the end of a header parameter list
2456 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2457 * filename parameters in Content-Type and Content-Disposition
2458 * headers, respectively.
2459 */
2460 static int
2461 fix_always (CT ct, int *message_mods) {
2462 int status = OK;
2463
2464 switch (ct->c_type) {
2465 case CT_MULTIPART: {
2466 struct multipart *m = (struct multipart *) ct->c_ctparams;
2467 struct part *part;
2468
2469 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2470 status = fix_always (part->mp_part, message_mods);
2471 }
2472 break;
2473 }
2474
2475 case CT_MESSAGE:
2476 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2477 struct exbody *e = (struct exbody *) ct->c_ctparams;
2478
2479 status = fix_always (e->eb_content, message_mods);
2480 }
2481 break;
2482
2483 default: {
2484 HF hf;
2485
2486 if (ct->c_first_hf) {
2487 fix_filename_encoding (ct);
2488 }
2489
2490 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2491 size_t len = strlen (hf->value);
2492
2493 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2494 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2495 /* Only do this for Content-Type and
2496 Content-Disposition fields because those are the
2497 only headers that parse_mime() warns about. */
2498 continue;
2499 }
2500
2501 /* whitespace following a trailing ';' will be nuked as well */
2502 if (hf->value[len - 1] == '\n') {
2503 while (isspace((unsigned char)(hf->value[len - 2]))) {
2504 if (len-- == 0) { break; }
2505 }
2506 }
2507
2508 if (hf->value[len - 2] == ';') {
2509 /* Remove trailing ';' from parameter value. */
2510 hf->value[len - 2] = '\n';
2511 hf->value[len - 1] = '\0';
2512
2513 /* Also, if Content-Type parameter, remove trailing ';'
2514 from ct->c_ctline. This probably isn't necessary
2515 but can't hurt. */
2516 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2517 size_t l = strlen(ct->c_ctline) - 1;
2518 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2519 ct->c_ctline[l] == ';') {
2520 ct->c_ctline[l--] = '\0';
2521 if (l == 0) { break; }
2522 }
2523 }
2524
2525 ++*message_mods;
2526 if (verbosw) {
2527 report (NULL, ct->c_partno, ct->c_file,
2528 "remove trailing ; from %s parameter value",
2529 hf->name);
2530 }
2531 }
2532 }
2533 }}
2534
2535 return status;
2536 }
2537
2538
2539 /*
2540 * Factor out common code for loops in fix_filename_encoding().
2541 */
2542 static int
2543 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2544 int fixed = 0;
2545
2546 if (HasPrefix(value, "=?") && HasSuffix(value, "?=")) {
2547 /* Looks like an RFC 2047 encoded parameter. */
2548 char decoded[PATH_MAX + 1];
2549
2550 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2551 /* Encode using RFC 2231. */
2552 replace_param (first_pm, last_pm, name, decoded, 0);
2553 fixed = 1;
2554 } else {
2555 advise (NULL, "failed to decode %s parameter %s", name, value);
2556 }
2557 }
2558
2559 return fixed;
2560 }
2561
2562
2563 /*
2564 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2565 * filename parameters in Content-Type and Content-Disposition
2566 * headers, respectively.
2567 */
2568 static int
2569 fix_filename_encoding (CT ct) {
2570 PM pm;
2571 HF hf;
2572 int fixed = 0;
2573
2574 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2575 if (pm->pm_name && pm->pm_value &&
2576 strcasecmp (pm->pm_name, "name") == 0) {
2577 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2578 &ct->c_ctinfo.ci_first_pm,
2579 &ct->c_ctinfo.ci_last_pm);
2580 }
2581 }
2582
2583 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2584 if (pm->pm_name && pm->pm_value &&
2585 strcasecmp (pm->pm_name, "filename") == 0) {
2586 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2587 &ct->c_dispo_first,
2588 &ct->c_dispo_last);
2589 }
2590 }
2591
2592 /* Fix hf values to correspond. */
2593 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2594 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2595
2596 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2597 field = TYPE_HEADER;
2598 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2599 field = DISPO_HEADER;
2600 }
2601
2602 if (field != OTHER) {
2603 const char *const semicolon_loc = strchr (hf->value, ';');
2604
2605 if (semicolon_loc) {
2606 const size_t len =
2607 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2608 const char *const params =
2609 output_params (len,
2610 field == TYPE_HEADER
2611 ? ct->c_ctinfo.ci_first_pm
2612 : ct->c_dispo_first,
2613 NULL, 0);
2614 const char *const new_params = concat (params, "\n", NULL);
2615
2616 replace_substring (&hf->value, semicolon_loc, new_params);
2617 free((void *)new_params); /* Cast away const. Sigh. */
2618 free((void *)params);
2619 } else {
2620 advise (NULL, "did not find semicolon in %s:%s\n",
2621 hf->name, hf->value);
2622 }
2623 }
2624 }
2625
2626 return OK;
2627 }
2628
2629
2630 /*
2631 * Output content in input file to output file.
2632 */
2633 static int
2634 write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace,
2635 int message_mods) {
2636 int status = OK;
2637
2638 if (modify_inplace) {
2639 if (message_mods > 0) {
2640 if ((status = output_message (ct, outfile)) == OK) {
2641 char *infile = input_filename
2642 ? mh_xstrdup (input_filename)
2643 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2644
2645 if (remove_file (infile) == OK) {
2646 if (rename (outfile, infile)) {
2647 /* Rename didn't work, possibly because of an
2648 attempt to rename across filesystems. Try
2649 brute force copy. */
2650 int old = open (outfile, O_RDONLY);
2651 int new =
2652 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2653 int i = -1;
2654
2655 if (old != -1 && new != -1) {
2656 char buffer[NMH_BUFSIZ];
2657
2658 while ((i = read (old, buffer, sizeof buffer)) >
2659 0) {
2660 if (write (new, buffer, i) != i) {
2661 i = -1;
2662 break;
2663 }
2664 }
2665 }
2666 if (new != -1) { close (new); }
2667 if (old != -1) { close (old); }
2668 (void) m_unlink (outfile);
2669
2670 if (i < 0) {
2671 /* The -file argument processing used path() to
2672 expand filename to absolute path. */
2673 int file = ct->c_file && ct->c_file[0] == '/';
2674
2675 admonish (NULL, "unable to rename %s %s to %s",
2676 file ? "file" : "message", outfile,
2677 infile);
2678 status = NOTOK;
2679 }
2680 }
2681 } else {
2682 admonish (NULL, "unable to remove input file %s, "
2683 "not modifying it", infile);
2684 (void) m_unlink (outfile);
2685 status = NOTOK;
2686 }
2687
2688 free (infile);
2689 } else {
2690 status = NOTOK;
2691 }
2692 } else {
2693 /* No modifications and didn't need the tmp outfile. */
2694 (void) m_unlink (outfile);
2695 }
2696 } else {
2697 /* Output is going to some file. Produce it whether or not
2698 there were modifications. */
2699 status = output_message (ct, outfile);
2700 }
2701
2702 flush_errors ();
2703 return status;
2704 }
2705
2706
2707 /*
2708 * parse_mime() does not set lf_line_endings in struct text, so use this
2709 * function to do it. It touches the parts the decodetypes identifies.
2710 */
2711 static void
2712 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2713 switch (ct->c_type) {
2714 case CT_MULTIPART: {
2715 struct multipart *m = (struct multipart *) ct->c_ctparams;
2716 struct part *part;
2717
2718 for (part = m->mp_parts; part; part = part->mp_next) {
2719 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2720 }
2721 break;
2722 }
2723
2724 case CT_MESSAGE:
2725 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2726 struct exbody *e = (struct exbody *) ct->c_ctparams;
2727
2728 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2729 }
2730 break;
2731
2732 default:
2733 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2734 if (ct->c_ctparams == NULL) {
2735 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2736 }
2737 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2738 }
2739 }
2740 }
2741
2742
2743 /*
2744 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2745 * use the standard MH backup file.
2746 */
2747 static int
2748 remove_file (const char *file) {
2749 if (rmmproc) {
2750 char *rmm_command = concat (rmmproc, " ", file, NULL);
2751 int status = system (rmm_command);
2752
2753 free (rmm_command);
2754 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2755 }
2756 /* This is OK for a non-message file, it still uses the
2757 BACKUP_PREFIX form. The backup file will be in the same
2758 directory as file. */
2759 return rename (file, m_backup (file));
2760 }
2761
2762
2763 /*
2764 * Output formatted message to user.
2765 */
2766 static void
2767 report (char *what, char *partno, char *filename, char *message, ...) {
2768 va_list args;
2769 char *fmt;
2770
2771 if (verbosw) {
2772 va_start (args, message);
2773 fmt = concat (filename, partno ? " part " : ", ",
2774 partno ? partno : "", partno ? ", " : "", message, NULL);
2775
2776 advertise (what, NULL, fmt, args);
2777
2778 free (fmt);
2779 va_end (args);
2780 }
2781 }
2782
2783
2784 static void
2785 pipeser (int i)
2786 {
2787 if (i == SIGQUIT) {
2788 fflush (stdout);
2789 fprintf (stderr, "\n");
2790 fflush (stderr);
2791 }
2792
2793 done (1);
2794 /* NOTREACHED */
2795 }