]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
sbr/vector.c: Replace Nbby with <limits.h>'s CHAR_BIT.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include <h/mh.h>
9 #include <h/mime.h>
10 #include <h/mhparse.h>
11 #include <h/utils.h>
12 #include <h/signals.h>
13 #include <fcntl.h>
14
15 #define MHFIXMSG_SWITCHES \
16 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
17 X("nodecodetext", 0, NDECODETEXTSW) \
18 X("decodetypes", 0, DECODETYPESW) \
19 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
20 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
21 X("textcharset", 0, TEXTCHARSETSW) \
22 X("notextcharset", 0, NTEXTCHARSETSW) \
23 X("reformat", 0, REFORMATSW) \
24 X("noreformat", 0, NREFORMATSW) \
25 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
26 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
27 X("fixboundary", 0, FIXBOUNDARYSW) \
28 X("nofixboundary", 0, NFIXBOUNDARYSW) \
29 X("fixcte", 0, FIXCOMPOSITECTESW) \
30 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
31 X("fixtype mimetype", 0, FIXTYPESW) \
32 X("file file", 0, FILESW) \
33 X("outfile file", 0, OUTFILESW) \
34 X("rmmproc program", 0, RPROCSW) \
35 X("normmproc", 0, NRPRCSW) \
36 X("changecur", 0, CHGSW) \
37 X("nochangecur", 0, NCHGSW) \
38 X("verbose", 0, VERBSW) \
39 X("noverbose", 0, NVERBSW) \
40 X("version", 0, VERSIONSW) \
41 X("help", 0, HELPSW) \
42
43 #define X(sw, minchars, id) id,
44 DEFINE_SWITCH_ENUM(MHFIXMSG);
45 #undef X
46
47 #define X(sw, minchars, id) { sw, minchars, id },
48 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
49 #undef X
50
51
52 int verbosw;
53 int debugsw; /* Needed by mhparse.c. */
54
55 #define quitser pipeser
56
57 /* mhparse.c */
58 extern int skip_mp_cte_check; /* flag to InitMultiPart */
59 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
60 extern int bogus_mp_content; /* flag from InitMultiPart */
61 /* flags to/from parse_header_attrs */
62 extern int suppress_extraneous_trailing_semicolon_warning;
63
64 /* mhoutsbr.c */
65 int output_message_fp (CT, FILE *, char *);
66
67 /* mhmisc.c */
68 void flush_errors (void);
69
70 /* mhfree.c */
71 extern CT *cts;
72
73 /*
74 * static prototypes
75 */
76 typedef struct fix_transformations {
77 int fixboundary;
78 int fixcompositecte;
79 svector_t fixtypes;
80 int reformat;
81 int replacetextplain;
82 int decodetext;
83 char *decodetypes;
84 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
85 int lf_line_endings;
86 char *textcharset;
87 } fix_transformations;
88
89 int mhfixmsgsbr (CT *, char *, const fix_transformations *, FILE **, char *,
90 FILE **);
91 static int fix_boundary (CT *, int *);
92 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
93 static int get_multipart_boundary (CT, char **);
94 static int replace_boundary (CT, char *, char *);
95 static int fix_types (CT, svector_t, int *);
96 static char *replace_substring (char **, const char *, const char *);
97 static char *remove_parameter (char *, const char *);
98 static int fix_composite_cte (CT, int *);
99 static int set_ce (CT, int);
100 static int ensure_text_plain (CT *, CT, int *, int);
101 static int find_textplain_sibling (CT, int, int *);
102 static int insert_new_text_plain_part (CT, int, CT);
103 static CT build_text_plain_part (CT);
104 static int insert_into_new_mp_alt (CT *, int *);
105 static CT divide_part (CT);
106 static void copy_ctinfo (CI, CI);
107 static int decode_part (CT);
108 static int reformat_part (CT, char *, char *, char *, int);
109 static CT build_multipart_alt (CT, CT, int, int);
110 static int boundary_in_content (FILE **, char *, const char *);
111 static void transfer_noncontent_headers (CT, CT);
112 static int set_ct_type (CT, int type, int subtype, int encoding);
113 static int decode_text_parts (CT, int, const char *, int *);
114 static int should_decode(const char *, const char *, const char *);
115 static int content_encoding (CT, const char **);
116 static int strip_crs (CT, int *);
117 static void update_cte (CT);
118 static int least_restrictive_encoding (CT);
119 static int less_restrictive (int, int);
120 static int convert_charsets (CT, char *, int *);
121 static int fix_always (CT, int *);
122 static int fix_filename_param (char *, char *, PM *, PM *);
123 static int fix_filename_encoding (CT);
124 static int write_content (CT, const char *, char *, FILE *, int, int);
125 static void set_text_ctparams(CT, char *, int);
126 static int remove_file (const char *);
127 static void report (char *, char *, char *, char *, ...);
128 static void pipeser (int);
129
130
131 int
132 main (int argc, char **argv) {
133 int msgnum;
134 char *cp, *file = NULL, *folder = NULL;
135 char *maildir = NULL, buf[100], *outfile = NULL;
136 char **argp, **arguments;
137 struct msgs_array msgs = { 0, 0, NULL };
138 struct msgs *mp = NULL;
139 CT *ctp;
140 FILE *fp, *infp = NULL, *outfp = NULL;
141 int using_stdin = 0;
142 int chgflag = 1;
143 int status = OK;
144 fix_transformations fx;
145 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
146 fx.fixtypes = NULL;
147 fx.replacetextplain = 0;
148 fx.decodetext = CE_8BIT;
149 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
150 fx.lf_line_endings = 0;
151 fx.textcharset = NULL;
152
153 if (nmh_init(argv[0], 2)) { return 1; }
154
155 arguments = getarguments (invo_name, argc, argv, 1);
156 argp = arguments;
157
158 /*
159 * Parse arguments
160 */
161 while ((cp = *argp++)) {
162 if (*cp == '-') {
163 switch (smatch (++cp, switches)) {
164 case AMBIGSW:
165 ambigsw (cp, switches);
166 done (1);
167 case UNKWNSW:
168 adios (NULL, "-%s unknown", cp);
169
170 case HELPSW:
171 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
172 invo_name);
173 print_help (buf, switches, 1);
174 done (0);
175 case VERSIONSW:
176 print_version(invo_name);
177 done (0);
178
179 case DECODETEXTSW:
180 if (! (cp = *argp++) || *cp == '-') {
181 adios (NULL, "missing argument to %s", argp[-2]);
182 }
183 if (! strcasecmp (cp, "8bit")) {
184 fx.decodetext = CE_8BIT;
185 } else if (! strcasecmp (cp, "7bit")) {
186 fx.decodetext = CE_7BIT;
187 } else if (! strcasecmp (cp, "binary")) {
188 fx.decodetext = CE_BINARY;
189 } else {
190 adios (NULL, "invalid argument to %s", argp[-2]);
191 }
192 continue;
193 case NDECODETEXTSW:
194 fx.decodetext = 0;
195 continue;
196 case DECODETYPESW:
197 if (! (cp = *argp++) || *cp == '-') {
198 adios (NULL, "missing argument to %s", argp[-2]);
199 }
200 fx.decodetypes = cp;
201 continue;
202 case CRLFLINEBREAKSSW:
203 fx.lf_line_endings = 0;
204 continue;
205 case NCRLFLINEBREAKSSW:
206 fx.lf_line_endings = 1;
207 continue;
208 case TEXTCHARSETSW:
209 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
210 adios (NULL, "missing argument to %s", argp[-2]);
211 }
212 fx.textcharset = cp;
213 continue;
214 case NTEXTCHARSETSW:
215 fx.textcharset = 0;
216 continue;
217 case FIXBOUNDARYSW:
218 fx.fixboundary = 1;
219 continue;
220 case NFIXBOUNDARYSW:
221 fx.fixboundary = 0;
222 continue;
223 case FIXCOMPOSITECTESW:
224 fx.fixcompositecte = 1;
225 continue;
226 case NFIXCOMPOSITECTESW:
227 fx.fixcompositecte = 0;
228 continue;
229 case FIXTYPESW:
230 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
231 adios (NULL, "missing argument to %s", argp[-2]);
232 }
233 if (! strncasecmp (cp, "multipart/", 10) ||
234 ! strncasecmp (cp, "message/", 8)) {
235 adios (NULL, "-fixtype %s not allowed", cp);
236 } else if (! strchr (cp, '/')) {
237 adios (NULL, "-fixtype requires type/subtype");
238 }
239 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
240 svector_push_back (fx.fixtypes, cp);
241 continue;
242 case REFORMATSW:
243 fx.reformat = 1;
244 continue;
245 case NREFORMATSW:
246 fx.reformat = 0;
247 continue;
248 case REPLACETEXTPLAINSW:
249 fx.replacetextplain = 1;
250 continue;
251 case NREPLACETEXTPLAINSW:
252 fx.replacetextplain = 0;
253 continue;
254 case FILESW:
255 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
256 adios (NULL, "missing argument to %s", argp[-2]);
257 }
258 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
259 continue;
260 case OUTFILESW:
261 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
262 adios (NULL, "missing argument to %s", argp[-2]);
263 }
264 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
265 continue;
266 case RPROCSW:
267 if (!(rmmproc = *argp++) || *rmmproc == '-') {
268 adios (NULL, "missing argument to %s", argp[-2]);
269 }
270 continue;
271 case NRPRCSW:
272 rmmproc = NULL;
273 continue;
274 case CHGSW:
275 chgflag = 1;
276 continue;
277 case NCHGSW:
278 chgflag = 0;
279 continue;
280 case VERBSW:
281 verbosw = 1;
282 continue;
283 case NVERBSW:
284 verbosw = 0;
285 continue;
286 }
287 }
288 if (*cp == '+' || *cp == '@') {
289 if (folder) {
290 adios (NULL, "only one folder at a time!");
291 } else {
292 folder = pluspath (cp);
293 }
294 } else {
295 if (*cp == '/') {
296 /* Interpret a full path as a filename, not a message. */
297 file = mh_xstrdup (cp);
298 } else {
299 app_msgarg (&msgs, cp);
300 }
301 }
302 }
303
304 SIGNAL (SIGQUIT, quitser);
305 SIGNAL (SIGPIPE, pipeser);
306
307 /*
308 * Read the standard profile setup
309 */
310 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
311 readconfig ((struct node **) 0, fp, cp, 0);
312 fclose (fp);
313 }
314
315 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
316 suppress_extraneous_trailing_semicolon_warning = 1;
317
318 if (! context_find ("path")) {
319 free (path ("./", TFOLDER));
320 }
321
322 if (file && msgs.size) {
323 adios (NULL, "cannot specify msg and file at same time!");
324 }
325
326 if (outfile) {
327 /* Open the outfile now, so we don't have to risk opening it
328 after running out of fds. */
329 if (strcmp (outfile, "-") == 0) {
330 outfp = stdout;
331 } else if ((outfp = fopen (outfile, "w")) == NULL) {
332 adios (outfile, "unable to open for writing");
333 }
334 }
335
336 /*
337 * check if message is coming from file
338 */
339 if (file) {
340 /* If file is stdin, create a tmp file name before parse_mime()
341 has a chance, because it might put in on a different
342 filesystem than the output file. Instead, put it in the
343 user's preferred tmp directory. */
344 CT ct;
345
346 if (! strcmp ("-", file)) {
347 int fd;
348 char *cp;
349
350 using_stdin = 1;
351
352 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
353 adios (NULL, "unable to create temporary file in %s",
354 get_temp_dir());
355 } else {
356 free (file);
357 file = mh_xstrdup (cp);
358 cpydata (STDIN_FILENO, fd, "-", file);
359 }
360
361 if (close (fd)) {
362 (void) m_unlink (file);
363 adios (NULL, "failed to write temporary file");
364 }
365 }
366
367 cts = mh_xcalloc(2, sizeof *cts);
368 ctp = cts;
369
370 if ((ct = parse_mime (file))) {
371 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
372 *ctp++ = ct;
373 } else {
374 inform("unable to parse message from file %s", file);
375 status = NOTOK;
376
377 /* If there's an outfile, pass the input message unchanged, so the
378 message won't get dropped from a pipeline. */
379 if (outfile) {
380 /* Something went wrong. Output might be expected, such as if
381 this were run as a filter. Just copy the input to the
382 output. */
383 if ((infp = fopen (file, "r")) == NULL) {
384 adios (file, "unable to open for reading");
385 }
386
387 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
388 inform("unable to copy message to %s, "
389 "it might be lost\n", outfile);
390 }
391
392 fclose (infp);
393 infp = NULL;
394 }
395 }
396 } else {
397 /*
398 * message(s) are coming from a folder
399 */
400 CT ct;
401
402 if (! msgs.size) {
403 app_msgarg(&msgs, "cur");
404 }
405 if (! folder) {
406 folder = getfolder (1);
407 }
408 maildir = mh_xstrdup(m_maildir (folder));
409
410 /* chdir so that error messages, esp. from MIME parser, just
411 refer to the message and not its path. */
412 if (chdir (maildir) == NOTOK) {
413 adios (maildir, "unable to change directory to");
414 }
415
416 /* read folder and create message structure */
417 if (! (mp = folder_read (folder, 1))) {
418 adios (NULL, "unable to read folder %s", folder);
419 }
420
421 /* check for empty folder */
422 if (mp->nummsg == 0) {
423 adios (NULL, "no messages in %s", folder);
424 }
425
426 /* parse all the message ranges/sequences and set SELECTED */
427 for (msgnum = 0; msgnum < msgs.size; msgnum++)
428 if (! m_convert (mp, msgs.msgs[msgnum])) {
429 done (1);
430 }
431 seq_setprev (mp); /* set the previous-sequence */
432
433 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
434 ctp = cts;
435
436 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
437 if (is_selected(mp, msgnum)) {
438 char *msgnam = m_name (msgnum);
439
440 if ((ct = parse_mime (msgnam))) {
441 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
442 *ctp++ = ct;
443 } else {
444 inform("unable to parse message %s", msgnam);
445 status = NOTOK;
446
447 /* If there's an outfile, pass the input message
448 unchanged, so the message won't get dropped from a
449 pipeline. */
450 if (outfile) {
451 /* Something went wrong. Output might be expected,
452 such as if this were run as a filter. Just copy
453 the input to the output. */
454 /* Can't use path() here because 1) it might have been
455 called before and it caches the pwd, and 2) we call
456 chdir() after that. */
457 char *input_filename =
458 concat (maildir, "/", msgnam, NULL);
459
460 if ((infp = fopen (input_filename, "r")) == NULL) {
461 adios (input_filename,
462 "unable to open for reading");
463 }
464
465 if (copy_input_to_output (input_filename, infp,
466 outfile, outfp) != OK) {
467 inform("unable to copy message to %s, "
468 "it might be lost\n", outfile);
469 }
470
471 fclose (infp);
472 infp = NULL;
473 free (input_filename);
474 }
475 }
476 }
477 }
478
479 if (chgflag) {
480 seq_setcur (mp, mp->hghsel); /* update current message */
481 }
482 seq_save (mp); /* synchronize sequences */
483 context_replace (pfolder, folder);/* update current folder */
484 context_save (); /* save the context file */
485 }
486
487 if (*cts) {
488 for (ctp = cts; *ctp; ++ctp) {
489 status += mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp);
490 free_content (*ctp);
491
492 if (using_stdin) {
493 (void) m_unlink (file);
494
495 if (! outfile) {
496 /* Just calling m_backup() unlinks the backup file. */
497 (void) m_backup (file);
498 }
499 }
500 }
501 } else {
502 status = 1;
503 }
504
505 mh_xfree(maildir);
506 free (cts);
507
508 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
509 if (infp) { fclose (infp); } /* even if stdin */
510 if (outfp) { fclose (outfp); } /* even if stdout */
511 free (outfile);
512 free (file);
513 free (folder);
514 free (arguments);
515
516 done (status);
517 return NOTOK;
518 }
519
520
521 /*
522 * Apply transformations to one message.
523 */
524 int
525 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
526 FILE **infp, char *outfile, FILE **outfp) {
527 /* Store input filename in case one of the transformations, i.e.,
528 fix_boundary(), rewrites to a tmp file. */
529 char *input_filename = maildir
530 ? concat (maildir, "/", (*ctp)->c_file, NULL)
531 : mh_xstrdup ((*ctp)->c_file);
532 int modify_inplace = 0;
533 int message_mods = 0;
534 int status = OK;
535
536 /* Though the input file won't need to be opened if everything goes
537 well, do it here just in case there's a failure, and that failure is
538 running out of file descriptors. */
539 if ((*infp = fopen (input_filename, "r")) == NULL) {
540 adios (input_filename, "unable to open for reading");
541 }
542
543 if (outfile == NULL) {
544 modify_inplace = 1;
545
546 if ((*ctp)->c_file) {
547 char *tempfile;
548 /* outfp will be closed by the caller */
549 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
550 NULL) {
551 adios (NULL, "unable to create temporary file in %s",
552 get_temp_dir());
553 }
554 outfile = mh_xstrdup (tempfile);
555 } else {
556 adios (NULL, "missing both input and output filenames\n");
557 }
558 } /* else *outfp was defined by caller */
559
560 reverse_alternative_parts (*ctp);
561 status = fix_always (*ctp, &message_mods);
562 if (status == OK && fx->fixboundary) {
563 status = fix_boundary (ctp, &message_mods);
564 }
565 if (status == OK && fx->fixtypes != NULL) {
566 status = fix_types (*ctp, fx->fixtypes, &message_mods);
567 }
568 if (status == OK && fx->fixcompositecte) {
569 status = fix_composite_cte (*ctp, &message_mods);
570 }
571 if (status == OK && fx->reformat) {
572 status =
573 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
574 }
575 if (status == OK && fx->decodetext) {
576 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
577 &message_mods);
578 update_cte (*ctp);
579 }
580 if (status == OK && fx->textcharset != NULL) {
581 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
582 }
583
584 if (status == OK && ! (*ctp)->c_umask) {
585 /* Set the umask for the contents file. This currently
586 isn't used but just in case it is in the future. */
587 struct stat st;
588
589 if (stat ((*ctp)->c_file, &st) != NOTOK) {
590 (*ctp)->c_umask = ~(st.st_mode & 0777);
591 } else {
592 (*ctp)->c_umask = ~m_gmprot();
593 }
594 }
595
596 /*
597 * Write the content to a file
598 */
599 if (status == OK) {
600 status = write_content (*ctp, input_filename, outfile, *outfp,
601 modify_inplace, message_mods);
602 } else if (! modify_inplace) {
603 /* Something went wrong. Output might be expected, such
604 as if this were run as a filter. Just copy the input
605 to the output. */
606 if (copy_input_to_output (input_filename, *infp, outfile,
607 *outfp) != OK) {
608 inform("unable to copy message to %s, it might be lost\n",
609 outfile);
610 }
611 }
612
613 if (modify_inplace) {
614 if (status != OK) { (void) m_unlink (outfile); }
615 free (outfile);
616 outfile = NULL;
617 }
618
619 fclose (*infp);
620 *infp = NULL;
621 free (input_filename);
622
623 return status;
624 }
625
626
627 /*
628 * Copy input message to output. Assumes not modifying in place, so this
629 * might be running as part of a pipeline.
630 */
631 static int
632 copy_input_to_output (const char *input_filename, FILE *infp,
633 const char *output_filename, FILE *outfp) {
634 int in = fileno (infp);
635 int out = fileno (outfp);
636 int status = OK;
637
638 if (in != -1 && out != -1) {
639 cpydata (in, out, input_filename, output_filename);
640 } else {
641 status = NOTOK;
642 }
643
644 return status;
645 }
646
647
648 /*
649 * Fix mismatched outer level boundary.
650 */
651 static int
652 fix_boundary (CT *ct, int *message_mods) {
653 struct multipart *mp;
654 int status = OK;
655
656 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
657 mp = (struct multipart *) (*ct)->c_ctparams;
658
659 /*
660 * 1) Get boundary at end of part.
661 * 2) Get boundary at beginning of part and compare to the end-of-part
662 * boundary.
663 * 3) Write out contents of ct to tmp file, replacing boundary in
664 * header with boundary from part. Set c_unlink to 1.
665 * 4) Free ct.
666 * 5) Call parse_mime() on the tmp file, replacing ct.
667 */
668
669 if (mp && mp->mp_start) {
670 char *part_boundary;
671
672 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
673 char *fixed;
674
675 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
676 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
677 char *filename = mh_xstrdup ((*ct)->c_file);
678 CT fixed_ct;
679
680 free_content (*ct);
681 if ((fixed_ct = parse_mime (fixed))) {
682 *ct = fixed_ct;
683 (*ct)->c_unlink = 1;
684
685 ++*message_mods;
686 if (verbosw) {
687 report (NULL, NULL, filename,
688 "fix multipart boundary");
689 }
690 } else {
691 *ct = NULL;
692 inform("unable to parse fixed part");
693 status = NOTOK;
694 }
695 free (filename);
696 } else {
697 inform("unable to replace broken boundary");
698 status = NOTOK;
699 }
700 } else {
701 inform("unable to create temporary file in %s",
702 get_temp_dir());
703 status = NOTOK;
704 }
705
706 free (part_boundary);
707 } else {
708 /* Couldn't fix the boundary. Report failure so that mhfixmsg
709 doesn't modify the message. */
710 status = NOTOK;
711 }
712 } else {
713 /* No multipart struct, even though the content type is
714 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
715 the message. */
716 status = NOTOK;
717 }
718 }
719
720 return status;
721 }
722
723
724 /*
725 * Find boundary at end of multipart.
726 */
727 static int
728 get_multipart_boundary (CT ct, char **part_boundary) {
729 char buffer[NMH_BUFSIZ];
730 char *end_boundary = NULL;
731 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
732 ? (off_t) (ct->c_end - sizeof buffer)
733 : (off_t) ct->c_begin;
734 size_t bytes_read;
735 int status = OK;
736
737 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
738 be big enough, even if it's just 1024, to make that unlikely. */
739
740 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
741 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
742 advise (ct->c_file, "unable to open for reading");
743 return NOTOK;
744 }
745
746 /* Get boundary at end of multipart. */
747 while (begin >= (off_t) ct->c_begin) {
748 fseeko (ct->c_fp, begin, SEEK_SET);
749 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
750 char *cp = rfind_str (buffer, bytes_read, "--");
751
752 if (cp) {
753 char *end;
754
755 /* Trim off trailing "--" and anything beyond. */
756 *cp-- = '\0';
757 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
758 if (strlen (end) > 3 && *end++ == '\n' &&
759 *end++ == '-' && *end++ == '-') {
760 end_boundary = mh_xstrdup (end);
761 break;
762 }
763 }
764 }
765 }
766
767 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
768 begin -= sizeof buffer;
769 } else {
770 break;
771 }
772 }
773
774 /* Get boundary at beginning of multipart. */
775 if (end_boundary) {
776 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
777 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
778 if (bytes_read >= strlen (end_boundary)) {
779 char *cp = find_str (buffer, bytes_read, end_boundary);
780
781 if (cp && cp - buffer >= 2 && *--cp == '-' &&
782 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
783 status = OK;
784 break;
785 }
786 } else {
787 /* The start and end boundaries didn't match, or the
788 start boundary doesn't begin with "\n--" (or "--"
789 if at the beginning of buffer). Keep trying. */
790 status = NOTOK;
791 }
792 }
793 } else {
794 status = NOTOK;
795 }
796
797 if (ct->c_fp) {
798 fclose (ct->c_fp);
799 ct->c_fp = NULL;
800 }
801
802 if (status == OK) {
803 *part_boundary = end_boundary;
804 } else {
805 *part_boundary = NULL;
806 free (end_boundary);
807 }
808
809 return status;
810 }
811
812
813 /*
814 * Open and copy ct->c_file to file, replacing the multipart boundary.
815 */
816 static int
817 replace_boundary (CT ct, char *file, char *boundary) {
818 FILE *fpin, *fpout;
819 int compnum, state;
820 char buf[NMH_BUFSIZ], name[NAMESZ];
821 char *np, *vp;
822 m_getfld_state_t gstate = 0;
823 int status = OK;
824
825 if (ct->c_file == NULL) {
826 inform("missing input filename");
827 return NOTOK;
828 }
829
830 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
831 advise (ct->c_file, "unable to open for reading");
832 return NOTOK;
833 }
834
835 if ((fpout = fopen (file, "w")) == NULL) {
836 fclose (fpin);
837 advise (file, "unable to open for writing");
838 return NOTOK;
839 }
840
841 for (compnum = 1;;) {
842 int bufsz = (int) sizeof buf;
843
844 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
845 case FLD:
846 case FLDPLUS:
847 compnum++;
848
849 /* get copies of the buffers */
850 np = mh_xstrdup (name);
851 vp = mh_xstrdup (buf);
852
853 /* if necessary, get rest of field */
854 while (state == FLDPLUS) {
855 bufsz = sizeof buf;
856 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
857 vp = add (buf, vp); /* add to previous value */
858 }
859
860 if (strcasecmp (TYPE_FIELD, np)) {
861 fprintf (fpout, "%s:%s", np, vp);
862 } else {
863 char *new_ctline, *new_params;
864
865 replace_param(&ct->c_ctinfo.ci_first_pm,
866 &ct->c_ctinfo.ci_last_pm, "boundary",
867 boundary, 0);
868
869 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
870 ct->c_ctinfo.ci_subtype, NULL);
871 new_params = output_params(strlen(TYPE_FIELD) +
872 strlen(new_ctline) + 1,
873 ct->c_ctinfo.ci_first_pm, NULL, 0);
874 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
875 new_params ? new_params : "");
876 free(new_ctline);
877 mh_xfree(new_params);
878 }
879
880 free (vp);
881 free (np);
882
883 continue;
884
885 case BODY:
886 putc('\n', fpout);
887 /* buf will have a terminating NULL, skip it. */
888 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
889 advise (file, "fwrite");
890 }
891 continue;
892
893 case FILEEOF:
894 break;
895
896 case LENERR:
897 case FMTERR:
898 inform("message format error in component #%d", compnum);
899 status = NOTOK;
900 break;
901
902 default:
903 inform("getfld() returned %d", state);
904 status = NOTOK;
905 break;
906 }
907
908 break;
909 }
910
911 m_getfld_state_destroy (&gstate);
912 fclose (fpout);
913 fclose (fpin);
914
915 return status;
916 }
917
918
919 /*
920 * Fix Content-Type header to reflect the content of its part.
921 */
922 static int
923 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
924 int status = OK;
925
926 switch (ct->c_type) {
927 case CT_MULTIPART: {
928 struct multipart *m = (struct multipart *) ct->c_ctparams;
929 struct part *part;
930
931 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
932 status = fix_types (part->mp_part, fixtypes, message_mods);
933 }
934 break;
935 }
936
937 case CT_MESSAGE:
938 if (ct->c_subtype == MESSAGE_EXTERNAL) {
939 struct exbody *e = (struct exbody *) ct->c_ctparams;
940
941 status = fix_types (e->eb_content, fixtypes, message_mods);
942 }
943 break;
944
945 default: {
946 char **typep, *type;
947
948 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
949 for (typep = svector_strs (fixtypes);
950 typep && (type = *typep);
951 ++typep) {
952 char *type_subtype =
953 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
954 NULL);
955
956 if (! strcasecmp (type, type_subtype) &&
957 decode_part (ct) == OK &&
958 ct->c_cefile.ce_file != NULL) {
959 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
960 char *cp;
961
962 if ((cp = strchr (ct_type_subtype, ';'))) {
963 /* Truncate to remove any parameter list from
964 mime_type () result. */
965 *cp = '\0';
966 }
967
968 if (strcasecmp (type, ct_type_subtype)) {
969 char *ct_type, *ct_subtype;
970 HF hf;
971
972 /* The Content-Type header does not match the
973 content, so update these struct Content
974 fields to match:
975 * c_type, c_subtype
976 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
977 * c_ctline
978 */
979 /* Extract type and subtype from type/subtype. */
980 ct_type = mh_xstrdup(ct_type_subtype);
981 if ((cp = strchr (ct_type, '/'))) {
982 *cp = '\0';
983 ct_subtype = mh_xstrdup(++cp);
984 } else {
985 inform("missing / in MIME type of %s %s",
986 ct->c_file, ct->c_partno);
987 free (ct_type);
988 return NOTOK;
989 }
990
991 ct->c_type = ct_str_type (ct_type);
992 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
993
994 free (ct->c_ctinfo.ci_type);
995 ct->c_ctinfo.ci_type = ct_type;
996 free (ct->c_ctinfo.ci_subtype);
997 ct->c_ctinfo.ci_subtype = ct_subtype;
998 if (! replace_substring (&ct->c_ctline, type,
999 ct_type_subtype)) {
1000 inform("did not find %s in %s",
1001 type, ct->c_ctline);
1002 }
1003
1004 /* Update Content-Type header field. */
1005 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1006 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1007 if (replace_substring (&hf->value, type,
1008 ct_type_subtype)) {
1009 ++*message_mods;
1010 if (verbosw) {
1011 report (NULL, ct->c_partno, ct->c_file,
1012 "change Content-Type in header "
1013 "from %s to %s",
1014 type, ct_type_subtype);
1015 }
1016 break;
1017 } else {
1018 inform("did not find %s in %s",
1019 type, hf->value);
1020 }
1021 }
1022 }
1023 }
1024 free (ct_type_subtype);
1025 }
1026 free (type_subtype);
1027 }
1028 }
1029 }}
1030
1031 return status;
1032 }
1033
1034
1035 /*
1036 * Replace a substring, allocating space to hold the new one.
1037 */
1038 char *
1039 replace_substring (char **str, const char *old, const char *new) {
1040 char *cp;
1041
1042 if ((cp = strstr (*str, old))) {
1043 char *remainder = cp + strlen (old);
1044 char *prefix, *new_str;
1045
1046 if (cp - *str) {
1047 prefix = mh_xstrdup(*str);
1048 *(prefix + (cp - *str)) = '\0';
1049 new_str = concat (prefix, new, remainder, NULL);
1050 free (prefix);
1051 } else {
1052 new_str = concat (new, remainder, NULL);
1053 }
1054
1055 free (*str);
1056
1057 return *str = new_str;
1058 }
1059
1060 return NULL;
1061 }
1062
1063
1064 /*
1065 * Remove a name=value parameter, given just its name, from a header value.
1066 */
1067 char *
1068 remove_parameter (char *str, const char *name) {
1069 /* It looks to me, based on the BNF in RFC 2045, than there can't
1070 be whitespace betwwen the parameter name and the "=", or
1071 between the "=" and the parameter value. */
1072 char *param_name = concat (name, "=", NULL);
1073 char *cp;
1074
1075 if ((cp = strstr (str, param_name))) {
1076 char *start, *end;
1077 size_t count = 1;
1078
1079 /* Remove any leading spaces, before the parameter name. */
1080 for (start = cp;
1081 start > str && isspace ((unsigned char) *(start-1));
1082 --start) {
1083 continue;
1084 }
1085 /* Remove a leading semicolon. */
1086 if (start > str && *(start-1) == ';') { --start; }
1087
1088 end = cp + strlen (name) + 1;
1089 if (*end == '"') {
1090 /* Skip past the quoted value, and then the final quote. */
1091 for (++end ; *end && *end != '"'; ++end) { continue; }
1092 ++end;
1093 } else {
1094 /* Skip past the value. */
1095 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1096 }
1097
1098 /* Count how many characters need to be moved. Include
1099 trailing null, which is accounted for by the
1100 initialization of count to 1. */
1101 for (cp = end; *cp; ++cp) { ++count; }
1102 (void) memmove (start, end, count);
1103 }
1104
1105 free (param_name);
1106
1107 return str;
1108 }
1109
1110
1111 /*
1112 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1113 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1114 * 8 bit.
1115 */
1116 static int
1117 fix_composite_cte (CT ct, int *message_mods) {
1118 int status = OK;
1119
1120 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1121 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1122 ct->c_encoding != CE_BINARY) {
1123 HF hf;
1124
1125 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1126 char *name = hf->name;
1127 for (; *name && isspace ((unsigned char) *name); ++name) {
1128 continue;
1129 }
1130
1131 if (! strncasecmp (name, ENCODING_FIELD,
1132 strlen (ENCODING_FIELD))) {
1133 char *prefix = "Nmh-REPLACED-INVALID-";
1134 HF h;
1135
1136 NEW(h);
1137 h->name = mh_xstrdup (hf->name);
1138 h->hf_encoding = hf->hf_encoding;
1139 h->next = hf->next;
1140 hf->next = h;
1141
1142 /* Retain old header but prefix its name. */
1143 free (hf->name);
1144 hf->name = concat (prefix, h->name, NULL);
1145
1146 ++*message_mods;
1147 if (verbosw) {
1148 char *encoding = cpytrim (hf->value);
1149 report (NULL, ct->c_partno, ct->c_file,
1150 "replace Content-Transfer-Encoding of %s "
1151 "with 8 bit", encoding);
1152 free (encoding);
1153 }
1154
1155 h->value = mh_xstrdup (" 8bit\n");
1156
1157 /* Don't need to warn for multiple C-T-E header
1158 fields, parse_mime() already does that. But
1159 if there are any, fix them all as necessary. */
1160 hf = h;
1161 }
1162 }
1163
1164 set_ce (ct, CE_8BIT);
1165 }
1166
1167 if (ct->c_type == CT_MULTIPART) {
1168 struct multipart *m;
1169 struct part *part;
1170
1171 m = (struct multipart *) ct->c_ctparams;
1172 for (part = m->mp_parts; part; part = part->mp_next) {
1173 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1174 status = NOTOK;
1175 break;
1176 }
1177 }
1178 }
1179 }
1180
1181 return status;
1182 }
1183
1184
1185 /*
1186 * Set content encoding.
1187 */
1188 static int
1189 set_ce (CT ct, int encoding) {
1190 const char *ce = ce_str (encoding);
1191 const struct str2init *ctinit = get_ce_method (ce);
1192
1193 if (ctinit) {
1194 char *cte = concat (" ", ce, "\n", NULL);
1195 int found_cte = 0;
1196 HF hf;
1197 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1198 caller is decode_text_parts (). Save because we'll
1199 overwrite below. */
1200 struct cefile decoded_content_info = ct->c_cefile;
1201
1202 ct->c_encoding = encoding;
1203
1204 ct->c_ctinitfnx = ctinit->si_init;
1205 /* This will assign ct->c_cefile with an all-0 struct, which
1206 is what we want. */
1207 (*ctinit->si_init) (ct);
1208 /* After returning, the caller should set
1209 ct->c_cefile.ce_file to the name of the file containing
1210 the contents. */
1211
1212 if (ct->c_ceclosefnx) {
1213 (*ct->c_ceclosefnx) (ct);
1214 }
1215
1216 /* Restore the cefile. */
1217 ct->c_cefile = decoded_content_info;
1218
1219 /* Update/add Content-Transfer-Encoding header field. */
1220 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1221 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1222 found_cte = 1;
1223 free (hf->value);
1224 hf->value = cte;
1225 }
1226 }
1227 if (! found_cte) {
1228 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1229 }
1230
1231 /* Update c_celine. It's used only by mhlist -debug. */
1232 free (ct->c_celine);
1233 ct->c_celine = mh_xstrdup (cte);
1234
1235 return OK;
1236 }
1237
1238 return NOTOK;
1239 }
1240
1241
1242 /*
1243 * Make sure each text part has a corresponding text/plain part.
1244 */
1245 static int
1246 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1247 int status = OK;
1248
1249 switch ((*ct)->c_type) {
1250 case CT_TEXT: {
1251 /* Nothing to do for text/plain. */
1252 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1253
1254 if (parent && parent->c_type == CT_MULTIPART &&
1255 parent->c_subtype == MULTI_ALTERNATE) {
1256 int new_subpart_number = 1;
1257 int has_text_plain =
1258 find_textplain_sibling (parent, replacetextplain,
1259 &new_subpart_number);
1260
1261 if (! has_text_plain) {
1262 /* Parent is a multipart/alternative. Insert a new
1263 text/plain subpart. */
1264 const int inserted =
1265 insert_new_text_plain_part (*ct, new_subpart_number,
1266 parent);
1267 if (inserted) {
1268 ++*message_mods;
1269 if (verbosw) {
1270 report (NULL, parent->c_partno, parent->c_file,
1271 "insert text/plain part");
1272 }
1273 } else {
1274 status = NOTOK;
1275 }
1276 }
1277 } else if (parent && parent->c_type == CT_MULTIPART &&
1278 parent->c_subtype == MULTI_RELATED) {
1279 char *type_subtype =
1280 concat ((*ct)->c_ctinfo.ci_type, "/",
1281 (*ct)->c_ctinfo.ci_subtype, NULL);
1282 const char *parent_type =
1283 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1284 int new_subpart_number = 1;
1285 int has_text_plain = 0;
1286
1287 /* Have to do string comparison on the subtype because we
1288 don't enumerate all of them in c_subtype values.
1289 parent_type will be NULL if the multipart/related part
1290 doesn't have a type parameter. The type parameter must
1291 be specified according to RFC 2387 Sec. 3.1 but not all
1292 messages comply. */
1293 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1294 /* The type of this part matches the root type of the
1295 parent multipart/related. Look to see if there's
1296 text/plain sibling. */
1297 has_text_plain =
1298 find_textplain_sibling (parent, replacetextplain,
1299 &new_subpart_number);
1300 }
1301
1302 free (type_subtype);
1303
1304 if (! has_text_plain) {
1305 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1306 struct part *part;
1307 int siblings = 0;
1308
1309 for (part = mp->mp_parts; part; part = part->mp_next) {
1310 if (*ct != part->mp_part) {
1311 ++siblings;
1312 }
1313 }
1314
1315 if (siblings) {
1316 /* Parent is a multipart/related. Insert a new
1317 text/plain subpart in a new multipart/alternative. */
1318 if (insert_into_new_mp_alt (ct, message_mods)) {
1319 /* Not an error if text/plain couldn't be added. */
1320 }
1321 } else {
1322 /* There are no siblings, so insert a new text/plain
1323 subpart, and change the parent type from
1324 multipart/related to multipart/alternative. */
1325 const int inserted =
1326 insert_new_text_plain_part (*ct, new_subpart_number,
1327 parent);
1328
1329 if (inserted) {
1330 HF hf;
1331
1332 parent->c_subtype = MULTI_ALTERNATE;
1333 free (parent->c_ctinfo.ci_subtype);
1334 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1335 if (! replace_substring (&parent->c_ctline, "/related",
1336 "/alternative")) {
1337 inform("did not find multipart/related in %s",
1338 parent->c_ctline);
1339 }
1340
1341 /* Update Content-Type header field. */
1342 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1343 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1344 if (replace_substring (&hf->value, "/related",
1345 "/alternative")) {
1346 ++*message_mods;
1347 if (verbosw) {
1348 report (NULL, parent->c_partno,
1349 parent->c_file,
1350 "insert text/plain part");
1351 }
1352
1353 /* Remove, e.g., type="text/html" from
1354 multipart/alternative. */
1355 remove_parameter (hf->value, "type");
1356 break;
1357 } else {
1358 inform("did not find multipart/"
1359 "related in header %s",
1360 hf->value);
1361 }
1362 }
1363 }
1364 } else {
1365 /* Not an error if text/plain couldn't be inserted. */
1366 }
1367 }
1368 }
1369 } else {
1370 if (insert_into_new_mp_alt (ct, message_mods)) {
1371 status = NOTOK;
1372 }
1373 }
1374 break;
1375 }
1376
1377 case CT_MULTIPART: {
1378 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1379 struct part *part;
1380
1381 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1382 if ((*ct)->c_type == CT_MULTIPART) {
1383 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1384 replacetextplain);
1385 }
1386 }
1387 break;
1388 }
1389
1390 case CT_MESSAGE:
1391 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1392 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1393
1394 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1395 replacetextplain);
1396 }
1397 break;
1398 }
1399
1400 return status;
1401 }
1402
1403
1404 /*
1405 * See if there is a sibling text/plain, and return its subpart number.
1406 */
1407 static int
1408 find_textplain_sibling (CT parent, int replacetextplain,
1409 int *new_subpart_number) {
1410 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1411 struct part *part, *prev;
1412 int has_text_plain = 0;
1413
1414 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1415 ++*new_subpart_number;
1416 if (part->mp_part->c_type == CT_TEXT &&
1417 part->mp_part->c_subtype == TEXT_PLAIN) {
1418 if (replacetextplain) {
1419 struct part *old_part;
1420 if (part == mp->mp_parts) {
1421 old_part = mp->mp_parts;
1422 mp->mp_parts = part->mp_next;
1423 } else {
1424 old_part = prev->mp_next;
1425 prev->mp_next = part->mp_next;
1426 }
1427 if (verbosw) {
1428 report (NULL, parent->c_partno, parent->c_file,
1429 "remove text/plain part %s",
1430 old_part->mp_part->c_partno);
1431 }
1432 free_content (old_part->mp_part);
1433 free (old_part);
1434 } else {
1435 has_text_plain = 1;
1436 }
1437 break;
1438 }
1439 prev = part;
1440 }
1441
1442 return has_text_plain;
1443 }
1444
1445
1446 /*
1447 * Insert a new text/plain part.
1448 */
1449 static int
1450 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1451 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1452 struct part *new_part;
1453
1454 NEW(new_part);
1455 if ((new_part->mp_part = build_text_plain_part (ct))) {
1456 char buffer[16];
1457 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1458
1459 new_part->mp_next = mp->mp_parts;
1460 mp->mp_parts = new_part;
1461 new_part->mp_part->c_partno =
1462 concat (parent->c_partno ? parent->c_partno : "1", ".",
1463 buffer, NULL);
1464
1465 return 1;
1466 }
1467
1468 free_content (new_part->mp_part);
1469 free (new_part);
1470
1471 return 0;
1472 }
1473
1474
1475 /*
1476 * Create a text/plain part to go along with non-plain sibling part.
1477 */
1478 static CT
1479 build_text_plain_part (CT encoded_part) {
1480 CT tp_part = divide_part (encoded_part);
1481 char *tmp_plain_file = NULL;
1482
1483 if (decode_part (tp_part) == OK) {
1484 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1485 contains the decoded contents. And the decoding function, such
1486 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1487 be unlinked by free_content (). */
1488 char *tempfile;
1489
1490 /* This m_mktemp2() call closes the temp file. */
1491 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1492 inform("unable to create temporary file in %s",
1493 get_temp_dir());
1494 } else {
1495 tmp_plain_file = mh_xstrdup (tempfile);
1496 if (reformat_part (tp_part, tmp_plain_file,
1497 tp_part->c_ctinfo.ci_type,
1498 tp_part->c_ctinfo.ci_subtype,
1499 tp_part->c_type) == OK) {
1500 return tp_part;
1501 }
1502 }
1503 }
1504
1505 free_content (tp_part);
1506 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1507 free (tmp_plain_file);
1508
1509 return NULL;
1510 }
1511
1512
1513 /*
1514 * Slip new text/plain part into a new multipart/alternative.
1515 */
1516 static int
1517 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1518 CT tp_part = build_text_plain_part (*ct);
1519 int status = OK;
1520
1521 if (tp_part) {
1522 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1523 MULTI_ALTERNATE);
1524 if (mp_alt) {
1525 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1526
1527 if (mp && mp->mp_parts) {
1528 mp->mp_parts->mp_part = tp_part;
1529 /* Make the new multipart/alternative the parent. */
1530 *ct = mp_alt;
1531
1532 ++*message_mods;
1533 if (verbosw) {
1534 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1535 "insert text/plain part");
1536 }
1537 } else {
1538 free_content (tp_part);
1539 free_content (mp_alt);
1540 status = NOTOK;
1541 }
1542 } else {
1543 status = NOTOK;
1544 }
1545 } else {
1546 /* Not an error if text/plain couldn't be built. */
1547 }
1548
1549 return status;
1550 }
1551
1552
1553 /*
1554 * Clone a MIME part.
1555 */
1556 static CT
1557 divide_part (CT ct) {
1558 CT new_part;
1559
1560 NEW0(new_part);
1561 /* Just copy over what is needed for decoding. c_vrsn and
1562 c_celine aren't necessary. */
1563 new_part->c_file = mh_xstrdup (ct->c_file);
1564 new_part->c_begin = ct->c_begin;
1565 new_part->c_end = ct->c_end;
1566 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1567 new_part->c_type = ct->c_type;
1568 new_part->c_cefile = ct->c_cefile;
1569 new_part->c_encoding = ct->c_encoding;
1570 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1571 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1572 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1573 new_part->c_cesizefnx = ct->c_cesizefnx;
1574
1575 /* c_ctline is used by reformat__part(), so it can preserve
1576 anything after the type/subtype. */
1577 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1578
1579 return new_part;
1580 }
1581
1582
1583 /*
1584 * Copy the content info from one part to another.
1585 */
1586 static void
1587 copy_ctinfo (CI dest, CI src) {
1588 PM s_pm, d_pm;
1589
1590 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1591 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1592
1593 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1594 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1595 s_pm->pm_value, 0);
1596 if (s_pm->pm_charset) {
1597 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1598 }
1599 if (s_pm->pm_lang) {
1600 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1601 }
1602 }
1603
1604 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1605 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1606 }
1607
1608
1609 /*
1610 * Decode content.
1611 */
1612 static int
1613 decode_part (CT ct) {
1614 char *tmp_decoded;
1615 int status;
1616 FILE *file;
1617 char *tempfile;
1618
1619 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1620 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1621 }
1622 tmp_decoded = mh_xstrdup (tempfile);
1623 /* The following call will load ct->c_cefile.ce_file with the tmp
1624 filename of the decoded content. tmp_decoded will contain the
1625 encoded output, get rid of that. */
1626 status = output_message_fp (ct, file, tmp_decoded);
1627 (void) m_unlink (tmp_decoded);
1628 free (tmp_decoded);
1629 if (fclose (file)) {
1630 inform("unable to close temporary file %s, continuing...", tempfile);
1631 }
1632
1633 return status;
1634 }
1635
1636
1637 /*
1638 * Reformat content as plain text.
1639 * Some of the arguments aren't really needed now, but maybe will
1640 * be in the future for other than text types.
1641 */
1642 static int
1643 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1644 int output_subtype, output_encoding;
1645 const char *reason = NULL;
1646 char *cp, *cf;
1647 int status;
1648
1649 /* Hacky: this redirects the output from whatever command is used
1650 to show the part to a file. So, the user can't have any output
1651 redirection in that command.
1652 Could show_multi() in mhshowsbr.c avoid this? */
1653
1654 /* Check for invo_name-format-type/subtype. */
1655 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1656 if (verbosw) {
1657 inform("Don't know how to convert %s, there is no "
1658 "%s-format-%s/%s profile entry",
1659 ct->c_file, invo_name, type, subtype);
1660 }
1661 return NOTOK;
1662 }
1663 if (strchr (cf, '>')) {
1664 inform("'>' prohibited in \"%s\",\nplease fix your "
1665 "%s-format-%s/%s profile entry", cf, invo_name, type,
1666 subtype ? subtype : "");
1667
1668 return NOTOK;
1669 }
1670
1671 cp = concat (cf, " >", file, NULL);
1672 status = show_content_aux (ct, 0, cp, NULL, NULL);
1673 free (cp);
1674
1675 /* Unlink decoded content tmp file and free its filename to avoid
1676 leaks. The file stream should already have been closed. */
1677 if (ct->c_cefile.ce_unlink) {
1678 (void) m_unlink (ct->c_cefile.ce_file);
1679 free (ct->c_cefile.ce_file);
1680 ct->c_cefile.ce_file = NULL;
1681 ct->c_cefile.ce_unlink = 0;
1682 }
1683
1684 if (c_type == CT_TEXT) {
1685 output_subtype = TEXT_PLAIN;
1686 } else {
1687 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1688 output_subtype = 0;
1689 }
1690
1691 output_encoding = content_encoding (ct, &reason);
1692 if (status == OK &&
1693 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1694 ct->c_cefile.ce_file = file;
1695 ct->c_cefile.ce_unlink = 1;
1696 } else {
1697 ct->c_cefile.ce_unlink = 0;
1698 status = NOTOK;
1699 }
1700
1701 return status;
1702 }
1703
1704
1705 /*
1706 * Fill in a multipart/alternative part.
1707 */
1708 static CT
1709 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1710 char *boundary_prefix = "----=_nmh-multipart";
1711 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1712 char *boundary_indicator = "; boundary=";
1713 char *typename, *subtypename, *name;
1714 CT ct;
1715 struct part *p;
1716 struct multipart *m;
1717 const struct str2init *ctinit;
1718
1719 NEW0(ct);
1720
1721 /* Set up the multipart/alternative part. These fields of *ct were
1722 initialized to 0 by mh_xcalloc():
1723 c_fp, c_unlink, c_begin, c_end,
1724 c_vrsn, c_ctline, c_celine,
1725 c_id, c_descr, c_dispo, c_partno,
1726 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1727 c_cefile, c_encoding,
1728 c_digested, c_digest[16], c_ctexbody,
1729 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1730 c_umask, c_rfc934,
1731 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1732 */
1733
1734 ct->c_file = mh_xstrdup (first_alt->c_file);
1735 ct->c_type = type;
1736 ct->c_subtype = subtype;
1737
1738 ctinit = get_ct_init (ct->c_type);
1739
1740 typename = ct_type_str (type);
1741 subtypename = ct_subtype_str (type, subtype);
1742
1743 {
1744 int serial = 0;
1745 int found_boundary = 1;
1746
1747 while (found_boundary && serial < 1000000) {
1748 found_boundary = 0;
1749
1750 /* Ensure that the boundary doesn't appear in the decoded
1751 content. */
1752 if (new_part->c_cefile.ce_file) {
1753 if ((found_boundary =
1754 boundary_in_content (&new_part->c_cefile.ce_fp,
1755 new_part->c_cefile.ce_file,
1756 boundary)) == NOTOK) {
1757 free_content (ct);
1758 return NULL;
1759 }
1760 }
1761
1762 /* Ensure that the boundary doesn't appear in the encoded
1763 content. */
1764 if (! found_boundary && new_part->c_file) {
1765 if ((found_boundary =
1766 boundary_in_content (&new_part->c_fp,
1767 new_part->c_file,
1768 boundary)) == NOTOK) {
1769 free_content (ct);
1770 return NULL;
1771 }
1772 }
1773
1774 if (found_boundary) {
1775 /* Try a slightly different boundary. */
1776 char buffer2[16];
1777
1778 free (boundary);
1779 ++serial;
1780 snprintf (buffer2, sizeof buffer2, "%d", serial);
1781 boundary =
1782 concat (boundary_prefix,
1783 first_alt->c_partno ? first_alt->c_partno : "",
1784 "-", buffer2, NULL);
1785 }
1786 }
1787
1788 if (found_boundary) {
1789 inform("giving up trying to find a unique boundary");
1790 free_content (ct);
1791 return NULL;
1792 }
1793 }
1794
1795 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1796 boundary, "\"", NULL);
1797
1798 /* Load c_first_hf and c_last_hf. */
1799 transfer_noncontent_headers (first_alt, ct);
1800 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1801 free (name);
1802
1803 /* Load c_partno. */
1804 if (first_alt->c_partno) {
1805 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1806 free (first_alt->c_partno);
1807 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1808 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1809 } else {
1810 first_alt->c_partno = mh_xstrdup ("1");
1811 new_part->c_partno = mh_xstrdup ("2");
1812 }
1813
1814 if (ctinit) {
1815 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1816 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1817 }
1818
1819 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1820 "boundary", boundary, 0);
1821
1822 NEW(p);
1823 NEW(p->mp_next);
1824 p->mp_next->mp_next = NULL;
1825 p->mp_next->mp_part = first_alt;
1826
1827 NEW0(m);
1828 m->mp_start = concat (boundary, "\n", NULL);
1829 m->mp_stop = concat (boundary, "--\n", NULL);
1830 m->mp_parts = p;
1831 ct->c_ctparams = m;
1832
1833 free (boundary);
1834
1835 return ct;
1836 }
1837
1838
1839 /*
1840 * Check that the boundary does not appear in the content.
1841 */
1842 static int
1843 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1844 char buffer[NMH_BUFSIZ];
1845 size_t bytes_read;
1846 int found_boundary = 0;
1847
1848 /* free_content() will close *fp if we fopen it here. */
1849 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1850 advise (file, "unable to open %s for reading", file);
1851 return NOTOK;
1852 }
1853
1854 fseeko (*fp, 0L, SEEK_SET);
1855 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1856 if (find_str (buffer, bytes_read, boundary)) {
1857 found_boundary = 1;
1858 break;
1859 }
1860 }
1861
1862 return found_boundary;
1863 }
1864
1865
1866 /*
1867 * Remove all non-Content headers.
1868 */
1869 static void
1870 transfer_noncontent_headers (CT old, CT new) {
1871 HF hp, hp_prev;
1872
1873 hp_prev = hp = old->c_first_hf;
1874 while (hp) {
1875 HF next = hp->next;
1876
1877 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1878 if (hp == old->c_last_hf) {
1879 if (hp == old->c_first_hf) {
1880 old->c_last_hf = old->c_first_hf = NULL;
1881 } else {
1882 hp_prev->next = NULL;
1883 old->c_last_hf = hp_prev;
1884 }
1885 } else {
1886 if (hp == old->c_first_hf) {
1887 old->c_first_hf = next;
1888 } else {
1889 hp_prev->next = next;
1890 }
1891 }
1892
1893 /* Put node hp in the new CT. */
1894 if (new->c_first_hf == NULL) {
1895 new->c_first_hf = hp;
1896 } else {
1897 new->c_last_hf->next = hp;
1898 }
1899 new->c_last_hf = hp;
1900 } else {
1901 /* A Content- header, leave in old. */
1902 hp_prev = hp;
1903 }
1904
1905 hp = next;
1906 }
1907 }
1908
1909
1910 /*
1911 * Set content type.
1912 */
1913 static int
1914 set_ct_type (CT ct, int type, int subtype, int encoding) {
1915 char *typename = ct_type_str (type);
1916 char *subtypename = ct_subtype_str (type, subtype);
1917 /* E.g, " text/plain" */
1918 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1919 /* E.g, " text/plain\n" */
1920 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1921 int found_content_type = 0;
1922 HF hf;
1923 const char *cp = NULL;
1924 char *ctline;
1925 int status;
1926
1927 /* Update/add Content-Type header field. */
1928 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1929 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1930 found_content_type = 1;
1931 free (hf->value);
1932 hf->value = (cp = strchr (ct->c_ctline, ';'))
1933 ? concat (type_subtypename, cp, "\n", NULL)
1934 : mh_xstrdup (name_plus_nl);
1935 }
1936 }
1937 if (! found_content_type) {
1938 add_header (ct, mh_xstrdup (TYPE_FIELD),
1939 (cp = strchr (ct->c_ctline, ';'))
1940 ? concat (type_subtypename, cp, "\n", NULL)
1941 : mh_xstrdup (name_plus_nl));
1942 }
1943
1944 /* Some of these might not be used, but set them anyway. */
1945 ctline = cp
1946 ? concat (type_subtypename, cp, NULL)
1947 : concat (type_subtypename, NULL);
1948 free (ct->c_ctline);
1949 ct->c_ctline = ctline;
1950 /* Leave other ctinfo members as they were. */
1951 free (ct->c_ctinfo.ci_type);
1952 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1953 free (ct->c_ctinfo.ci_subtype);
1954 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1955 ct->c_type = type;
1956 ct->c_subtype = subtype;
1957
1958 free (name_plus_nl);
1959 free (type_subtypename);
1960
1961 status = set_ce (ct, encoding);
1962
1963 return status;
1964 }
1965
1966
1967 /*
1968 * It's not necessary to update the charset parameter of a Content-Type
1969 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1970 * (content) was originally in the specified charset, "and will be in
1971 * that character set again after decoding."
1972 */
1973 static int
1974 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1975 int *message_mods) {
1976 int status = OK;
1977 int lf_line_endings = 0;
1978
1979 switch (ct->c_type) {
1980 case CT_MULTIPART: {
1981 struct multipart *m = (struct multipart *) ct->c_ctparams;
1982 struct part *part;
1983
1984 /* Should check to see if the body for this part is encoded?
1985 For now, it gets passed along as-is by InitMultiPart(). */
1986 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1987 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1988 message_mods);
1989 }
1990 break;
1991 }
1992
1993 case CT_MESSAGE:
1994 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1995 struct exbody *e = (struct exbody *) ct->c_ctparams;
1996
1997 status = decode_text_parts (e->eb_content, encoding, decodetypes,
1998 message_mods);
1999 }
2000 break;
2001
2002 default:
2003 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2004 break;
2005 }
2006
2007 lf_line_endings =
2008 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
2009
2010 switch (ct->c_encoding) {
2011 case CE_BASE64:
2012 case CE_QUOTED: {
2013 int ct_encoding;
2014
2015 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2016 const char *reason = NULL;
2017
2018 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2019 && encoding != CE_BINARY) {
2020 /* The decoding isn't acceptable so discard it.
2021 Leave status as OK to allow other transformations. */
2022 if (verbosw) {
2023 report (NULL, ct->c_partno, ct->c_file,
2024 "will not decode%s because it is binary (%s)",
2025 ct->c_partno ? ""
2026 : ct->c_ctline ? ct->c_ctline
2027 : "",
2028 reason);
2029 }
2030 (void) m_unlink (ct->c_cefile.ce_file);
2031 free (ct->c_cefile.ce_file);
2032 ct->c_cefile.ce_file = NULL;
2033 } else if (ct->c_encoding == CE_QUOTED &&
2034 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2035 /* The decoding isn't acceptable so discard it.
2036 Leave status as OK to allow other transformations. */
2037 if (verbosw) {
2038 report (NULL, ct->c_partno, ct->c_file,
2039 "will not decode%s because it is 8bit",
2040 ct->c_partno ? ""
2041 : ct->c_ctline ? ct->c_ctline
2042 : "");
2043 }
2044 (void) m_unlink (ct->c_cefile.ce_file);
2045 free (ct->c_cefile.ce_file);
2046 ct->c_cefile.ce_file = NULL;
2047 } else {
2048 int enc;
2049
2050 if (ct_encoding == CE_BINARY) {
2051 enc = CE_BINARY;
2052 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2053 enc = CE_QUOTED;
2054 } else {
2055 enc = ct_encoding;
2056 }
2057 if (set_ce (ct, enc) == OK) {
2058 ++*message_mods;
2059 if (verbosw) {
2060 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2061 ct->c_ctline ? ct->c_ctline : "");
2062 }
2063 if (lf_line_endings) {
2064 strip_crs (ct, message_mods);
2065 }
2066 } else {
2067 status = NOTOK;
2068 }
2069 }
2070 } else {
2071 status = NOTOK;
2072 }
2073 break;
2074 }
2075 case CE_8BIT:
2076 case CE_7BIT:
2077 if (lf_line_endings) {
2078 strip_crs (ct, message_mods);
2079 }
2080 break;
2081 default:
2082 break;
2083 }
2084
2085 break;
2086 }
2087
2088 return status;
2089 }
2090
2091
2092 /*
2093 * Determine if the part with type[/subtype] should be decoded, according to
2094 * decodetypes (which came from the -decodetypes switch).
2095 */
2096 static int
2097 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2098 /* Quick search for matching type[/subtype] in decodetypes: bracket
2099 decodetypes with commas, then search for ,type, and ,type/subtype, in
2100 it. */
2101
2102 int found_match = 0;
2103 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2104 char *delimited_type = concat(",", type, ",", NULL);
2105
2106 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2107 found_match = 1;
2108 } else if (subtype != NULL) {
2109 char *delimited_type_subtype =
2110 concat(",", type, "/", subtype, ",", NULL);
2111
2112 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2113 found_match = 1;
2114 }
2115 free(delimited_type_subtype);
2116 }
2117
2118 free(delimited_type);
2119 free(delimited_decodetypes);
2120
2121 return found_match;
2122 }
2123
2124
2125 /*
2126 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2127 * if it has any NUL characters, a CR not followed by a LF, or lines
2128 * greater than 998 characters in length. If binary, reason is set
2129 * to a string explaining why.
2130 */
2131 static int
2132 content_encoding (CT ct, const char **reason) {
2133 CE ce = &ct->c_cefile;
2134 int encoding = CE_7BIT;
2135
2136 if (ce->ce_file) {
2137 size_t line_len = 0;
2138 char buffer[NMH_BUFSIZ];
2139 size_t inbytes;
2140
2141 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2142 advise (ce->ce_file, "unable to open for reading");
2143 return CE_UNKNOWN;
2144 }
2145
2146 fseeko (ce->ce_fp, 0L, SEEK_SET);
2147 while (encoding != CE_BINARY &&
2148 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2149 char *cp;
2150 size_t i;
2151 int last_char_was_cr = 0;
2152
2153 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2154 if (*cp == '\0' || ++line_len > 998 ||
2155 (*cp != '\n' && last_char_was_cr)) {
2156 encoding = CE_BINARY;
2157 if (*cp == '\0') {
2158 *reason = "null character";
2159 } else if (line_len > 998) {
2160 *reason = "line length > 998";
2161 } else if (*cp != '\n' && last_char_was_cr) {
2162 *reason = "CR not followed by LF";
2163 } else {
2164 /* Should not reach this. */
2165 *reason = "";
2166 }
2167 break;
2168 } else if (*cp == '\n') {
2169 line_len = 0;
2170 } else if (! isascii ((unsigned char) *cp)) {
2171 encoding = CE_8BIT;
2172 }
2173
2174 last_char_was_cr = *cp == '\r' ? 1 : 0;
2175 }
2176 }
2177
2178 fclose (ce->ce_fp);
2179 ce->ce_fp = NULL;
2180 } /* else should never happen */
2181
2182 return encoding;
2183 }
2184
2185
2186 /*
2187 * Strip carriage returns from content.
2188 */
2189 static int
2190 strip_crs (CT ct, int *message_mods) {
2191 char *charset = content_charset (ct);
2192 int status = OK;
2193
2194 /* Only strip carriage returns if content is ASCII or another
2195 charset that has the same readily recognizable CR followed by a
2196 LF. We can include UTF-8 here because if the high-order bit of
2197 a UTF-8 byte is 0, then it must be a single-byte ASCII
2198 character. */
2199 if (! strcasecmp (charset, "US-ASCII") ||
2200 ! strcasecmp (charset, "UTF-8") ||
2201 ! strncasecmp (charset, "ISO-8859-", 9) ||
2202 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2203 char **file = NULL;
2204 FILE **fp = NULL;
2205 size_t begin;
2206 size_t end;
2207 int has_crs = 0;
2208 int opened_input_file = 0;
2209
2210 if (ct->c_cefile.ce_file) {
2211 file = &ct->c_cefile.ce_file;
2212 fp = &ct->c_cefile.ce_fp;
2213 begin = end = 0;
2214 } else if (ct->c_file) {
2215 file = &ct->c_file;
2216 fp = &ct->c_fp;
2217 begin = (size_t) ct->c_begin;
2218 end = (size_t) ct->c_end;
2219 } /* else don't know where the content is */
2220
2221 if (file && *file && fp) {
2222 if (! *fp) {
2223 if ((*fp = fopen (*file, "r")) == NULL) {
2224 advise (*file, "unable to open for reading");
2225 status = NOTOK;
2226 } else {
2227 opened_input_file = 1;
2228 }
2229 }
2230 }
2231
2232 if (fp && *fp) {
2233 char buffer[NMH_BUFSIZ];
2234 size_t bytes_read;
2235 size_t bytes_to_read =
2236 end > 0 && end > begin ? end - begin : sizeof buffer;
2237
2238 fseeko (*fp, begin, SEEK_SET);
2239 while ((bytes_read = fread (buffer, 1,
2240 min (bytes_to_read, sizeof buffer),
2241 *fp)) > 0) {
2242 /* Look for CR followed by a LF. This is supposed to
2243 be text so there should be LF's. If not, don't
2244 modify the content. */
2245 char *cp;
2246 size_t i;
2247 int last_char_was_cr = 0;
2248
2249 if (end > 0) { bytes_to_read -= bytes_read; }
2250
2251 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2252 if (*cp == '\n' && last_char_was_cr) {
2253 has_crs = 1;
2254 break;
2255 }
2256
2257 last_char_was_cr = *cp == '\r' ? 1 : 0;
2258 }
2259 }
2260
2261 if (has_crs) {
2262 int fd;
2263 char *stripped_content_file;
2264 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2265
2266 if (tempfile == NULL) {
2267 adios (NULL, "unable to create temporary file in %s",
2268 get_temp_dir());
2269 }
2270 stripped_content_file = mh_xstrdup (tempfile);
2271
2272 /* Strip each CR before a LF from the content. */
2273 fseeko (*fp, begin, SEEK_SET);
2274 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2275 0) {
2276 char *cp;
2277 size_t i;
2278 int last_char_was_cr = 0;
2279
2280 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2281 if (*cp == '\r') {
2282 last_char_was_cr = 1;
2283 } else if (last_char_was_cr) {
2284 if (*cp != '\n') {
2285 if (write (fd, "\r", 1) < 0) {
2286 advise (tempfile, "CR write");
2287 }
2288 }
2289 if (write (fd, cp, 1) < 0) {
2290 advise (tempfile, "write");
2291 }
2292 last_char_was_cr = 0;
2293 } else {
2294 if (write (fd, cp, 1) < 0) {
2295 advise (tempfile, "write");
2296 }
2297 last_char_was_cr = 0;
2298 }
2299 }
2300 }
2301
2302 if (close (fd)) {
2303 inform("unable to write temporary file %s, continuing...",
2304 stripped_content_file);
2305 (void) m_unlink (stripped_content_file);
2306 status = NOTOK;
2307 } else {
2308 /* Replace the decoded file with the converted one. */
2309 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2310 (void) m_unlink (ct->c_cefile.ce_file);
2311
2312 mh_xfree(ct->c_cefile.ce_file);
2313 ct->c_cefile.ce_file = stripped_content_file;
2314 ct->c_cefile.ce_unlink = 1;
2315
2316 ++*message_mods;
2317 if (verbosw) {
2318 report (NULL, ct->c_partno,
2319 begin == 0 && end == 0 ? "" : *file,
2320 "stripped CRs");
2321 }
2322 }
2323 }
2324
2325 if (opened_input_file) {
2326 fclose (*fp);
2327 *fp = NULL;
2328 }
2329 }
2330 }
2331
2332 free (charset);
2333
2334 return status;
2335 }
2336
2337
2338 /*
2339 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2340 * of the part C-T-E's.
2341 */
2342 static void
2343 update_cte (CT ct) {
2344 const int least_restrictive_enc = least_restrictive_encoding (ct);
2345
2346 if (least_restrictive_enc != CE_UNKNOWN &&
2347 least_restrictive_enc != CE_7BIT) {
2348 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2349 HF hf;
2350 int found_cte = 0;
2351
2352 /* Update/add Content-Transfer-Encoding header field. */
2353 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2354 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2355 found_cte = 1;
2356 free (hf->value);
2357 hf->value = cte;
2358 }
2359 }
2360 if (! found_cte) {
2361 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2362 }
2363 }
2364 }
2365
2366
2367 /*
2368 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2369 * within a message.
2370 */
2371 static int
2372 least_restrictive_encoding (CT ct) {
2373 int encoding = CE_UNKNOWN;
2374
2375 switch (ct->c_type) {
2376 case CT_MULTIPART: {
2377 struct multipart *m = (struct multipart *) ct->c_ctparams;
2378 struct part *part;
2379
2380 for (part = m->mp_parts; part; part = part->mp_next) {
2381 const int part_encoding =
2382 least_restrictive_encoding (part->mp_part);
2383
2384 if (less_restrictive (encoding, part_encoding)) {
2385 encoding = part_encoding;
2386 }
2387 }
2388 break;
2389 }
2390
2391 case CT_MESSAGE:
2392 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2393 struct exbody *e = (struct exbody *) ct->c_ctparams;
2394 const int part_encoding =
2395 least_restrictive_encoding (e->eb_content);
2396
2397 if (less_restrictive (encoding, part_encoding)) {
2398 encoding = part_encoding;
2399 }
2400 }
2401 break;
2402
2403 default: {
2404 if (less_restrictive (encoding, ct->c_encoding)) {
2405 encoding = ct->c_encoding;
2406 }
2407 }}
2408
2409 return encoding;
2410 }
2411
2412
2413 /*
2414 * Return whether the second encoding is less restrictive than the first, where
2415 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2416 * CE_BINARY is less restrictive than CE_8BIT and
2417 * CE_8BIT is less restrictive than CE_7BIT.
2418 */
2419 static int
2420 less_restrictive (int encoding, int second_encoding) {
2421 switch (second_encoding) {
2422 case CE_BINARY:
2423 return encoding != CE_BINARY;
2424 case CE_8BIT:
2425 return encoding != CE_BINARY && encoding != CE_8BIT;
2426 case CE_7BIT:
2427 return encoding != CE_BINARY && encoding != CE_8BIT &&
2428 encoding != CE_7BIT;
2429 default :
2430 return 0;
2431 }
2432 }
2433
2434
2435 /*
2436 * Convert character set of each part.
2437 */
2438 static int
2439 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2440 int status = OK;
2441
2442 switch (ct->c_type) {
2443 case CT_TEXT:
2444 if (ct->c_subtype == TEXT_PLAIN) {
2445 status = convert_charset (ct, dest_charset, message_mods);
2446 if (status == OK) {
2447 if (verbosw) {
2448 char *ct_charset = content_charset (ct);
2449
2450 report (NULL, ct->c_partno, ct->c_file,
2451 "convert %s to %s", ct_charset, dest_charset);
2452 free (ct_charset);
2453 }
2454 } else {
2455 char *ct_charset = content_charset (ct);
2456
2457 report ("iconv", ct->c_partno, ct->c_file,
2458 "failed to convert %s to %s", ct_charset, dest_charset);
2459 free (ct_charset);
2460 }
2461 }
2462 break;
2463
2464 case CT_MULTIPART: {
2465 struct multipart *m = (struct multipart *) ct->c_ctparams;
2466 struct part *part;
2467
2468 /* Should check to see if the body for this part is encoded?
2469 For now, it gets passed along as-is by InitMultiPart(). */
2470 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2471 status =
2472 convert_charsets (part->mp_part, dest_charset, message_mods);
2473 }
2474 break;
2475 }
2476
2477 case CT_MESSAGE:
2478 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2479 struct exbody *e = (struct exbody *) ct->c_ctparams;
2480
2481 status =
2482 convert_charsets (e->eb_content, dest_charset, message_mods);
2483 }
2484 break;
2485
2486 default:
2487 break;
2488 }
2489
2490 return status;
2491 }
2492
2493
2494 /*
2495 * Fix various problems that aren't handled elsewhere. These
2496 * are fixed unconditionally: there are no switches to disable
2497 * them. Currently, "problems" are these:
2498 * 1) remove extraneous semicolon at the end of a header parameter list
2499 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2500 * filename parameters in Content-Type and Content-Disposition
2501 * headers, respectively.
2502 */
2503 static int
2504 fix_always (CT ct, int *message_mods) {
2505 int status = OK;
2506
2507 switch (ct->c_type) {
2508 case CT_MULTIPART: {
2509 struct multipart *m = (struct multipart *) ct->c_ctparams;
2510 struct part *part;
2511
2512 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2513 status = fix_always (part->mp_part, message_mods);
2514 }
2515 break;
2516 }
2517
2518 case CT_MESSAGE:
2519 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2520 struct exbody *e = (struct exbody *) ct->c_ctparams;
2521
2522 status = fix_always (e->eb_content, message_mods);
2523 }
2524 break;
2525
2526 default: {
2527 HF hf;
2528
2529 if (ct->c_first_hf) {
2530 fix_filename_encoding (ct);
2531 }
2532
2533 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2534 size_t len = strlen (hf->value);
2535
2536 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2537 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2538 /* Only do this for Content-Type and
2539 Content-Disposition fields because those are the
2540 only headers that parse_mime() warns about. */
2541 continue;
2542 }
2543
2544 /* whitespace following a trailing ';' will be nuked as well */
2545 if (hf->value[len - 1] == '\n') {
2546 while (isspace((unsigned char)(hf->value[len - 2]))) {
2547 if (len-- == 0) { break; }
2548 }
2549 }
2550
2551 if (hf->value[len - 2] == ';') {
2552 /* Remove trailing ';' from parameter value. */
2553 hf->value[len - 2] = '\n';
2554 hf->value[len - 1] = '\0';
2555
2556 /* Also, if Content-Type parameter, remove trailing ';'
2557 from ct->c_ctline. This probably isn't necessary
2558 but can't hurt. */
2559 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2560 size_t l = strlen(ct->c_ctline) - 1;
2561 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2562 ct->c_ctline[l] == ';') {
2563 ct->c_ctline[l--] = '\0';
2564 if (l == 0) { break; }
2565 }
2566 }
2567
2568 ++*message_mods;
2569 if (verbosw) {
2570 report (NULL, ct->c_partno, ct->c_file,
2571 "remove trailing ; from %s parameter value",
2572 hf->name);
2573 }
2574 }
2575 }
2576 }}
2577
2578 return status;
2579 }
2580
2581
2582 /*
2583 * Factor out common code for loops in fix_filename_encoding().
2584 */
2585 static int
2586 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2587 int fixed = 0;
2588
2589 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2590 /* Looks like an RFC 2047 encoded parameter. */
2591 char decoded[PATH_MAX + 1];
2592
2593 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2594 /* Encode using RFC 2231. */
2595 replace_param (first_pm, last_pm, name, decoded, 0);
2596 fixed = 1;
2597 } else {
2598 inform("failed to decode %s parameter %s", name, value);
2599 }
2600 }
2601
2602 return fixed;
2603 }
2604
2605
2606 /*
2607 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2608 * filename parameters in Content-Type and Content-Disposition
2609 * headers, respectively.
2610 */
2611 static int
2612 fix_filename_encoding (CT ct) {
2613 PM pm;
2614 HF hf;
2615 int fixed = 0;
2616
2617 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2618 if (pm->pm_name && pm->pm_value &&
2619 strcasecmp (pm->pm_name, "name") == 0) {
2620 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2621 &ct->c_ctinfo.ci_first_pm,
2622 &ct->c_ctinfo.ci_last_pm);
2623 }
2624 }
2625
2626 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2627 if (pm->pm_name && pm->pm_value &&
2628 strcasecmp (pm->pm_name, "filename") == 0) {
2629 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2630 &ct->c_dispo_first,
2631 &ct->c_dispo_last);
2632 }
2633 }
2634
2635 /* Fix hf values to correspond. */
2636 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2637 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2638
2639 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2640 field = TYPE_HEADER;
2641 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2642 field = DISPO_HEADER;
2643 }
2644
2645 if (field != OTHER) {
2646 const char *const semicolon_loc = strchr (hf->value, ';');
2647
2648 if (semicolon_loc) {
2649 const size_t len =
2650 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2651 const char *const params =
2652 output_params (len,
2653 field == TYPE_HEADER
2654 ? ct->c_ctinfo.ci_first_pm
2655 : ct->c_dispo_first,
2656 NULL, 0);
2657 const char *const new_params = concat (params, "\n", NULL);
2658
2659 replace_substring (&hf->value, semicolon_loc, new_params);
2660 free((void *)new_params); /* Cast away const. Sigh. */
2661 free((void *)params);
2662 } else {
2663 inform("did not find semicolon in %s:%s\n",
2664 hf->name, hf->value);
2665 }
2666 }
2667 }
2668
2669 return OK;
2670 }
2671
2672
2673 /*
2674 * Output content in input file to output file.
2675 */
2676 static int
2677 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2678 int modify_inplace, int message_mods) {
2679 int status = OK;
2680
2681 if (modify_inplace) {
2682 if (message_mods > 0) {
2683 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2684 char *infile = input_filename
2685 ? mh_xstrdup (input_filename)
2686 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2687
2688 if (remove_file (infile) == OK) {
2689 if (rename (outfile, infile)) {
2690 /* Rename didn't work, possibly because of an
2691 attempt to rename across filesystems. Try
2692 brute force copy. */
2693 int old = open (outfile, O_RDONLY);
2694 int new =
2695 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2696 int i = -1;
2697
2698 if (old != -1 && new != -1) {
2699 char buffer[NMH_BUFSIZ];
2700
2701 while ((i = read (old, buffer, sizeof buffer)) >
2702 0) {
2703 if (write (new, buffer, i) != i) {
2704 i = -1;
2705 break;
2706 }
2707 }
2708 }
2709 if (new != -1) { close (new); }
2710 if (old != -1) { close (old); }
2711 (void) m_unlink (outfile);
2712
2713 if (i < 0) {
2714 /* The -file argument processing used path() to
2715 expand filename to absolute path. */
2716 int file = ct->c_file && ct->c_file[0] == '/';
2717
2718 inform("unable to rename %s %s to %s, continuing...",
2719 file ? "file" : "message", outfile,
2720 infile);
2721 status = NOTOK;
2722 }
2723 }
2724 } else {
2725 inform("unable to remove input file %s, "
2726 "not modifying it, continuing...", infile);
2727 (void) m_unlink (outfile);
2728 status = NOTOK;
2729 }
2730
2731 free (infile);
2732 } else {
2733 status = NOTOK;
2734 }
2735 } else {
2736 /* No modifications and didn't need the tmp outfile. */
2737 (void) m_unlink (outfile);
2738 }
2739 } else {
2740 /* Output is going to some file. Produce it whether or not
2741 there were modifications. */
2742 status = output_message_fp (ct, outfp, outfile);
2743 }
2744
2745 flush_errors ();
2746 return status;
2747 }
2748
2749
2750 /*
2751 * parse_mime() does not set lf_line_endings in struct text, so use this
2752 * function to do it. It touches the parts the decodetypes identifies.
2753 */
2754 static void
2755 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2756 switch (ct->c_type) {
2757 case CT_MULTIPART: {
2758 struct multipart *m = (struct multipart *) ct->c_ctparams;
2759 struct part *part;
2760
2761 for (part = m->mp_parts; part; part = part->mp_next) {
2762 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2763 }
2764 break;
2765 }
2766
2767 case CT_MESSAGE:
2768 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2769 struct exbody *e = (struct exbody *) ct->c_ctparams;
2770
2771 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2772 }
2773 break;
2774
2775 default:
2776 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2777 if (ct->c_ctparams == NULL) {
2778 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2779 }
2780 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2781 }
2782 }
2783 }
2784
2785
2786 /*
2787 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2788 * use the standard MH backup file.
2789 */
2790 static int
2791 remove_file (const char *file) {
2792 if (rmmproc) {
2793 char *rmm_command = concat (rmmproc, " ", file, NULL);
2794 int status = system (rmm_command);
2795
2796 free (rmm_command);
2797 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2798 }
2799 /* This is OK for a non-message file, it still uses the
2800 BACKUP_PREFIX form. The backup file will be in the same
2801 directory as file. */
2802 return rename (file, m_backup (file));
2803 }
2804
2805
2806 /*
2807 * Output formatted message to user.
2808 */
2809 static void
2810 report (char *what, char *partno, char *filename, char *message, ...) {
2811 va_list args;
2812 char *fmt;
2813
2814 if (verbosw) {
2815 va_start (args, message);
2816 fmt = concat (filename, partno ? " part " : ", ",
2817 partno ? partno : "", partno ? ", " : "", message, NULL);
2818
2819 advertise (what, NULL, fmt, args);
2820
2821 free (fmt);
2822 va_end (args);
2823 }
2824 }
2825
2826
2827 static void
2828 pipeser (int i)
2829 {
2830 if (i == SIGQUIT) {
2831 fflush (stdout);
2832 fprintf (stderr, "\n");
2833 fflush (stderr);
2834 }
2835
2836 done (1);
2837 /* NOTREACHED */
2838 }