]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
datetime.c: Replace some int with bool.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include <h/mh.h>
9 #include <h/fmt_scan.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include "../sbr/m_maildir.h"
15 #include "../sbr/m_mktemp.h"
16 #include "../sbr/mime_type.h"
17 #include "mhmisc.h"
18 #include "mhfree.h"
19 #include "mhoutsbr.h"
20 #include "mhshowsbr.h"
21 #include <fcntl.h>
22
23 #define MHFIXMSG_SWITCHES \
24 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
25 X("nodecodetext", 0, NDECODETEXTSW) \
26 X("decodetypes", 0, DECODETYPESW) \
27 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
28 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
29 X("textcharset", 0, TEXTCHARSETSW) \
30 X("notextcharset", 0, NTEXTCHARSETSW) \
31 X("reformat", 0, REFORMATSW) \
32 X("noreformat", 0, NREFORMATSW) \
33 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
34 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
35 X("fixboundary", 0, FIXBOUNDARYSW) \
36 X("nofixboundary", 0, NFIXBOUNDARYSW) \
37 X("fixcte", 0, FIXCOMPOSITECTESW) \
38 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
39 X("fixtype mimetype", 0, FIXTYPESW) \
40 X("file file", 0, FILESW) \
41 X("outfile file", 0, OUTFILESW) \
42 X("rmmproc program", 0, RPROCSW) \
43 X("normmproc", 0, NRPRCSW) \
44 X("changecur", 0, CHGSW) \
45 X("nochangecur", 0, NCHGSW) \
46 X("verbose", 0, VERBSW) \
47 X("noverbose", 0, NVERBSW) \
48 X("version", 0, VERSIONSW) \
49 X("help", 0, HELPSW) \
50
51 #define X(sw, minchars, id) id,
52 DEFINE_SWITCH_ENUM(MHFIXMSG);
53 #undef X
54
55 #define X(sw, minchars, id) { sw, minchars, id },
56 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
57 #undef X
58
59
60 int verbosw;
61 int debugsw; /* Needed by mhparse.c. */
62
63 #define quitser pipeser
64
65 /*
66 * static prototypes
67 */
68 typedef struct fix_transformations {
69 int fixboundary;
70 int fixcompositecte;
71 svector_t fixtypes;
72 int reformat;
73 int replacetextplain;
74 int decodetext;
75 char *decodetypes;
76 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
77 int lf_line_endings;
78 char *textcharset;
79 } fix_transformations;
80
81 static int mhfixmsgsbr (CT *, char *, const fix_transformations *,
82 FILE **, char *, FILE **);
83 static int fix_boundary (CT *, int *);
84 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
85 static int get_multipart_boundary (CT, char **);
86 static int replace_boundary (CT, char *, char *);
87 static int fix_types (CT, svector_t, int *);
88 static char *replace_substring (char **, const char *, const char *);
89 static char *remove_parameter (char *, const char *);
90 static int fix_composite_cte (CT, int *);
91 static int set_ce (CT, int);
92 static int ensure_text_plain (CT *, CT, int *, int);
93 static int find_textplain_sibling (CT, int, int *);
94 static int insert_new_text_plain_part (CT, int, CT);
95 static CT build_text_plain_part (CT);
96 static int insert_into_new_mp_alt (CT *, int *);
97 static CT divide_part (CT);
98 static void copy_ctinfo (CI, CI);
99 static int decode_part (CT);
100 static int reformat_part (CT, char *, char *, char *, int);
101 static CT build_multipart_alt (CT, CT, int, int);
102 static int boundary_in_content (FILE **, char *, const char *);
103 static void transfer_noncontent_headers (CT, CT);
104 static int set_ct_type (CT, int type, int subtype, int encoding);
105 static int decode_text_parts (CT, int, const char *, int *);
106 static int should_decode(const char *, const char *, const char *);
107 static int content_encoding (CT, const char **);
108 static int strip_crs (CT, int *);
109 static void update_cte (CT);
110 static int least_restrictive_encoding (CT) PURE;
111 static int less_restrictive (int, int);
112 static int convert_charsets (CT, char *, int *);
113 static int fix_always (CT, int *);
114 static int fix_filename_param (char *, char *, PM *, PM *);
115 static int fix_filename_encoding (CT);
116 static int write_content (CT, const char *, char *, FILE *, int, int);
117 static void set_text_ctparams(CT, char *, int);
118 static int remove_file (const char *);
119 static void report (char *, char *, char *, char *, ...)
120 CHECK_PRINTF(4, 5);
121 static void pipeser (int);
122
123
124 int
125 main (int argc, char **argv) {
126 int msgnum;
127 char *cp, *file = NULL, *folder = NULL;
128 char *maildir = NULL, buf[100], *outfile = NULL;
129 char **argp, **arguments;
130 struct msgs_array msgs = { 0, 0, NULL };
131 struct msgs *mp = NULL;
132 CT *ctp;
133 FILE *fp, *infp = NULL, *outfp = NULL;
134 int using_stdin = 0;
135 int chgflag = 1;
136 int status = OK;
137 fix_transformations fx;
138 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
139 fx.fixtypes = NULL;
140 fx.replacetextplain = 0;
141 fx.decodetext = CE_8BIT;
142 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
143 fx.lf_line_endings = 0;
144 fx.textcharset = NULL;
145
146 if (nmh_init(argv[0], 2)) { return 1; }
147
148 arguments = getarguments (invo_name, argc, argv, 1);
149 argp = arguments;
150
151 /*
152 * Parse arguments
153 */
154 while ((cp = *argp++)) {
155 if (*cp == '-') {
156 switch (smatch (++cp, switches)) {
157 case AMBIGSW:
158 ambigsw (cp, switches);
159 done (1);
160 case UNKWNSW:
161 adios (NULL, "-%s unknown", cp);
162
163 case HELPSW:
164 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
165 invo_name);
166 print_help (buf, switches, 1);
167 done (0);
168 case VERSIONSW:
169 print_version(invo_name);
170 done (0);
171
172 case DECODETEXTSW:
173 if (! (cp = *argp++) || *cp == '-') {
174 adios (NULL, "missing argument to %s", argp[-2]);
175 }
176 if (! strcasecmp (cp, "8bit")) {
177 fx.decodetext = CE_8BIT;
178 } else if (! strcasecmp (cp, "7bit")) {
179 fx.decodetext = CE_7BIT;
180 } else if (! strcasecmp (cp, "binary")) {
181 fx.decodetext = CE_BINARY;
182 } else {
183 adios (NULL, "invalid argument to %s", argp[-2]);
184 }
185 continue;
186 case NDECODETEXTSW:
187 fx.decodetext = 0;
188 continue;
189 case DECODETYPESW:
190 if (! (cp = *argp++) || *cp == '-') {
191 adios (NULL, "missing argument to %s", argp[-2]);
192 }
193 fx.decodetypes = cp;
194 continue;
195 case CRLFLINEBREAKSSW:
196 fx.lf_line_endings = 0;
197 continue;
198 case NCRLFLINEBREAKSSW:
199 fx.lf_line_endings = 1;
200 continue;
201 case TEXTCHARSETSW:
202 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
203 adios (NULL, "missing argument to %s", argp[-2]);
204 }
205 fx.textcharset = cp;
206 continue;
207 case NTEXTCHARSETSW:
208 fx.textcharset = 0;
209 continue;
210 case FIXBOUNDARYSW:
211 fx.fixboundary = 1;
212 continue;
213 case NFIXBOUNDARYSW:
214 fx.fixboundary = 0;
215 continue;
216 case FIXCOMPOSITECTESW:
217 fx.fixcompositecte = 1;
218 continue;
219 case NFIXCOMPOSITECTESW:
220 fx.fixcompositecte = 0;
221 continue;
222 case FIXTYPESW:
223 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
224 adios (NULL, "missing argument to %s", argp[-2]);
225 }
226 if (! strncasecmp (cp, "multipart/", 10) ||
227 ! strncasecmp (cp, "message/", 8))
228 adios (NULL, "-fixtype %s not allowed", cp);
229 if (! strchr (cp, '/'))
230 adios (NULL, "-fixtype requires type/subtype");
231 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
232 svector_push_back (fx.fixtypes, cp);
233 continue;
234 case REFORMATSW:
235 fx.reformat = 1;
236 continue;
237 case NREFORMATSW:
238 fx.reformat = 0;
239 continue;
240 case REPLACETEXTPLAINSW:
241 fx.replacetextplain = 1;
242 continue;
243 case NREPLACETEXTPLAINSW:
244 fx.replacetextplain = 0;
245 continue;
246 case FILESW:
247 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
248 adios (NULL, "missing argument to %s", argp[-2]);
249 }
250 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
251 continue;
252 case OUTFILESW:
253 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
254 adios (NULL, "missing argument to %s", argp[-2]);
255 }
256 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
257 continue;
258 case RPROCSW:
259 if (!(rmmproc = *argp++) || *rmmproc == '-') {
260 adios (NULL, "missing argument to %s", argp[-2]);
261 }
262 continue;
263 case NRPRCSW:
264 rmmproc = NULL;
265 continue;
266 case CHGSW:
267 chgflag = 1;
268 continue;
269 case NCHGSW:
270 chgflag = 0;
271 continue;
272 case VERBSW:
273 verbosw = 1;
274 continue;
275 case NVERBSW:
276 verbosw = 0;
277 continue;
278 }
279 }
280 if (*cp == '+' || *cp == '@') {
281 if (folder)
282 adios (NULL, "only one folder at a time!");
283 folder = pluspath (cp);
284 } else {
285 if (*cp == '/') {
286 /* Interpret a full path as a filename, not a message. */
287 file = mh_xstrdup (cp);
288 } else {
289 app_msgarg (&msgs, cp);
290 }
291 }
292 }
293
294 SIGNAL (SIGQUIT, quitser);
295 SIGNAL (SIGPIPE, pipeser);
296
297 /*
298 * Read the standard profile setup
299 */
300 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
301 readconfig(NULL, fp, cp, 0);
302 fclose (fp);
303 }
304
305 suppress_bogus_mp_content_warning = skip_mp_cte_check = true;
306 suppress_extraneous_trailing_semicolon_warning = true;
307
308 if (! context_find ("path")) {
309 free (path ("./", TFOLDER));
310 }
311
312 if (file && msgs.size) {
313 adios (NULL, "cannot specify msg and file at same time!");
314 }
315
316 if (outfile) {
317 /* Open the outfile now, so we don't have to risk opening it
318 after running out of fds. */
319 if (strcmp (outfile, "-") == 0) {
320 outfp = stdout;
321 } else if ((outfp = fopen (outfile, "w")) == NULL) {
322 adios (outfile, "unable to open for writing");
323 }
324 }
325
326 /*
327 * check if message is coming from file
328 */
329 if (file) {
330 /* If file is stdin, create a tmp file name before parse_mime()
331 has a chance, because it might put in on a different
332 filesystem than the output file. Instead, put it in the
333 user's preferred tmp directory. */
334 CT ct;
335
336 if (! strcmp ("-", file)) {
337 int fd;
338 char *cp;
339
340 using_stdin = 1;
341
342 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
343 adios (NULL, "unable to create temporary file in %s",
344 get_temp_dir());
345 } else {
346 free (file);
347 file = mh_xstrdup (cp);
348 cpydata (STDIN_FILENO, fd, "-", file);
349 }
350
351 if (close (fd)) {
352 (void) m_unlink (file);
353 adios (NULL, "failed to write temporary file");
354 }
355 }
356
357 cts = mh_xcalloc(2, sizeof *cts);
358 ctp = cts;
359
360 if ((ct = parse_mime (file))) {
361 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
362 *ctp++ = ct;
363 } else {
364 inform("unable to parse message from file %s", file);
365 status = NOTOK;
366
367 /* If there's an outfile, pass the input message unchanged, so the
368 message won't get dropped from a pipeline. */
369 if (outfile) {
370 /* Something went wrong. Output might be expected, such as if
371 this were run as a filter. Just copy the input to the
372 output. */
373 if ((infp = fopen (file, "r")) == NULL) {
374 adios (file, "unable to open for reading");
375 }
376
377 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
378 inform("unable to copy message to %s, "
379 "it might be lost\n", outfile);
380 }
381
382 fclose (infp);
383 infp = NULL;
384 }
385 }
386 } else {
387 /*
388 * message(s) are coming from a folder
389 */
390 CT ct;
391
392 if (! msgs.size) {
393 app_msgarg(&msgs, "cur");
394 }
395 if (! folder) {
396 folder = getfolder (1);
397 }
398 maildir = mh_xstrdup(m_maildir (folder));
399
400 /* chdir so that error messages, esp. from MIME parser, just
401 refer to the message and not its path. */
402 if (chdir (maildir) == NOTOK) {
403 adios (maildir, "unable to change directory to");
404 }
405
406 /* read folder and create message structure */
407 if (! (mp = folder_read (folder, 1))) {
408 adios (NULL, "unable to read folder %s", folder);
409 }
410
411 /* check for empty folder */
412 if (mp->nummsg == 0) {
413 adios (NULL, "no messages in %s", folder);
414 }
415
416 /* parse all the message ranges/sequences and set SELECTED */
417 for (msgnum = 0; msgnum < msgs.size; msgnum++)
418 if (! m_convert (mp, msgs.msgs[msgnum])) {
419 done (1);
420 }
421 seq_setprev (mp); /* set the previous-sequence */
422
423 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
424 ctp = cts;
425
426 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
427 if (is_selected(mp, msgnum)) {
428 char *msgnam = m_name (msgnum);
429
430 if ((ct = parse_mime (msgnam))) {
431 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
432 *ctp++ = ct;
433 } else {
434 inform("unable to parse message %s", msgnam);
435 status = NOTOK;
436
437 /* If there's an outfile, pass the input message
438 unchanged, so the message won't get dropped from a
439 pipeline. */
440 if (outfile) {
441 /* Something went wrong. Output might be expected,
442 such as if this were run as a filter. Just copy
443 the input to the output. */
444 /* Can't use path() here because 1) it might have been
445 called before and it caches the pwd, and 2) we call
446 chdir() after that. */
447 char *input_filename =
448 concat (maildir, "/", msgnam, NULL);
449
450 if ((infp = fopen (input_filename, "r")) == NULL) {
451 adios (input_filename,
452 "unable to open for reading");
453 }
454
455 if (copy_input_to_output (input_filename, infp,
456 outfile, outfp) != OK) {
457 inform("unable to copy message to %s, "
458 "it might be lost\n", outfile);
459 }
460
461 fclose (infp);
462 infp = NULL;
463 free (input_filename);
464 }
465 }
466 }
467 }
468
469 if (chgflag) {
470 seq_setcur (mp, mp->hghsel); /* update current message */
471 }
472 seq_save (mp); /* synchronize sequences */
473 context_replace (pfolder, folder);/* update current folder */
474 context_save (); /* save the context file */
475 }
476
477 if (*cts) {
478 for (ctp = cts; *ctp; ++ctp) {
479 status =
480 mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp) == OK
481 ? 0
482 : 1;
483 free_content (*ctp);
484
485 if (using_stdin) {
486 (void) m_unlink (file);
487
488 if (! outfile) {
489 /* Just calling m_backup() unlinks the backup file. */
490 (void) m_backup (file);
491 }
492 }
493 }
494 } else {
495 status = 1;
496 }
497
498 mh_xfree(maildir);
499 free (cts);
500
501 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
502 if (infp) { fclose (infp); } /* even if stdin */
503 if (outfp) { fclose (outfp); } /* even if stdout */
504 free (outfile);
505 free (file);
506 free (folder);
507 free (arguments);
508
509 done (status == OK ? 0 : 1);
510 return NOTOK;
511 }
512
513
514 /*
515 * Apply transformations to one message.
516 */
517 static int
518 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
519 FILE **infp, char *outfile, FILE **outfp) {
520 /* Store input filename in case one of the transformations, i.e.,
521 fix_boundary(), rewrites to a tmp file. */
522 char *input_filename = maildir
523 ? concat (maildir, "/", (*ctp)->c_file, NULL)
524 : mh_xstrdup ((*ctp)->c_file);
525 int modify_inplace = 0;
526 int message_mods = 0;
527 int status = OK;
528
529 /* Though the input file won't need to be opened if everything goes
530 well, do it here just in case there's a failure, and that failure is
531 running out of file descriptors. */
532 if ((*infp = fopen (input_filename, "r")) == NULL) {
533 adios (input_filename, "unable to open for reading");
534 }
535
536 if (outfile == NULL) {
537 modify_inplace = 1;
538
539 if ((*ctp)->c_file) {
540 char *tempfile;
541 /* outfp will be closed by the caller */
542 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
543 NULL) {
544 adios (NULL, "unable to create temporary file in %s",
545 get_temp_dir());
546 }
547 outfile = mh_xstrdup (tempfile);
548 } else {
549 adios (NULL, "missing both input and output filenames\n");
550 }
551 } /* else *outfp was defined by caller */
552
553 reverse_alternative_parts (*ctp);
554 status = fix_always (*ctp, &message_mods);
555 if (status == OK && fx->fixboundary) {
556 status = fix_boundary (ctp, &message_mods);
557 }
558 if (status == OK && fx->fixtypes != NULL) {
559 status = fix_types (*ctp, fx->fixtypes, &message_mods);
560 }
561 if (status == OK && fx->fixcompositecte) {
562 status = fix_composite_cte (*ctp, &message_mods);
563 }
564 if (status == OK && fx->reformat) {
565 status =
566 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
567 }
568 if (status == OK && fx->decodetext) {
569 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
570 &message_mods);
571 update_cte (*ctp);
572 }
573 if (status == OK && fx->textcharset != NULL) {
574 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
575 }
576
577 if (status == OK && ! (*ctp)->c_umask) {
578 /* Set the umask for the contents file. This currently
579 isn't used but just in case it is in the future. */
580 struct stat st;
581
582 if (stat ((*ctp)->c_file, &st) != NOTOK) {
583 (*ctp)->c_umask = ~(st.st_mode & 0777);
584 } else {
585 (*ctp)->c_umask = ~m_gmprot();
586 }
587 }
588
589 /*
590 * Write the content to a file
591 */
592 if (status == OK) {
593 status = write_content (*ctp, input_filename, outfile, *outfp,
594 modify_inplace, message_mods);
595 } else if (! modify_inplace) {
596 /* Something went wrong. Output might be expected, such
597 as if this were run as a filter. Just copy the input
598 to the output. */
599 if (copy_input_to_output (input_filename, *infp, outfile,
600 *outfp) != OK) {
601 inform("unable to copy message to %s, it might be lost\n",
602 outfile);
603 }
604 }
605
606 if (modify_inplace) {
607 if (status != OK) { (void) m_unlink (outfile); }
608 free (outfile);
609 outfile = NULL;
610 }
611
612 fclose (*infp);
613 *infp = NULL;
614 free (input_filename);
615
616 return status;
617 }
618
619
620 /*
621 * Copy input message to output. Assumes not modifying in place, so this
622 * might be running as part of a pipeline.
623 */
624 static int
625 copy_input_to_output (const char *input_filename, FILE *infp,
626 const char *output_filename, FILE *outfp) {
627 int in = fileno (infp);
628 int out = fileno (outfp);
629 int status = OK;
630
631 if (in != -1 && out != -1) {
632 cpydata (in, out, input_filename, output_filename);
633 } else {
634 status = NOTOK;
635 }
636
637 return status;
638 }
639
640
641 /*
642 * Fix mismatched outer level boundary.
643 */
644 static int
645 fix_boundary (CT *ct, int *message_mods) {
646 struct multipart *mp;
647 int status = OK;
648
649 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
650 mp = (struct multipart *) (*ct)->c_ctparams;
651
652 /*
653 * 1) Get boundary at end of part.
654 * 2) Get boundary at beginning of part and compare to the end-of-part
655 * boundary.
656 * 3) Write out contents of ct to tmp file, replacing boundary in
657 * header with boundary from part. Set c_unlink to 1.
658 * 4) Free ct.
659 * 5) Call parse_mime() on the tmp file, replacing ct.
660 */
661
662 if (mp && mp->mp_start) {
663 char *part_boundary;
664
665 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
666 char *fixed;
667
668 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
669 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
670 char *filename = mh_xstrdup ((*ct)->c_file);
671 CT fixed_ct;
672
673 free_content (*ct);
674 if ((fixed_ct = parse_mime (fixed))) {
675 *ct = fixed_ct;
676 (*ct)->c_unlink = 1;
677
678 ++*message_mods;
679 if (verbosw) {
680 report (NULL, NULL, filename,
681 "fix multipart boundary");
682 }
683 } else {
684 *ct = NULL;
685 inform("unable to parse fixed part");
686 status = NOTOK;
687 }
688 free (filename);
689 } else {
690 inform("unable to replace broken boundary");
691 status = NOTOK;
692 }
693 } else {
694 inform("unable to create temporary file in %s",
695 get_temp_dir());
696 status = NOTOK;
697 }
698
699 free (part_boundary);
700 } else {
701 /* Couldn't fix the boundary. Report failure so that mhfixmsg
702 doesn't modify the message. */
703 status = NOTOK;
704 }
705 } else {
706 /* No multipart struct, even though the content type is
707 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
708 the message. */
709 status = NOTOK;
710 }
711 }
712
713 return status;
714 }
715
716
717 /*
718 * Find boundary at end of multipart.
719 */
720 static int
721 get_multipart_boundary (CT ct, char **part_boundary) {
722 char buffer[NMH_BUFSIZ];
723 char *end_boundary = NULL;
724 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
725 ? (off_t) (ct->c_end - sizeof buffer)
726 : (off_t) ct->c_begin;
727 size_t bytes_read;
728 int status = OK;
729
730 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
731 be big enough, even if it's just 1024, to make that unlikely. */
732
733 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
734 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
735 advise (ct->c_file, "unable to open for reading");
736 return NOTOK;
737 }
738
739 /* Get boundary at end of multipart. */
740 while (begin >= (off_t) ct->c_begin) {
741 fseeko (ct->c_fp, begin, SEEK_SET);
742 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
743 char *cp = rfind_str (buffer, bytes_read, "--");
744
745 if (cp) {
746 char *end;
747
748 /* Trim off trailing "--" and anything beyond. */
749 *cp-- = '\0';
750 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
751 if (strlen (end) > 3 && *end++ == '\n' &&
752 *end++ == '-' && *end++ == '-') {
753 end_boundary = mh_xstrdup (end);
754 break;
755 }
756 }
757 }
758 }
759
760 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
761 break;
762 begin -= sizeof buffer;
763 }
764
765 /* Get boundary at beginning of multipart. */
766 if (end_boundary) {
767 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
768 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
769 if (bytes_read >= strlen (end_boundary)) {
770 char *cp = find_str (buffer, bytes_read, end_boundary);
771
772 if (cp && cp - buffer >= 2 && *--cp == '-' &&
773 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
774 status = OK;
775 break;
776 }
777 } else {
778 /* The start and end boundaries didn't match, or the
779 start boundary doesn't begin with "\n--" (or "--"
780 if at the beginning of buffer). Keep trying. */
781 status = NOTOK;
782 }
783 }
784 } else {
785 status = NOTOK;
786 }
787
788 if (ct->c_fp) {
789 fclose (ct->c_fp);
790 ct->c_fp = NULL;
791 }
792
793 if (status == OK) {
794 *part_boundary = end_boundary;
795 } else {
796 *part_boundary = NULL;
797 free (end_boundary);
798 }
799
800 return status;
801 }
802
803
804 /*
805 * Open and copy ct->c_file to file, replacing the multipart boundary.
806 */
807 static int
808 replace_boundary (CT ct, char *file, char *boundary) {
809 FILE *fpin, *fpout;
810 int compnum, state;
811 char buf[NMH_BUFSIZ], name[NAMESZ];
812 char *np, *vp;
813 m_getfld_state_t gstate;
814 int status = OK;
815
816 if (ct->c_file == NULL) {
817 inform("missing input filename");
818 return NOTOK;
819 }
820
821 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
822 advise (ct->c_file, "unable to open for reading");
823 return NOTOK;
824 }
825
826 if ((fpout = fopen (file, "w")) == NULL) {
827 fclose (fpin);
828 advise (file, "unable to open for writing");
829 return NOTOK;
830 }
831
832 gstate = m_getfld_state_init(fpin);
833 for (compnum = 1;;) {
834 int bufsz = (int) sizeof buf;
835
836 switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
837 case FLD:
838 case FLDPLUS:
839 compnum++;
840
841 /* get copies of the buffers */
842 np = mh_xstrdup (name);
843 vp = mh_xstrdup (buf);
844
845 /* if necessary, get rest of field */
846 while (state == FLDPLUS) {
847 bufsz = sizeof buf;
848 state = m_getfld2(&gstate, name, buf, &bufsz);
849 vp = add (buf, vp); /* add to previous value */
850 }
851
852 if (strcasecmp (TYPE_FIELD, np)) {
853 fprintf (fpout, "%s:%s", np, vp);
854 } else {
855 char *new_ctline, *new_params;
856
857 replace_param(&ct->c_ctinfo.ci_first_pm,
858 &ct->c_ctinfo.ci_last_pm, "boundary",
859 boundary, 0);
860
861 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
862 ct->c_ctinfo.ci_subtype, NULL);
863 new_params = output_params(LEN(TYPE_FIELD) +
864 strlen(new_ctline) + 1,
865 ct->c_ctinfo.ci_first_pm, NULL, 0);
866 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
867 FENDNULL(new_params));
868 free(new_ctline);
869 mh_xfree(new_params);
870 }
871
872 free (vp);
873 free (np);
874
875 continue;
876
877 case BODY:
878 putc('\n', fpout);
879 /* buf will have a terminating NULL, skip it. */
880 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
881 advise (file, "fwrite");
882 }
883 continue;
884
885 case FILEEOF:
886 break;
887
888 case LENERR:
889 case FMTERR:
890 inform("message format error in component #%d", compnum);
891 status = NOTOK;
892 break;
893
894 default:
895 inform("getfld() returned %d", state);
896 status = NOTOK;
897 break;
898 }
899
900 break;
901 }
902
903 m_getfld_state_destroy (&gstate);
904 fclose (fpout);
905 fclose (fpin);
906
907 return status;
908 }
909
910
911 /*
912 * Fix Content-Type header to reflect the content of its part.
913 */
914 static int
915 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
916 int status = OK;
917
918 switch (ct->c_type) {
919 case CT_MULTIPART: {
920 struct multipart *m = (struct multipart *) ct->c_ctparams;
921 struct part *part;
922
923 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
924 status = fix_types (part->mp_part, fixtypes, message_mods);
925 }
926 break;
927 }
928
929 case CT_MESSAGE:
930 if (ct->c_subtype == MESSAGE_EXTERNAL) {
931 struct exbody *e = (struct exbody *) ct->c_ctparams;
932
933 status = fix_types (e->eb_content, fixtypes, message_mods);
934 }
935 break;
936
937 default: {
938 char **typep, *type;
939
940 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
941 for (typep = svector_strs (fixtypes);
942 typep && (type = *typep);
943 ++typep) {
944 char *type_subtype =
945 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
946 NULL);
947
948 if (! strcasecmp (type, type_subtype) &&
949 decode_part (ct) == OK &&
950 ct->c_cefile.ce_file != NULL) {
951 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
952 char *cp;
953
954 if ((cp = strchr (ct_type_subtype, ';'))) {
955 /* Truncate to remove any parameter list from
956 mime_type () result. */
957 *cp = '\0';
958 }
959
960 if (strcasecmp (type, ct_type_subtype)) {
961 char *ct_type, *ct_subtype;
962 HF hf;
963
964 /* The Content-Type header does not match the
965 content, so update these struct Content
966 fields to match:
967 * c_type, c_subtype
968 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
969 * c_ctline
970 */
971 /* Extract type and subtype from type/subtype. */
972 ct_type = mh_xstrdup(ct_type_subtype);
973 if ((cp = strchr (ct_type, '/'))) {
974 *cp = '\0';
975 ct_subtype = mh_xstrdup(++cp);
976 } else {
977 inform("missing / in MIME type of %s %s",
978 ct->c_file, ct->c_partno);
979 free (ct_type);
980 return NOTOK;
981 }
982
983 ct->c_type = ct_str_type (ct_type);
984 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
985
986 free (ct->c_ctinfo.ci_type);
987 ct->c_ctinfo.ci_type = ct_type;
988 free (ct->c_ctinfo.ci_subtype);
989 ct->c_ctinfo.ci_subtype = ct_subtype;
990 if (! replace_substring (&ct->c_ctline, type,
991 ct_type_subtype)) {
992 inform("did not find %s in %s",
993 type, ct->c_ctline);
994 }
995
996 /* Update Content-Type header field. */
997 for (hf = ct->c_first_hf; hf; hf = hf->next) {
998 if (! strcasecmp (TYPE_FIELD, hf->name)) {
999 if (replace_substring (&hf->value, type,
1000 ct_type_subtype)) {
1001 ++*message_mods;
1002 if (verbosw) {
1003 report (NULL, ct->c_partno, ct->c_file,
1004 "change Content-Type in header "
1005 "from %s to %s",
1006 type, ct_type_subtype);
1007 }
1008 break;
1009 }
1010 inform("did not find %s in %s", type, hf->value);
1011 }
1012 }
1013 }
1014 free (ct_type_subtype);
1015 }
1016 free (type_subtype);
1017 }
1018 }
1019 }}
1020
1021 return status;
1022 }
1023
1024
1025 /*
1026 * Replace a substring, allocating space to hold the new one.
1027 */
1028 char *
1029 replace_substring (char **str, const char *old, const char *new) {
1030 char *cp;
1031
1032 if ((cp = strstr (*str, old))) {
1033 char *remainder = cp + strlen (old);
1034 char *prefix, *new_str;
1035
1036 if (cp - *str) {
1037 prefix = mh_xstrdup(*str);
1038 *(prefix + (cp - *str)) = '\0';
1039 new_str = concat (prefix, new, remainder, NULL);
1040 free (prefix);
1041 } else {
1042 new_str = concat (new, remainder, NULL);
1043 }
1044
1045 free (*str);
1046
1047 return *str = new_str;
1048 }
1049
1050 return NULL;
1051 }
1052
1053
1054 /*
1055 * Remove a name=value parameter, given just its name, from a header value.
1056 */
1057 char *
1058 remove_parameter (char *str, const char *name) {
1059 /* It looks to me, based on the BNF in RFC 2045, than there can't
1060 be whitespace between the parameter name and the "=", or
1061 between the "=" and the parameter value. */
1062 char *param_name = concat (name, "=", NULL);
1063 char *cp;
1064
1065 if ((cp = strstr (str, param_name))) {
1066 char *start, *end;
1067 size_t count = 1;
1068
1069 /* Remove any leading spaces, before the parameter name. */
1070 for (start = cp;
1071 start > str && isspace ((unsigned char) *(start-1));
1072 --start) {
1073 continue;
1074 }
1075 /* Remove a leading semicolon. */
1076 if (start > str && *(start-1) == ';') { --start; }
1077
1078 end = cp + strlen (name) + 1;
1079 if (*end == '"') {
1080 /* Skip past the quoted value, and then the final quote. */
1081 for (++end ; *end && *end != '"'; ++end) { continue; }
1082 ++end;
1083 } else {
1084 /* Skip past the value. */
1085 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1086 }
1087
1088 /* Count how many characters need to be moved. Include
1089 trailing null, which is accounted for by the
1090 initialization of count to 1. */
1091 for (cp = end; *cp; ++cp) { ++count; }
1092 (void) memmove (start, end, count);
1093 }
1094
1095 free (param_name);
1096
1097 return str;
1098 }
1099
1100
1101 /*
1102 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1103 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1104 * 8 bit.
1105 */
1106 static int
1107 fix_composite_cte (CT ct, int *message_mods) {
1108 int status = OK;
1109
1110 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1111 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1112 ct->c_encoding != CE_BINARY) {
1113 HF hf;
1114
1115 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1116 char *name = hf->name;
1117 for (; *name && isspace ((unsigned char) *name); ++name) {
1118 continue;
1119 }
1120
1121 if (! strncasecmp (name, ENCODING_FIELD,
1122 LEN(ENCODING_FIELD))) {
1123 char *prefix = "Nmh-REPLACED-INVALID-";
1124 HF h;
1125
1126 NEW(h);
1127 h->name = mh_xstrdup (hf->name);
1128 h->hf_encoding = hf->hf_encoding;
1129 h->next = hf->next;
1130 hf->next = h;
1131
1132 /* Retain old header but prefix its name. */
1133 free (hf->name);
1134 hf->name = concat (prefix, h->name, NULL);
1135
1136 ++*message_mods;
1137 if (verbosw) {
1138 char *encoding = cpytrim (hf->value);
1139 report (NULL, ct->c_partno, ct->c_file,
1140 "replace Content-Transfer-Encoding of %s "
1141 "with 8 bit", encoding);
1142 free (encoding);
1143 }
1144
1145 h->value = mh_xstrdup (" 8bit\n");
1146
1147 /* Don't need to warn for multiple C-T-E header
1148 fields, parse_mime() already does that. But
1149 if there are any, fix them all as necessary. */
1150 hf = h;
1151 }
1152 }
1153
1154 set_ce (ct, CE_8BIT);
1155 }
1156
1157 if (ct->c_type == CT_MULTIPART) {
1158 struct multipart *m;
1159 struct part *part;
1160
1161 m = (struct multipart *) ct->c_ctparams;
1162 for (part = m->mp_parts; part; part = part->mp_next) {
1163 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1164 status = NOTOK;
1165 break;
1166 }
1167 }
1168 }
1169 }
1170
1171 return status;
1172 }
1173
1174
1175 /*
1176 * Set content encoding.
1177 */
1178 static int
1179 set_ce (CT ct, int encoding) {
1180 const char *ce = ce_str (encoding);
1181 const struct str2init *ctinit = get_ce_method (ce);
1182
1183 if (ctinit) {
1184 char *cte = concat (" ", ce, "\n", NULL);
1185 int found_cte = 0;
1186 HF hf;
1187 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1188 caller is decode_text_parts (). Save because we'll
1189 overwrite below. */
1190 struct cefile decoded_content_info = ct->c_cefile;
1191
1192 ct->c_encoding = encoding;
1193
1194 ct->c_ctinitfnx = ctinit->si_init;
1195 /* This will assign ct->c_cefile with an all-0 struct, which
1196 is what we want. */
1197 (*ctinit->si_init) (ct);
1198 /* After returning, the caller should set
1199 ct->c_cefile.ce_file to the name of the file containing
1200 the contents. */
1201
1202 if (ct->c_ceclosefnx) {
1203 (*ct->c_ceclosefnx) (ct);
1204 }
1205
1206 /* Restore the cefile. */
1207 ct->c_cefile = decoded_content_info;
1208
1209 /* Update/add Content-Transfer-Encoding header field. */
1210 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1211 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1212 found_cte = 1;
1213 free (hf->value);
1214 hf->value = cte;
1215 }
1216 }
1217 if (! found_cte) {
1218 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1219 }
1220
1221 /* Update c_celine. It's used only by mhlist -debug. */
1222 free (ct->c_celine);
1223 ct->c_celine = mh_xstrdup (cte);
1224
1225 return OK;
1226 }
1227
1228 return NOTOK;
1229 }
1230
1231
1232 /*
1233 * Make sure each text part has a corresponding text/plain part.
1234 */
1235 static int
1236 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1237 int status = OK;
1238
1239 switch ((*ct)->c_type) {
1240 case CT_TEXT: {
1241 /* Nothing to do for text/plain. */
1242 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1243
1244 if (parent && parent->c_type == CT_MULTIPART &&
1245 parent->c_subtype == MULTI_ALTERNATE) {
1246 int new_subpart_number = 1;
1247 int has_text_plain =
1248 find_textplain_sibling (parent, replacetextplain,
1249 &new_subpart_number);
1250
1251 if (! has_text_plain) {
1252 /* Parent is a multipart/alternative. Insert a new
1253 text/plain subpart. */
1254 const int inserted =
1255 insert_new_text_plain_part (*ct, new_subpart_number,
1256 parent);
1257 if (inserted) {
1258 ++*message_mods;
1259 if (verbosw) {
1260 report (NULL, parent->c_partno, parent->c_file,
1261 "insert text/plain part");
1262 }
1263 } else {
1264 status = NOTOK;
1265 }
1266 }
1267 } else if (parent && parent->c_type == CT_MULTIPART &&
1268 parent->c_subtype == MULTI_RELATED) {
1269 char *type_subtype =
1270 concat ((*ct)->c_ctinfo.ci_type, "/",
1271 (*ct)->c_ctinfo.ci_subtype, NULL);
1272 const char *parent_type =
1273 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1274 int new_subpart_number = 1;
1275 int has_text_plain = 0;
1276
1277 /* Have to do string comparison on the subtype because we
1278 don't enumerate all of them in c_subtype values.
1279 parent_type will be NULL if the multipart/related part
1280 doesn't have a type parameter. The type parameter must
1281 be specified according to RFC 2387 Sec. 3.1 but not all
1282 messages comply. */
1283 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1284 /* The type of this part matches the root type of the
1285 parent multipart/related. Look to see if there's
1286 text/plain sibling. */
1287 has_text_plain =
1288 find_textplain_sibling (parent, replacetextplain,
1289 &new_subpart_number);
1290 }
1291
1292 free (type_subtype);
1293
1294 if (! has_text_plain) {
1295 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1296 struct part *part;
1297 int siblings = 0;
1298
1299 for (part = mp->mp_parts; part; part = part->mp_next) {
1300 if (*ct != part->mp_part) {
1301 ++siblings;
1302 }
1303 }
1304
1305 if (siblings) {
1306 /* Parent is a multipart/related. Insert a new
1307 text/plain subpart in a new multipart/alternative. */
1308 if (insert_into_new_mp_alt (ct, message_mods)) {
1309 /* Not an error if text/plain couldn't be added. */
1310 }
1311 } else {
1312 /* There are no siblings, so insert a new text/plain
1313 subpart, and change the parent type from
1314 multipart/related to multipart/alternative. */
1315 const int inserted =
1316 insert_new_text_plain_part (*ct, new_subpart_number,
1317 parent);
1318
1319 if (inserted) {
1320 HF hf;
1321
1322 parent->c_subtype = MULTI_ALTERNATE;
1323 free (parent->c_ctinfo.ci_subtype);
1324 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1325 if (! replace_substring (&parent->c_ctline, "/related",
1326 "/alternative")) {
1327 inform("did not find multipart/related in %s",
1328 parent->c_ctline);
1329 }
1330
1331 /* Update Content-Type header field. */
1332 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1333 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1334 if (replace_substring (&hf->value, "/related",
1335 "/alternative")) {
1336 ++*message_mods;
1337 if (verbosw) {
1338 report (NULL, parent->c_partno,
1339 parent->c_file,
1340 "insert text/plain part");
1341 }
1342
1343 /* Remove, e.g., type="text/html" from
1344 multipart/alternative. */
1345 remove_parameter (hf->value, "type");
1346 break;
1347 }
1348 inform("did not find multipart/"
1349 "related in header %s", hf->value);
1350 }
1351 }
1352 } else {
1353 /* Not an error if text/plain couldn't be inserted. */
1354 }
1355 }
1356 }
1357 } else {
1358 if (insert_into_new_mp_alt (ct, message_mods)) {
1359 status = NOTOK;
1360 }
1361 }
1362 break;
1363 }
1364
1365 case CT_MULTIPART: {
1366 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1367 struct part *part;
1368
1369 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1370 if ((*ct)->c_type == CT_MULTIPART) {
1371 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1372 replacetextplain);
1373 }
1374 }
1375 break;
1376 }
1377
1378 case CT_MESSAGE:
1379 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1380 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1381
1382 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1383 replacetextplain);
1384 }
1385 break;
1386 }
1387
1388 return status;
1389 }
1390
1391
1392 /*
1393 * See if there is a sibling text/plain, and return its subpart number.
1394 */
1395 static int
1396 find_textplain_sibling (CT parent, int replacetextplain,
1397 int *new_subpart_number) {
1398 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1399 struct part *part, *prev;
1400 int has_text_plain = 0;
1401
1402 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1403 ++*new_subpart_number;
1404 if (part->mp_part->c_type == CT_TEXT &&
1405 part->mp_part->c_subtype == TEXT_PLAIN) {
1406 if (replacetextplain) {
1407 struct part *old_part;
1408 if (part == mp->mp_parts) {
1409 old_part = mp->mp_parts;
1410 mp->mp_parts = part->mp_next;
1411 } else {
1412 old_part = prev->mp_next;
1413 prev->mp_next = part->mp_next;
1414 }
1415 if (verbosw) {
1416 report (NULL, parent->c_partno, parent->c_file,
1417 "remove text/plain part %s",
1418 old_part->mp_part->c_partno);
1419 }
1420 free_content (old_part->mp_part);
1421 free (old_part);
1422 } else {
1423 has_text_plain = 1;
1424 }
1425 break;
1426 }
1427 prev = part;
1428 }
1429
1430 return has_text_plain;
1431 }
1432
1433
1434 /*
1435 * Insert a new text/plain part.
1436 */
1437 static int
1438 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1439 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1440 struct part *new_part;
1441
1442 NEW(new_part);
1443 if ((new_part->mp_part = build_text_plain_part (ct))) {
1444 char buffer[16];
1445 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1446
1447 new_part->mp_next = mp->mp_parts;
1448 mp->mp_parts = new_part;
1449 new_part->mp_part->c_partno =
1450 concat (parent->c_partno ? parent->c_partno : "1", ".",
1451 buffer, NULL);
1452
1453 return 1;
1454 }
1455
1456 free_content (new_part->mp_part);
1457 free (new_part);
1458
1459 return 0;
1460 }
1461
1462
1463 /*
1464 * Create a text/plain part to go along with non-plain sibling part.
1465 */
1466 static CT
1467 build_text_plain_part (CT encoded_part) {
1468 CT tp_part = divide_part (encoded_part);
1469 char *tmp_plain_file = NULL;
1470
1471 if (decode_part (tp_part) == OK) {
1472 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1473 contains the decoded contents. And the decoding function, such
1474 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1475 be unlinked by free_content (). */
1476 char *tempfile;
1477
1478 /* This m_mktemp2() call closes the temp file. */
1479 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1480 inform("unable to create temporary file in %s",
1481 get_temp_dir());
1482 } else {
1483 tmp_plain_file = mh_xstrdup (tempfile);
1484 if (reformat_part (tp_part, tmp_plain_file,
1485 tp_part->c_ctinfo.ci_type,
1486 tp_part->c_ctinfo.ci_subtype,
1487 tp_part->c_type) == OK) {
1488 return tp_part;
1489 }
1490 }
1491 }
1492
1493 free_content (tp_part);
1494 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1495 free (tmp_plain_file);
1496
1497 return NULL;
1498 }
1499
1500
1501 /*
1502 * Slip new text/plain part into a new multipart/alternative.
1503 */
1504 static int
1505 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1506 CT tp_part = build_text_plain_part (*ct);
1507 int status = OK;
1508
1509 if (tp_part) {
1510 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1511 MULTI_ALTERNATE);
1512 if (mp_alt) {
1513 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1514
1515 if (mp && mp->mp_parts) {
1516 mp->mp_parts->mp_part = tp_part;
1517 /* Make the new multipart/alternative the parent. */
1518 *ct = mp_alt;
1519
1520 ++*message_mods;
1521 if (verbosw) {
1522 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1523 "insert text/plain part");
1524 }
1525 } else {
1526 free_content (tp_part);
1527 free_content (mp_alt);
1528 status = NOTOK;
1529 }
1530 } else {
1531 status = NOTOK;
1532 }
1533 } else {
1534 /* Not an error if text/plain couldn't be built. */
1535 }
1536
1537 return status;
1538 }
1539
1540
1541 /*
1542 * Clone a MIME part.
1543 */
1544 static CT
1545 divide_part (CT ct) {
1546 CT new_part;
1547
1548 NEW0(new_part);
1549 /* Just copy over what is needed for decoding. c_vrsn and
1550 c_celine aren't necessary. */
1551 new_part->c_file = mh_xstrdup (ct->c_file);
1552 new_part->c_begin = ct->c_begin;
1553 new_part->c_end = ct->c_end;
1554 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1555 new_part->c_type = ct->c_type;
1556 new_part->c_cefile = ct->c_cefile;
1557 new_part->c_encoding = ct->c_encoding;
1558 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1559 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1560 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1561 new_part->c_cesizefnx = ct->c_cesizefnx;
1562
1563 /* c_ctline is used by reformat__part(), so it can preserve
1564 anything after the type/subtype. */
1565 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1566
1567 return new_part;
1568 }
1569
1570
1571 /*
1572 * Copy the content info from one part to another.
1573 */
1574 static void
1575 copy_ctinfo (CI dest, CI src) {
1576 PM s_pm, d_pm;
1577
1578 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1579 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1580
1581 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1582 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1583 s_pm->pm_value, 0);
1584 if (s_pm->pm_charset) {
1585 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1586 }
1587 if (s_pm->pm_lang) {
1588 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1589 }
1590 }
1591
1592 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1593 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1594 }
1595
1596
1597 /*
1598 * Decode content.
1599 */
1600 static int
1601 decode_part (CT ct) {
1602 char *tmp_decoded;
1603 int status;
1604 FILE *file;
1605 char *tempfile;
1606
1607 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1608 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1609 }
1610 tmp_decoded = mh_xstrdup (tempfile);
1611 /* The following call will load ct->c_cefile.ce_file with the tmp
1612 filename of the decoded content. tmp_decoded will contain the
1613 encoded output, get rid of that. */
1614 status = output_message_fp (ct, file, tmp_decoded);
1615 (void) m_unlink (tmp_decoded);
1616 free (tmp_decoded);
1617 if (fclose (file)) {
1618 inform("unable to close temporary file %s, continuing...", tempfile);
1619 }
1620
1621 return status;
1622 }
1623
1624
1625 /*
1626 * Reformat content as plain text.
1627 * Some of the arguments aren't really needed now, but maybe will
1628 * be in the future for other than text types.
1629 */
1630 static int
1631 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1632 int output_subtype, output_encoding;
1633 const char *reason = NULL;
1634 char *cp, *cf;
1635 int status;
1636
1637 /* Hacky: this redirects the output from whatever command is used
1638 to show the part to a file. So, the user can't have any output
1639 redirection in that command.
1640 Could show_multi() in mhshowsbr.c avoid this? */
1641
1642 /* Check for invo_name-format-type/subtype. */
1643 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1644 if (verbosw) {
1645 inform("Don't know how to convert %s, there is no "
1646 "%s-format-%s/%s profile entry",
1647 ct->c_file, invo_name, type, subtype);
1648 }
1649 return NOTOK;
1650 }
1651 if (strchr (cf, '>')) {
1652 inform("'>' prohibited in \"%s\",\nplease fix your "
1653 "%s-format-%s/%s profile entry", cf, invo_name, type,
1654 FENDNULL(subtype));
1655
1656 return NOTOK;
1657 }
1658
1659 cp = concat (cf, " >", file, NULL);
1660 status = show_content_aux (ct, 0, cp, NULL, NULL);
1661 free (cp);
1662
1663 /* Unlink decoded content tmp file and free its filename to avoid
1664 leaks. The file stream should already have been closed. */
1665 if (ct->c_cefile.ce_unlink) {
1666 (void) m_unlink (ct->c_cefile.ce_file);
1667 free (ct->c_cefile.ce_file);
1668 ct->c_cefile.ce_file = NULL;
1669 ct->c_cefile.ce_unlink = 0;
1670 }
1671
1672 if (c_type == CT_TEXT) {
1673 output_subtype = TEXT_PLAIN;
1674 } else {
1675 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1676 output_subtype = 0;
1677 }
1678
1679 output_encoding = content_encoding (ct, &reason);
1680 if (status == OK &&
1681 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1682 ct->c_cefile.ce_file = file;
1683 ct->c_cefile.ce_unlink = 1;
1684 } else {
1685 ct->c_cefile.ce_unlink = 0;
1686 status = NOTOK;
1687 }
1688
1689 return status;
1690 }
1691
1692
1693 /*
1694 * Fill in a multipart/alternative part.
1695 */
1696 static CT
1697 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1698 char *boundary_prefix = "----=_nmh-multipart";
1699 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1700 char *boundary_indicator = "; boundary=";
1701 char *typename, *subtypename, *name;
1702 CT ct;
1703 struct part *p;
1704 struct multipart *m;
1705 const struct str2init *ctinit;
1706
1707 NEW0(ct);
1708
1709 /* Set up the multipart/alternative part. These fields of *ct were
1710 initialized to 0 by mh_xcalloc():
1711 c_fp, c_unlink, c_begin, c_end,
1712 c_vrsn, c_ctline, c_celine,
1713 c_id, c_descr, c_dispo, c_partno,
1714 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1715 c_cefile, c_encoding,
1716 c_digested, c_digest[16], c_ctexbody,
1717 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1718 c_umask, c_rfc934,
1719 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1720 */
1721
1722 ct->c_file = mh_xstrdup (first_alt->c_file);
1723 ct->c_type = type;
1724 ct->c_subtype = subtype;
1725
1726 ctinit = get_ct_init (ct->c_type);
1727
1728 typename = ct_type_str (type);
1729 subtypename = ct_subtype_str (type, subtype);
1730
1731 {
1732 int serial = 0;
1733 int found_boundary = 1;
1734
1735 while (found_boundary && serial < 1000000) {
1736 found_boundary = 0;
1737
1738 /* Ensure that the boundary doesn't appear in the decoded
1739 content. */
1740 if (new_part->c_cefile.ce_file) {
1741 if ((found_boundary =
1742 boundary_in_content (&new_part->c_cefile.ce_fp,
1743 new_part->c_cefile.ce_file,
1744 boundary)) == NOTOK) {
1745 free_content (ct);
1746 return NULL;
1747 }
1748 }
1749
1750 /* Ensure that the boundary doesn't appear in the encoded
1751 content. */
1752 if (! found_boundary && new_part->c_file) {
1753 if ((found_boundary =
1754 boundary_in_content (&new_part->c_fp,
1755 new_part->c_file,
1756 boundary)) == NOTOK) {
1757 free_content (ct);
1758 return NULL;
1759 }
1760 }
1761
1762 if (found_boundary) {
1763 /* Try a slightly different boundary. */
1764 char buffer2[16];
1765
1766 free (boundary);
1767 ++serial;
1768 snprintf (buffer2, sizeof buffer2, "%d", serial);
1769 boundary =
1770 concat (boundary_prefix,
1771 FENDNULL(first_alt->c_partno),
1772 "-", buffer2, NULL);
1773 }
1774 }
1775
1776 if (found_boundary) {
1777 inform("giving up trying to find a unique boundary");
1778 free_content (ct);
1779 return NULL;
1780 }
1781 }
1782
1783 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1784 boundary, "\"", NULL);
1785
1786 /* Load c_first_hf and c_last_hf. */
1787 transfer_noncontent_headers (first_alt, ct);
1788 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1789 free (name);
1790
1791 /* Load c_partno. */
1792 if (first_alt->c_partno) {
1793 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1794 free (first_alt->c_partno);
1795 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1796 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1797 } else {
1798 first_alt->c_partno = mh_xstrdup ("1");
1799 new_part->c_partno = mh_xstrdup ("2");
1800 }
1801
1802 if (ctinit) {
1803 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1804 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1805 }
1806
1807 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1808 "boundary", boundary, 0);
1809
1810 NEW(p);
1811 NEW(p->mp_next);
1812 p->mp_next->mp_next = NULL;
1813 p->mp_next->mp_part = first_alt;
1814
1815 NEW0(m);
1816 m->mp_start = concat (boundary, "\n", NULL);
1817 m->mp_stop = concat (boundary, "--\n", NULL);
1818 m->mp_parts = p;
1819 ct->c_ctparams = m;
1820
1821 free (boundary);
1822
1823 return ct;
1824 }
1825
1826
1827 /*
1828 * Check that the boundary does not appear in the content.
1829 */
1830 static int
1831 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1832 char buffer[NMH_BUFSIZ];
1833 size_t bytes_read;
1834 int found_boundary = 0;
1835
1836 /* free_content() will close *fp if we fopen it here. */
1837 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1838 advise (file, "unable to open %s for reading", file);
1839 return NOTOK;
1840 }
1841
1842 fseeko (*fp, 0L, SEEK_SET);
1843 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1844 if (find_str (buffer, bytes_read, boundary)) {
1845 found_boundary = 1;
1846 break;
1847 }
1848 }
1849
1850 return found_boundary;
1851 }
1852
1853
1854 /*
1855 * Remove all non-Content headers.
1856 */
1857 static void
1858 transfer_noncontent_headers (CT old, CT new) {
1859 HF hp, hp_prev;
1860
1861 hp_prev = hp = old->c_first_hf;
1862 while (hp) {
1863 HF next = hp->next;
1864
1865 if (strncasecmp (XXX_FIELD_PRF, hp->name, LEN(XXX_FIELD_PRF))) {
1866 if (hp == old->c_last_hf) {
1867 if (hp == old->c_first_hf) {
1868 old->c_last_hf = old->c_first_hf = NULL;
1869 } else {
1870 hp_prev->next = NULL;
1871 old->c_last_hf = hp_prev;
1872 }
1873 } else {
1874 if (hp == old->c_first_hf) {
1875 old->c_first_hf = next;
1876 } else {
1877 hp_prev->next = next;
1878 }
1879 }
1880
1881 /* Put node hp in the new CT. */
1882 if (new->c_first_hf == NULL) {
1883 new->c_first_hf = hp;
1884 } else {
1885 new->c_last_hf->next = hp;
1886 }
1887 new->c_last_hf = hp;
1888 } else {
1889 /* A Content- header, leave in old. */
1890 hp_prev = hp;
1891 }
1892
1893 hp = next;
1894 }
1895 }
1896
1897
1898 /*
1899 * Set content type.
1900 */
1901 static int
1902 set_ct_type (CT ct, int type, int subtype, int encoding) {
1903 char *typename = ct_type_str (type);
1904 char *subtypename = ct_subtype_str (type, subtype);
1905 /* E.g, " text/plain" */
1906 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1907 /* E.g, " text/plain\n" */
1908 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1909 int found_content_type = 0;
1910 HF hf;
1911 const char *cp = NULL;
1912 char *ctline;
1913 int status;
1914
1915 /* Update/add Content-Type header field. */
1916 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1917 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1918 found_content_type = 1;
1919 free (hf->value);
1920 hf->value = (cp = strchr (ct->c_ctline, ';'))
1921 ? concat (type_subtypename, cp, "\n", NULL)
1922 : mh_xstrdup (name_plus_nl);
1923 }
1924 }
1925 if (! found_content_type) {
1926 add_header (ct, mh_xstrdup (TYPE_FIELD),
1927 (cp = strchr (ct->c_ctline, ';'))
1928 ? concat (type_subtypename, cp, "\n", NULL)
1929 : mh_xstrdup (name_plus_nl));
1930 }
1931
1932 /* Some of these might not be used, but set them anyway. */
1933 ctline = cp
1934 ? concat (type_subtypename, cp, NULL)
1935 : concat (type_subtypename, NULL);
1936 free (ct->c_ctline);
1937 ct->c_ctline = ctline;
1938 /* Leave other ctinfo members as they were. */
1939 free (ct->c_ctinfo.ci_type);
1940 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1941 free (ct->c_ctinfo.ci_subtype);
1942 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1943 ct->c_type = type;
1944 ct->c_subtype = subtype;
1945
1946 free (name_plus_nl);
1947 free (type_subtypename);
1948
1949 status = set_ce (ct, encoding);
1950
1951 return status;
1952 }
1953
1954
1955 /*
1956 * It's not necessary to update the charset parameter of a Content-Type
1957 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1958 * (content) was originally in the specified charset, "and will be in
1959 * that character set again after decoding."
1960 */
1961 static int
1962 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1963 int *message_mods) {
1964 int status = OK;
1965 int lf_line_endings = 0;
1966
1967 switch (ct->c_type) {
1968 case CT_MULTIPART: {
1969 struct multipart *m = (struct multipart *) ct->c_ctparams;
1970 struct part *part;
1971
1972 /* Should check to see if the body for this part is encoded?
1973 For now, it gets passed along as-is by InitMultiPart(). */
1974 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1975 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1976 message_mods);
1977 }
1978 break;
1979 }
1980
1981 case CT_MESSAGE:
1982 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1983 struct exbody *e = (struct exbody *) ct->c_ctparams;
1984
1985 status = decode_text_parts (e->eb_content, encoding, decodetypes,
1986 message_mods);
1987 }
1988 break;
1989
1990 default:
1991 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1992 break;
1993 }
1994
1995 lf_line_endings =
1996 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
1997
1998 switch (ct->c_encoding) {
1999 case CE_BASE64:
2000 case CE_QUOTED: {
2001 int ct_encoding;
2002
2003 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2004 const char *reason = NULL;
2005
2006 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2007 && encoding != CE_BINARY) {
2008 /* The decoding isn't acceptable so discard it.
2009 Leave status as OK to allow other transformations. */
2010 if (verbosw) {
2011 report (NULL, ct->c_partno, ct->c_file,
2012 "will not decode%s because it is binary (%s)",
2013 ct->c_partno ? ""
2014 : (FENDNULL(ct->c_ctline)),
2015 reason);
2016 }
2017 (void) m_unlink (ct->c_cefile.ce_file);
2018 free (ct->c_cefile.ce_file);
2019 ct->c_cefile.ce_file = NULL;
2020 } else if (ct->c_encoding == CE_QUOTED &&
2021 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2022 /* The decoding isn't acceptable so discard it.
2023 Leave status as OK to allow other transformations. */
2024 if (verbosw) {
2025 report (NULL, ct->c_partno, ct->c_file,
2026 "will not decode%s because it is 8bit",
2027 ct->c_partno ? ""
2028 : (FENDNULL(ct->c_ctline)));
2029 }
2030 (void) m_unlink (ct->c_cefile.ce_file);
2031 free (ct->c_cefile.ce_file);
2032 ct->c_cefile.ce_file = NULL;
2033 } else {
2034 int enc;
2035
2036 if (ct_encoding == CE_BINARY) {
2037 enc = CE_BINARY;
2038 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2039 enc = CE_QUOTED;
2040 } else {
2041 enc = ct_encoding;
2042 }
2043 if (set_ce (ct, enc) == OK) {
2044 ++*message_mods;
2045 if (verbosw) {
2046 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2047 FENDNULL(ct->c_ctline));
2048 }
2049 if (lf_line_endings) {
2050 strip_crs (ct, message_mods);
2051 }
2052 } else {
2053 status = NOTOK;
2054 }
2055 }
2056 } else {
2057 status = NOTOK;
2058 }
2059 break;
2060 }
2061 case CE_8BIT:
2062 case CE_7BIT:
2063 if (lf_line_endings) {
2064 strip_crs (ct, message_mods);
2065 }
2066 break;
2067 default:
2068 break;
2069 }
2070
2071 break;
2072 }
2073
2074 return status;
2075 }
2076
2077
2078 /*
2079 * Determine if the part with type[/subtype] should be decoded, according to
2080 * decodetypes (which came from the -decodetypes switch).
2081 */
2082 static int
2083 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2084 /* Quick search for matching type[/subtype] in decodetypes: bracket
2085 decodetypes with commas, then search for ,type, and ,type/subtype, in
2086 it. */
2087
2088 int found_match = 0;
2089 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2090 char *delimited_type = concat(",", type, ",", NULL);
2091
2092 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2093 found_match = 1;
2094 } else if (subtype != NULL) {
2095 char *delimited_type_subtype =
2096 concat(",", type, "/", subtype, ",", NULL);
2097
2098 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2099 found_match = 1;
2100 }
2101 free(delimited_type_subtype);
2102 }
2103
2104 free(delimited_type);
2105 free(delimited_decodetypes);
2106
2107 return found_match;
2108 }
2109
2110
2111 /*
2112 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2113 * if it has any NUL characters, a CR not followed by a LF, or lines
2114 * greater than 998 characters in length. If binary, reason is set
2115 * to a string explaining why.
2116 */
2117 static int
2118 content_encoding (CT ct, const char **reason) {
2119 CE ce = &ct->c_cefile;
2120 int encoding = CE_7BIT;
2121
2122 if (ce->ce_file) {
2123 size_t line_len = 0;
2124 char buffer[NMH_BUFSIZ];
2125 size_t inbytes;
2126
2127 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2128 advise (ce->ce_file, "unable to open for reading");
2129 return CE_UNKNOWN;
2130 }
2131
2132 fseeko (ce->ce_fp, 0L, SEEK_SET);
2133 while (encoding != CE_BINARY &&
2134 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2135 char *cp;
2136 size_t i;
2137 int last_char_was_cr = 0;
2138
2139 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2140 if (*cp == '\0' || ++line_len > 998 ||
2141 (*cp != '\n' && last_char_was_cr)) {
2142 encoding = CE_BINARY;
2143 if (*cp == '\0') {
2144 *reason = "null character";
2145 } else if (line_len > 998) {
2146 *reason = "line length > 998";
2147 } else if (*cp != '\n' && last_char_was_cr) {
2148 *reason = "CR not followed by LF";
2149 } else {
2150 /* Should not reach this. */
2151 *reason = "";
2152 }
2153 break;
2154 }
2155 if (*cp == '\n') {
2156 line_len = 0;
2157 } else if (! isascii ((unsigned char) *cp)) {
2158 encoding = CE_8BIT;
2159 }
2160
2161 last_char_was_cr = *cp == '\r';
2162 }
2163 }
2164
2165 fclose (ce->ce_fp);
2166 ce->ce_fp = NULL;
2167 } /* else should never happen */
2168
2169 return encoding;
2170 }
2171
2172
2173 /*
2174 * Strip carriage returns from content.
2175 */
2176 static int
2177 strip_crs (CT ct, int *message_mods) {
2178 char *charset = content_charset (ct);
2179 int status = OK;
2180
2181 /* Only strip carriage returns if content is ASCII or another
2182 charset that has the same readily recognizable CR followed by a
2183 LF. We can include UTF-8 here because if the high-order bit of
2184 a UTF-8 byte is 0, then it must be a single-byte ASCII
2185 character. */
2186 if (! strcasecmp (charset, "US-ASCII") ||
2187 ! strcasecmp (charset, "UTF-8") ||
2188 ! strncasecmp (charset, "ISO-8859-", 9) ||
2189 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2190 char **file = NULL;
2191 FILE **fp = NULL;
2192 size_t begin;
2193 size_t end;
2194 int has_crs = 0;
2195 int opened_input_file = 0;
2196
2197 if (ct->c_cefile.ce_file) {
2198 file = &ct->c_cefile.ce_file;
2199 fp = &ct->c_cefile.ce_fp;
2200 begin = end = 0;
2201 } else if (ct->c_file) {
2202 file = &ct->c_file;
2203 fp = &ct->c_fp;
2204 begin = (size_t) ct->c_begin;
2205 end = (size_t) ct->c_end;
2206 } /* else don't know where the content is */
2207
2208 if (file && *file && fp) {
2209 if (! *fp) {
2210 if ((*fp = fopen (*file, "r")) == NULL) {
2211 advise (*file, "unable to open for reading");
2212 status = NOTOK;
2213 } else {
2214 opened_input_file = 1;
2215 }
2216 }
2217 }
2218
2219 if (fp && *fp) {
2220 char buffer[NMH_BUFSIZ];
2221 size_t bytes_read;
2222 size_t bytes_to_read =
2223 end > 0 && end > begin ? end - begin : sizeof buffer;
2224
2225 fseeko (*fp, begin, SEEK_SET);
2226 while ((bytes_read = fread (buffer, 1,
2227 min (bytes_to_read, sizeof buffer),
2228 *fp)) > 0) {
2229 /* Look for CR followed by a LF. This is supposed to
2230 be text so there should be LF's. If not, don't
2231 modify the content. */
2232 char *cp;
2233 size_t i;
2234 int last_char_was_cr = 0;
2235
2236 if (end > 0) { bytes_to_read -= bytes_read; }
2237
2238 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2239 if (*cp == '\n' && last_char_was_cr) {
2240 has_crs = 1;
2241 break;
2242 }
2243
2244 last_char_was_cr = *cp == '\r';
2245 }
2246 }
2247
2248 if (has_crs) {
2249 int fd;
2250 char *stripped_content_file;
2251 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2252
2253 if (tempfile == NULL) {
2254 adios (NULL, "unable to create temporary file in %s",
2255 get_temp_dir());
2256 }
2257 stripped_content_file = mh_xstrdup (tempfile);
2258
2259 /* Strip each CR before a LF from the content. */
2260 fseeko (*fp, begin, SEEK_SET);
2261 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2262 0) {
2263 char *cp;
2264 size_t i;
2265 int last_char_was_cr = 0;
2266
2267 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2268 if (*cp == '\r') {
2269 last_char_was_cr = 1;
2270 } else if (last_char_was_cr) {
2271 if (*cp != '\n') {
2272 if (write (fd, "\r", 1) < 0) {
2273 advise (tempfile, "CR write");
2274 }
2275 }
2276 if (write (fd, cp, 1) < 0) {
2277 advise (tempfile, "write");
2278 }
2279 last_char_was_cr = 0;
2280 } else {
2281 if (write (fd, cp, 1) < 0) {
2282 advise (tempfile, "write");
2283 }
2284 last_char_was_cr = 0;
2285 }
2286 }
2287 }
2288
2289 if (close (fd)) {
2290 inform("unable to write temporary file %s, continuing...",
2291 stripped_content_file);
2292 (void) m_unlink (stripped_content_file);
2293 status = NOTOK;
2294 } else {
2295 /* Replace the decoded file with the converted one. */
2296 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2297 (void) m_unlink (ct->c_cefile.ce_file);
2298
2299 mh_xfree(ct->c_cefile.ce_file);
2300 ct->c_cefile.ce_file = stripped_content_file;
2301 ct->c_cefile.ce_unlink = 1;
2302
2303 ++*message_mods;
2304 if (verbosw) {
2305 report (NULL, ct->c_partno,
2306 begin == 0 && end == 0 ? "" : *file,
2307 "stripped CRs");
2308 }
2309 }
2310 }
2311
2312 if (opened_input_file) {
2313 fclose (*fp);
2314 *fp = NULL;
2315 }
2316 }
2317 }
2318
2319 free (charset);
2320
2321 return status;
2322 }
2323
2324
2325 /*
2326 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2327 * of the part C-T-E's.
2328 */
2329 static void
2330 update_cte (CT ct) {
2331 const int least_restrictive_enc = least_restrictive_encoding (ct);
2332
2333 if (least_restrictive_enc != CE_UNKNOWN &&
2334 least_restrictive_enc != CE_7BIT) {
2335 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2336 HF hf;
2337 int found_cte = 0;
2338
2339 /* Update/add Content-Transfer-Encoding header field. */
2340 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2341 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2342 found_cte = 1;
2343 free (hf->value);
2344 hf->value = cte;
2345 }
2346 }
2347 if (! found_cte) {
2348 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2349 }
2350 }
2351 }
2352
2353
2354 /*
2355 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2356 * within a message.
2357 */
2358 static int
2359 least_restrictive_encoding (CT ct) {
2360 int encoding = CE_UNKNOWN;
2361
2362 switch (ct->c_type) {
2363 case CT_MULTIPART: {
2364 struct multipart *m = (struct multipart *) ct->c_ctparams;
2365 struct part *part;
2366
2367 for (part = m->mp_parts; part; part = part->mp_next) {
2368 const int part_encoding =
2369 least_restrictive_encoding (part->mp_part);
2370
2371 if (less_restrictive (encoding, part_encoding)) {
2372 encoding = part_encoding;
2373 }
2374 }
2375 break;
2376 }
2377
2378 case CT_MESSAGE:
2379 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2380 struct exbody *e = (struct exbody *) ct->c_ctparams;
2381 const int part_encoding =
2382 least_restrictive_encoding (e->eb_content);
2383
2384 if (less_restrictive (encoding, part_encoding)) {
2385 encoding = part_encoding;
2386 }
2387 }
2388 break;
2389
2390 default: {
2391 if (less_restrictive (encoding, ct->c_encoding)) {
2392 encoding = ct->c_encoding;
2393 }
2394 }}
2395
2396 return encoding;
2397 }
2398
2399
2400 /*
2401 * Return whether the second encoding is less restrictive than the first, where
2402 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2403 * CE_BINARY is less restrictive than CE_8BIT and
2404 * CE_8BIT is less restrictive than CE_7BIT.
2405 */
2406 static int
2407 less_restrictive (int encoding, int second_encoding) {
2408 switch (second_encoding) {
2409 case CE_BINARY:
2410 return encoding != CE_BINARY;
2411 case CE_8BIT:
2412 return encoding != CE_BINARY && encoding != CE_8BIT;
2413 case CE_7BIT:
2414 return encoding != CE_BINARY && encoding != CE_8BIT &&
2415 encoding != CE_7BIT;
2416 default :
2417 return 0;
2418 }
2419 }
2420
2421
2422 /*
2423 * Convert character set of each part.
2424 */
2425 static int
2426 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2427 int status = OK;
2428
2429 switch (ct->c_type) {
2430 case CT_TEXT:
2431 if (ct->c_subtype == TEXT_PLAIN) {
2432 status = convert_charset (ct, dest_charset, message_mods);
2433 if (status == OK) {
2434 if (verbosw) {
2435 char *ct_charset = content_charset (ct);
2436
2437 report (NULL, ct->c_partno, ct->c_file,
2438 "convert %s to %s", ct_charset, dest_charset);
2439 free (ct_charset);
2440 }
2441 } else {
2442 char *ct_charset = content_charset (ct);
2443
2444 report ("iconv", ct->c_partno, ct->c_file,
2445 "failed to convert %s to %s", ct_charset, dest_charset);
2446 free (ct_charset);
2447 }
2448 }
2449 break;
2450
2451 case CT_MULTIPART: {
2452 struct multipart *m = (struct multipart *) ct->c_ctparams;
2453 struct part *part;
2454
2455 /* Should check to see if the body for this part is encoded?
2456 For now, it gets passed along as-is by InitMultiPart(). */
2457 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2458 status =
2459 convert_charsets (part->mp_part, dest_charset, message_mods);
2460 }
2461 break;
2462 }
2463
2464 case CT_MESSAGE:
2465 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2466 struct exbody *e = (struct exbody *) ct->c_ctparams;
2467
2468 status =
2469 convert_charsets (e->eb_content, dest_charset, message_mods);
2470 }
2471 break;
2472
2473 default:
2474 break;
2475 }
2476
2477 return status;
2478 }
2479
2480
2481 /*
2482 * Fix various problems that aren't handled elsewhere. These
2483 * are fixed unconditionally: there are no switches to disable
2484 * them. Currently, "problems" are these:
2485 * 1) remove extraneous semicolon at the end of a header parameter list
2486 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2487 * filename parameters in Content-Type and Content-Disposition
2488 * headers, respectively.
2489 */
2490 static int
2491 fix_always (CT ct, int *message_mods) {
2492 int status = OK;
2493
2494 switch (ct->c_type) {
2495 case CT_MULTIPART: {
2496 struct multipart *m = (struct multipart *) ct->c_ctparams;
2497 struct part *part;
2498
2499 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2500 status = fix_always (part->mp_part, message_mods);
2501 }
2502 break;
2503 }
2504
2505 case CT_MESSAGE:
2506 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2507 struct exbody *e = (struct exbody *) ct->c_ctparams;
2508
2509 status = fix_always (e->eb_content, message_mods);
2510 }
2511 break;
2512
2513 default: {
2514 HF hf;
2515
2516 if (ct->c_first_hf) {
2517 fix_filename_encoding (ct);
2518 }
2519
2520 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2521 size_t len = strlen (hf->value);
2522
2523 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2524 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2525 /* Only do this for Content-Type and
2526 Content-Disposition fields because those are the
2527 only headers that parse_mime() warns about. */
2528 continue;
2529 }
2530
2531 /* whitespace following a trailing ';' will be nuked as well */
2532 if (hf->value[len - 1] == '\n') {
2533 while (isspace((unsigned char)(hf->value[len - 2]))) {
2534 if (len-- == 0) { break; }
2535 }
2536 }
2537
2538 if (hf->value[len - 2] == ';') {
2539 /* Remove trailing ';' from parameter value. */
2540 hf->value[len - 2] = '\n';
2541 hf->value[len - 1] = '\0';
2542
2543 /* Also, if Content-Type parameter, remove trailing ';'
2544 from ct->c_ctline. This probably isn't necessary
2545 but can't hurt. */
2546 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2547 size_t l = strlen(ct->c_ctline) - 1;
2548 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2549 ct->c_ctline[l] == ';') {
2550 ct->c_ctline[l--] = '\0';
2551 if (l == 0) { break; }
2552 }
2553 }
2554
2555 ++*message_mods;
2556 if (verbosw) {
2557 report (NULL, ct->c_partno, ct->c_file,
2558 "remove trailing ; from %s parameter value",
2559 hf->name);
2560 }
2561 }
2562 }
2563 }}
2564
2565 return status;
2566 }
2567
2568
2569 /*
2570 * Factor out common code for loops in fix_filename_encoding().
2571 */
2572 static int
2573 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2574 int fixed = 0;
2575
2576 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2577 /* Looks like an RFC 2047 encoded parameter. */
2578 char decoded[PATH_MAX + 1];
2579
2580 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2581 /* Encode using RFC 2231. */
2582 replace_param (first_pm, last_pm, name, decoded, 0);
2583 fixed = 1;
2584 } else {
2585 inform("failed to decode %s parameter %s", name, value);
2586 }
2587 }
2588
2589 return fixed;
2590 }
2591
2592
2593 /*
2594 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2595 * filename parameters in Content-Type and Content-Disposition
2596 * headers, respectively.
2597 */
2598 static int
2599 fix_filename_encoding (CT ct) {
2600 PM pm;
2601 HF hf;
2602 int fixed = 0;
2603
2604 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2605 if (pm->pm_name && pm->pm_value &&
2606 strcasecmp (pm->pm_name, "name") == 0) {
2607 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2608 &ct->c_ctinfo.ci_first_pm,
2609 &ct->c_ctinfo.ci_last_pm);
2610 }
2611 }
2612
2613 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2614 if (pm->pm_name && pm->pm_value &&
2615 strcasecmp (pm->pm_name, "filename") == 0) {
2616 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2617 &ct->c_dispo_first,
2618 &ct->c_dispo_last);
2619 }
2620 }
2621
2622 /* Fix hf values to correspond. */
2623 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2624 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2625
2626 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2627 field = TYPE_HEADER;
2628 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2629 field = DISPO_HEADER;
2630 }
2631
2632 if (field != OTHER) {
2633 const char *const semicolon_loc = strchr (hf->value, ';');
2634
2635 if (semicolon_loc) {
2636 const size_t len =
2637 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2638 const char *const params =
2639 output_params (len,
2640 field == TYPE_HEADER
2641 ? ct->c_ctinfo.ci_first_pm
2642 : ct->c_dispo_first,
2643 NULL, 0);
2644 const char *const new_params = concat (params, "\n", NULL);
2645
2646 replace_substring (&hf->value, semicolon_loc, new_params);
2647 free((void *)new_params); /* Cast away const. Sigh. */
2648 free((void *)params);
2649 } else {
2650 inform("did not find semicolon in %s:%s\n",
2651 hf->name, hf->value);
2652 }
2653 }
2654 }
2655
2656 return OK;
2657 }
2658
2659
2660 /*
2661 * Output content in input file to output file.
2662 */
2663 static int
2664 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2665 int modify_inplace, int message_mods) {
2666 int status = OK;
2667
2668 if (modify_inplace) {
2669 if (message_mods > 0) {
2670 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2671 char *infile = input_filename
2672 ? mh_xstrdup (input_filename)
2673 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2674
2675 if (remove_file (infile) == OK) {
2676 if (rename (outfile, infile)) {
2677 /* Rename didn't work, possibly because of an
2678 attempt to rename across filesystems. Try
2679 brute force copy. */
2680 int old = open (outfile, O_RDONLY);
2681 int new =
2682 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2683 int i = -1;
2684
2685 if (old != -1 && new != -1) {
2686 char buffer[NMH_BUFSIZ];
2687
2688 while ((i = read (old, buffer, sizeof buffer)) >
2689 0) {
2690 if (write (new, buffer, i) != i) {
2691 i = -1;
2692 break;
2693 }
2694 }
2695 }
2696 if (new != -1) { close (new); }
2697 if (old != -1) { close (old); }
2698 (void) m_unlink (outfile);
2699
2700 if (i < 0) {
2701 /* The -file argument processing used path() to
2702 expand filename to absolute path. */
2703 int file = ct->c_file && ct->c_file[0] == '/';
2704
2705 inform("unable to rename %s %s to %s, continuing...",
2706 file ? "file" : "message", outfile,
2707 infile);
2708 status = NOTOK;
2709 }
2710 }
2711 } else {
2712 inform("unable to remove input file %s, "
2713 "not modifying it, continuing...", infile);
2714 (void) m_unlink (outfile);
2715 status = NOTOK;
2716 }
2717
2718 free (infile);
2719 } else {
2720 status = NOTOK;
2721 }
2722 } else {
2723 /* No modifications and didn't need the tmp outfile. */
2724 (void) m_unlink (outfile);
2725 }
2726 } else {
2727 /* Output is going to some file. Produce it whether or not
2728 there were modifications. */
2729 status = output_message_fp (ct, outfp, outfile);
2730 }
2731
2732 flush_errors ();
2733 return status;
2734 }
2735
2736
2737 /*
2738 * parse_mime() does not set lf_line_endings in struct text, so use this
2739 * function to do it. It touches the parts the decodetypes identifies.
2740 */
2741 static void
2742 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2743 switch (ct->c_type) {
2744 case CT_MULTIPART: {
2745 struct multipart *m = (struct multipart *) ct->c_ctparams;
2746 struct part *part;
2747
2748 for (part = m->mp_parts; part; part = part->mp_next) {
2749 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2750 }
2751 break;
2752 }
2753
2754 case CT_MESSAGE:
2755 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2756 struct exbody *e = (struct exbody *) ct->c_ctparams;
2757
2758 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2759 }
2760 break;
2761
2762 default:
2763 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2764 if (ct->c_ctparams == NULL) {
2765 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2766 }
2767 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2768 }
2769 }
2770 }
2771
2772
2773 /*
2774 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2775 * use the standard MH backup file.
2776 */
2777 static int
2778 remove_file (const char *file) {
2779 if (rmmproc) {
2780 char *rmm_command = concat (rmmproc, " ", file, NULL);
2781 int status = system (rmm_command);
2782
2783 free (rmm_command);
2784 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2785 }
2786 /* This is OK for a non-message file, it still uses the
2787 BACKUP_PREFIX form. The backup file will be in the same
2788 directory as file. */
2789 return rename (file, m_backup (file));
2790 }
2791
2792
2793 /*
2794 * Output formatted message to user.
2795 */
2796 static void
2797 report (char *what, char *partno, char *filename, char *message, ...) {
2798 va_list args;
2799 char *fmt;
2800
2801 if (verbosw) {
2802 va_start (args, message);
2803 fmt = concat (filename, partno ? " part " : ", ",
2804 FENDNULL(partno), partno ? ", " : "", message, NULL);
2805
2806 advertise (what, NULL, fmt, args);
2807
2808 free (fmt);
2809 va_end (args);
2810 }
2811 }
2812
2813
2814 static void
2815 pipeser (int i)
2816 {
2817 if (i == SIGQUIT) {
2818 fflush (stdout);
2819 fprintf (stderr, "\n");
2820 fflush (stderr);
2821 }
2822
2823 done (1);
2824 /* NOTREACHED */
2825 }