]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
lock_file.c: close(2) file descriptor on failure, avoiding leak.
[nmh] / uip / mhfixmsg.c
1 /* mhfixmsg.c -- rewrite a message with various transformations
2 *
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
6 */
7
8 #include <h/mh.h>
9 #include <h/fmt_scan.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include "sbr/m_maildir.h"
15 #include "sbr/m_mktemp.h"
16 #include "sbr/mime_type.h"
17 #include "mhmisc.h"
18 #include "mhfree.h"
19 #include "mhoutsbr.h"
20 #include "mhshowsbr.h"
21 #include <fcntl.h>
22
23 #define MHFIXMSG_SWITCHES \
24 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
25 X("nodecodetext", 0, NDECODETEXTSW) \
26 X("decodetypes", 0, DECODETYPESW) \
27 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
28 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
29 X("textcharset", 0, TEXTCHARSETSW) \
30 X("notextcharset", 0, NTEXTCHARSETSW) \
31 X("reformat", 0, REFORMATSW) \
32 X("noreformat", 0, NREFORMATSW) \
33 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
34 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
35 X("fixboundary", 0, FIXBOUNDARYSW) \
36 X("nofixboundary", 0, NFIXBOUNDARYSW) \
37 X("fixcte", 0, FIXCOMPOSITECTESW) \
38 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
39 X("fixtype mimetype", 0, FIXTYPESW) \
40 X("file file", 0, FILESW) \
41 X("outfile file", 0, OUTFILESW) \
42 X("rmmproc program", 0, RPROCSW) \
43 X("normmproc", 0, NRPRCSW) \
44 X("changecur", 0, CHGSW) \
45 X("nochangecur", 0, NCHGSW) \
46 X("verbose", 0, VERBSW) \
47 X("noverbose", 0, NVERBSW) \
48 X("version", 0, VERSIONSW) \
49 X("help", 0, HELPSW) \
50
51 #define X(sw, minchars, id) id,
52 DEFINE_SWITCH_ENUM(MHFIXMSG);
53 #undef X
54
55 #define X(sw, minchars, id) { sw, minchars, id },
56 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
57 #undef X
58
59
60 int verbosw;
61 int debugsw; /* Needed by mhparse.c. */
62
63 #define quitser pipeser
64
65 /*
66 * static prototypes
67 */
68 typedef struct fix_transformations {
69 int fixboundary;
70 int fixcompositecte;
71 svector_t fixtypes;
72 int reformat;
73 int replacetextplain;
74 int decodetext;
75 char *decodetypes;
76 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
77 int lf_line_endings;
78 char *textcharset;
79 } fix_transformations;
80
81 static int mhfixmsgsbr (CT *, char *, const fix_transformations *,
82 FILE **, char *, FILE **);
83 static int fix_boundary (CT *, int *);
84 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
85 static int get_multipart_boundary (CT, char **);
86 static int replace_boundary (CT, char *, char *);
87 static int fix_types (CT, svector_t, int *);
88 static char *replace_substring (char **, const char *, const char *);
89 static char *remove_parameter (char *, const char *);
90 static int fix_composite_cte (CT, int *);
91 static int set_ce (CT, int);
92 static int ensure_text_plain (CT *, CT, int *, int);
93 static int find_textplain_sibling (CT, int, int *);
94 static int insert_new_text_plain_part (CT, int, CT);
95 static CT build_text_plain_part (CT);
96 static int insert_into_new_mp_alt (CT *, int *);
97 static CT divide_part (CT);
98 static void copy_ctinfo (CI, CI);
99 static int decode_part (CT);
100 static int reformat_part (CT, char *, char *, char *, int);
101 static CT build_multipart_alt (CT, CT, int, int);
102 static int boundary_in_content (FILE **, char *, const char *);
103 static void transfer_noncontent_headers (CT, CT);
104 static int set_ct_type (CT, int type, int subtype, int encoding);
105 static int decode_text_parts (CT, int, const char *, int *);
106 static int should_decode(const char *, const char *, const char *);
107 static int content_encoding (CT, const char **);
108 static int strip_crs (CT, int *);
109 static void update_cte (CT);
110 static int least_restrictive_encoding (CT) PURE;
111 static int less_restrictive (int, int);
112 static int convert_charsets (CT, char *, int *);
113 static int fix_always (CT, int *);
114 static int fix_filename_param (char *, char *, PM *, PM *);
115 static int fix_filename_encoding (CT);
116 static int write_content (CT, const char *, char *, FILE *, int, int);
117 static void set_text_ctparams(CT, char *, int);
118 static int remove_file (const char *);
119 static void report (char *, char *, char *, char *, ...)
120 CHECK_PRINTF(4, 5);
121 static void pipeser (int);
122
123
124 int
125 main (int argc, char **argv) {
126 int msgnum;
127 char *cp, *file = NULL, *folder = NULL;
128 char *maildir = NULL, buf[100], *outfile = NULL;
129 char **argp, **arguments;
130 struct msgs_array msgs = { 0, 0, NULL };
131 struct msgs *mp = NULL;
132 CT *ctp;
133 FILE *fp, *infp = NULL, *outfp = NULL;
134 int using_stdin = 0;
135 int chgflag = 1;
136 int status = OK;
137 fix_transformations fx;
138 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
139 fx.fixtypes = NULL;
140 fx.replacetextplain = 0;
141 fx.decodetext = CE_8BIT;
142 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
143 fx.lf_line_endings = 0;
144 fx.textcharset = NULL;
145
146 if (nmh_init(argv[0], 2)) { return 1; }
147
148 arguments = getarguments (invo_name, argc, argv, 1);
149 argp = arguments;
150
151 /*
152 * Parse arguments
153 */
154 while ((cp = *argp++)) {
155 if (*cp == '-') {
156 switch (smatch (++cp, switches)) {
157 case AMBIGSW:
158 ambigsw (cp, switches);
159 done (1);
160 case UNKWNSW:
161 adios (NULL, "-%s unknown", cp);
162
163 case HELPSW:
164 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
165 invo_name);
166 print_help (buf, switches, 1);
167 done (0);
168 case VERSIONSW:
169 print_version(invo_name);
170 done (0);
171
172 case DECODETEXTSW:
173 if (! (cp = *argp++) || *cp == '-') {
174 adios (NULL, "missing argument to %s", argp[-2]);
175 }
176 if (! strcasecmp (cp, "8bit")) {
177 fx.decodetext = CE_8BIT;
178 } else if (! strcasecmp (cp, "7bit")) {
179 fx.decodetext = CE_7BIT;
180 } else if (! strcasecmp (cp, "binary")) {
181 fx.decodetext = CE_BINARY;
182 } else {
183 adios (NULL, "invalid argument to %s", argp[-2]);
184 }
185 continue;
186 case NDECODETEXTSW:
187 fx.decodetext = 0;
188 continue;
189 case DECODETYPESW:
190 if (! (cp = *argp++) || *cp == '-') {
191 adios (NULL, "missing argument to %s", argp[-2]);
192 }
193 fx.decodetypes = cp;
194 continue;
195 case CRLFLINEBREAKSSW:
196 fx.lf_line_endings = 0;
197 continue;
198 case NCRLFLINEBREAKSSW:
199 fx.lf_line_endings = 1;
200 continue;
201 case TEXTCHARSETSW:
202 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
203 adios (NULL, "missing argument to %s", argp[-2]);
204 }
205 fx.textcharset = cp;
206 continue;
207 case NTEXTCHARSETSW:
208 fx.textcharset = 0;
209 continue;
210 case FIXBOUNDARYSW:
211 fx.fixboundary = 1;
212 continue;
213 case NFIXBOUNDARYSW:
214 fx.fixboundary = 0;
215 continue;
216 case FIXCOMPOSITECTESW:
217 fx.fixcompositecte = 1;
218 continue;
219 case NFIXCOMPOSITECTESW:
220 fx.fixcompositecte = 0;
221 continue;
222 case FIXTYPESW:
223 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
224 adios (NULL, "missing argument to %s", argp[-2]);
225 }
226 if (! strncasecmp (cp, "multipart/", 10) ||
227 ! strncasecmp (cp, "message/", 8))
228 adios (NULL, "-fixtype %s not allowed", cp);
229 if (! strchr (cp, '/'))
230 adios (NULL, "-fixtype requires type/subtype");
231 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
232 svector_push_back (fx.fixtypes, cp);
233 continue;
234 case REFORMATSW:
235 fx.reformat = 1;
236 continue;
237 case NREFORMATSW:
238 fx.reformat = 0;
239 continue;
240 case REPLACETEXTPLAINSW:
241 fx.replacetextplain = 1;
242 continue;
243 case NREPLACETEXTPLAINSW:
244 fx.replacetextplain = 0;
245 continue;
246 case FILESW:
247 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
248 adios (NULL, "missing argument to %s", argp[-2]);
249 }
250 file = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
251 continue;
252 case OUTFILESW:
253 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
254 adios (NULL, "missing argument to %s", argp[-2]);
255 }
256 outfile = *cp == '-' ? mh_xstrdup (cp) : path (cp, TFILE);
257 continue;
258 case RPROCSW:
259 if (!(rmmproc = *argp++) || *rmmproc == '-') {
260 adios (NULL, "missing argument to %s", argp[-2]);
261 }
262 continue;
263 case NRPRCSW:
264 rmmproc = NULL;
265 continue;
266 case CHGSW:
267 chgflag = 1;
268 continue;
269 case NCHGSW:
270 chgflag = 0;
271 continue;
272 case VERBSW:
273 verbosw = 1;
274 continue;
275 case NVERBSW:
276 verbosw = 0;
277 continue;
278 }
279 }
280 if (*cp == '+' || *cp == '@') {
281 if (folder)
282 adios (NULL, "only one folder at a time!");
283 folder = pluspath (cp);
284 } else {
285 if (*cp == '/') {
286 /* Interpret a full path as a filename, not a message. */
287 file = mh_xstrdup (cp);
288 } else {
289 app_msgarg (&msgs, cp);
290 }
291 }
292 }
293
294 SIGNAL (SIGQUIT, quitser);
295 SIGNAL (SIGPIPE, pipeser);
296
297 /*
298 * Read the standard profile setup
299 */
300 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
301 readconfig(NULL, fp, cp, 0);
302 fclose (fp);
303 }
304
305 suppress_bogus_mp_content_warning = skip_mp_cte_check = true;
306 suppress_extraneous_trailing_semicolon_warning = true;
307
308 if (! context_find ("path")) {
309 free (path ("./", TFOLDER));
310 }
311
312 if (file && msgs.size) {
313 adios (NULL, "cannot specify msg and file at same time!");
314 }
315
316 if (outfile) {
317 /* Open the outfile now, so we don't have to risk opening it
318 after running out of fds. */
319 if (strcmp (outfile, "-") == 0) {
320 outfp = stdout;
321 } else if ((outfp = fopen (outfile, "w")) == NULL) {
322 adios (outfile, "unable to open for writing");
323 }
324 }
325
326 /*
327 * check if message is coming from file
328 */
329 if (file) {
330 /* If file is stdin, create a tmp file name before parse_mime()
331 has a chance, because it might put in on a different
332 filesystem than the output file. Instead, put it in the
333 user's preferred tmp directory. */
334 CT ct;
335
336 if (! strcmp ("-", file)) {
337 int fd;
338 char *cp;
339
340 using_stdin = 1;
341
342 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
343 adios (NULL, "unable to create temporary file in %s",
344 get_temp_dir());
345 } else {
346 free (file);
347 file = mh_xstrdup (cp);
348 cpydata (STDIN_FILENO, fd, "-", file);
349 }
350
351 if (close (fd)) {
352 (void) m_unlink (file);
353 adios (NULL, "failed to write temporary file");
354 }
355 }
356
357 cts = mh_xcalloc(2, sizeof *cts);
358 ctp = cts;
359
360 if ((ct = parse_mime (file))) {
361 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
362 *ctp++ = ct;
363 } else {
364 inform("unable to parse message from file %s", file);
365 status = NOTOK;
366
367 /* If there's an outfile, pass the input message unchanged, so the
368 message won't get dropped from a pipeline. */
369 if (outfile) {
370 /* Something went wrong. Output might be expected, such as if
371 this were run as a filter. Just copy the input to the
372 output. */
373 if ((infp = fopen (file, "r")) == NULL) {
374 adios (file, "unable to open for reading");
375 }
376
377 if (copy_input_to_output (file, infp, outfile, outfp) != OK) {
378 inform("unable to copy message to %s, "
379 "it might be lost\n", outfile);
380 }
381
382 fclose (infp);
383 infp = NULL;
384 }
385 }
386 } else {
387 /*
388 * message(s) are coming from a folder
389 */
390 CT ct;
391
392 if (! msgs.size) {
393 app_msgarg(&msgs, "cur");
394 }
395 if (! folder) {
396 folder = getfolder (1);
397 }
398 maildir = mh_xstrdup(m_maildir (folder));
399
400 /* chdir so that error messages, esp. from MIME parser, just
401 refer to the message and not its path. */
402 if (chdir (maildir) == NOTOK) {
403 adios (maildir, "unable to change directory to");
404 }
405
406 /* read folder and create message structure */
407 if (! (mp = folder_read (folder, 1))) {
408 adios (NULL, "unable to read folder %s", folder);
409 }
410
411 /* check for empty folder */
412 if (mp->nummsg == 0) {
413 adios (NULL, "no messages in %s", folder);
414 }
415
416 /* parse all the message ranges/sequences and set SELECTED */
417 for (msgnum = 0; msgnum < msgs.size; msgnum++)
418 if (! m_convert (mp, msgs.msgs[msgnum])) {
419 done (1);
420 }
421 seq_setprev (mp); /* set the previous-sequence */
422
423 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
424 ctp = cts;
425
426 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
427 if (is_selected(mp, msgnum)) {
428 char *msgnam = m_name (msgnum);
429
430 if ((ct = parse_mime (msgnam))) {
431 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
432 *ctp++ = ct;
433 } else {
434 inform("unable to parse message %s", msgnam);
435 status = NOTOK;
436
437 /* If there's an outfile, pass the input message
438 unchanged, so the message won't get dropped from a
439 pipeline. */
440 if (outfile) {
441 /* Something went wrong. Output might be expected,
442 such as if this were run as a filter. Just copy
443 the input to the output. */
444 /* Can't use path() here because 1) it might have been
445 called before and it caches the pwd, and 2) we call
446 chdir() after that. */
447 char *input_filename =
448 concat (maildir, "/", msgnam, NULL);
449
450 if ((infp = fopen (input_filename, "r")) == NULL) {
451 adios (input_filename,
452 "unable to open for reading");
453 }
454
455 if (copy_input_to_output (input_filename, infp,
456 outfile, outfp) != OK) {
457 inform("unable to copy message to %s, "
458 "it might be lost\n", outfile);
459 }
460
461 fclose (infp);
462 infp = NULL;
463 free (input_filename);
464 }
465 }
466 }
467 }
468
469 if (chgflag) {
470 seq_setcur (mp, mp->hghsel); /* update current message */
471 }
472 seq_save (mp); /* synchronize sequences */
473 context_replace (pfolder, folder);/* update current folder */
474 context_save (); /* save the context file */
475 }
476
477 if (*cts) {
478 for (ctp = cts; *ctp; ++ctp) {
479 status =
480 mhfixmsgsbr (ctp, maildir, &fx, &infp, outfile, &outfp) == OK
481 ? 0
482 : 1;
483 free_content (*ctp);
484
485 if (using_stdin) {
486 (void) m_unlink (file);
487
488 if (! outfile) {
489 /* Just calling m_backup() unlinks the backup file. */
490 (void) m_backup (file);
491 }
492 }
493 }
494 } else {
495 status = 1;
496 }
497
498 free(maildir);
499 free (cts);
500
501 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
502 if (infp) { fclose (infp); } /* even if stdin */
503 if (outfp) { fclose (outfp); } /* even if stdout */
504 free (outfile);
505 free (file);
506 free (folder);
507 free (arguments);
508
509 done (status == OK ? 0 : 1);
510 return NOTOK;
511 }
512
513
514 /*
515 * Apply transformations to one message.
516 */
517 static int
518 mhfixmsgsbr (CT *ctp, char *maildir, const fix_transformations *fx,
519 FILE **infp, char *outfile, FILE **outfp) {
520 /* Store input filename in case one of the transformations, i.e.,
521 fix_boundary(), rewrites to a tmp file. */
522 char *input_filename = maildir
523 ? concat (maildir, "/", (*ctp)->c_file, NULL)
524 : mh_xstrdup ((*ctp)->c_file);
525 int modify_inplace = 0;
526 int message_mods = 0;
527 int status = OK;
528
529 /* Though the input file won't need to be opened if everything goes
530 well, do it here just in case there's a failure, and that failure is
531 running out of file descriptors. */
532 if ((*infp = fopen (input_filename, "r")) == NULL) {
533 adios (input_filename, "unable to open for reading");
534 }
535
536 if (outfile == NULL) {
537 modify_inplace = 1;
538
539 if ((*ctp)->c_file) {
540 char *tempfile;
541 /* outfp will be closed by the caller */
542 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, outfp)) ==
543 NULL) {
544 adios (NULL, "unable to create temporary file in %s",
545 get_temp_dir());
546 }
547 outfile = mh_xstrdup (tempfile);
548 } else {
549 adios (NULL, "missing both input and output filenames\n");
550 }
551 } /* else *outfp was defined by caller */
552
553 reverse_alternative_parts (*ctp);
554 status = fix_always (*ctp, &message_mods);
555 if (status == OK && fx->fixboundary) {
556 status = fix_boundary (ctp, &message_mods);
557 }
558 if (status == OK && fx->fixtypes != NULL) {
559 status = fix_types (*ctp, fx->fixtypes, &message_mods);
560 }
561 if (status == OK && fx->fixcompositecte) {
562 status = fix_composite_cte (*ctp, &message_mods);
563 }
564 if (status == OK && fx->reformat) {
565 status =
566 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
567 }
568 if (status == OK && fx->decodetext) {
569 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
570 &message_mods);
571 update_cte (*ctp);
572 }
573 if (status == OK && fx->textcharset != NULL) {
574 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
575 }
576
577 if (status == OK && ! (*ctp)->c_umask) {
578 /* Set the umask for the contents file. This currently
579 isn't used but just in case it is in the future. */
580 struct stat st;
581
582 if (stat ((*ctp)->c_file, &st) != NOTOK) {
583 (*ctp)->c_umask = ~(st.st_mode & 0777);
584 } else {
585 (*ctp)->c_umask = ~m_gmprot();
586 }
587 }
588
589 /*
590 * Write the content to a file
591 */
592 if (status == OK) {
593 status = write_content (*ctp, input_filename, outfile, *outfp,
594 modify_inplace, message_mods);
595 } else if (! modify_inplace) {
596 /* Something went wrong. Output might be expected, such
597 as if this were run as a filter. Just copy the input
598 to the output. */
599 if (copy_input_to_output (input_filename, *infp, outfile,
600 *outfp) != OK) {
601 inform("unable to copy message to %s, it might be lost\n",
602 outfile);
603 }
604 }
605
606 if (modify_inplace) {
607 if (status != OK) { (void) m_unlink (outfile); }
608 free (outfile);
609 outfile = NULL;
610 }
611
612 fclose (*infp);
613 *infp = NULL;
614 free (input_filename);
615
616 return status;
617 }
618
619
620 /*
621 * Copy input message to output. Assumes not modifying in place, so this
622 * might be running as part of a pipeline.
623 */
624 static int
625 copy_input_to_output (const char *input_filename, FILE *infp,
626 const char *output_filename, FILE *outfp) {
627 int in = fileno (infp);
628 int out = fileno (outfp);
629 int status = OK;
630
631 if (in != -1 && out != -1) {
632 cpydata (in, out, input_filename, output_filename);
633 } else {
634 status = NOTOK;
635 }
636
637 return status;
638 }
639
640
641 /*
642 * Fix mismatched outer level boundary.
643 */
644 static int
645 fix_boundary (CT *ct, int *message_mods) {
646 struct multipart *mp;
647 int status = OK;
648
649 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
650 mp = (struct multipart *) (*ct)->c_ctparams;
651
652 /*
653 * 1) Get boundary at end of part.
654 * 2) Get boundary at beginning of part and compare to the end-of-part
655 * boundary.
656 * 3) Write out contents of ct to tmp file, replacing boundary in
657 * header with boundary from part. Set c_unlink to 1.
658 * 4) Free ct.
659 * 5) Call parse_mime() on the tmp file, replacing ct.
660 */
661
662 if (mp && mp->mp_start) {
663 char *part_boundary;
664
665 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
666 char *fixed;
667
668 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
669 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
670 char *filename = mh_xstrdup ((*ct)->c_file);
671 CT fixed_ct;
672
673 free_content (*ct);
674 if ((fixed_ct = parse_mime (fixed))) {
675 *ct = fixed_ct;
676 (*ct)->c_unlink = 1;
677
678 ++*message_mods;
679 if (verbosw) {
680 report (NULL, NULL, filename,
681 "fix multipart boundary");
682 }
683 } else {
684 *ct = NULL;
685 inform("unable to parse fixed part");
686 status = NOTOK;
687 }
688 free (filename);
689 } else {
690 inform("unable to replace broken boundary");
691 status = NOTOK;
692 }
693 } else {
694 inform("unable to create temporary file in %s",
695 get_temp_dir());
696 status = NOTOK;
697 }
698
699 free (part_boundary);
700 } else {
701 /* Couldn't fix the boundary. Report failure so that mhfixmsg
702 doesn't modify the message. */
703 status = NOTOK;
704 }
705 } else {
706 /* No multipart struct, even though the content type is
707 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
708 the message. */
709 status = NOTOK;
710 }
711 }
712
713 return status;
714 }
715
716
717 /*
718 * Find boundary at end of multipart.
719 */
720 static int
721 get_multipart_boundary (CT ct, char **part_boundary) {
722 char buffer[NMH_BUFSIZ];
723 char *end_boundary = NULL;
724 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
725 ? (off_t) (ct->c_end - sizeof buffer)
726 : (off_t) ct->c_begin;
727 size_t bytes_read;
728 int status = OK;
729
730 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
731 be big enough, even if it's just 1024, to make that unlikely. */
732
733 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
734 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
735 advise (ct->c_file, "unable to open for reading");
736 return NOTOK;
737 }
738
739 /* Get boundary at end of multipart. */
740 while (begin >= (off_t) ct->c_begin) {
741 fseeko (ct->c_fp, begin, SEEK_SET);
742 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
743 char *cp = rfind_str (buffer, bytes_read, "--");
744
745 if (cp) {
746 char *end;
747
748 /* Trim off trailing "--" and anything beyond. */
749 *cp-- = '\0';
750 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
751 if (strlen (end) > 3 && *end++ == '\n' &&
752 *end++ == '-' && *end++ == '-') {
753 end_boundary = mh_xstrdup (end);
754 break;
755 }
756 }
757 }
758 }
759
760 if (end_boundary || begin <= (off_t) (ct->c_begin + sizeof buffer))
761 break;
762 begin -= sizeof buffer;
763 }
764
765 /* Get boundary at beginning of multipart. */
766 if (end_boundary) {
767 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
768 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
769 if (bytes_read >= strlen (end_boundary)) {
770 char *cp = find_str (buffer, bytes_read, end_boundary);
771
772 if (cp && cp - buffer >= 2 && *--cp == '-' &&
773 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
774 status = OK;
775 break;
776 }
777 } else {
778 /* The start and end boundaries didn't match, or the
779 start boundary doesn't begin with "\n--" (or "--"
780 if at the beginning of buffer). Keep trying. */
781 status = NOTOK;
782 }
783 }
784 } else {
785 status = NOTOK;
786 }
787
788 if (ct->c_fp) {
789 fclose (ct->c_fp);
790 ct->c_fp = NULL;
791 }
792
793 if (status == OK) {
794 *part_boundary = end_boundary;
795 } else {
796 *part_boundary = NULL;
797 free (end_boundary);
798 }
799
800 return status;
801 }
802
803
804 /*
805 * Open and copy ct->c_file to file, replacing the multipart boundary.
806 */
807 static int
808 replace_boundary (CT ct, char *file, char *boundary) {
809 FILE *fpin, *fpout;
810 int compnum, state;
811 char buf[NMH_BUFSIZ], name[NAMESZ];
812 char *np, *vp;
813 m_getfld_state_t gstate;
814 int status = OK;
815
816 if (ct->c_file == NULL) {
817 inform("missing input filename");
818 return NOTOK;
819 }
820
821 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
822 advise (ct->c_file, "unable to open for reading");
823 return NOTOK;
824 }
825
826 if ((fpout = fopen (file, "w")) == NULL) {
827 fclose (fpin);
828 advise (file, "unable to open for writing");
829 return NOTOK;
830 }
831
832 gstate = m_getfld_state_init(fpin);
833 for (compnum = 1;;) {
834 int bufsz = (int) sizeof buf;
835
836 switch (state = m_getfld2(&gstate, name, buf, &bufsz)) {
837 case FLD:
838 case FLDPLUS:
839 compnum++;
840
841 /* get copies of the buffers */
842 np = mh_xstrdup (name);
843 vp = mh_xstrdup (buf);
844
845 /* if necessary, get rest of field */
846 while (state == FLDPLUS) {
847 bufsz = sizeof buf;
848 state = m_getfld2(&gstate, name, buf, &bufsz);
849 vp = add (buf, vp); /* add to previous value */
850 }
851
852 if (strcasecmp (TYPE_FIELD, np)) {
853 fprintf (fpout, "%s:%s", np, vp);
854 } else {
855 char *new_ctline, *new_params;
856
857 replace_param(&ct->c_ctinfo.ci_first_pm,
858 &ct->c_ctinfo.ci_last_pm, "boundary",
859 boundary, 0);
860
861 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
862 ct->c_ctinfo.ci_subtype, NULL);
863 new_params = output_params(LEN(TYPE_FIELD) +
864 strlen(new_ctline) + 1,
865 ct->c_ctinfo.ci_first_pm, NULL, 0);
866 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
867 FENDNULL(new_params));
868 free(new_ctline);
869 free(new_params);
870 }
871
872 free (vp);
873 free (np);
874
875 continue;
876
877 case BODY:
878 putc('\n', fpout);
879 /* buf will have a terminating NULL, skip it. */
880 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
881 advise (file, "fwrite");
882 }
883 continue;
884
885 case FILEEOF:
886 break;
887
888 case LENERR:
889 case FMTERR:
890 inform("message format error in component #%d", compnum);
891 status = NOTOK;
892 break;
893
894 default:
895 inform("getfld() returned %d", state);
896 status = NOTOK;
897 break;
898 }
899
900 break;
901 }
902
903 m_getfld_state_destroy (&gstate);
904 fclose (fpout);
905 fclose (fpin);
906
907 return status;
908 }
909
910
911 /*
912 * Fix Content-Type header to reflect the content of its part.
913 */
914 static int
915 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
916 int status = OK;
917
918 switch (ct->c_type) {
919 case CT_MULTIPART: {
920 struct multipart *m = (struct multipart *) ct->c_ctparams;
921 struct part *part;
922
923 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
924 status = fix_types (part->mp_part, fixtypes, message_mods);
925 }
926 break;
927 }
928
929 case CT_MESSAGE:
930 if (ct->c_subtype == MESSAGE_EXTERNAL) {
931 struct exbody *e = (struct exbody *) ct->c_ctparams;
932
933 status = fix_types (e->eb_content, fixtypes, message_mods);
934 }
935 break;
936
937 default: {
938 char **typep, *type;
939
940 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
941 for (typep = svector_strs (fixtypes);
942 typep && (type = *typep);
943 ++typep) {
944 char *type_subtype =
945 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
946 NULL);
947
948 if (! strcasecmp (type, type_subtype) &&
949 decode_part (ct) == OK &&
950 ct->c_cefile.ce_file != NULL) {
951 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
952 char *cp;
953
954 if ((cp = strchr (ct_type_subtype, ';'))) {
955 /* Truncate to remove any parameter list from
956 mime_type () result. */
957 *cp = '\0';
958 }
959
960 if (strcasecmp (type, ct_type_subtype)) {
961 char *ct_type, *ct_subtype;
962 HF hf;
963
964 /* The Content-Type header does not match the
965 content, so update these struct Content
966 fields to match:
967 * c_type, c_subtype
968 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
969 * c_ctline
970 */
971 /* Extract type and subtype from type/subtype. */
972 ct_type = mh_xstrdup(ct_type_subtype);
973 if ((cp = strchr (ct_type, '/'))) {
974 *cp = '\0';
975 ct_subtype = mh_xstrdup(++cp);
976 } else {
977 inform("missing / in MIME type of %s %s",
978 ct->c_file, ct->c_partno);
979 free (ct_type);
980 return NOTOK;
981 }
982
983 ct->c_type = ct_str_type (ct_type);
984 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
985
986 free (ct->c_ctinfo.ci_type);
987 ct->c_ctinfo.ci_type = ct_type;
988 free (ct->c_ctinfo.ci_subtype);
989 ct->c_ctinfo.ci_subtype = ct_subtype;
990 if (! replace_substring (&ct->c_ctline, type,
991 ct_type_subtype)) {
992 inform("did not find %s in %s",
993 type, ct->c_ctline);
994 }
995
996 /* Update Content-Type header field. */
997 for (hf = ct->c_first_hf; hf; hf = hf->next) {
998 if (! strcasecmp (TYPE_FIELD, hf->name)) {
999 if (replace_substring (&hf->value, type,
1000 ct_type_subtype)) {
1001 ++*message_mods;
1002 if (verbosw) {
1003 report (NULL, ct->c_partno, ct->c_file,
1004 "change Content-Type in header "
1005 "from %s to %s",
1006 type, ct_type_subtype);
1007 }
1008 break;
1009 }
1010 inform("did not find %s in %s", type, hf->value);
1011 }
1012 }
1013 }
1014 free (ct_type_subtype);
1015 }
1016 free (type_subtype);
1017 }
1018 }
1019 }}
1020
1021 return status;
1022 }
1023
1024
1025 /*
1026 * Replace a substring, allocating space to hold the new one.
1027 */
1028 char *
1029 replace_substring (char **str, const char *old, const char *new) {
1030 char *cp;
1031
1032 if ((cp = strstr (*str, old))) {
1033 char *remainder = cp + strlen (old);
1034 char *prefix, *new_str;
1035
1036 if (cp - *str) {
1037 prefix = mh_xstrdup(*str);
1038 *(prefix + (cp - *str)) = '\0';
1039 new_str = concat (prefix, new, remainder, NULL);
1040 free (prefix);
1041 } else {
1042 new_str = concat (new, remainder, NULL);
1043 }
1044
1045 free (*str);
1046
1047 return *str = new_str;
1048 }
1049
1050 return NULL;
1051 }
1052
1053
1054 /*
1055 * Remove a name=value parameter, given just its name, from a header value.
1056 */
1057 char *
1058 remove_parameter (char *str, const char *name) {
1059 /* It looks to me, based on the BNF in RFC 2045, than there can't
1060 be whitespace between the parameter name and the "=", or
1061 between the "=" and the parameter value. */
1062 char *param_name = concat (name, "=", NULL);
1063 char *cp;
1064
1065 if ((cp = strstr (str, param_name))) {
1066 char *start, *end;
1067 size_t count = 1;
1068
1069 /* Remove any leading spaces, before the parameter name. */
1070 for (start = cp;
1071 start > str && isspace ((unsigned char) *(start-1));
1072 --start) {
1073 continue;
1074 }
1075 /* Remove a leading semicolon. */
1076 if (start > str && *(start-1) == ';') { --start; }
1077
1078 end = cp + strlen (name) + 1;
1079 if (*end == '"') {
1080 /* Skip past the quoted value, and then the final quote. */
1081 for (++end ; *end && *end != '"'; ++end) { continue; }
1082 ++end;
1083 } else {
1084 /* Skip past the value. */
1085 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1086 }
1087
1088 /* Count how many characters need to be moved. Include
1089 trailing null, which is accounted for by the
1090 initialization of count to 1. */
1091 for (cp = end; *cp; ++cp) { ++count; }
1092 (void) memmove (start, end, count);
1093 }
1094
1095 free (param_name);
1096
1097 return str;
1098 }
1099
1100
1101 /*
1102 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1103 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1104 * 8 bit.
1105 */
1106 static int
1107 fix_composite_cte (CT ct, int *message_mods) {
1108 int status = OK;
1109
1110 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1111 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1112 ct->c_encoding != CE_BINARY) {
1113 HF hf;
1114
1115 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1116 char *name = hf->name;
1117 for (; isspace((unsigned char)*name); ++name) {
1118 continue;
1119 }
1120
1121 if (! strncasecmp (name, ENCODING_FIELD,
1122 LEN(ENCODING_FIELD))) {
1123 char *prefix = "Nmh-REPLACED-INVALID-";
1124 HF h;
1125
1126 NEW(h);
1127 h->name = mh_xstrdup (hf->name);
1128 h->hf_encoding = hf->hf_encoding;
1129 h->next = hf->next;
1130 hf->next = h;
1131
1132 /* Retain old header but prefix its name. */
1133 free (hf->name);
1134 hf->name = concat (prefix, h->name, NULL);
1135
1136 ++*message_mods;
1137 if (verbosw) {
1138 char *encoding = cpytrim (hf->value);
1139 report (NULL, ct->c_partno, ct->c_file,
1140 "replace Content-Transfer-Encoding of %s "
1141 "with 8 bit", encoding);
1142 free (encoding);
1143 }
1144
1145 h->value = mh_xstrdup (" 8bit\n");
1146
1147 /* Don't need to warn for multiple C-T-E header
1148 fields, parse_mime() already does that. But
1149 if there are any, fix them all as necessary. */
1150 hf = h;
1151 }
1152 }
1153
1154 set_ce (ct, CE_8BIT);
1155 }
1156
1157 if (ct->c_type == CT_MULTIPART) {
1158 struct multipart *m;
1159 struct part *part;
1160
1161 m = (struct multipart *) ct->c_ctparams;
1162 for (part = m->mp_parts; part; part = part->mp_next) {
1163 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1164 status = NOTOK;
1165 break;
1166 }
1167 }
1168 }
1169 }
1170
1171 return status;
1172 }
1173
1174
1175 /*
1176 * Set content encoding.
1177 */
1178 static int
1179 set_ce (CT ct, int encoding) {
1180 const char *ce = ce_str (encoding);
1181 const struct str2init *ctinit = get_ce_method (ce);
1182
1183 if (ctinit) {
1184 char *cte = concat (" ", ce, "\n", NULL);
1185 int found_cte = 0;
1186 HF hf;
1187 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1188 caller is decode_text_parts (). Save because we'll
1189 overwrite below. */
1190 struct cefile decoded_content_info = ct->c_cefile;
1191
1192 ct->c_encoding = encoding;
1193
1194 ct->c_ctinitfnx = ctinit->si_init;
1195 /* This will assign ct->c_cefile with an all-0 struct, which
1196 is what we want. */
1197 (*ctinit->si_init) (ct);
1198 /* After returning, the caller should set
1199 ct->c_cefile.ce_file to the name of the file containing
1200 the contents. */
1201
1202 if (ct->c_ceclosefnx) {
1203 (*ct->c_ceclosefnx) (ct);
1204 }
1205
1206 /* Restore the cefile. */
1207 ct->c_cefile = decoded_content_info;
1208
1209 /* Update/add Content-Transfer-Encoding header field. */
1210 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1211 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1212 found_cte = 1;
1213 free (hf->value);
1214 hf->value = cte;
1215 }
1216 }
1217 if (! found_cte) {
1218 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
1219 }
1220
1221 /* Update c_celine. It's used only by mhlist -debug. */
1222 free (ct->c_celine);
1223 ct->c_celine = mh_xstrdup (cte);
1224
1225 return OK;
1226 }
1227
1228 return NOTOK;
1229 }
1230
1231
1232 /*
1233 * Make sure each text part has a corresponding text/plain part.
1234 */
1235 static int
1236 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1237 int status = OK;
1238
1239 switch ((*ct)->c_type) {
1240 case CT_TEXT: {
1241 /* Nothing to do for text/plain. */
1242 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1243
1244 if (parent && parent->c_type == CT_MULTIPART &&
1245 parent->c_subtype == MULTI_ALTERNATE) {
1246 int new_subpart_number = 1;
1247 int has_text_plain =
1248 find_textplain_sibling (parent, replacetextplain,
1249 &new_subpart_number);
1250
1251 if (! has_text_plain) {
1252 /* Parent is a multipart/alternative. Insert a new
1253 text/plain subpart. */
1254 const int inserted =
1255 insert_new_text_plain_part (*ct, new_subpart_number,
1256 parent);
1257 if (inserted) {
1258 ++*message_mods;
1259 if (verbosw) {
1260 report (NULL, parent->c_partno, parent->c_file,
1261 "insert text/plain part");
1262 }
1263 } else {
1264 status = NOTOK;
1265 }
1266 }
1267 } else if (parent && parent->c_type == CT_MULTIPART &&
1268 parent->c_subtype == MULTI_RELATED) {
1269 char *type_subtype =
1270 concat ((*ct)->c_ctinfo.ci_type, "/",
1271 (*ct)->c_ctinfo.ci_subtype, NULL);
1272 const char *parent_type =
1273 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1274 int new_subpart_number = 1;
1275 int has_text_plain = 0;
1276
1277 /* Have to do string comparison on the subtype because we
1278 don't enumerate all of them in c_subtype values.
1279 parent_type will be NULL if the multipart/related part
1280 doesn't have a type parameter. The type parameter must
1281 be specified according to RFC 2387 Sec. 3.1 but not all
1282 messages comply. */
1283 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1284 /* The type of this part matches the root type of the
1285 parent multipart/related. Look to see if there's
1286 text/plain sibling. */
1287 has_text_plain =
1288 find_textplain_sibling (parent, replacetextplain,
1289 &new_subpart_number);
1290 }
1291
1292 free (type_subtype);
1293
1294 if (! has_text_plain) {
1295 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1296 struct part *part;
1297 int siblings = 0;
1298
1299 for (part = mp->mp_parts; part; part = part->mp_next) {
1300 if (*ct != part->mp_part) {
1301 ++siblings;
1302 }
1303 }
1304
1305 if (siblings) {
1306 /* Parent is a multipart/related. Insert a new
1307 text/plain subpart in a new multipart/alternative. */
1308 if (insert_into_new_mp_alt (ct, message_mods)) {
1309 /* Not an error if text/plain couldn't be added. */
1310 }
1311 } else {
1312 /* There are no siblings, so insert a new text/plain
1313 subpart, and change the parent type from
1314 multipart/related to multipart/alternative. */
1315 const int inserted =
1316 insert_new_text_plain_part (*ct, new_subpart_number,
1317 parent);
1318
1319 if (inserted) {
1320 HF hf;
1321
1322 parent->c_subtype = MULTI_ALTERNATE;
1323 free (parent->c_ctinfo.ci_subtype);
1324 parent->c_ctinfo.ci_subtype = mh_xstrdup("alternative");
1325 if (! replace_substring (&parent->c_ctline, "/related",
1326 "/alternative")) {
1327 inform("did not find multipart/related in %s",
1328 parent->c_ctline);
1329 }
1330
1331 /* Update Content-Type header field. */
1332 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1333 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1334 if (replace_substring (&hf->value, "/related",
1335 "/alternative")) {
1336 ++*message_mods;
1337 if (verbosw) {
1338 report (NULL, parent->c_partno,
1339 parent->c_file,
1340 "insert text/plain part");
1341 }
1342
1343 /* Remove, e.g., type="text/html" from
1344 multipart/alternative. */
1345 remove_parameter (hf->value, "type");
1346 break;
1347 }
1348 inform("did not find multipart/"
1349 "related in header %s", hf->value);
1350 }
1351 }
1352 } else {
1353 /* Not an error if text/plain couldn't be inserted. */
1354 }
1355 }
1356 }
1357 } else {
1358 if (insert_into_new_mp_alt (ct, message_mods)) {
1359 status = NOTOK;
1360 }
1361 }
1362 break;
1363 }
1364
1365 case CT_MULTIPART: {
1366 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1367 struct part *part;
1368
1369 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1370 if ((*ct)->c_type == CT_MULTIPART) {
1371 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1372 replacetextplain);
1373 }
1374 }
1375 break;
1376 }
1377
1378 case CT_MESSAGE:
1379 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1380 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1381
1382 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1383 replacetextplain);
1384 }
1385 break;
1386 }
1387
1388 return status;
1389 }
1390
1391
1392 /*
1393 * See if there is a sibling text/plain, and return its subpart number.
1394 */
1395 static int
1396 find_textplain_sibling (CT parent, int replacetextplain,
1397 int *new_subpart_number) {
1398 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1399 struct part *part, *prev;
1400 int has_text_plain = 0;
1401
1402 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1403 ++*new_subpart_number;
1404 if (part->mp_part->c_type == CT_TEXT &&
1405 part->mp_part->c_subtype == TEXT_PLAIN) {
1406 if (replacetextplain) {
1407 struct part *old_part;
1408 if (part == mp->mp_parts) {
1409 old_part = mp->mp_parts;
1410 mp->mp_parts = part->mp_next;
1411 } else {
1412 old_part = prev->mp_next;
1413 prev->mp_next = part->mp_next;
1414 }
1415 if (verbosw) {
1416 report (NULL, parent->c_partno, parent->c_file,
1417 "remove text/plain part %s",
1418 old_part->mp_part->c_partno);
1419 }
1420 free_content (old_part->mp_part);
1421 free (old_part);
1422 } else {
1423 has_text_plain = 1;
1424 }
1425 break;
1426 }
1427 prev = part;
1428 }
1429
1430 return has_text_plain;
1431 }
1432
1433
1434 /*
1435 * Insert a new text/plain part.
1436 */
1437 static int
1438 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1439 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1440 struct part *new_part;
1441
1442 NEW(new_part);
1443 if ((new_part->mp_part = build_text_plain_part (ct))) {
1444 char buffer[16];
1445 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1446
1447 new_part->mp_next = mp->mp_parts;
1448 mp->mp_parts = new_part;
1449 new_part->mp_part->c_partno =
1450 concat (parent->c_partno ? parent->c_partno : "1", ".",
1451 buffer, NULL);
1452
1453 return 1;
1454 }
1455
1456 free_content (new_part->mp_part);
1457 free (new_part);
1458
1459 return 0;
1460 }
1461
1462
1463 /*
1464 * Create a text/plain part to go along with non-plain sibling part.
1465 */
1466 static CT
1467 build_text_plain_part (CT encoded_part) {
1468 CT tp_part = divide_part (encoded_part);
1469 char *tmp_plain_file = NULL;
1470
1471 if (decode_part (tp_part) == OK) {
1472 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1473 contains the decoded contents. And the decoding function, such
1474 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1475 be unlinked by free_content (). */
1476 char *tempfile;
1477
1478 /* This m_mktemp2() call closes the temp file. */
1479 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1480 inform("unable to create temporary file in %s",
1481 get_temp_dir());
1482 } else {
1483 tmp_plain_file = mh_xstrdup (tempfile);
1484 if (reformat_part (tp_part, tmp_plain_file,
1485 tp_part->c_ctinfo.ci_type,
1486 tp_part->c_ctinfo.ci_subtype,
1487 tp_part->c_type) == OK) {
1488 return tp_part;
1489 }
1490 }
1491 }
1492
1493 free_content (tp_part);
1494 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1495 free (tmp_plain_file);
1496
1497 return NULL;
1498 }
1499
1500
1501 /*
1502 * Slip new text/plain part into a new multipart/alternative.
1503 */
1504 static int
1505 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1506 CT tp_part = build_text_plain_part (*ct);
1507 int status = OK;
1508
1509 if (tp_part) {
1510 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1511 MULTI_ALTERNATE);
1512 if (mp_alt) {
1513 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1514
1515 if (mp && mp->mp_parts) {
1516 mp->mp_parts->mp_part = tp_part;
1517 /* Make the new multipart/alternative the parent. */
1518 *ct = mp_alt;
1519
1520 ++*message_mods;
1521 if (verbosw) {
1522 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1523 "insert text/plain part");
1524 }
1525 } else {
1526 free_content (tp_part);
1527 free_content (mp_alt);
1528 status = NOTOK;
1529 }
1530 } else {
1531 status = NOTOK;
1532 }
1533 } else {
1534 /* Not an error if text/plain couldn't be built. */
1535 }
1536
1537 return status;
1538 }
1539
1540
1541 /*
1542 * Clone a MIME part.
1543 */
1544 static CT
1545 divide_part (CT ct) {
1546 CT new_part;
1547
1548 NEW0(new_part);
1549 /* Just copy over what is needed for decoding. c_vrsn and
1550 c_celine aren't necessary. */
1551 new_part->c_file = mh_xstrdup (ct->c_file);
1552 new_part->c_begin = ct->c_begin;
1553 new_part->c_end = ct->c_end;
1554 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1555 new_part->c_type = ct->c_type;
1556 new_part->c_cefile = ct->c_cefile;
1557 new_part->c_encoding = ct->c_encoding;
1558 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1559 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1560 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1561 new_part->c_cesizefnx = ct->c_cesizefnx;
1562
1563 /* c_ctline is used by reformat__part(), so it can preserve
1564 anything after the type/subtype. */
1565 new_part->c_ctline = mh_xstrdup (ct->c_ctline);
1566
1567 return new_part;
1568 }
1569
1570
1571 /*
1572 * Copy the content info from one part to another.
1573 */
1574 static void
1575 copy_ctinfo (CI dest, CI src) {
1576 PM s_pm, d_pm;
1577
1578 dest->ci_type = src->ci_type ? mh_xstrdup (src->ci_type) : NULL;
1579 dest->ci_subtype = src->ci_subtype ? mh_xstrdup (src->ci_subtype) : NULL;
1580
1581 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1582 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1583 s_pm->pm_value, 0);
1584 if (s_pm->pm_charset) {
1585 d_pm->pm_charset = mh_xstrdup(s_pm->pm_charset);
1586 }
1587 if (s_pm->pm_lang) {
1588 d_pm->pm_lang = mh_xstrdup(s_pm->pm_lang);
1589 }
1590 }
1591
1592 dest->ci_comment = src->ci_comment ? mh_xstrdup (src->ci_comment) : NULL;
1593 dest->ci_magic = src->ci_magic ? mh_xstrdup (src->ci_magic) : NULL;
1594 }
1595
1596
1597 /*
1598 * Decode content.
1599 */
1600 static int
1601 decode_part (CT ct) {
1602 char *tmp_decoded;
1603 int status;
1604 FILE *file;
1605 char *tempfile;
1606
1607 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, &file)) == NULL) {
1608 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1609 }
1610 tmp_decoded = mh_xstrdup (tempfile);
1611 /* The following call will load ct->c_cefile.ce_file with the tmp
1612 filename of the decoded content. tmp_decoded will contain the
1613 encoded output, get rid of that. */
1614 status = output_message_fp (ct, file, tmp_decoded);
1615 (void) m_unlink (tmp_decoded);
1616 free (tmp_decoded);
1617 if (fclose (file)) {
1618 inform("unable to close temporary file %s, continuing...", tempfile);
1619 }
1620
1621 return status;
1622 }
1623
1624
1625 /*
1626 * Reformat content as plain text.
1627 * Some of the arguments aren't really needed now, but maybe will
1628 * be in the future for other than text types.
1629 */
1630 static int
1631 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1632 int output_subtype, output_encoding;
1633 const char *reason = NULL;
1634 char *cp, *cf;
1635 int status;
1636
1637 /* Hacky: this redirects the output from whatever command is used
1638 to show the part to a file. So, the user can't have any output
1639 redirection in that command.
1640 Could show_multi() in mhshowsbr.c avoid this? */
1641
1642 /* Check for invo_name-format-type/subtype. */
1643 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1644 if (verbosw) {
1645 inform("Don't know how to convert %s, there is no "
1646 "%s-format-%s/%s profile entry",
1647 ct->c_file, invo_name, type, subtype);
1648 }
1649 return NOTOK;
1650 }
1651 if (strchr (cf, '>')) {
1652 inform("'>' prohibited in \"%s\",\nplease fix your "
1653 "%s-format-%s/%s profile entry", cf, invo_name, type,
1654 FENDNULL(subtype));
1655
1656 return NOTOK;
1657 }
1658
1659 cp = concat (cf, " >", file, NULL);
1660 status = show_content_aux (ct, 0, cp, NULL, NULL);
1661 free (cp);
1662
1663 /* Unlink decoded content tmp file and free its filename to avoid
1664 leaks. The file stream should already have been closed. */
1665 if (ct->c_cefile.ce_unlink) {
1666 (void) m_unlink (ct->c_cefile.ce_file);
1667 free (ct->c_cefile.ce_file);
1668 ct->c_cefile.ce_file = NULL;
1669 ct->c_cefile.ce_unlink = 0;
1670 }
1671
1672 if (c_type == CT_TEXT) {
1673 output_subtype = TEXT_PLAIN;
1674 } else {
1675 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1676 output_subtype = 0;
1677 }
1678
1679 output_encoding = content_encoding (ct, &reason);
1680 if (status == OK &&
1681 set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1682 ct->c_cefile.ce_file = file;
1683 ct->c_cefile.ce_unlink = 1;
1684 } else {
1685 ct->c_cefile.ce_unlink = 0;
1686 status = NOTOK;
1687 }
1688
1689 return status;
1690 }
1691
1692
1693 /*
1694 * Fill in a multipart/alternative part.
1695 */
1696 static CT
1697 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1698 char *boundary_prefix = "----=_nmh-multipart";
1699 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1700 char *boundary_indicator = "; boundary=";
1701 char *typename, *subtypename, *name;
1702 CT ct;
1703 struct part *p;
1704 struct multipart *m;
1705 const struct str2init *ctinit;
1706
1707 NEW0(ct);
1708
1709 /* Set up the multipart/alternative part. These fields of *ct were
1710 initialized to 0 by mh_xcalloc():
1711 c_fp, c_unlink, c_begin, c_end,
1712 c_vrsn, c_ctline, c_celine,
1713 c_id, c_descr, c_dispo, c_partno,
1714 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1715 c_cefile, c_encoding,
1716 c_digested, c_digest[16], c_ctexbody,
1717 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1718 c_umask, c_rfc934,
1719 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1720 */
1721
1722 ct->c_file = mh_xstrdup (first_alt->c_file);
1723 ct->c_type = type;
1724 ct->c_subtype = subtype;
1725
1726 ctinit = get_ct_init (ct->c_type);
1727
1728 typename = ct_type_str (type);
1729 subtypename = ct_subtype_str (type, subtype);
1730
1731 {
1732 int serial = 0;
1733 int found_boundary = 1;
1734
1735 while (found_boundary && serial < 1000000) {
1736 found_boundary = 0;
1737
1738 /* Ensure that the boundary doesn't appear in the decoded
1739 content. */
1740 if (new_part->c_cefile.ce_file) {
1741 if ((found_boundary =
1742 boundary_in_content (&new_part->c_cefile.ce_fp,
1743 new_part->c_cefile.ce_file,
1744 boundary)) == NOTOK) {
1745 goto return_null;
1746 }
1747 }
1748
1749 /* Ensure that the boundary doesn't appear in the encoded
1750 content. */
1751 if (! found_boundary && new_part->c_file) {
1752 if ((found_boundary =
1753 boundary_in_content (&new_part->c_fp,
1754 new_part->c_file,
1755 boundary)) == NOTOK) {
1756 goto return_null;
1757 }
1758 }
1759
1760 if (found_boundary) {
1761 /* Try a slightly different boundary. */
1762 char buffer2[16];
1763
1764 free (boundary);
1765 ++serial;
1766 snprintf (buffer2, sizeof buffer2, "%d", serial);
1767 boundary =
1768 concat (boundary_prefix,
1769 FENDNULL(first_alt->c_partno),
1770 "-", buffer2, NULL);
1771 }
1772 }
1773
1774 if (found_boundary) {
1775 inform("giving up trying to find a unique boundary");
1776 goto return_null;
1777 }
1778 }
1779
1780 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1781 boundary, "\"", NULL);
1782
1783 /* Load c_first_hf and c_last_hf. */
1784 transfer_noncontent_headers (first_alt, ct);
1785 add_header (ct, mh_xstrdup (TYPE_FIELD), concat (name, "\n", NULL));
1786 free (name);
1787
1788 /* Load c_partno. */
1789 if (first_alt->c_partno) {
1790 ct->c_partno = mh_xstrdup (first_alt->c_partno);
1791 free (first_alt->c_partno);
1792 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1793 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1794 } else {
1795 first_alt->c_partno = mh_xstrdup ("1");
1796 new_part->c_partno = mh_xstrdup ("2");
1797 }
1798
1799 if (ctinit) {
1800 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1801 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1802 }
1803
1804 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1805 "boundary", boundary, 0);
1806
1807 NEW(p);
1808 NEW(p->mp_next);
1809 p->mp_next->mp_next = NULL;
1810 p->mp_next->mp_part = first_alt;
1811
1812 NEW0(m);
1813 m->mp_start = concat (boundary, "\n", NULL);
1814 m->mp_stop = concat (boundary, "--\n", NULL);
1815 m->mp_parts = p;
1816 ct->c_ctparams = m;
1817
1818 free (boundary);
1819
1820 return ct;
1821
1822 return_null:
1823 free_content(ct);
1824 free(boundary);
1825 return NULL;
1826 }
1827
1828
1829 /*
1830 * Check that the boundary does not appear in the content.
1831 */
1832 static int
1833 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1834 char buffer[NMH_BUFSIZ];
1835 size_t bytes_read;
1836 int found_boundary = 0;
1837
1838 /* free_content() will close *fp if we fopen it here. */
1839 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1840 advise (file, "unable to open %s for reading", file);
1841 return NOTOK;
1842 }
1843
1844 fseeko (*fp, 0L, SEEK_SET);
1845 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1846 if (find_str (buffer, bytes_read, boundary)) {
1847 found_boundary = 1;
1848 break;
1849 }
1850 }
1851
1852 return found_boundary;
1853 }
1854
1855
1856 /*
1857 * Remove all non-Content headers.
1858 */
1859 static void
1860 transfer_noncontent_headers (CT old, CT new) {
1861 HF hp, hp_prev;
1862
1863 hp_prev = hp = old->c_first_hf;
1864 while (hp) {
1865 HF next = hp->next;
1866
1867 if (strncasecmp (XXX_FIELD_PRF, hp->name, LEN(XXX_FIELD_PRF))) {
1868 if (hp == old->c_last_hf) {
1869 if (hp == old->c_first_hf) {
1870 old->c_last_hf = old->c_first_hf = NULL;
1871 } else {
1872 hp_prev->next = NULL;
1873 old->c_last_hf = hp_prev;
1874 }
1875 } else {
1876 if (hp == old->c_first_hf) {
1877 old->c_first_hf = next;
1878 } else {
1879 hp_prev->next = next;
1880 }
1881 }
1882
1883 /* Put node hp in the new CT. */
1884 if (new->c_first_hf == NULL) {
1885 new->c_first_hf = hp;
1886 } else {
1887 new->c_last_hf->next = hp;
1888 }
1889 new->c_last_hf = hp;
1890 } else {
1891 /* A Content- header, leave in old. */
1892 hp_prev = hp;
1893 }
1894
1895 hp = next;
1896 }
1897 }
1898
1899
1900 /*
1901 * Set content type.
1902 */
1903 static int
1904 set_ct_type (CT ct, int type, int subtype, int encoding) {
1905 char *typename = ct_type_str (type);
1906 char *subtypename = ct_subtype_str (type, subtype);
1907 /* E.g, " text/plain" */
1908 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1909 /* E.g, " text/plain\n" */
1910 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1911 int found_content_type = 0;
1912 HF hf;
1913 const char *cp = NULL;
1914 char *ctline;
1915 int status;
1916
1917 /* Update/add Content-Type header field. */
1918 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1919 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1920 found_content_type = 1;
1921 free (hf->value);
1922 hf->value = (cp = strchr (ct->c_ctline, ';'))
1923 ? concat (type_subtypename, cp, "\n", NULL)
1924 : mh_xstrdup (name_plus_nl);
1925 }
1926 }
1927 if (! found_content_type) {
1928 add_header (ct, mh_xstrdup (TYPE_FIELD),
1929 (cp = strchr (ct->c_ctline, ';'))
1930 ? concat (type_subtypename, cp, "\n", NULL)
1931 : mh_xstrdup (name_plus_nl));
1932 }
1933
1934 /* Some of these might not be used, but set them anyway. */
1935 ctline = cp
1936 ? concat (type_subtypename, cp, NULL)
1937 : concat (type_subtypename, NULL);
1938 free (ct->c_ctline);
1939 ct->c_ctline = ctline;
1940 /* Leave other ctinfo members as they were. */
1941 free (ct->c_ctinfo.ci_type);
1942 ct->c_ctinfo.ci_type = mh_xstrdup (typename);
1943 free (ct->c_ctinfo.ci_subtype);
1944 ct->c_ctinfo.ci_subtype = mh_xstrdup (subtypename);
1945 ct->c_type = type;
1946 ct->c_subtype = subtype;
1947
1948 free (name_plus_nl);
1949 free (type_subtypename);
1950
1951 status = set_ce (ct, encoding);
1952
1953 return status;
1954 }
1955
1956
1957 /*
1958 * It's not necessary to update the charset parameter of a Content-Type
1959 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1960 * (content) was originally in the specified charset, "and will be in
1961 * that character set again after decoding."
1962 */
1963 static int
1964 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1965 int *message_mods) {
1966 int status = OK;
1967 int lf_line_endings = 0;
1968
1969 switch (ct->c_type) {
1970 case CT_MULTIPART: {
1971 struct multipart *m = (struct multipart *) ct->c_ctparams;
1972 struct part *part;
1973
1974 /* Should check to see if the body for this part is encoded?
1975 For now, it gets passed along as-is by InitMultiPart(). */
1976 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1977 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1978 message_mods);
1979 }
1980 break;
1981 }
1982
1983 case CT_MESSAGE:
1984 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1985 struct exbody *e = (struct exbody *) ct->c_ctparams;
1986
1987 status = decode_text_parts (e->eb_content, encoding, decodetypes,
1988 message_mods);
1989 }
1990 break;
1991
1992 default:
1993 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1994 break;
1995 }
1996
1997 lf_line_endings =
1998 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
1999
2000 switch (ct->c_encoding) {
2001 case CE_BASE64:
2002 case CE_QUOTED: {
2003 int ct_encoding;
2004
2005 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
2006 const char *reason = NULL;
2007
2008 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
2009 && encoding != CE_BINARY) {
2010 /* The decoding isn't acceptable so discard it.
2011 Leave status as OK to allow other transformations. */
2012 if (verbosw) {
2013 report (NULL, ct->c_partno, ct->c_file,
2014 "will not decode%s because it is binary (%s)",
2015 ct->c_partno ? ""
2016 : (FENDNULL(ct->c_ctline)),
2017 reason);
2018 }
2019 (void) m_unlink (ct->c_cefile.ce_file);
2020 free (ct->c_cefile.ce_file);
2021 ct->c_cefile.ce_file = NULL;
2022 } else if (ct->c_encoding == CE_QUOTED &&
2023 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2024 /* The decoding isn't acceptable so discard it.
2025 Leave status as OK to allow other transformations. */
2026 if (verbosw) {
2027 report (NULL, ct->c_partno, ct->c_file,
2028 "will not decode%s because it is 8bit",
2029 ct->c_partno ? ""
2030 : (FENDNULL(ct->c_ctline)));
2031 }
2032 (void) m_unlink (ct->c_cefile.ce_file);
2033 free (ct->c_cefile.ce_file);
2034 ct->c_cefile.ce_file = NULL;
2035 } else {
2036 int enc;
2037
2038 if (ct_encoding == CE_BINARY) {
2039 enc = CE_BINARY;
2040 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2041 enc = CE_QUOTED;
2042 } else {
2043 enc = ct_encoding;
2044 }
2045 if (set_ce (ct, enc) == OK) {
2046 ++*message_mods;
2047 if (verbosw) {
2048 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2049 FENDNULL(ct->c_ctline));
2050 }
2051 if (lf_line_endings) {
2052 strip_crs (ct, message_mods);
2053 }
2054 } else {
2055 status = NOTOK;
2056 }
2057 }
2058 } else {
2059 status = NOTOK;
2060 }
2061 break;
2062 }
2063 case CE_8BIT:
2064 case CE_7BIT:
2065 if (lf_line_endings) {
2066 strip_crs (ct, message_mods);
2067 }
2068 break;
2069 default:
2070 break;
2071 }
2072
2073 break;
2074 }
2075
2076 return status;
2077 }
2078
2079
2080 /*
2081 * Determine if the part with type[/subtype] should be decoded, according to
2082 * decodetypes (which came from the -decodetypes switch).
2083 */
2084 static int
2085 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2086 /* Quick search for matching type[/subtype] in decodetypes: bracket
2087 decodetypes with commas, then search for ,type, and ,type/subtype, in
2088 it. */
2089
2090 int found_match = 0;
2091 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2092 char *delimited_type = concat(",", type, ",", NULL);
2093
2094 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2095 found_match = 1;
2096 } else if (subtype != NULL) {
2097 char *delimited_type_subtype =
2098 concat(",", type, "/", subtype, ",", NULL);
2099
2100 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2101 found_match = 1;
2102 }
2103 free(delimited_type_subtype);
2104 }
2105
2106 free(delimited_type);
2107 free(delimited_decodetypes);
2108
2109 return found_match;
2110 }
2111
2112
2113 /*
2114 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2115 * if it has any NUL characters, a CR not followed by a LF, or lines
2116 * greater than 998 characters in length. If binary, reason is set
2117 * to a string explaining why.
2118 */
2119 static int
2120 content_encoding (CT ct, const char **reason) {
2121 CE ce = &ct->c_cefile;
2122 int encoding = CE_7BIT;
2123
2124 if (ce->ce_file) {
2125 size_t line_len = 0;
2126 char buffer[NMH_BUFSIZ];
2127 size_t inbytes;
2128
2129 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2130 advise (ce->ce_file, "unable to open for reading");
2131 return CE_UNKNOWN;
2132 }
2133
2134 fseeko (ce->ce_fp, 0L, SEEK_SET);
2135 while (encoding != CE_BINARY &&
2136 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2137 char *cp;
2138 size_t i;
2139 int last_char_was_cr = 0;
2140
2141 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2142 if (*cp == '\0' || ++line_len > 998 ||
2143 (*cp != '\n' && last_char_was_cr)) {
2144 encoding = CE_BINARY;
2145 if (*cp == '\0') {
2146 *reason = "null character";
2147 } else if (line_len > 998) {
2148 *reason = "line length > 998";
2149 } else if (*cp != '\n' && last_char_was_cr) {
2150 *reason = "CR not followed by LF";
2151 } else {
2152 /* Should not reach this. */
2153 *reason = "";
2154 }
2155 break;
2156 }
2157 if (*cp == '\n') {
2158 line_len = 0;
2159 } else if (! isascii ((unsigned char) *cp)) {
2160 encoding = CE_8BIT;
2161 }
2162
2163 last_char_was_cr = *cp == '\r';
2164 }
2165 }
2166
2167 fclose (ce->ce_fp);
2168 ce->ce_fp = NULL;
2169 } /* else should never happen */
2170
2171 return encoding;
2172 }
2173
2174
2175 /*
2176 * Strip carriage returns from content.
2177 */
2178 static int
2179 strip_crs (CT ct, int *message_mods) {
2180 char *charset = content_charset (ct);
2181 int status = OK;
2182
2183 /* Only strip carriage returns if content is ASCII or another
2184 charset that has the same readily recognizable CR followed by a
2185 LF. We can include UTF-8 here because if the high-order bit of
2186 a UTF-8 byte is 0, then it must be a single-byte ASCII
2187 character. */
2188 if (! strcasecmp (charset, "US-ASCII") ||
2189 ! strcasecmp (charset, "UTF-8") ||
2190 ! strncasecmp (charset, "ISO-8859-", 9) ||
2191 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2192 char **file = NULL;
2193 FILE **fp = NULL;
2194 size_t begin;
2195 size_t end;
2196 int has_crs = 0;
2197 int opened_input_file = 0;
2198
2199 if (ct->c_cefile.ce_file) {
2200 file = &ct->c_cefile.ce_file;
2201 fp = &ct->c_cefile.ce_fp;
2202 begin = end = 0;
2203 } else if (ct->c_file) {
2204 file = &ct->c_file;
2205 fp = &ct->c_fp;
2206 begin = (size_t) ct->c_begin;
2207 end = (size_t) ct->c_end;
2208 } /* else don't know where the content is */
2209
2210 if (file && *file && fp) {
2211 if (! *fp) {
2212 if ((*fp = fopen (*file, "r")) == NULL) {
2213 advise (*file, "unable to open for reading");
2214 status = NOTOK;
2215 } else {
2216 opened_input_file = 1;
2217 }
2218 }
2219 }
2220
2221 if (fp && *fp) {
2222 char buffer[NMH_BUFSIZ];
2223 size_t bytes_read;
2224 size_t bytes_to_read =
2225 end > 0 && end > begin ? end - begin : sizeof buffer;
2226
2227 fseeko (*fp, begin, SEEK_SET);
2228 while ((bytes_read = fread (buffer, 1,
2229 min (bytes_to_read, sizeof buffer),
2230 *fp)) > 0) {
2231 /* Look for CR followed by a LF. This is supposed to
2232 be text so there should be LF's. If not, don't
2233 modify the content. */
2234 char *cp;
2235 size_t i;
2236 int last_char_was_cr = 0;
2237
2238 if (end > 0) { bytes_to_read -= bytes_read; }
2239
2240 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2241 if (*cp == '\n' && last_char_was_cr) {
2242 has_crs = 1;
2243 break;
2244 }
2245
2246 last_char_was_cr = *cp == '\r';
2247 }
2248 }
2249
2250 if (has_crs) {
2251 int fd;
2252 char *stripped_content_file;
2253 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2254
2255 if (tempfile == NULL) {
2256 adios (NULL, "unable to create temporary file in %s",
2257 get_temp_dir());
2258 }
2259 stripped_content_file = mh_xstrdup (tempfile);
2260
2261 /* Strip each CR before a LF from the content. */
2262 fseeko (*fp, begin, SEEK_SET);
2263 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2264 0) {
2265 char *cp;
2266 size_t i;
2267 int last_char_was_cr = 0;
2268
2269 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2270 if (*cp == '\r') {
2271 last_char_was_cr = 1;
2272 } else if (last_char_was_cr) {
2273 if (*cp != '\n') {
2274 if (write (fd, "\r", 1) < 0) {
2275 advise (tempfile, "CR write");
2276 }
2277 }
2278 if (write (fd, cp, 1) < 0) {
2279 advise (tempfile, "write");
2280 }
2281 last_char_was_cr = 0;
2282 } else {
2283 if (write (fd, cp, 1) < 0) {
2284 advise (tempfile, "write");
2285 }
2286 last_char_was_cr = 0;
2287 }
2288 }
2289 }
2290
2291 if (close (fd)) {
2292 inform("unable to write temporary file %s, continuing...",
2293 stripped_content_file);
2294 (void) m_unlink (stripped_content_file);
2295 free(stripped_content_file);
2296 status = NOTOK;
2297 } else {
2298 /* Replace the decoded file with the converted one. */
2299 if (ct->c_cefile.ce_file && ct->c_cefile.ce_unlink)
2300 (void) m_unlink (ct->c_cefile.ce_file);
2301
2302 free(ct->c_cefile.ce_file);
2303 ct->c_cefile.ce_file = stripped_content_file;
2304 ct->c_cefile.ce_unlink = 1;
2305
2306 ++*message_mods;
2307 if (verbosw) {
2308 report (NULL, ct->c_partno,
2309 begin == 0 && end == 0 ? "" : *file,
2310 "stripped CRs");
2311 }
2312 }
2313 }
2314
2315 if (opened_input_file) {
2316 fclose (*fp);
2317 *fp = NULL;
2318 }
2319 }
2320 }
2321
2322 free (charset);
2323
2324 return status;
2325 }
2326
2327
2328 /*
2329 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2330 * of the part C-T-E's.
2331 */
2332 static void
2333 update_cte (CT ct) {
2334 const int least_restrictive_enc = least_restrictive_encoding (ct);
2335
2336 if (least_restrictive_enc != CE_UNKNOWN &&
2337 least_restrictive_enc != CE_7BIT) {
2338 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2339 HF hf;
2340 int found_cte = 0;
2341
2342 /* Update/add Content-Transfer-Encoding header field. */
2343 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2344 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2345 found_cte = 1;
2346 free (hf->value);
2347 hf->value = cte;
2348 }
2349 }
2350 if (! found_cte) {
2351 add_header (ct, mh_xstrdup (ENCODING_FIELD), cte);
2352 }
2353 }
2354 }
2355
2356
2357 /*
2358 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2359 * within a message.
2360 */
2361 static int
2362 least_restrictive_encoding (CT ct) {
2363 int encoding = CE_UNKNOWN;
2364
2365 switch (ct->c_type) {
2366 case CT_MULTIPART: {
2367 struct multipart *m = (struct multipart *) ct->c_ctparams;
2368 struct part *part;
2369
2370 for (part = m->mp_parts; part; part = part->mp_next) {
2371 const int part_encoding =
2372 least_restrictive_encoding (part->mp_part);
2373
2374 if (less_restrictive (encoding, part_encoding)) {
2375 encoding = part_encoding;
2376 }
2377 }
2378 break;
2379 }
2380
2381 case CT_MESSAGE:
2382 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2383 struct exbody *e = (struct exbody *) ct->c_ctparams;
2384 const int part_encoding =
2385 least_restrictive_encoding (e->eb_content);
2386
2387 if (less_restrictive (encoding, part_encoding)) {
2388 encoding = part_encoding;
2389 }
2390 }
2391 break;
2392
2393 default: {
2394 if (less_restrictive (encoding, ct->c_encoding)) {
2395 encoding = ct->c_encoding;
2396 }
2397 }}
2398
2399 return encoding;
2400 }
2401
2402
2403 /*
2404 * Return whether the second encoding is less restrictive than the first, where
2405 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2406 * CE_BINARY is less restrictive than CE_8BIT and
2407 * CE_8BIT is less restrictive than CE_7BIT.
2408 */
2409 static int
2410 less_restrictive (int encoding, int second_encoding) {
2411 switch (second_encoding) {
2412 case CE_BINARY:
2413 return encoding != CE_BINARY;
2414 case CE_8BIT:
2415 return encoding != CE_BINARY && encoding != CE_8BIT;
2416 case CE_7BIT:
2417 return encoding != CE_BINARY && encoding != CE_8BIT &&
2418 encoding != CE_7BIT;
2419 default :
2420 return 0;
2421 }
2422 }
2423
2424
2425 /*
2426 * Convert character set of each part.
2427 */
2428 static int
2429 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2430 int status = OK;
2431
2432 switch (ct->c_type) {
2433 case CT_TEXT:
2434 if (ct->c_subtype == TEXT_PLAIN) {
2435 status = convert_charset (ct, dest_charset, message_mods);
2436 if (status == OK) {
2437 if (verbosw) {
2438 char *ct_charset = content_charset (ct);
2439
2440 report (NULL, ct->c_partno, ct->c_file,
2441 "convert %s to %s", ct_charset, dest_charset);
2442 free (ct_charset);
2443 }
2444 } else {
2445 char *ct_charset = content_charset (ct);
2446
2447 report ("iconv", ct->c_partno, ct->c_file,
2448 "failed to convert %s to %s", ct_charset, dest_charset);
2449 free (ct_charset);
2450 }
2451 }
2452 break;
2453
2454 case CT_MULTIPART: {
2455 struct multipart *m = (struct multipart *) ct->c_ctparams;
2456 struct part *part;
2457
2458 /* Should check to see if the body for this part is encoded?
2459 For now, it gets passed along as-is by InitMultiPart(). */
2460 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2461 status =
2462 convert_charsets (part->mp_part, dest_charset, message_mods);
2463 }
2464 break;
2465 }
2466
2467 case CT_MESSAGE:
2468 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2469 struct exbody *e = (struct exbody *) ct->c_ctparams;
2470
2471 status =
2472 convert_charsets (e->eb_content, dest_charset, message_mods);
2473 }
2474 break;
2475
2476 default:
2477 break;
2478 }
2479
2480 return status;
2481 }
2482
2483
2484 /*
2485 * Fix various problems that aren't handled elsewhere. These
2486 * are fixed unconditionally: there are no switches to disable
2487 * them. Currently, "problems" are these:
2488 * 1) remove extraneous semicolon at the end of a header parameter list
2489 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2490 * filename parameters in Content-Type and Content-Disposition
2491 * headers, respectively.
2492 */
2493 static int
2494 fix_always (CT ct, int *message_mods) {
2495 int status = OK;
2496
2497 switch (ct->c_type) {
2498 case CT_MULTIPART: {
2499 struct multipart *m = (struct multipart *) ct->c_ctparams;
2500 struct part *part;
2501
2502 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2503 status = fix_always (part->mp_part, message_mods);
2504 }
2505 break;
2506 }
2507
2508 case CT_MESSAGE:
2509 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2510 struct exbody *e = (struct exbody *) ct->c_ctparams;
2511
2512 status = fix_always (e->eb_content, message_mods);
2513 }
2514 break;
2515
2516 default: {
2517 HF hf;
2518
2519 if (ct->c_first_hf) {
2520 fix_filename_encoding (ct);
2521 }
2522
2523 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2524 size_t len = strlen (hf->value);
2525
2526 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2527 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2528 /* Only do this for Content-Type and
2529 Content-Disposition fields because those are the
2530 only headers that parse_mime() warns about. */
2531 continue;
2532 }
2533
2534 /* whitespace following a trailing ';' will be nuked as well */
2535 if (hf->value[len - 1] == '\n') {
2536 while (isspace((unsigned char)(hf->value[len - 2]))) {
2537 if (len-- == 0) { break; }
2538 }
2539 }
2540
2541 if (hf->value[len - 2] == ';') {
2542 /* Remove trailing ';' from parameter value. */
2543 hf->value[len - 2] = '\n';
2544 hf->value[len - 1] = '\0';
2545
2546 /* Also, if Content-Type parameter, remove trailing ';'
2547 from ct->c_ctline. This probably isn't necessary
2548 but can't hurt. */
2549 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2550 size_t l = strlen(ct->c_ctline) - 1;
2551 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2552 ct->c_ctline[l] == ';') {
2553 ct->c_ctline[l--] = '\0';
2554 if (l == 0) { break; }
2555 }
2556 }
2557
2558 ++*message_mods;
2559 if (verbosw) {
2560 report (NULL, ct->c_partno, ct->c_file,
2561 "remove trailing ; from %s parameter value",
2562 hf->name);
2563 }
2564 }
2565 }
2566 }}
2567
2568 return status;
2569 }
2570
2571
2572 /*
2573 * Factor out common code for loops in fix_filename_encoding().
2574 */
2575 static int
2576 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2577 int fixed = 0;
2578
2579 if (has_prefix(value, "=?") && has_suffix(value, "?=")) {
2580 /* Looks like an RFC 2047 encoded parameter. */
2581 char decoded[PATH_MAX + 1];
2582
2583 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2584 /* Encode using RFC 2231. */
2585 replace_param (first_pm, last_pm, name, decoded, 0);
2586 fixed = 1;
2587 } else {
2588 inform("failed to decode %s parameter %s", name, value);
2589 }
2590 }
2591
2592 return fixed;
2593 }
2594
2595
2596 /*
2597 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2598 * filename parameters in Content-Type and Content-Disposition
2599 * headers, respectively.
2600 */
2601 static int
2602 fix_filename_encoding (CT ct) {
2603 PM pm;
2604 HF hf;
2605 int fixed = 0;
2606
2607 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2608 if (pm->pm_name && pm->pm_value &&
2609 strcasecmp (pm->pm_name, "name") == 0) {
2610 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2611 &ct->c_ctinfo.ci_first_pm,
2612 &ct->c_ctinfo.ci_last_pm);
2613 }
2614 }
2615
2616 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2617 if (pm->pm_name && pm->pm_value &&
2618 strcasecmp (pm->pm_name, "filename") == 0) {
2619 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2620 &ct->c_dispo_first,
2621 &ct->c_dispo_last);
2622 }
2623 }
2624
2625 /* Fix hf values to correspond. */
2626 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2627 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2628
2629 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2630 field = TYPE_HEADER;
2631 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2632 field = DISPO_HEADER;
2633 }
2634
2635 if (field != OTHER) {
2636 const char *const semicolon_loc = strchr (hf->value, ';');
2637
2638 if (semicolon_loc) {
2639 const size_t len =
2640 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2641 const char *const params =
2642 output_params (len,
2643 field == TYPE_HEADER
2644 ? ct->c_ctinfo.ci_first_pm
2645 : ct->c_dispo_first,
2646 NULL, 0);
2647 const char *const new_params = concat (params, "\n", NULL);
2648
2649 replace_substring (&hf->value, semicolon_loc, new_params);
2650 free((void *)new_params); /* Cast away const. Sigh. */
2651 free((void *)params);
2652 } else {
2653 inform("did not find semicolon in %s:%s\n",
2654 hf->name, hf->value);
2655 }
2656 }
2657 }
2658
2659 return OK;
2660 }
2661
2662
2663 /*
2664 * Output content in input file to output file.
2665 */
2666 static int
2667 write_content (CT ct, const char *input_filename, char *outfile, FILE *outfp,
2668 int modify_inplace, int message_mods) {
2669 int status = OK;
2670
2671 if (modify_inplace) {
2672 if (message_mods > 0) {
2673 if ((status = output_message_fp (ct, outfp, outfile)) == OK) {
2674 char *infile = input_filename
2675 ? mh_xstrdup (input_filename)
2676 : mh_xstrdup (ct->c_file ? ct->c_file : "-");
2677
2678 if (remove_file (infile) == OK) {
2679 if (rename (outfile, infile)) {
2680 /* Rename didn't work, possibly because of an
2681 attempt to rename across filesystems. Try
2682 brute force copy. */
2683 int old = open (outfile, O_RDONLY);
2684 int new =
2685 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2686 int i = -1;
2687
2688 if (old != -1 && new != -1) {
2689 char buffer[NMH_BUFSIZ];
2690
2691 while ((i = read (old, buffer, sizeof buffer)) >
2692 0) {
2693 if (write (new, buffer, i) != i) {
2694 i = -1;
2695 break;
2696 }
2697 }
2698 }
2699 if (new != -1) { close (new); }
2700 if (old != -1) { close (old); }
2701 (void) m_unlink (outfile);
2702
2703 if (i < 0) {
2704 /* The -file argument processing used path() to
2705 expand filename to absolute path. */
2706 int file = ct->c_file && ct->c_file[0] == '/';
2707
2708 inform("unable to rename %s %s to %s, continuing...",
2709 file ? "file" : "message", outfile,
2710 infile);
2711 status = NOTOK;
2712 }
2713 }
2714 } else {
2715 inform("unable to remove input file %s, "
2716 "not modifying it, continuing...", infile);
2717 (void) m_unlink (outfile);
2718 status = NOTOK;
2719 }
2720
2721 free (infile);
2722 } else {
2723 status = NOTOK;
2724 }
2725 } else {
2726 /* No modifications and didn't need the tmp outfile. */
2727 (void) m_unlink (outfile);
2728 }
2729 } else {
2730 /* Output is going to some file. Produce it whether or not
2731 there were modifications. */
2732 status = output_message_fp (ct, outfp, outfile);
2733 }
2734
2735 flush_errors ();
2736 return status;
2737 }
2738
2739
2740 /*
2741 * parse_mime() does not set lf_line_endings in struct text, so use this
2742 * function to do it. It touches the parts the decodetypes identifies.
2743 */
2744 static void
2745 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2746 switch (ct->c_type) {
2747 case CT_MULTIPART: {
2748 struct multipart *m = (struct multipart *) ct->c_ctparams;
2749 struct part *part;
2750
2751 for (part = m->mp_parts; part; part = part->mp_next) {
2752 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2753 }
2754 break;
2755 }
2756
2757 case CT_MESSAGE:
2758 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2759 struct exbody *e = (struct exbody *) ct->c_ctparams;
2760
2761 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2762 }
2763 break;
2764
2765 default:
2766 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2767 if (ct->c_ctparams == NULL) {
2768 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2769 }
2770 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2771 }
2772 }
2773 }
2774
2775
2776 /*
2777 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2778 * use the standard MH backup file.
2779 */
2780 static int
2781 remove_file (const char *file) {
2782 if (rmmproc) {
2783 char *rmm_command = concat (rmmproc, " ", file, NULL);
2784 int status = system (rmm_command);
2785
2786 free (rmm_command);
2787 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2788 }
2789 /* This is OK for a non-message file, it still uses the
2790 BACKUP_PREFIX form. The backup file will be in the same
2791 directory as file. */
2792 return rename (file, m_backup (file));
2793 }
2794
2795
2796 /*
2797 * Output formatted message to user.
2798 */
2799 static void
2800 report (char *what, char *partno, char *filename, char *message, ...) {
2801 va_list args;
2802 char *fmt;
2803
2804 if (verbosw) {
2805 va_start (args, message);
2806 fmt = concat (filename, partno ? " part " : ", ",
2807 FENDNULL(partno), partno ? ", " : "", message, NULL);
2808
2809 advertise (what, NULL, fmt, args);
2810
2811 free (fmt);
2812 va_end (args);
2813 }
2814 }
2815
2816
2817 static void
2818 pipeser (int i)
2819 {
2820 if (i == SIGQUIT) {
2821 fflush (stdout);
2822 fprintf (stderr, "\n");
2823 fflush (stderr);
2824 }
2825
2826 done (1);
2827 /* NOTREACHED */
2828 }