]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Remove SYNOPSIS from nmh(7).
[nmh] / uip / mhfixmsg.c
1 /*
2 * mhfixmsg.c -- rewrite a message with various tranformations
3 *
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include <fcntl.h>
15 #ifdef HAVE_ICONV
16 # include <iconv.h>
17 #endif
18
19 #define MHFIXMSG_SWITCHES \
20 X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
21 X("nodecodetext", 0, NDECODETEXTSW) \
22 X("textcodeset", 0, TEXTCODESETSW) \
23 X("notextcodeset", 0, NTEXTCODESETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("fixboundary", 0, FIXBOUNDARYSW) \
27 X("nofixboundary", 0, NFIXBOUNDARYSW) \
28 X("fixcte", 0, FIXCTESW) \
29 X("nofixcte", 0, NFIXCTESW) \
30 X("file file", 0, FILESW) \
31 X("outfile file", 0, OUTFILESW) \
32 X("rmmproc program", 0, RPROCSW) \
33 X("normmproc", 0, NRPRCSW) \
34 X("verbose", 0, VERBSW) \
35 X("noverbose", 0, NVERBSW) \
36 X("version", 0, VERSIONSW) \
37 X("help", 0, HELPSW) \
38
39 #define X(sw, minchars, id) id,
40 DEFINE_SWITCH_ENUM(MHFIXMSG);
41 #undef X
42
43 #define X(sw, minchars, id) { sw, minchars, id },
44 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
45 #undef X
46
47
48 int verbosw;
49 int debugsw; /* Needed by mhparse.c. */
50
51 #define quitser pipeser
52
53 /* mhparse.c */
54 extern char *tmp; /* directory to place tmp files */
55 extern int skip_mp_cte_check; /* flag to InitMultiPart */
56 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
57 extern int bogus_mp_content; /* flag from InitMultiPart */
58 CT parse_mime (char *);
59 void reverse_parts (CT);
60
61 /* mhoutsbr.c */
62 int output_message (CT, char *);
63
64 /* mhshowsbr.c */
65 int show_content_aux (CT, int, int, char *, char *);
66
67 /* mhmisc.c */
68 void flush_errors (void);
69
70 /* mhfree.c */
71 extern CT *cts;
72 void freects_done (int) NORETURN;
73
74 /*
75 * static prototypes
76 */
77 typedef struct fix_transformations {
78 int fixboundary;
79 int fixcte;
80 int reformat;
81 int decodetext;
82 char *textcodeset;
83 } fix_transformations;
84
85 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
86 static void reverse_alternative_parts (CT);
87 static int fix_boundary (CT *, int *);
88 static int get_multipart_boundary (CT, char **);
89 static int replace_boundary (CT, char *, const char *);
90 static char *update_attr (char *, const char *, const char *e);
91 static int fix_multipart_cte (CT, int *);
92 static int set_ce (CT, int);
93 static int ensure_text_plain (CT *, CT, int *);
94 static CT build_text_plain_part (CT);
95 static CT divide_part (CT);
96 static void copy_ctinfo (CI, CI);
97 static int decode_part (CT);
98 static int reformat_part (CT, char *, char *, char *, int);
99 static int charset_encoding (CT);
100 static CT build_multipart_alt (CT, CT, int, int);
101 static int boundary_in_content (FILE **, char *, const char *);
102 static void transfer_noncontent_headers (CT, CT);
103 static int set_ct_type (CT, int type, int subtype, int encoding);
104 static int decode_text_parts (CT, int, int *);
105 static int content_encoding (CT);
106 static int strip_crs (CT, int *);
107 static int convert_codesets (CT, char *, int *);
108 static int convert_codeset (CT, char *, int *);
109 static char *content_codeset (CT);
110 static int write_content (CT, char *, char *, int, int);
111 static int remove_file (char *);
112 static void report (char *, char *, char *, ...);
113 static char *upcase (char *);
114 static void pipeser (int);
115
116
117 int
118 main (int argc, char **argv) {
119 int msgnum;
120 char *cp, *file = NULL, *folder = NULL;
121 char *maildir, buf[100], *outfile = NULL;
122 char **argp, **arguments;
123 struct msgs_array msgs = { 0, 0, NULL };
124 struct msgs *mp = NULL;
125 CT *ctp;
126 FILE *fp;
127 int using_stdin = 0;
128 int status = OK;
129 fix_transformations fx;
130 fx.reformat = fx.fixcte = fx.fixboundary = 1;
131 fx.decodetext = CE_8BIT;
132 fx.textcodeset = NULL;
133
134 done = freects_done;
135
136 #ifdef LOCALE
137 setlocale(LC_ALL, "");
138 #endif
139 invo_name = r1bindex (argv[0], '/');
140
141 /* read user profile/context */
142 context_read();
143
144 arguments = getarguments (invo_name, argc, argv, 1);
145 argp = arguments;
146
147 /*
148 * Parse arguments
149 */
150 while ((cp = *argp++)) {
151 if (*cp == '-') {
152 switch (smatch (++cp, switches)) {
153 case AMBIGSW:
154 ambigsw (cp, switches);
155 done (1);
156 case UNKWNSW:
157 adios (NULL, "-%s unknown", cp);
158
159 case HELPSW:
160 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
161 invo_name);
162 print_help (buf, switches, 1);
163 done (0);
164 case VERSIONSW:
165 print_version(invo_name);
166 done (0);
167
168 case DECODETEXTSW:
169 if (! (cp = *argp++) || *cp == '-')
170 adios (NULL, "missing argument to %s", argp[-2]);
171 if (! strcasecmp (cp, "8bit")) {
172 fx.decodetext = CE_8BIT;
173 } else if (! strcasecmp (cp, "7bit")) {
174 fx.decodetext = CE_7BIT;
175 } else {
176 adios (NULL, "invalid argument to %s", argp[-2]);
177 }
178 continue;
179 case NDECODETEXTSW:
180 fx.decodetext = 0;
181 continue;
182 case TEXTCODESETSW:
183 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
184 adios (NULL, "missing argument to %s", argp[-2]);
185 fx.textcodeset = cp;
186 continue;
187 case NTEXTCODESETSW:
188 fx.textcodeset = 0;
189 continue;
190 case FIXBOUNDARYSW:
191 fx.fixboundary = 1;
192 continue;
193 case NFIXBOUNDARYSW:
194 fx.fixboundary = 0;
195 continue;
196 case FIXCTESW:
197 fx.fixcte = 1;
198 continue;
199 case NFIXCTESW:
200 fx.fixcte = 0;
201 continue;
202 case REFORMATSW:
203 fx.reformat = 1;
204 continue;
205 case NREFORMATSW:
206 fx.reformat = 0;
207 continue;
208
209 case FILESW:
210 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
211 adios (NULL, "missing argument to %s", argp[-2]);
212 file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
213 continue;
214
215 case OUTFILESW:
216 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
217 adios (NULL, "missing argument to %s", argp[-2]);
218 outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
219 continue;
220
221 case RPROCSW:
222 if (!(rmmproc = *argp++) || *rmmproc == '-')
223 adios (NULL, "missing argument to %s", argp[-2]);
224 continue;
225 case NRPRCSW:
226 rmmproc = NULL;
227 continue;
228
229 case VERBSW:
230 verbosw = 1;
231 continue;
232 case NVERBSW:
233 verbosw = 0;
234 continue;
235 }
236 }
237 if (*cp == '+' || *cp == '@') {
238 if (folder)
239 adios (NULL, "only one folder at a time!");
240 else
241 folder = pluspath (cp);
242 } else
243 app_msgarg(&msgs, cp);
244 }
245
246 SIGNAL (SIGQUIT, quitser);
247 SIGNAL (SIGPIPE, pipeser);
248
249 /*
250 * Read the standard profile setup
251 */
252 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
253 readconfig ((struct node **) 0, fp, cp, 0);
254 fclose (fp);
255 }
256
257 /*
258 * Check for storage directory. If specified,
259 * then store temporary files there. Else we
260 * store them in standard nmh directory.
261 */
262 if ((cp = context_find (nmhstorage)) && *cp)
263 tmp = concat (cp, "/", invo_name, NULL);
264 else
265 tmp = add (m_maildir (invo_name), NULL);
266
267 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
268
269 if (! context_find ("path"))
270 free (path ("./", TFOLDER));
271
272 if (file && msgs.size)
273 adios (NULL, "cannot specify msg and file at same time!");
274
275 /*
276 * check if message is coming from file
277 */
278 if (file) {
279 /* If file is stdin, create a tmp file name before parse_mime()
280 has a chance, because it might put in on a different
281 filesystem than the output file. Instead, put it in the
282 user's preferred tmp directory. */
283 CT ct;
284
285 if (! strcmp ("-", file)) {
286 int fd;
287 char *cp;
288
289 using_stdin = 1;
290
291 if ((cp = m_mktemp2 (tmp, invo_name, &fd, NULL)) == NULL) {
292 adios (NULL, "unable to create temporary file");
293 } else {
294 free (file);
295 file = add (cp, NULL);
296 chmod (file, 0600);
297 cpydata (STDIN_FILENO, fd, "-", file);
298 }
299
300 if (close (fd)) {
301 unlink (file);
302 adios (NULL, "failed to write temporary file");
303 }
304 }
305
306 if (! (cts = (CT *) calloc ((size_t) 2, sizeof *cts)))
307 adios (NULL, "out of memory");
308 ctp = cts;
309
310 if ((ct = parse_mime (file))) *ctp++ = ct;
311 } else {
312 /*
313 * message(s) are coming from a folder
314 */
315 CT ct;
316
317 if (! msgs.size)
318 app_msgarg(&msgs, "cur");
319 if (! folder)
320 folder = getfolder (1);
321 maildir = m_maildir (folder);
322
323 if (chdir (maildir) == NOTOK)
324 adios (maildir, "unable to change directory to");
325
326 /* read folder and create message structure */
327 if (! (mp = folder_read (folder, 1)))
328 adios (NULL, "unable to read folder %s", folder);
329
330 /* check for empty folder */
331 if (mp->nummsg == 0)
332 adios (NULL, "no messages in %s", folder);
333
334 /* parse all the message ranges/sequences and set SELECTED */
335 for (msgnum = 0; msgnum < msgs.size; msgnum++)
336 if (! m_convert (mp, msgs.msgs[msgnum]))
337 done (1);
338 seq_setprev (mp); /* set the previous-sequence */
339
340 if (! (cts = (CT *) calloc ((size_t) (mp->numsel + 1), sizeof *cts)))
341 adios (NULL, "out of memory");
342 ctp = cts;
343
344 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
345 if (is_selected(mp, msgnum)) {
346 char *msgnam;
347
348 msgnam = m_name (msgnum);
349 if ((ct = parse_mime (msgnam))) *ctp++ = ct;
350 }
351 }
352
353 seq_setcur (mp, mp->hghsel); /* update current message */
354 seq_save (mp); /* synchronize sequences */
355 context_replace (pfolder, folder);/* update current folder */
356 context_save (); /* save the context file */
357 }
358
359 if (*cts) {
360 for (ctp = cts; *ctp; ++ctp) {
361 status += mhfixmsgsbr (ctp, &fx, outfile);
362
363 if (using_stdin) {
364 unlink (file);
365
366 if (! outfile) {
367 /* Just calling m_backup() unlinks the backup file. */
368 (void) m_backup (file);
369 }
370 }
371 }
372 } else {
373 status = 1;
374 }
375
376 free (outfile);
377 free (tmp);
378 free (file);
379
380 /* done is freects_done, which will clean up all of cts. */
381 done (status);
382 return NOTOK;
383 }
384
385
386 int
387 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
388 /* Store input filename in case one of the transformations, i.e.,
389 fix_boundary(), rewrites to a tmp file. */
390 char *input_filename = add ((*ctp)->c_file, NULL);
391 int modify_inplace = 0;
392 int message_mods = 0;
393 int status = OK;
394
395 if (outfile == NULL) {
396 modify_inplace = 1;
397
398 if ((*ctp)->c_file) {
399 outfile = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL);
400 } else {
401 adios (NULL, "missing both input and output filenames\n");
402 }
403 }
404
405 reverse_alternative_parts (*ctp);
406 if (status == OK && fx->fixboundary) {
407 status = fix_boundary (ctp, &message_mods);
408 }
409 if (status == OK && fx->fixcte) {
410 status = fix_multipart_cte (*ctp, &message_mods);
411 }
412 if (status == OK && fx->reformat) {
413 status = ensure_text_plain (ctp, NULL, &message_mods);
414 }
415 if (status == OK && fx->decodetext) {
416 status = decode_text_parts (*ctp, fx->decodetext, &message_mods);
417 }
418 if (status == OK && fx->textcodeset != NULL) {
419 status = convert_codesets (*ctp, fx->textcodeset, &message_mods);
420 }
421
422 if (! (*ctp)->c_umask) {
423 /* Set the umask for the contents file. This currently
424 isn't used but just in case it is in the future. */
425 struct stat st;
426
427 if (stat ((*ctp)->c_file, &st) != NOTOK) {
428 (*ctp)->c_umask = ~(st.st_mode & 0777);
429 } else {
430 (*ctp)->c_umask = ~m_gmprot();
431 }
432 }
433
434 /*
435 * Write the content to a file
436 */
437 if (status == OK) {
438 status = write_content (*ctp, input_filename, outfile, modify_inplace,
439 message_mods);
440 } else if (! modify_inplace) {
441 /* Something went wrong. Output might be expected, such
442 as if this were run as a filter. Just copy the input
443 to the output. */
444 int in = open (input_filename, O_RDONLY);
445 int out = strcmp (outfile, "-")
446 ? open (outfile, O_WRONLY | O_CREAT, m_gmprot ())
447 : STDOUT_FILENO;
448
449 if (in != -1 && out != -1) {
450 cpydata (in, out, input_filename, outfile);
451 } else {
452 status = NOTOK;
453 }
454
455 close (out);
456 close (in);
457 }
458
459 if (modify_inplace) {
460 if (status != OK) unlink (outfile);
461 free (outfile);
462 outfile = NULL;
463 }
464
465 free (input_filename);
466
467 return status;
468 }
469
470
471 /* parse_mime() arranges alternates in reverse (priority) order, so
472 reverse them back. This will put a text/plain part at the front of
473 a multipart/alternative part, for example, where it belongs. */
474 static void
475 reverse_alternative_parts (CT ct) {
476 if (ct->c_type == CT_MULTIPART) {
477 struct multipart *m = (struct multipart *) ct->c_ctparams;
478 struct part *part;
479
480 if (ct->c_subtype == MULTI_ALTERNATE) {
481 reverse_parts (ct);
482 }
483
484 /* And call recursively on each part of a multipart. */
485 for (part = m->mp_parts; part; part = part->mp_next) {
486 reverse_alternative_parts (part->mp_part);
487 }
488 }
489 }
490
491
492 static int
493 fix_boundary (CT *ct, int *message_mods) {
494 struct multipart *mp;
495 int status = OK;
496
497 if (bogus_mp_content) {
498 mp = (struct multipart *) (*ct)->c_ctparams;
499
500 /*
501 * 1) Get boundary at end of part.
502 * 2) Get boundary at beginning of part and compare to the end-of-part
503 * boundary.
504 * 3) Write out contents of ct to tmp file, replacing boundary in
505 * header with boundary from part. Set c_unlink to 1.
506 * 4) Free ct.
507 * 5) Call parse_mime() on the tmp file, replacing ct.
508 */
509
510 if (mp && mp->mp_start) {
511 char *part_boundary;
512
513 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
514 char *fixed;
515
516 if ((fixed = m_mktemp2 (tmp, invo_name, NULL, &(*ct)->c_fp))) {
517 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
518 char *filename = add ((*ct)->c_file, NULL);
519
520 free_content (*ct);
521 if ((*ct = parse_mime (fixed))) {
522 (*ct)->c_unlink = 1;
523
524 ++*message_mods;
525 if (verbosw) {
526 report (NULL, filename,
527 "fix multipart boundary");
528 }
529 }
530 free (filename);
531 } else {
532 advise (NULL, "unable to replace broken boundary");
533 status = NOTOK;
534 }
535 } else {
536 advise (NULL, "unable to create temporary file");
537 status = NOTOK;
538 }
539
540 free (part_boundary);
541 }
542 }
543 }
544
545 return status;
546 }
547
548
549 static int
550 get_multipart_boundary (CT ct, char **part_boundary) {
551 char buffer[BUFSIZ];
552 char *end_boundary = NULL;
553 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
554 ? (off_t) (ct->c_end - sizeof buffer)
555 : (off_t) ct->c_begin;
556 size_t bytes_read;
557 int status = OK;
558
559 /* This will fail if the boundary spans fread() calls. BUFSIZ should
560 be big enough, even if it's just 1024, to make that unlikely. */
561
562 /* free_content() will close ct->c_fp. */
563 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
564 advise (ct->c_file, "unable to open for reading");
565 return NOTOK;
566 }
567
568 /* Get boundary at end of multipart. */
569 while (begin >= (off_t) ct->c_begin) {
570 fseeko (ct->c_fp, begin, SEEK_SET);
571 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
572 char *end = buffer + bytes_read - 1;
573 char *cp;
574
575 if ((cp = rfind_str (buffer, bytes_read, "--"))) {
576 /* Trim off trailing "--" and anything beyond. */
577 *cp-- = '\0';
578 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
579 if (strlen (end) > 3 && *end++ == '\n' &&
580 *end++ == '-' && *end++ == '-') {
581 end_boundary = add (end, NULL);
582 break;
583 }
584 }
585 }
586 }
587
588 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
589 begin -= sizeof buffer;
590 } else {
591 break;
592 }
593 }
594
595 /* Get boundary at beginning of multipart. */
596 if (end_boundary) {
597 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
598 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
599 if (bytes_read >= strlen (end_boundary)) {
600 char *cp = find_str (buffer, bytes_read, end_boundary);
601
602 if (cp && cp - buffer >= 2 && *--cp == '-' &&
603 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
604 status = OK;
605 break;
606 }
607 } else {
608 /* The start and end boundaries didn't match, or the
609 start boundary doesn't begin with "\n--" (or "--"
610 if at the beginning of buffer). Keep trying. */
611 status = NOTOK;
612 }
613 }
614 } else {
615 status = NOTOK;
616 }
617
618 if (status == OK) {
619 *part_boundary = end_boundary;
620 } else {
621 *part_boundary = NULL;
622 free (end_boundary);
623 }
624
625 return status;
626 }
627
628
629 /* Open and copy ct->c_file to file, replacing the multipart boundary. */
630 static int
631 replace_boundary (CT ct, char *file, const char *boundary) {
632 FILE *fpin, *fpout;
633 int compnum, state;
634 char buf[BUFSIZ], name[NAMESZ];
635 char *np, *vp;
636 m_getfld_state_t gstate = 0;
637 int status = OK;
638
639 if (ct->c_file == NULL) {
640 advise (NULL, "missing input filename");
641 return NOTOK;
642 }
643
644 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
645 advise (ct->c_file, "unable to open for reading");
646 return NOTOK;
647 }
648
649 if ((fpout = fopen (file, "w")) == NULL) {
650 fclose (fpin);
651 advise (file, "unable to open for writing");
652 return NOTOK;
653 }
654
655 for (compnum = 1;;) {
656 int bufsz = (int) sizeof buf;
657
658 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
659 case FLD:
660 case FLDPLUS:
661 compnum++;
662
663 /* get copies of the buffers */
664 np = add (name, NULL);
665 vp = add (buf, NULL);
666
667 /* if necessary, get rest of field */
668 while (state == FLDPLUS) {
669 bufsz = sizeof buf;
670 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
671 vp = add (buf, vp); /* add to previous value */
672 }
673
674 if (strcasecmp (TYPE_FIELD, np)) {
675 fprintf (fpout, "%s:%s", np, vp);
676 } else {
677 char *new_boundary = update_attr (vp, "boundary=", boundary);
678
679 fprintf (fpout, "%s:%s\n", np, new_boundary);
680 free (new_boundary);
681 }
682
683 free (vp);
684 free (np);
685
686 continue;
687
688 case BODY:
689 fputs ("\n", fpout);
690 /* buf will have a terminating NULL, skip it. */
691 fwrite (buf, 1, bufsz-1, fpout);
692 continue;
693
694 case FILEEOF:
695 break;
696
697 case LENERR:
698 case FMTERR:
699 advise (NULL, "message format error in component #%d", compnum);
700 status = NOTOK;
701 break;
702
703 default:
704 advise (NULL, "getfld() returned %d", state);
705 status = NOTOK;
706 break;
707 }
708
709 break;
710 }
711
712 m_getfld_state_destroy (&gstate);
713 fclose (fpout);
714 fclose (fpin);
715
716 return status;
717 }
718
719
720 /* Change the value of a name=value pair in a header field body.
721 If the name isn't there, append them. In any case, a new
722 string will be allocated and must be free'd by the caller.
723 Trims any trailing newlines. */
724 static char *
725 update_attr (char *body, const char *name, const char *value) {
726 char *bp = nmh_strcasestr (body, name);
727 char *new_body;
728
729 if (bp) {
730 char *other_attrs = strchr (bp, ';');
731
732 *(bp + strlen (name)) = '\0';
733 new_body = concat (body, "\"", value, "\"", NULL);
734
735 if (other_attrs) {
736 char *cp;
737
738 /* Trim any trailing newlines. */
739 for (cp = &other_attrs[strlen (other_attrs) - 1];
740 cp > other_attrs && *cp == '\n';
741 *cp-- = '\0') continue;
742 new_body = add (other_attrs, new_body);
743 }
744 } else {
745 char *cp;
746
747 /* Append name/value pair, after first removing a final newline
748 and (extraneous) semicolon. */
749 if (*(cp = &body[strlen (body) - 1]) == '\n') *cp = '\0';
750 if (*(cp = &body[strlen (body) - 1]) == ';') *cp = '\0';
751 new_body = concat (body, "; ", name, "\"", value, "\"", NULL);
752 }
753
754 return new_body;
755 }
756
757
758 static int
759 fix_multipart_cte (CT ct, int *message_mods) {
760 int status = OK;
761
762 if (ct->c_type == CT_MULTIPART) {
763 struct multipart *m;
764 struct part *part;
765
766 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
767 ct->c_encoding != CE_BINARY) {
768 HF hf;
769
770 for (hf = ct->c_first_hf; hf; hf = hf->next) {
771 char *name = hf->name;
772 for (; *name && isspace ((unsigned char) *name); ++name) {
773 continue;
774 }
775
776 if (! strncasecmp (name, ENCODING_FIELD,
777 strlen (ENCODING_FIELD))) {
778 char *prefix = "Nmh-REPLACED-INVALID-";
779 HF h = mh_xmalloc (sizeof *h);
780
781 h->name = add (hf->name, NULL);
782 h->hf_encoding = hf->hf_encoding;
783 h->next = hf->next;
784 hf->next = h;
785
786 /* Retain old header but prefix its name. */
787 free (hf->name);
788 hf->name = concat (prefix, h->name, NULL);
789
790 ++*message_mods;
791 if (verbosw) {
792 char *encoding = cpytrim (hf->value);
793 report (ct->c_partno, ct->c_file,
794 "replace Content-Transfer-Encoding of %s "
795 "with 8 bit", encoding);
796 free (encoding);
797 }
798
799 h->value = add (" 8bit\n", NULL);
800
801 /* Don't need to warn for multiple C-T-E header
802 fields, parse_mime() already does that. But
803 if there are any, fix them all as necessary. */
804 hf = h;
805 }
806 }
807
808 set_ce (ct, CE_8BIT);
809 }
810
811 m = (struct multipart *) ct->c_ctparams;
812 for (part = m->mp_parts; part; part = part->mp_next) {
813 if (fix_multipart_cte (part->mp_part, message_mods) != OK) {
814 status = NOTOK;
815 break;
816 }
817 }
818 }
819
820 return status;
821 }
822
823
824 static int
825 set_ce (CT ct, int encoding) {
826 const char *ce = ce_str (encoding);
827 const struct str2init *ctinit = get_ce_method (ce);
828
829 if (ctinit) {
830 char *cte = concat (" ", ce, "\n", NULL);
831 int found_cte = 0;
832 HF hf;
833 /* Decoded contents might be in ct->c_cefile.ce_file, if the
834 caller is decode_text_parts (). Save because we'll
835 overwrite below. */
836 struct cefile decoded_content_info = ct->c_cefile;
837
838 ct->c_encoding = encoding;
839
840 ct->c_ctinitfnx = ctinit->si_init;
841 /* This will assign ct->c_cefile with an all-0 struct, which
842 is what we want. */
843 (*ctinit->si_init) (ct);
844 /* After returning, the caller should set
845 ct->c_cefile.ce_file to the name of the file containing
846 the contents. */
847
848 /* Restore the cefile. */
849 ct->c_cefile = decoded_content_info;
850
851 /* Update/add Content-Transfer-Encoding header field. */
852 for (hf = ct->c_first_hf; hf; hf = hf->next) {
853 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
854 found_cte = 1;
855 free (hf->value);
856 hf->value = cte;
857 }
858 }
859 if (! found_cte) {
860 add_header (ct, add (ENCODING_FIELD, NULL), cte);
861 }
862
863 /* Update c_celine. It's used only by mhlist -debug. */
864 free (ct->c_celine);
865 ct->c_celine = add (cte, NULL);
866
867 return OK;
868 } else {
869 return NOTOK;
870 }
871 }
872
873
874 /* Make sure each text part has a corresponding text/plain part. */
875 static int
876 ensure_text_plain (CT *ct, CT parent, int *message_mods) {
877 int status = OK;
878
879 switch ((*ct)->c_type) {
880 case CT_TEXT: {
881 int has_text_plain = 0;
882
883 /* Nothing to do for text/plain. */
884 if ((*ct)->c_subtype == TEXT_PLAIN) return OK;
885
886 if (parent && parent->c_type == CT_MULTIPART &&
887 parent->c_subtype == MULTI_ALTERNATE) {
888 struct multipart *mp = (struct multipart *) parent->c_ctparams;
889 struct part *part;
890 int new_subpart_number = 1;
891
892 /* See if there is a sibling text/plain. */
893 for (part = mp->mp_parts; part; part = part->mp_next) {
894 ++new_subpart_number;
895 if (part->mp_part->c_type == CT_TEXT &&
896 part->mp_part->c_subtype == TEXT_PLAIN) {
897 has_text_plain = 1;
898 break;
899 }
900 }
901
902 if (! has_text_plain) {
903 /* Parent is a multipart/alternative. Insert a new
904 text/plain subpart. */
905 struct part *new_part = mh_xmalloc (sizeof *new_part);
906
907 if ((new_part->mp_part = build_text_plain_part (*ct))) {
908 char buffer[16];
909 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
910
911 new_part->mp_next = mp->mp_parts;
912 mp->mp_parts = new_part;
913 new_part->mp_part->c_partno =
914 concat (parent->c_partno ? parent->c_partno : "1", ".",
915 buffer, NULL);
916
917 ++*message_mods;
918 if (verbosw) {
919 report (parent->c_partno, parent->c_file,
920 "insert text/plain part");
921 }
922 } else {
923 free_content (new_part->mp_part);
924 free (new_part);
925 status = NOTOK;
926 }
927 }
928 } else {
929 /* Slip new text/plain part into a new multipart/alternative. */
930 CT tp_part = build_text_plain_part (*ct);
931
932 if (tp_part) {
933 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
934 MULTI_ALTERNATE);
935 if (mp_alt) {
936 struct multipart *mp =
937 (struct multipart *) mp_alt->c_ctparams;
938
939 if (mp && mp->mp_parts) {
940 mp->mp_parts->mp_part = tp_part;
941 /* Make the new multipart/alternative the parent. */
942 *ct = mp_alt;
943
944 ++*message_mods;
945 if (verbosw) {
946 report ((*ct)->c_partno, (*ct)->c_file,
947 "insert text/plain part");
948 }
949 } else {
950 free_content (tp_part);
951 free_content (mp_alt);
952 status = NOTOK;
953 }
954 } else {
955 status = NOTOK;
956 }
957 } else {
958 status = NOTOK;
959 }
960 }
961 break;
962 }
963
964 case CT_MULTIPART: {
965 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
966 struct part *part;
967
968 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
969 if ((*ct)->c_type == CT_MULTIPART) {
970 status = ensure_text_plain (&part->mp_part, *ct, message_mods);
971 }
972 }
973 break;
974 }
975
976 case CT_MESSAGE:
977 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
978 struct exbody *e;
979
980 e = (struct exbody *) (*ct)->c_ctparams;
981 status = ensure_text_plain (&e->eb_content, *ct, message_mods);
982 }
983 break;
984 }
985
986 return status;
987 }
988
989
990 static CT
991 build_text_plain_part (CT encoded_part) {
992 CT tp_part = divide_part (encoded_part);
993 char *tmp_plain_file = NULL;
994
995 if (decode_part (tp_part) == OK) {
996 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
997 contains the decoded contents. And the decoding function, such
998 as openQuoted, will have set ...->ce_unlink to 1 so that it will
999 be unlinked by free_content (). */
1000 tmp_plain_file = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL);
1001 if (reformat_part (tp_part, tmp_plain_file,
1002 tp_part->c_ctinfo.ci_type,
1003 tp_part->c_ctinfo.ci_subtype,
1004 tp_part->c_type) == OK) {
1005 return tp_part;
1006 }
1007 }
1008
1009 free_content (tp_part);
1010 unlink (tmp_plain_file);
1011 free (tmp_plain_file);
1012
1013 return NULL;
1014 }
1015
1016
1017 static CT
1018 divide_part (CT ct) {
1019 CT new_part;
1020
1021 if ((new_part = (CT) calloc (1, sizeof *new_part)) == NULL)
1022 adios (NULL, "out of memory");
1023
1024 /* Just copy over what is needed for decoding. c_vrsn and
1025 c_celine aren't necessary. */
1026 new_part->c_file = add (ct->c_file, NULL);
1027 new_part->c_begin = ct->c_begin;
1028 new_part->c_end = ct->c_end;
1029 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1030 new_part->c_type = ct->c_type;
1031 new_part->c_cefile = ct->c_cefile;
1032 new_part->c_encoding = ct->c_encoding;
1033 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1034 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1035 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1036 new_part->c_cesizefnx = ct->c_cesizefnx;
1037
1038 /* c_ctline is used by reformat__part(), so it can preserve
1039 anything after the type/subtype. */
1040 new_part->c_ctline = add (ct->c_ctline, NULL);
1041
1042 return new_part;
1043 }
1044
1045
1046 static void
1047 copy_ctinfo (CI dest, CI src) {
1048 char **s_ap, **d_ap, **s_vp, **d_vp;
1049
1050 dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1051 dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1052
1053 for (s_ap = src->ci_attrs, d_ap = dest->ci_attrs,
1054 s_vp = src->ci_values, d_vp = dest->ci_values;
1055 *s_ap;
1056 ++s_ap, ++d_ap, ++s_vp, ++d_vp) {
1057 *d_ap = add (*s_ap, NULL);
1058 *d_vp = *s_vp;
1059 }
1060 *d_ap = NULL;
1061
1062 dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1063 dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1064 }
1065
1066
1067 static int
1068 decode_part (CT ct) {
1069 char *tmp_decoded;
1070 int status;
1071
1072 tmp_decoded = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL);
1073 /* The following call will load ct->c_cefile.ce_file with the tmp
1074 filename of the decoded content. tmp_decoded will contain the
1075 encoded output, get rid of that. */
1076 status = output_message (ct, tmp_decoded);
1077 unlink (tmp_decoded);
1078 free (tmp_decoded);
1079
1080 return status;
1081 }
1082
1083
1084 /* Some of the arguments aren't really needed now, but maybe will
1085 be in the future for other than text types. */
1086 static int
1087 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1088 int output_subtype, output_encoding;
1089 char *cp, *cf;
1090 int status;
1091
1092 /* Hacky: this redirects the output from whatever command is used
1093 to show the part to a file. So, the user can't have any output
1094 redirection in that command.
1095 Could show_multi() in mhshowsbr.c avoid this? */
1096
1097 /* Check for invo_name-format-type/subtype. */
1098 cp = concat (invo_name, "-format-", type, "/", subtype, NULL);
1099 if ((cf = context_find (cp)) && *cf != '\0') {
1100 if (strchr (cf, '>')) {
1101 free (cp);
1102 advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1103 "%s-format-%s/%s profile entry", cf, invo_name, type,
1104 subtype);
1105 return NOTOK;
1106 }
1107 } else {
1108 free (cp);
1109
1110 /* Check for invo_name-format-type. */
1111 cp = concat (invo_name, "-format-", type, NULL);
1112 if (! (cf = context_find (cp)) || *cf == '\0') {
1113 free (cp);
1114 if (verbosw) {
1115 advise (NULL, "Don't know how to convert %s, there is no "
1116 "%s-format-%s/%s profile entry",
1117 ct->c_file, invo_name, type, subtype);
1118 }
1119 return NOTOK;
1120 }
1121
1122 if (strchr (cf, '>')) {
1123 free (cp);
1124 advise (NULL, "'>' prohibited in \"%s\"", cf);
1125 return NOTOK;
1126 }
1127 }
1128 free (cp);
1129
1130 cp = concat (cf, " >", file, NULL);
1131 status = show_content_aux (ct, 1, 0, cp, NULL);
1132 free (cp);
1133
1134 /* Unlink decoded content tmp file and free its filename to avoid
1135 leaks. The file stream should already have been closed. */
1136 if (ct->c_cefile.ce_unlink) {
1137 unlink (ct->c_cefile.ce_file);
1138 free (ct->c_cefile.ce_file);
1139 ct->c_cefile.ce_file = NULL;
1140 ct->c_cefile.ce_unlink = 0;
1141 }
1142
1143 if (c_type == CT_TEXT) {
1144 output_subtype = TEXT_PLAIN;
1145 } else {
1146 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1147 output_subtype = 0;
1148 }
1149 output_encoding = charset_encoding (ct);
1150
1151 if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1152 ct->c_cefile.ce_file = file;
1153 ct->c_cefile.ce_unlink = 1;
1154 } else {
1155 ct->c_cefile.ce_unlink = 0;
1156 status = NOTOK;
1157 }
1158
1159 return status;
1160 }
1161
1162
1163 /* Identifies 7bit or 8bit content based on charset. */
1164 static int
1165 charset_encoding (CT ct) {
1166 /* norm_charmap() is case sensitive. */
1167 char *codeset = upcase (content_codeset (ct));
1168 int encoding =
1169 strcmp (norm_charmap (codeset), "US-ASCII") ? CE_8BIT : CE_7BIT;
1170
1171 free (codeset);
1172 return encoding;
1173 }
1174
1175
1176 static CT
1177 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1178 char *boundary_prefix = "----=_nmh-multipart";
1179 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1180 char *boundary_indicator = "; boundary=";
1181 char *typename, *subtypename, *name;
1182 CT ct;
1183 struct part *p;
1184 struct multipart *m;
1185 char *cp;
1186 const struct str2init *ctinit;
1187
1188 if ((ct = (CT) calloc (1, sizeof *ct)) == NULL)
1189 adios (NULL, "out of memory");
1190
1191 /* Set up the multipart/alternative part. These fields of *ct were
1192 initialized to 0 by calloc():
1193 c_fp, c_unlink, c_begin, c_end,
1194 c_vrsn, c_ctline, c_celine,
1195 c_id, c_descr, c_dispo, c_partno,
1196 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1197 c_cefile, c_encoding,
1198 c_digested, c_digest[16], c_ctexbody,
1199 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1200 c_umask, c_pid, c_rfc934,
1201 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1202 */
1203
1204 ct->c_file = add (first_alt->c_file, NULL);
1205 ct->c_type = type;
1206 ct->c_subtype = subtype;
1207
1208 ctinit = get_ct_init (ct->c_type);
1209
1210 typename = ct_type_str (type);
1211 subtypename = ct_subtype_str (type, subtype);
1212
1213 {
1214 int serial = 0;
1215 int found_boundary = 1;
1216
1217 while (found_boundary && serial < 1000000) {
1218 found_boundary = 0;
1219
1220 /* Ensure that the boundary doesn't appear in the decoded
1221 content. */
1222 if (new_part->c_cefile.ce_file) {
1223 if ((found_boundary =
1224 boundary_in_content (&new_part->c_cefile.ce_fp,
1225 new_part->c_cefile.ce_file,
1226 boundary)) == -1) {
1227 return NULL;
1228 }
1229 }
1230
1231 /* Ensure that the boundary doesn't appear in the encoded
1232 content. */
1233 if (! found_boundary && new_part->c_file) {
1234 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1235 new_part->c_file,
1236 boundary)) == -1) {
1237 return NULL;
1238 }
1239 }
1240
1241 if (found_boundary) {
1242 /* Try a slightly different boundary. */
1243 char buffer2[16];
1244
1245 free (boundary);
1246 ++serial;
1247 snprintf (buffer2, sizeof buffer2, "%d", serial);
1248 boundary =
1249 concat (boundary_prefix,
1250 first_alt->c_partno ? first_alt->c_partno : "",
1251 "-", buffer2, NULL);
1252 }
1253 }
1254
1255 if (found_boundary) {
1256 advise (NULL, "giving up trying to find a unique boundary");
1257 return NULL;
1258 }
1259 }
1260
1261 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1262 boundary, "\"", NULL);
1263
1264 /* Load c_first_hf and c_last_hf. */
1265 transfer_noncontent_headers (first_alt, ct);
1266 add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1267 free (name);
1268
1269 /* Load c_partno. */
1270 if (first_alt->c_partno) {
1271 ct->c_partno = add (first_alt->c_partno, NULL);
1272 free (first_alt->c_partno);
1273 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1274 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1275 } else {
1276 first_alt->c_partno = add ("1", NULL);
1277 new_part->c_partno = add ("2", NULL);
1278 }
1279
1280 if (ctinit) {
1281 ct->c_ctinfo.ci_type = add (typename, NULL);
1282 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1283 }
1284
1285 name = concat (" ", typename, "/", subtypename, boundary_indicator,
1286 boundary, NULL);
1287 if ((cp = strstr (name, boundary_indicator))) {
1288 ct->c_ctinfo.ci_attrs[0] = name;
1289 ct->c_ctinfo.ci_attrs[1] = NULL;
1290 /* ci_values don't get free'd, so point into ci_attrs. */
1291 ct->c_ctinfo.ci_values[0] = cp + strlen (boundary_indicator);
1292 }
1293
1294 p = (struct part *) mh_xmalloc (sizeof *p);
1295 p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1296 p->mp_next->mp_next = NULL;
1297 p->mp_next->mp_part = first_alt;
1298
1299 if ((m = (struct multipart *) calloc (1, sizeof (struct multipart))) ==
1300 NULL)
1301 adios (NULL, "out of memory");
1302 m->mp_start = concat (boundary, "\n", NULL);
1303 m->mp_stop = concat (boundary, "--\n", NULL);
1304 m->mp_parts = p;
1305 ct->c_ctparams = (void *) m;
1306
1307 free (boundary);
1308
1309 return ct;
1310 }
1311
1312
1313 /* Check that the boundary does not appear in the content. */
1314 static int
1315 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1316 char buffer[BUFSIZ];
1317 size_t bytes_read;
1318 int found_boundary = 0;
1319
1320 /* free_content() will close *fp if we fopen it here. */
1321 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1322 advise (file, "unable to open %s for reading", file);
1323 return NOTOK;
1324 }
1325
1326 fseeko (*fp, 0L, SEEK_SET);
1327 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1328 if (find_str (buffer, bytes_read, boundary)) {
1329 found_boundary = 1;
1330 break;
1331 }
1332 }
1333
1334 return found_boundary;
1335 }
1336
1337
1338 /* Remove all non-Content headers. */
1339 static void
1340 transfer_noncontent_headers (CT old, CT new) {
1341 HF hp, hp_prev;
1342
1343 hp_prev = hp = old->c_first_hf;
1344 while (hp) {
1345 HF next = hp->next;
1346
1347 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1348 if (hp == old->c_last_hf) {
1349 if (hp == old->c_first_hf) {
1350 old->c_last_hf = old->c_first_hf = NULL;
1351 } else {
1352 hp_prev->next = NULL;
1353 old->c_last_hf = hp_prev;
1354 }
1355 } else {
1356 if (hp == old->c_first_hf) {
1357 old->c_first_hf = next;
1358 } else {
1359 hp_prev->next = next;
1360 }
1361 }
1362
1363 /* Put node hp in the new CT. */
1364 if (new->c_first_hf == NULL) {
1365 new->c_first_hf = hp;
1366 } else {
1367 new->c_last_hf->next = hp;
1368 }
1369 new->c_last_hf = hp;
1370 } else {
1371 /* A Content- header, leave in old. */
1372 hp_prev = hp;
1373 }
1374
1375 hp = next;
1376 }
1377 }
1378
1379
1380 static int
1381 set_ct_type (CT ct, int type, int subtype, int encoding) {
1382 char *typename = ct_type_str (type);
1383 char *subtypename = ct_subtype_str (type, subtype);
1384 /* E.g, " text/plain" */
1385 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1386 /* E.g, " text/plain\n" */
1387 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1388 int found_content_type = 0;
1389 HF hf;
1390 const char *cp = NULL;
1391 char *ctline;
1392 int status;
1393
1394 /* Update/add Content-Type header field. */
1395 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1396 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1397 found_content_type = 1;
1398 free (hf->value);
1399 hf->value = (cp = strchr (ct->c_ctline, ';'))
1400 ? concat (type_subtypename, cp, "\n", NULL)
1401 : add (name_plus_nl, NULL);
1402 }
1403 }
1404 if (! found_content_type) {
1405 add_header (ct, add (TYPE_FIELD, NULL),
1406 (cp = strchr (ct->c_ctline, ';'))
1407 ? concat (type_subtypename, cp, "\n", NULL)
1408 : add (name_plus_nl, NULL));
1409 }
1410
1411 /* Some of these might not be used, but set them anyway. */
1412 ctline = cp
1413 ? concat (type_subtypename, cp, NULL)
1414 : concat (type_subtypename, NULL);
1415 free (ct->c_ctline);
1416 ct->c_ctline = ctline;
1417 /* Leave other ctinfo members as they were. */
1418 free (ct->c_ctinfo.ci_type);
1419 ct->c_ctinfo.ci_type = add (typename, NULL);
1420 free (ct->c_ctinfo.ci_subtype);
1421 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1422 ct->c_type = type;
1423 ct->c_subtype = subtype;
1424
1425 free (name_plus_nl);
1426 free (type_subtypename);
1427
1428 status = set_ce (ct, encoding);
1429
1430 return status;
1431 }
1432
1433
1434 static int
1435 decode_text_parts (CT ct, int encoding, int *message_mods) {
1436 int status = OK;
1437
1438 switch (ct->c_type) {
1439 case CT_TEXT:
1440 switch (ct->c_encoding) {
1441 case CE_BASE64:
1442 case CE_QUOTED: {
1443 int ct_encoding;
1444
1445 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
1446 if ((ct_encoding = content_encoding (ct)) == CE_BINARY &&
1447 encoding != CE_BINARY) {
1448 /* The decoding isn't acceptable so discard it.
1449 Leave status as OK to allow other transformations. */
1450 if (verbosw) {
1451 report (ct->c_partno, ct->c_file,
1452 "will not decode%s because it is binary",
1453 ct->c_partno ? ""
1454 : ct->c_ctline ? ct->c_ctline
1455 : "");
1456 }
1457 unlink (ct->c_cefile.ce_file);
1458 free (ct->c_cefile.ce_file);
1459 ct->c_cefile.ce_file = NULL;
1460 } else if (ct->c_encoding == CE_QUOTED &&
1461 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1462 /* The decoding isn't acceptable so discard it.
1463 Leave status as OK to allow other transformations. */
1464 if (verbosw) {
1465 report (ct->c_partno, ct->c_file,
1466 "will not decode%s because it is 8bit",
1467 ct->c_partno ? ""
1468 : ct->c_ctline ? ct->c_ctline
1469 : "");
1470 }
1471 unlink (ct->c_cefile.ce_file);
1472 free (ct->c_cefile.ce_file);
1473 ct->c_cefile.ce_file = NULL;
1474 } else {
1475 int enc;
1476 if (ct_encoding == CE_BINARY)
1477 enc = CE_BINARY;
1478 else if (ct_encoding == CE_8BIT && encoding == CE_7BIT)
1479 enc = CE_QUOTED;
1480 else
1481 enc = charset_encoding (ct);
1482 if (set_ce (ct, enc) == OK) {
1483 ++*message_mods;
1484 if (verbosw) {
1485 report (ct->c_partno, ct->c_file, "decode%s",
1486 ct->c_ctline ? ct->c_ctline : "");
1487 }
1488 strip_crs (ct, message_mods);
1489 } else {
1490 status = NOTOK;
1491 }
1492 }
1493 } else {
1494 status = NOTOK;
1495 }
1496 break;
1497 }
1498 case CE_8BIT:
1499 case CE_7BIT:
1500 strip_crs (ct, message_mods);
1501 break;
1502 default:
1503 break;
1504 }
1505
1506 break;
1507
1508 case CT_MULTIPART: {
1509 struct multipart *m = (struct multipart *) ct->c_ctparams;
1510 struct part *part;
1511
1512 /* Should check to see if the body for this part is encoded?
1513 For now, it gets passed along as-is by InitMultiPart(). */
1514 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1515 status = decode_text_parts (part->mp_part, encoding, message_mods);
1516 }
1517 break;
1518 }
1519
1520 case CT_MESSAGE:
1521 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1522 struct exbody *e;
1523
1524 e = (struct exbody *) ct->c_ctparams;
1525 status = decode_text_parts (e->eb_content, encoding, message_mods);
1526 }
1527 break;
1528
1529 default:
1530 break;
1531 }
1532
1533 return status;
1534 }
1535
1536
1537 /* See if the decoded content is 7bit, 8bit, or binary. It's binary
1538 if it has any NUL characters, a CR not followed by a LF, or lines
1539 greater than 998 characters in length. */
1540 static int
1541 content_encoding (CT ct) {
1542 CE ce = &ct->c_cefile;
1543 int encoding = CE_7BIT;
1544
1545 if (ce->ce_file) {
1546 char buffer[BUFSIZ];
1547 size_t inbytes;
1548
1549 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
1550 advise (ce->ce_file, "unable to open for reading");
1551 return CE_UNKNOWN;
1552 }
1553
1554 fseeko (ce->ce_fp, 0L, SEEK_SET);
1555 while (encoding != CE_BINARY &&
1556 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
1557 char *cp;
1558 size_t i;
1559 size_t line_len = 0;
1560 int last_char_was_cr = 0;
1561
1562 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
1563 if (*cp == '\0' || ++line_len > 998 ||
1564 (*cp != '\n' && last_char_was_cr)) {
1565 encoding = CE_BINARY;
1566 break;
1567 } else if (*cp == '\n') {
1568 line_len = 0;
1569 } else if (! isascii ((unsigned char) *cp)) {
1570 encoding = CE_8BIT;
1571 }
1572
1573 last_char_was_cr = *cp == '\r' ? 1 : 0;
1574 }
1575 }
1576
1577 fclose (ce->ce_fp);
1578 ce->ce_fp = NULL;
1579 } /* else should never happen */
1580
1581 return encoding;
1582 }
1583
1584
1585 static int
1586 strip_crs (CT ct, int *message_mods) {
1587 /* norm_charmap() is case sensitive. */
1588 char *codeset = upcase (content_codeset (ct));
1589 int status = OK;
1590
1591 /* Only strip carriage returns if content is ASCII. */
1592 if (! strcmp (norm_charmap (codeset), "US-ASCII")) {
1593 char **file = NULL;
1594 FILE **fp = NULL;
1595 size_t begin;
1596 size_t end;
1597 int has_crs = 0;
1598 int opened_input_file = 0;
1599
1600 if (ct->c_cefile.ce_file) {
1601 file = &ct->c_cefile.ce_file;
1602 fp = &ct->c_cefile.ce_fp;
1603 begin = end = 0;
1604 } else if (ct->c_file) {
1605 file = &ct->c_file;
1606 fp = &ct->c_fp;
1607 begin = (size_t) ct->c_begin;
1608 end = (size_t) ct->c_end;
1609 } /* else don't know where the content is */
1610
1611 if (file && *file && fp) {
1612 if (! *fp) {
1613 if ((*fp = fopen (*file, "r")) == NULL) {
1614 advise (*file, "unable to open for reading");
1615 status = NOTOK;
1616 } else {
1617 opened_input_file = 1;
1618 }
1619 }
1620 }
1621
1622 if (fp && *fp) {
1623 char buffer[BUFSIZ];
1624 size_t bytes_read;
1625 size_t bytes_to_read =
1626 end > 0 && end > begin ? end - begin : sizeof buffer;
1627
1628 fseeko (*fp, begin, SEEK_SET);
1629 while ((bytes_read = fread (buffer, 1,
1630 min (bytes_to_read, sizeof buffer),
1631 *fp)) > 0) {
1632 /* Look for CR followed by a LF. This is supposed to
1633 be text so there should be LF's. If not, don't
1634 modify the content. */
1635 char *cp;
1636 size_t i;
1637 int last_char_was_cr = 0;
1638
1639 if (end > 0) bytes_to_read -= bytes_read;
1640
1641 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
1642 if (*cp == '\n' && last_char_was_cr) {
1643 has_crs = 1;
1644 break;
1645 }
1646
1647 last_char_was_cr = *cp == '\r' ? 1 : 0;
1648 }
1649 }
1650
1651 if (has_crs) {
1652 int fd;
1653 char *stripped_content_file =
1654 add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL);
1655
1656 /* Strip each CR before a LF from the content. */
1657 fseeko (*fp, begin, SEEK_SET);
1658 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
1659 0) {
1660 char *cp;
1661 size_t i;
1662 int last_char_was_cr = 0;
1663
1664 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
1665 if (*cp == '\r') {
1666 last_char_was_cr = 1;
1667 } else if (last_char_was_cr) {
1668 if (*cp != '\n') write (fd, "\r", 1);
1669 write (fd, cp, 1);
1670 last_char_was_cr = 0;
1671 } else {
1672 write (fd, cp, 1);
1673 last_char_was_cr = 0;
1674 }
1675
1676 }
1677 }
1678
1679 if (close (fd)) {
1680 admonish (NULL, "unable to write temporaty file %s",
1681 stripped_content_file);
1682 unlink (stripped_content_file);
1683 status = NOTOK;
1684 } else {
1685 /* Replace the decoded file with the converted one. */
1686 if (ct->c_cefile.ce_file) {
1687 if (ct->c_cefile.ce_unlink) {
1688 unlink (ct->c_cefile.ce_file);
1689 }
1690 free (ct->c_cefile.ce_file);
1691 }
1692 ct->c_cefile.ce_file = stripped_content_file;
1693 ct->c_cefile.ce_unlink = 1;
1694
1695 ++*message_mods;
1696 if (verbosw) {
1697 report (NULL, *file, "stripped CRs");
1698 }
1699 }
1700 }
1701
1702 if (opened_input_file) {
1703 fclose (*fp);
1704 *fp = NULL;
1705 }
1706 }
1707 }
1708
1709 free (codeset);
1710 return status;
1711 }
1712
1713
1714 char *
1715 content_codeset (CT ct) {
1716 const char *const charset = "charset";
1717 char *default_codeset = NULL;
1718 CI ctinfo = &ct->c_ctinfo;
1719 char **ap, **vp;
1720 char **src_codeset = NULL;
1721
1722 for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) {
1723 if (! strcasecmp (*ap, charset)) {
1724 src_codeset = vp;
1725 break;
1726 }
1727 }
1728
1729 /* RFC 2045, Sec. 5.2: default to us-ascii. */
1730 if (src_codeset == NULL) src_codeset = &default_codeset;
1731 if (*src_codeset == NULL) *src_codeset = "US-ASCII";
1732
1733 return *src_codeset;
1734 }
1735
1736
1737 static int
1738 convert_codesets (CT ct, char *dest_codeset, int *message_mods) {
1739 int status = OK;
1740
1741 switch (ct->c_type) {
1742 case CT_TEXT:
1743 if (ct->c_subtype == TEXT_PLAIN) {
1744 status = convert_codeset (ct, dest_codeset, message_mods);
1745 }
1746 break;
1747
1748 case CT_MULTIPART: {
1749 struct multipart *m = (struct multipart *) ct->c_ctparams;
1750 struct part *part;
1751
1752 /* Should check to see if the body for this part is encoded?
1753 For now, it gets passed along as-is by InitMultiPart(). */
1754 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1755 status =
1756 convert_codesets (part->mp_part, dest_codeset, message_mods);
1757 }
1758 break;
1759 }
1760
1761 case CT_MESSAGE:
1762 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1763 struct exbody *e;
1764
1765 e = (struct exbody *) ct->c_ctparams;
1766 status =
1767 convert_codesets (e->eb_content, dest_codeset, message_mods);
1768 }
1769 break;
1770
1771 default:
1772 break;
1773 }
1774
1775 return status;
1776 }
1777
1778
1779 static int
1780 convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
1781 char *src_codeset = content_codeset (ct);
1782 int status = OK;
1783
1784 /* norm_charmap() is case sensitive. */
1785 char *src_codeset_u = upcase (src_codeset);
1786 char *dest_codeset_u = upcase (dest_codeset);
1787 int different_codesets =
1788 strcmp (norm_charmap (src_codeset), norm_charmap (dest_codeset));
1789
1790 free (dest_codeset_u);
1791 free (src_codeset_u);
1792
1793 if (different_codesets) {
1794 #ifdef HAVE_ICONV
1795 iconv_t conv_desc = NULL;
1796 char *dest;
1797 int fd = -1;
1798 char **file = NULL;
1799 FILE **fp = NULL;
1800 size_t begin;
1801 size_t end;
1802 int opened_input_file = 0;
1803 char src_buffer[BUFSIZ];
1804 HF hf;
1805
1806 if ((conv_desc = iconv_open (dest_codeset, src_codeset)) ==
1807 (iconv_t) -1) {
1808 advise (NULL, "Can't convert %s to %s", src_codeset, dest_codeset);
1809 return -1;
1810 }
1811
1812 dest = add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL);
1813
1814 if (ct->c_cefile.ce_file) {
1815 file = &ct->c_cefile.ce_file;
1816 fp = &ct->c_cefile.ce_fp;
1817 begin = end = 0;
1818 } else if (ct->c_file) {
1819 file = &ct->c_file;
1820 fp = &ct->c_fp;
1821 begin = (size_t) ct->c_begin;
1822 end = (size_t) ct->c_end;
1823 } /* else no input file: shouldn't happen */
1824
1825 if (file && *file && fp) {
1826 if (! *fp) {
1827 if ((*fp = fopen (*file, "r")) == NULL) {
1828 advise (*file, "unable to open for reading");
1829 status = NOTOK;
1830 } else {
1831 opened_input_file = 1;
1832 }
1833 }
1834 }
1835
1836 if (fp && *fp) {
1837 size_t inbytes;
1838 size_t bytes_to_read =
1839 end > 0 && end > begin ? end - begin : sizeof src_buffer;
1840
1841 fseeko (*fp, begin, SEEK_SET);
1842 while ((inbytes = fread (src_buffer, 1,
1843 min (bytes_to_read, sizeof src_buffer),
1844 *fp)) > 0) {
1845 char dest_buffer[BUFSIZ];
1846 char *ib = src_buffer, *ob = dest_buffer;
1847 size_t outbytes = sizeof dest_buffer;
1848 size_t outbytes_before = outbytes;
1849
1850 if (end > 0) bytes_to_read -= inbytes;
1851
1852 if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) ==
1853 (size_t) -1) {
1854 status = NOTOK;
1855 break;
1856 } else {
1857 write (fd, dest_buffer, outbytes_before - outbytes);
1858 }
1859 }
1860
1861 if (opened_input_file) {
1862 fclose (*fp);
1863 *fp = NULL;
1864 }
1865 }
1866
1867 iconv_close (conv_desc);
1868 close (fd);
1869
1870 if (status == OK) {
1871 /* Replace the decoded file with the converted one. */
1872 if (ct->c_cefile.ce_file) {
1873 if (ct->c_cefile.ce_unlink) {
1874 unlink (ct->c_cefile.ce_file);
1875 }
1876 free (ct->c_cefile.ce_file);
1877 }
1878 ct->c_cefile.ce_file = dest;
1879 ct->c_cefile.ce_unlink = 1;
1880
1881 ++*message_mods;
1882 if (verbosw) {
1883 report (ct->c_partno, ct->c_file, "convert %s to %s",
1884 src_codeset, dest_codeset);
1885 }
1886
1887 /* Update ci_attrs. */
1888 src_codeset = dest_codeset;
1889
1890 /* Update ct->c_ctline. */
1891 if (ct->c_ctline) {
1892 char *ctline =
1893 update_attr (ct->c_ctline, "charset=", dest_codeset);
1894
1895 free (ct->c_ctline);
1896 ct->c_ctline = ctline;
1897 } /* else no CT line, which is odd */
1898
1899 /* Update Content-Type header field. */
1900 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1901 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1902 char *ctline_less_newline =
1903 update_attr (hf->value, "charset=", dest_codeset);
1904 char *ctline = concat (ctline_less_newline, "\n", NULL);
1905 free (ctline_less_newline);
1906
1907 free (hf->value);
1908 hf->value = ctline;
1909 break;
1910 }
1911 }
1912 } else {
1913 unlink (dest);
1914 }
1915 #else /* ! HAVE_ICONV */
1916 NMH_UNUSED (message_mods);
1917
1918 advise (NULL, "Can't convert %s to %s without iconv", src_codeset,
1919 dest_codeset);
1920 status = NOTOK;
1921 #endif /* ! HAVE_ICONV */
1922 }
1923
1924 return status;
1925 }
1926
1927
1928 static int
1929 write_content (CT ct, char *input_filename, char *outfile, int modify_inplace,
1930 int message_mods) {
1931 int status = OK;
1932
1933 if (modify_inplace) {
1934 if (message_mods > 0) {
1935 if ((status = output_message (ct, outfile)) == OK) {
1936 char *infile = input_filename
1937 ? add (input_filename, NULL)
1938 : add (ct->c_file ? ct->c_file : "-", NULL);
1939
1940 if (remove_file (infile) == OK) {
1941 if (rename (outfile, infile)) {
1942 /* Rename didn't work, possibly because of an
1943 attempt to rename across filesystems. Try
1944 brute force copy. */
1945 int old = open (outfile, O_RDONLY);
1946 int new =
1947 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
1948 int i = -1;
1949
1950 if (old != -1 && new != -1) {
1951 char buffer[BUFSIZ];
1952
1953 while ((i = read (old, buffer, sizeof buffer)) >
1954 0) {
1955 if (write (new, buffer, i) != i) {
1956 i = -1;
1957 break;
1958 }
1959 }
1960 }
1961 if (new != -1) close (new);
1962 if (old != -1) close (old);
1963 unlink (outfile);
1964
1965 if (i < 0) {
1966 /* The -file argument processing used path() to
1967 expand filename to absolute path. */
1968 int file = ct->c_file && ct->c_file[0] == '/';
1969
1970 admonish (NULL, "unable to rename %s %s to %s",
1971 file ? "file" : "message", outfile,
1972 infile);
1973 status = NOTOK;
1974 }
1975 }
1976 } else {
1977 admonish (NULL, "unable to remove input file %s, "
1978 "not modifying it", infile);
1979 unlink (outfile);
1980 status = NOTOK;
1981 }
1982
1983 free (infile);
1984 } else {
1985 status = NOTOK;
1986 }
1987 } else {
1988 /* No modifications and didn't need the tmp outfile. */
1989 unlink (outfile);
1990 }
1991 } else {
1992 /* Output is going to some file. Produce it whether or not
1993 there were modifications. */
1994 status = output_message (ct, outfile);
1995 }
1996
1997 flush_errors ();
1998 return status;
1999 }
2000
2001
2002 /*
2003 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2004 * use the standard MH backup file.
2005 */
2006 static int
2007 remove_file (char *file) {
2008 if (rmmproc) {
2009 char *rmm_command = concat (rmmproc, " ", file, NULL);
2010 int status = system (rmm_command);
2011
2012 free (rmm_command);
2013 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2014 } else {
2015 /* This is OK for a non-message file, it still uses the
2016 BACKUP_PREFIX form. The backup file will be in the same
2017 directory as file. */
2018 return rename (file, m_backup (file));
2019 }
2020 }
2021
2022
2023 static void
2024 report (char *partno, char *filename, char *message, ...) {
2025 va_list args;
2026 char *fmt;
2027
2028 if (verbosw) {
2029 va_start (args, message);
2030 fmt = concat (filename, partno ? " part " : ", ",
2031 partno ? partno : "", partno ? ", " : "", message, NULL);
2032
2033 advertise (NULL, NULL, fmt, args);
2034
2035 free (fmt);
2036 va_end (args);
2037 }
2038 }
2039
2040
2041 static char *
2042 upcase (char *str) {
2043 char *up = cpytrim (str);
2044 char *cp;
2045
2046 for (cp = up; *cp; ++cp) *cp = toupper ((unsigned char) *cp);
2047
2048 return up;
2049 }
2050
2051
2052 static void
2053 pipeser (int i)
2054 {
2055 if (i == SIGQUIT) {
2056 fflush (stdout);
2057 fprintf (stderr, "\n");
2058 fflush (stderr);
2059 }
2060
2061 done (1);
2062 /* NOTREACHED */
2063 }