]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Fix encoder so at least one case of LF -> CR LF conversion happens correctly.
[nmh] / uip / mhfixmsg.c
1 /*
2 * mhfixmsg.c -- rewrite a message with various tranformations
3 *
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include <fcntl.h>
15 #ifdef HAVE_ICONV
16 # include <iconv.h>
17 #endif
18
19 #define MHFIXMSG_SWITCHES \
20 X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
21 X("nodecodetext", 0, NDECODETEXTSW) \
22 X("textcodeset", 0, TEXTCODESETSW) \
23 X("notextcodeset", 0, NTEXTCODESETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
27 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
28 X("fixboundary", 0, FIXBOUNDARYSW) \
29 X("nofixboundary", 0, NFIXBOUNDARYSW) \
30 X("fixcte", 0, FIXCTESW) \
31 X("nofixcte", 0, NFIXCTESW) \
32 X("file file", 0, FILESW) \
33 X("outfile file", 0, OUTFILESW) \
34 X("rmmproc program", 0, RPROCSW) \
35 X("normmproc", 0, NRPRCSW) \
36 X("verbose", 0, VERBSW) \
37 X("noverbose", 0, NVERBSW) \
38 X("version", 0, VERSIONSW) \
39 X("help", 0, HELPSW) \
40
41 #define X(sw, minchars, id) id,
42 DEFINE_SWITCH_ENUM(MHFIXMSG);
43 #undef X
44
45 #define X(sw, minchars, id) { sw, minchars, id },
46 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
47 #undef X
48
49
50 int verbosw;
51 int debugsw; /* Needed by mhparse.c. */
52
53 #define quitser pipeser
54
55 /* mhparse.c */
56 extern int skip_mp_cte_check; /* flag to InitMultiPart */
57 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
58 extern int bogus_mp_content; /* flag from InitMultiPart */
59 CT parse_mime (char *);
60 void reverse_parts (CT);
61
62 /* mhoutsbr.c */
63 int output_message (CT, char *);
64
65 /* mhshowsbr.c */
66 int show_content_aux (CT, int, int, char *, char *);
67
68 /* mhmisc.c */
69 void flush_errors (void);
70
71 /* mhfree.c */
72 extern CT *cts;
73 void freects_done (int) NORETURN;
74
75 /*
76 * static prototypes
77 */
78 typedef struct fix_transformations {
79 int fixboundary;
80 int fixcte;
81 int reformat;
82 int replacetextplain;
83 int decodetext;
84 char *textcodeset;
85 } fix_transformations;
86
87 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
88 static void reverse_alternative_parts (CT);
89 static int fix_boundary (CT *, int *);
90 static int get_multipart_boundary (CT, char **);
91 static int replace_boundary (CT, char *, const char *);
92 static char *update_attr (char *, const char *, const char *e);
93 static int fix_multipart_cte (CT, int *);
94 static int set_ce (CT, int);
95 static int ensure_text_plain (CT *, CT, int *, int);
96 static CT build_text_plain_part (CT);
97 static CT divide_part (CT);
98 static void copy_ctinfo (CI, CI);
99 static int decode_part (CT);
100 static int reformat_part (CT, char *, char *, char *, int);
101 static int charset_encoding (CT);
102 static CT build_multipart_alt (CT, CT, int, int);
103 static int boundary_in_content (FILE **, char *, const char *);
104 static void transfer_noncontent_headers (CT, CT);
105 static int set_ct_type (CT, int type, int subtype, int encoding);
106 static int decode_text_parts (CT, int, int *);
107 static int content_encoding (CT);
108 static int strip_crs (CT, int *);
109 static int convert_codesets (CT, char *, int *);
110 static int convert_codeset (CT, char *, int *);
111 static char *content_codeset (CT);
112 static int write_content (CT, char *, char *, int, int);
113 static int remove_file (char *);
114 static void report (char *, char *, char *, ...);
115 static char *upcase (char *);
116 static void pipeser (int);
117
118
119 int
120 main (int argc, char **argv) {
121 int msgnum;
122 char *cp, *file = NULL, *folder = NULL;
123 char *maildir, buf[100], *outfile = NULL;
124 char **argp, **arguments;
125 struct msgs_array msgs = { 0, 0, NULL };
126 struct msgs *mp = NULL;
127 CT *ctp;
128 FILE *fp;
129 int using_stdin = 0;
130 int status = OK;
131 fix_transformations fx;
132 fx.reformat = fx.fixcte = fx.fixboundary = 1;
133 fx.replacetextplain = 0;
134 fx.decodetext = CE_8BIT;
135 fx.textcodeset = NULL;
136
137 if (nmh_init(argv[0], 1)) { return 1; }
138
139 done = freects_done;
140
141 arguments = getarguments (invo_name, argc, argv, 1);
142 argp = arguments;
143
144 /*
145 * Parse arguments
146 */
147 while ((cp = *argp++)) {
148 if (*cp == '-') {
149 switch (smatch (++cp, switches)) {
150 case AMBIGSW:
151 ambigsw (cp, switches);
152 done (1);
153 case UNKWNSW:
154 adios (NULL, "-%s unknown", cp);
155
156 case HELPSW:
157 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
158 invo_name);
159 print_help (buf, switches, 1);
160 done (0);
161 case VERSIONSW:
162 print_version(invo_name);
163 done (0);
164
165 case DECODETEXTSW:
166 if (! (cp = *argp++) || *cp == '-')
167 adios (NULL, "missing argument to %s", argp[-2]);
168 if (! strcasecmp (cp, "8bit")) {
169 fx.decodetext = CE_8BIT;
170 } else if (! strcasecmp (cp, "7bit")) {
171 fx.decodetext = CE_7BIT;
172 } else {
173 adios (NULL, "invalid argument to %s", argp[-2]);
174 }
175 continue;
176 case NDECODETEXTSW:
177 fx.decodetext = 0;
178 continue;
179 case TEXTCODESETSW:
180 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
181 adios (NULL, "missing argument to %s", argp[-2]);
182 fx.textcodeset = cp;
183 continue;
184 case NTEXTCODESETSW:
185 fx.textcodeset = 0;
186 continue;
187 case FIXBOUNDARYSW:
188 fx.fixboundary = 1;
189 continue;
190 case NFIXBOUNDARYSW:
191 fx.fixboundary = 0;
192 continue;
193 case FIXCTESW:
194 fx.fixcte = 1;
195 continue;
196 case NFIXCTESW:
197 fx.fixcte = 0;
198 continue;
199 case REFORMATSW:
200 fx.reformat = 1;
201 continue;
202 case NREFORMATSW:
203 fx.reformat = 0;
204 continue;
205 case REPLACETEXTPLAINSW:
206 fx.replacetextplain = 1;
207 continue;
208 case NREPLACETEXTPLAINSW:
209 fx.replacetextplain = 0;
210 continue;
211 case FILESW:
212 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
213 adios (NULL, "missing argument to %s", argp[-2]);
214 file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
215 continue;
216 case OUTFILESW:
217 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
218 adios (NULL, "missing argument to %s", argp[-2]);
219 outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
220 continue;
221 case RPROCSW:
222 if (!(rmmproc = *argp++) || *rmmproc == '-')
223 adios (NULL, "missing argument to %s", argp[-2]);
224 continue;
225 case NRPRCSW:
226 rmmproc = NULL;
227 continue;
228 case VERBSW:
229 verbosw = 1;
230 continue;
231 case NVERBSW:
232 verbosw = 0;
233 continue;
234 }
235 }
236 if (*cp == '+' || *cp == '@') {
237 if (folder)
238 adios (NULL, "only one folder at a time!");
239 else
240 folder = pluspath (cp);
241 } else {
242 if (*cp == '/') {
243 /* Interpret a full path as a filename, not a message. */
244 file = add (cp, NULL);
245 } else {
246 app_msgarg (&msgs, cp);
247 }
248 }
249 }
250
251 SIGNAL (SIGQUIT, quitser);
252 SIGNAL (SIGPIPE, pipeser);
253
254 /*
255 * Read the standard profile setup
256 */
257 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
258 readconfig ((struct node **) 0, fp, cp, 0);
259 fclose (fp);
260 }
261
262 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
263
264 if (! context_find ("path"))
265 free (path ("./", TFOLDER));
266
267 if (file && msgs.size)
268 adios (NULL, "cannot specify msg and file at same time!");
269
270 /*
271 * check if message is coming from file
272 */
273 if (file) {
274 /* If file is stdin, create a tmp file name before parse_mime()
275 has a chance, because it might put in on a different
276 filesystem than the output file. Instead, put it in the
277 user's preferred tmp directory. */
278 CT ct;
279
280 if (! strcmp ("-", file)) {
281 int fd;
282 char *cp;
283
284 using_stdin = 1;
285
286 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
287 adios (NULL, "unable to create temporary file in %s",
288 get_temp_dir());
289 } else {
290 free (file);
291 file = add (cp, NULL);
292 cpydata (STDIN_FILENO, fd, "-", file);
293 }
294
295 if (close (fd)) {
296 (void) m_unlink (file);
297 adios (NULL, "failed to write temporary file");
298 }
299 }
300
301 if (! (cts = (CT *) calloc ((size_t) 2, sizeof *cts)))
302 adios (NULL, "out of memory");
303 ctp = cts;
304
305 if ((ct = parse_mime (file))) *ctp++ = ct;
306 } else {
307 /*
308 * message(s) are coming from a folder
309 */
310 CT ct;
311
312 if (! msgs.size)
313 app_msgarg(&msgs, "cur");
314 if (! folder)
315 folder = getfolder (1);
316 maildir = m_maildir (folder);
317
318 if (chdir (maildir) == NOTOK)
319 adios (maildir, "unable to change directory to");
320
321 /* read folder and create message structure */
322 if (! (mp = folder_read (folder, 1)))
323 adios (NULL, "unable to read folder %s", folder);
324
325 /* check for empty folder */
326 if (mp->nummsg == 0)
327 adios (NULL, "no messages in %s", folder);
328
329 /* parse all the message ranges/sequences and set SELECTED */
330 for (msgnum = 0; msgnum < msgs.size; msgnum++)
331 if (! m_convert (mp, msgs.msgs[msgnum]))
332 done (1);
333 seq_setprev (mp); /* set the previous-sequence */
334
335 if (! (cts = (CT *) calloc ((size_t) (mp->numsel + 1), sizeof *cts)))
336 adios (NULL, "out of memory");
337 ctp = cts;
338
339 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
340 if (is_selected(mp, msgnum)) {
341 char *msgnam;
342
343 msgnam = m_name (msgnum);
344 if ((ct = parse_mime (msgnam))) *ctp++ = ct;
345 }
346 }
347
348 seq_setcur (mp, mp->hghsel); /* update current message */
349 seq_save (mp); /* synchronize sequences */
350 context_replace (pfolder, folder);/* update current folder */
351 context_save (); /* save the context file */
352 }
353
354 if (*cts) {
355 for (ctp = cts; *ctp; ++ctp) {
356 status += mhfixmsgsbr (ctp, &fx, outfile);
357
358 if (using_stdin) {
359 (void) m_unlink (file);
360
361 if (! outfile) {
362 /* Just calling m_backup() unlinks the backup file. */
363 (void) m_backup (file);
364 }
365 }
366 }
367 } else {
368 status = 1;
369 }
370
371 free (outfile);
372 free (file);
373
374 /* done is freects_done, which will clean up all of cts. */
375 done (status);
376 return NOTOK;
377 }
378
379
380 int
381 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
382 /* Store input filename in case one of the transformations, i.e.,
383 fix_boundary(), rewrites to a tmp file. */
384 char *input_filename = add ((*ctp)->c_file, NULL);
385 int modify_inplace = 0;
386 int message_mods = 0;
387 int status = OK;
388
389 if (outfile == NULL) {
390 modify_inplace = 1;
391
392 if ((*ctp)->c_file) {
393 char *tempfile;
394 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
395 adios (NULL, "unable to create temporary file in %s",
396 get_temp_dir());
397 }
398 outfile = add (tempfile, NULL);
399 } else {
400 adios (NULL, "missing both input and output filenames\n");
401 }
402 }
403
404 reverse_alternative_parts (*ctp);
405 if (status == OK && fx->fixboundary) {
406 status = fix_boundary (ctp, &message_mods);
407 }
408 if (status == OK && fx->fixcte) {
409 status = fix_multipart_cte (*ctp, &message_mods);
410 }
411 if (status == OK && fx->reformat) {
412 status =
413 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
414 }
415 if (status == OK && fx->decodetext) {
416 status = decode_text_parts (*ctp, fx->decodetext, &message_mods);
417 }
418 if (status == OK && fx->textcodeset != NULL) {
419 status = convert_codesets (*ctp, fx->textcodeset, &message_mods);
420 }
421
422 if (! (*ctp)->c_umask) {
423 /* Set the umask for the contents file. This currently
424 isn't used but just in case it is in the future. */
425 struct stat st;
426
427 if (stat ((*ctp)->c_file, &st) != NOTOK) {
428 (*ctp)->c_umask = ~(st.st_mode & 0777);
429 } else {
430 (*ctp)->c_umask = ~m_gmprot();
431 }
432 }
433
434 /*
435 * Write the content to a file
436 */
437 if (status == OK) {
438 status = write_content (*ctp, input_filename, outfile, modify_inplace,
439 message_mods);
440 } else if (! modify_inplace) {
441 /* Something went wrong. Output might be expected, such
442 as if this were run as a filter. Just copy the input
443 to the output. */
444 int in = open (input_filename, O_RDONLY);
445 int out = strcmp (outfile, "-")
446 ? open (outfile, O_WRONLY | O_CREAT, m_gmprot ())
447 : STDOUT_FILENO;
448
449 if (in != -1 && out != -1) {
450 cpydata (in, out, input_filename, outfile);
451 } else {
452 status = NOTOK;
453 }
454
455 close (out);
456 close (in);
457 }
458
459 if (modify_inplace) {
460 if (status != OK) (void) m_unlink (outfile);
461 free (outfile);
462 outfile = NULL;
463 }
464
465 free (input_filename);
466
467 return status;
468 }
469
470
471 /* parse_mime() arranges alternates in reverse (priority) order, so
472 reverse them back. This will put a text/plain part at the front of
473 a multipart/alternative part, for example, where it belongs. */
474 static void
475 reverse_alternative_parts (CT ct) {
476 if (ct->c_type == CT_MULTIPART) {
477 struct multipart *m = (struct multipart *) ct->c_ctparams;
478 struct part *part;
479
480 if (ct->c_subtype == MULTI_ALTERNATE) {
481 reverse_parts (ct);
482 }
483
484 /* And call recursively on each part of a multipart. */
485 for (part = m->mp_parts; part; part = part->mp_next) {
486 reverse_alternative_parts (part->mp_part);
487 }
488 }
489 }
490
491
492 static int
493 fix_boundary (CT *ct, int *message_mods) {
494 struct multipart *mp;
495 int status = OK;
496
497 if (bogus_mp_content) {
498 mp = (struct multipart *) (*ct)->c_ctparams;
499
500 /*
501 * 1) Get boundary at end of part.
502 * 2) Get boundary at beginning of part and compare to the end-of-part
503 * boundary.
504 * 3) Write out contents of ct to tmp file, replacing boundary in
505 * header with boundary from part. Set c_unlink to 1.
506 * 4) Free ct.
507 * 5) Call parse_mime() on the tmp file, replacing ct.
508 */
509
510 if (mp && mp->mp_start) {
511 char *part_boundary;
512
513 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
514 char *fixed;
515
516 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
517 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
518 char *filename = add ((*ct)->c_file, NULL);
519
520 free_content (*ct);
521 if ((*ct = parse_mime (fixed))) {
522 (*ct)->c_unlink = 1;
523
524 ++*message_mods;
525 if (verbosw) {
526 report (NULL, filename,
527 "fix multipart boundary");
528 }
529 }
530 free (filename);
531 } else {
532 advise (NULL, "unable to replace broken boundary");
533 status = NOTOK;
534 }
535 } else {
536 advise (NULL, "unable to create temporary file in %s",
537 get_temp_dir());
538 status = NOTOK;
539 }
540
541 free (part_boundary);
542 }
543 }
544 }
545
546 return status;
547 }
548
549
550 static int
551 get_multipart_boundary (CT ct, char **part_boundary) {
552 char buffer[BUFSIZ];
553 char *end_boundary = NULL;
554 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
555 ? (off_t) (ct->c_end - sizeof buffer)
556 : (off_t) ct->c_begin;
557 size_t bytes_read;
558 int status = OK;
559
560 /* This will fail if the boundary spans fread() calls. BUFSIZ should
561 be big enough, even if it's just 1024, to make that unlikely. */
562
563 /* free_content() will close ct->c_fp. */
564 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
565 advise (ct->c_file, "unable to open for reading");
566 return NOTOK;
567 }
568
569 /* Get boundary at end of multipart. */
570 while (begin >= (off_t) ct->c_begin) {
571 fseeko (ct->c_fp, begin, SEEK_SET);
572 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
573 char *end = buffer + bytes_read - 1;
574 char *cp;
575
576 if ((cp = rfind_str (buffer, bytes_read, "--"))) {
577 /* Trim off trailing "--" and anything beyond. */
578 *cp-- = '\0';
579 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
580 if (strlen (end) > 3 && *end++ == '\n' &&
581 *end++ == '-' && *end++ == '-') {
582 end_boundary = add (end, NULL);
583 break;
584 }
585 }
586 }
587 }
588
589 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
590 begin -= sizeof buffer;
591 } else {
592 break;
593 }
594 }
595
596 /* Get boundary at beginning of multipart. */
597 if (end_boundary) {
598 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
599 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
600 if (bytes_read >= strlen (end_boundary)) {
601 char *cp = find_str (buffer, bytes_read, end_boundary);
602
603 if (cp && cp - buffer >= 2 && *--cp == '-' &&
604 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
605 status = OK;
606 break;
607 }
608 } else {
609 /* The start and end boundaries didn't match, or the
610 start boundary doesn't begin with "\n--" (or "--"
611 if at the beginning of buffer). Keep trying. */
612 status = NOTOK;
613 }
614 }
615 } else {
616 status = NOTOK;
617 }
618
619 if (status == OK) {
620 *part_boundary = end_boundary;
621 } else {
622 *part_boundary = NULL;
623 free (end_boundary);
624 }
625
626 return status;
627 }
628
629
630 /* Open and copy ct->c_file to file, replacing the multipart boundary. */
631 static int
632 replace_boundary (CT ct, char *file, const char *boundary) {
633 FILE *fpin, *fpout;
634 int compnum, state;
635 char buf[BUFSIZ], name[NAMESZ];
636 char *np, *vp;
637 m_getfld_state_t gstate = 0;
638 int status = OK;
639
640 if (ct->c_file == NULL) {
641 advise (NULL, "missing input filename");
642 return NOTOK;
643 }
644
645 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
646 advise (ct->c_file, "unable to open for reading");
647 return NOTOK;
648 }
649
650 if ((fpout = fopen (file, "w")) == NULL) {
651 fclose (fpin);
652 advise (file, "unable to open for writing");
653 return NOTOK;
654 }
655
656 for (compnum = 1;;) {
657 int bufsz = (int) sizeof buf;
658
659 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
660 case FLD:
661 case FLDPLUS:
662 compnum++;
663
664 /* get copies of the buffers */
665 np = add (name, NULL);
666 vp = add (buf, NULL);
667
668 /* if necessary, get rest of field */
669 while (state == FLDPLUS) {
670 bufsz = sizeof buf;
671 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
672 vp = add (buf, vp); /* add to previous value */
673 }
674
675 if (strcasecmp (TYPE_FIELD, np)) {
676 fprintf (fpout, "%s:%s", np, vp);
677 } else {
678 char *new_boundary = update_attr (vp, "boundary=", boundary);
679
680 fprintf (fpout, "%s:%s\n", np, new_boundary);
681 free (new_boundary);
682 }
683
684 free (vp);
685 free (np);
686
687 continue;
688
689 case BODY:
690 fputs ("\n", fpout);
691 /* buf will have a terminating NULL, skip it. */
692 fwrite (buf, 1, bufsz-1, fpout);
693 continue;
694
695 case FILEEOF:
696 break;
697
698 case LENERR:
699 case FMTERR:
700 advise (NULL, "message format error in component #%d", compnum);
701 status = NOTOK;
702 break;
703
704 default:
705 advise (NULL, "getfld() returned %d", state);
706 status = NOTOK;
707 break;
708 }
709
710 break;
711 }
712
713 m_getfld_state_destroy (&gstate);
714 fclose (fpout);
715 fclose (fpin);
716
717 return status;
718 }
719
720
721 /* Change the value of a name=value pair in a header field body.
722 If the name isn't there, append them. In any case, a new
723 string will be allocated and must be free'd by the caller.
724 Trims any trailing newlines. */
725 static char *
726 update_attr (char *body, const char *name, const char *value) {
727 char *bp = nmh_strcasestr (body, name);
728 char *new_body;
729
730 if (bp) {
731 char *other_attrs = strchr (bp, ';');
732
733 *(bp + strlen (name)) = '\0';
734 new_body = concat (body, "\"", value, "\"", NULL);
735
736 if (other_attrs) {
737 char *cp;
738
739 /* Trim any trailing newlines. */
740 for (cp = &other_attrs[strlen (other_attrs) - 1];
741 cp > other_attrs && *cp == '\n';
742 *cp-- = '\0') continue;
743 new_body = add (other_attrs, new_body);
744 }
745 } else {
746 char *cp;
747
748 /* Append name/value pair, after first removing a final newline
749 and (extraneous) semicolon. */
750 if (*(cp = &body[strlen (body) - 1]) == '\n') *cp = '\0';
751 if (*(cp = &body[strlen (body) - 1]) == ';') *cp = '\0';
752 new_body = concat (body, "; ", name, "\"", value, "\"", NULL);
753 }
754
755 return new_body;
756 }
757
758
759 static int
760 fix_multipart_cte (CT ct, int *message_mods) {
761 int status = OK;
762
763 if (ct->c_type == CT_MULTIPART) {
764 struct multipart *m;
765 struct part *part;
766
767 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
768 ct->c_encoding != CE_BINARY) {
769 HF hf;
770
771 for (hf = ct->c_first_hf; hf; hf = hf->next) {
772 char *name = hf->name;
773 for (; *name && isspace ((unsigned char) *name); ++name) {
774 continue;
775 }
776
777 if (! strncasecmp (name, ENCODING_FIELD,
778 strlen (ENCODING_FIELD))) {
779 char *prefix = "Nmh-REPLACED-INVALID-";
780 HF h = mh_xmalloc (sizeof *h);
781
782 h->name = add (hf->name, NULL);
783 h->hf_encoding = hf->hf_encoding;
784 h->next = hf->next;
785 hf->next = h;
786
787 /* Retain old header but prefix its name. */
788 free (hf->name);
789 hf->name = concat (prefix, h->name, NULL);
790
791 ++*message_mods;
792 if (verbosw) {
793 char *encoding = cpytrim (hf->value);
794 report (ct->c_partno, ct->c_file,
795 "replace Content-Transfer-Encoding of %s "
796 "with 8 bit", encoding);
797 free (encoding);
798 }
799
800 h->value = add (" 8bit\n", NULL);
801
802 /* Don't need to warn for multiple C-T-E header
803 fields, parse_mime() already does that. But
804 if there are any, fix them all as necessary. */
805 hf = h;
806 }
807 }
808
809 set_ce (ct, CE_8BIT);
810 }
811
812 m = (struct multipart *) ct->c_ctparams;
813 for (part = m->mp_parts; part; part = part->mp_next) {
814 if (fix_multipart_cte (part->mp_part, message_mods) != OK) {
815 status = NOTOK;
816 break;
817 }
818 }
819 }
820
821 return status;
822 }
823
824
825 static int
826 set_ce (CT ct, int encoding) {
827 const char *ce = ce_str (encoding);
828 const struct str2init *ctinit = get_ce_method (ce);
829
830 if (ctinit) {
831 char *cte = concat (" ", ce, "\n", NULL);
832 int found_cte = 0;
833 HF hf;
834 /* Decoded contents might be in ct->c_cefile.ce_file, if the
835 caller is decode_text_parts (). Save because we'll
836 overwrite below. */
837 struct cefile decoded_content_info = ct->c_cefile;
838
839 ct->c_encoding = encoding;
840
841 ct->c_ctinitfnx = ctinit->si_init;
842 /* This will assign ct->c_cefile with an all-0 struct, which
843 is what we want. */
844 (*ctinit->si_init) (ct);
845 /* After returning, the caller should set
846 ct->c_cefile.ce_file to the name of the file containing
847 the contents. */
848
849 /* Restore the cefile. */
850 ct->c_cefile = decoded_content_info;
851
852 /* Update/add Content-Transfer-Encoding header field. */
853 for (hf = ct->c_first_hf; hf; hf = hf->next) {
854 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
855 found_cte = 1;
856 free (hf->value);
857 hf->value = cte;
858 }
859 }
860 if (! found_cte) {
861 add_header (ct, add (ENCODING_FIELD, NULL), cte);
862 }
863
864 /* Update c_celine. It's used only by mhlist -debug. */
865 free (ct->c_celine);
866 ct->c_celine = add (cte, NULL);
867
868 return OK;
869 } else {
870 return NOTOK;
871 }
872 }
873
874
875 /* Make sure each text part has a corresponding text/plain part. */
876 static int
877 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
878 int status = OK;
879
880 switch ((*ct)->c_type) {
881 case CT_TEXT: {
882 int has_text_plain = 0;
883
884 /* Nothing to do for text/plain. */
885 if ((*ct)->c_subtype == TEXT_PLAIN) return OK;
886
887 if (parent && parent->c_type == CT_MULTIPART &&
888 parent->c_subtype == MULTI_ALTERNATE) {
889 struct multipart *mp = (struct multipart *) parent->c_ctparams;
890 struct part *part, *prev;
891 int new_subpart_number = 1;
892
893 /* See if there is a sibling text/plain. */
894 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
895 ++new_subpart_number;
896 if (part->mp_part->c_type == CT_TEXT &&
897 part->mp_part->c_subtype == TEXT_PLAIN) {
898 if (replacetextplain) {
899 struct part *old_part;
900 if (part == mp->mp_parts) {
901 old_part = mp->mp_parts;
902 mp->mp_parts = part->mp_next;
903 } else {
904 old_part = prev->mp_next;
905 prev->mp_next = part->mp_next;
906 }
907 if (verbosw) {
908 report (parent->c_partno, parent->c_file,
909 "remove text/plain part %s",
910 old_part->mp_part->c_partno);
911 }
912 free_content (old_part->mp_part);
913 free (old_part);
914 } else {
915 has_text_plain = 1;
916 }
917 break;
918 }
919 prev = part;
920 }
921
922 if (! has_text_plain) {
923 /* Parent is a multipart/alternative. Insert a new
924 text/plain subpart. */
925 struct part *new_part = mh_xmalloc (sizeof *new_part);
926
927 if ((new_part->mp_part = build_text_plain_part (*ct))) {
928 char buffer[16];
929 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
930
931 new_part->mp_next = mp->mp_parts;
932 mp->mp_parts = new_part;
933 new_part->mp_part->c_partno =
934 concat (parent->c_partno ? parent->c_partno : "1", ".",
935 buffer, NULL);
936
937 ++*message_mods;
938 if (verbosw) {
939 report (parent->c_partno, parent->c_file,
940 "insert text/plain part");
941 }
942 } else {
943 free_content (new_part->mp_part);
944 free (new_part);
945 status = NOTOK;
946 }
947 }
948 } else {
949 /* Slip new text/plain part into a new multipart/alternative. */
950 CT tp_part = build_text_plain_part (*ct);
951
952 if (tp_part) {
953 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
954 MULTI_ALTERNATE);
955 if (mp_alt) {
956 struct multipart *mp =
957 (struct multipart *) mp_alt->c_ctparams;
958
959 if (mp && mp->mp_parts) {
960 mp->mp_parts->mp_part = tp_part;
961 /* Make the new multipart/alternative the parent. */
962 *ct = mp_alt;
963
964 ++*message_mods;
965 if (verbosw) {
966 report ((*ct)->c_partno, (*ct)->c_file,
967 "insert text/plain part");
968 }
969 } else {
970 free_content (tp_part);
971 free_content (mp_alt);
972 status = NOTOK;
973 }
974 } else {
975 status = NOTOK;
976 }
977 } else {
978 status = NOTOK;
979 }
980 }
981 break;
982 }
983
984 case CT_MULTIPART: {
985 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
986 struct part *part;
987
988 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
989 if ((*ct)->c_type == CT_MULTIPART) {
990 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
991 replacetextplain);
992 }
993 }
994 break;
995 }
996
997 case CT_MESSAGE:
998 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
999 struct exbody *e;
1000
1001 e = (struct exbody *) (*ct)->c_ctparams;
1002 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1003 replacetextplain);
1004 }
1005 break;
1006 }
1007
1008 return status;
1009 }
1010
1011
1012 static CT
1013 build_text_plain_part (CT encoded_part) {
1014 CT tp_part = divide_part (encoded_part);
1015 char *tmp_plain_file = NULL;
1016
1017 if (decode_part (tp_part) == OK) {
1018 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1019 contains the decoded contents. And the decoding function, such
1020 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1021 be unlinked by free_content (). */
1022 char *tempfile;
1023
1024 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1025 advise (NULL, "unable to create temporary file in %s",
1026 get_temp_dir());
1027 }
1028 tmp_plain_file = add (tempfile, NULL);
1029 if (reformat_part (tp_part, tmp_plain_file,
1030 tp_part->c_ctinfo.ci_type,
1031 tp_part->c_ctinfo.ci_subtype,
1032 tp_part->c_type) == OK) {
1033 return tp_part;
1034 }
1035 }
1036
1037 free_content (tp_part);
1038 (void) m_unlink (tmp_plain_file);
1039 free (tmp_plain_file);
1040
1041 return NULL;
1042 }
1043
1044
1045 static CT
1046 divide_part (CT ct) {
1047 CT new_part;
1048
1049 if ((new_part = (CT) calloc (1, sizeof *new_part)) == NULL)
1050 adios (NULL, "out of memory");
1051
1052 /* Just copy over what is needed for decoding. c_vrsn and
1053 c_celine aren't necessary. */
1054 new_part->c_file = add (ct->c_file, NULL);
1055 new_part->c_begin = ct->c_begin;
1056 new_part->c_end = ct->c_end;
1057 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1058 new_part->c_type = ct->c_type;
1059 new_part->c_cefile = ct->c_cefile;
1060 new_part->c_encoding = ct->c_encoding;
1061 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1062 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1063 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1064 new_part->c_cesizefnx = ct->c_cesizefnx;
1065
1066 /* c_ctline is used by reformat__part(), so it can preserve
1067 anything after the type/subtype. */
1068 new_part->c_ctline = add (ct->c_ctline, NULL);
1069
1070 return new_part;
1071 }
1072
1073
1074 static void
1075 copy_ctinfo (CI dest, CI src) {
1076 char **s_ap, **d_ap, **s_vp, **d_vp;
1077
1078 dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1079 dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1080
1081 for (s_ap = src->ci_attrs, d_ap = dest->ci_attrs,
1082 s_vp = src->ci_values, d_vp = dest->ci_values;
1083 *s_ap;
1084 ++s_ap, ++d_ap, ++s_vp, ++d_vp) {
1085 *d_ap = add (*s_ap, NULL);
1086 *d_vp = *s_vp;
1087 }
1088 *d_ap = NULL;
1089
1090 dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1091 dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1092 }
1093
1094
1095 static int
1096 decode_part (CT ct) {
1097 char *tmp_decoded;
1098 int status;
1099 char *tempfile;
1100
1101 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1102 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1103 }
1104 tmp_decoded = add (tempfile, NULL);
1105 /* The following call will load ct->c_cefile.ce_file with the tmp
1106 filename of the decoded content. tmp_decoded will contain the
1107 encoded output, get rid of that. */
1108 status = output_message (ct, tmp_decoded);
1109 (void) m_unlink (tmp_decoded);
1110 free (tmp_decoded);
1111
1112 return status;
1113 }
1114
1115
1116 /* Some of the arguments aren't really needed now, but maybe will
1117 be in the future for other than text types. */
1118 static int
1119 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1120 int output_subtype, output_encoding;
1121 char *cp, *cf;
1122 int status;
1123
1124 /* Hacky: this redirects the output from whatever command is used
1125 to show the part to a file. So, the user can't have any output
1126 redirection in that command.
1127 Could show_multi() in mhshowsbr.c avoid this? */
1128
1129 /* Check for invo_name-format-type/subtype. */
1130 cp = concat (invo_name, "-format-", type, "/", subtype, NULL);
1131 if ((cf = context_find (cp)) && *cf != '\0') {
1132 if (strchr (cf, '>')) {
1133 free (cp);
1134 advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1135 "%s-format-%s/%s profile entry", cf, invo_name, type,
1136 subtype);
1137 return NOTOK;
1138 }
1139 } else {
1140 free (cp);
1141
1142 /* Check for invo_name-format-type. */
1143 cp = concat (invo_name, "-format-", type, NULL);
1144 if (! (cf = context_find (cp)) || *cf == '\0') {
1145 free (cp);
1146 if (verbosw) {
1147 advise (NULL, "Don't know how to convert %s, there is no "
1148 "%s-format-%s/%s profile entry",
1149 ct->c_file, invo_name, type, subtype);
1150 }
1151 return NOTOK;
1152 }
1153
1154 if (strchr (cf, '>')) {
1155 free (cp);
1156 advise (NULL, "'>' prohibited in \"%s\"", cf);
1157 return NOTOK;
1158 }
1159 }
1160 free (cp);
1161
1162 cp = concat (cf, " >", file, NULL);
1163 status = show_content_aux (ct, 1, 0, cp, NULL);
1164 free (cp);
1165
1166 /* Unlink decoded content tmp file and free its filename to avoid
1167 leaks. The file stream should already have been closed. */
1168 if (ct->c_cefile.ce_unlink) {
1169 (void) m_unlink (ct->c_cefile.ce_file);
1170 free (ct->c_cefile.ce_file);
1171 ct->c_cefile.ce_file = NULL;
1172 ct->c_cefile.ce_unlink = 0;
1173 }
1174
1175 if (c_type == CT_TEXT) {
1176 output_subtype = TEXT_PLAIN;
1177 } else {
1178 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1179 output_subtype = 0;
1180 }
1181 output_encoding = charset_encoding (ct);
1182
1183 if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1184 ct->c_cefile.ce_file = file;
1185 ct->c_cefile.ce_unlink = 1;
1186 } else {
1187 ct->c_cefile.ce_unlink = 0;
1188 status = NOTOK;
1189 }
1190
1191 return status;
1192 }
1193
1194
1195 /* Identifies 7bit or 8bit content based on charset. */
1196 static int
1197 charset_encoding (CT ct) {
1198 /* norm_charmap() is case sensitive. */
1199 char *codeset = upcase (content_codeset (ct));
1200 int encoding =
1201 strcmp (norm_charmap (codeset), "US-ASCII") ? CE_8BIT : CE_7BIT;
1202
1203 free (codeset);
1204 return encoding;
1205 }
1206
1207
1208 static CT
1209 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1210 char *boundary_prefix = "----=_nmh-multipart";
1211 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1212 char *boundary_indicator = "; boundary=";
1213 char *typename, *subtypename, *name;
1214 CT ct;
1215 struct part *p;
1216 struct multipart *m;
1217 char *cp;
1218 const struct str2init *ctinit;
1219
1220 if ((ct = (CT) calloc (1, sizeof *ct)) == NULL)
1221 adios (NULL, "out of memory");
1222
1223 /* Set up the multipart/alternative part. These fields of *ct were
1224 initialized to 0 by calloc():
1225 c_fp, c_unlink, c_begin, c_end,
1226 c_vrsn, c_ctline, c_celine,
1227 c_id, c_descr, c_dispo, c_partno,
1228 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1229 c_cefile, c_encoding,
1230 c_digested, c_digest[16], c_ctexbody,
1231 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1232 c_umask, c_pid, c_rfc934,
1233 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1234 */
1235
1236 ct->c_file = add (first_alt->c_file, NULL);
1237 ct->c_type = type;
1238 ct->c_subtype = subtype;
1239
1240 ctinit = get_ct_init (ct->c_type);
1241
1242 typename = ct_type_str (type);
1243 subtypename = ct_subtype_str (type, subtype);
1244
1245 {
1246 int serial = 0;
1247 int found_boundary = 1;
1248
1249 while (found_boundary && serial < 1000000) {
1250 found_boundary = 0;
1251
1252 /* Ensure that the boundary doesn't appear in the decoded
1253 content. */
1254 if (new_part->c_cefile.ce_file) {
1255 if ((found_boundary =
1256 boundary_in_content (&new_part->c_cefile.ce_fp,
1257 new_part->c_cefile.ce_file,
1258 boundary)) == -1) {
1259 return NULL;
1260 }
1261 }
1262
1263 /* Ensure that the boundary doesn't appear in the encoded
1264 content. */
1265 if (! found_boundary && new_part->c_file) {
1266 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1267 new_part->c_file,
1268 boundary)) == -1) {
1269 return NULL;
1270 }
1271 }
1272
1273 if (found_boundary) {
1274 /* Try a slightly different boundary. */
1275 char buffer2[16];
1276
1277 free (boundary);
1278 ++serial;
1279 snprintf (buffer2, sizeof buffer2, "%d", serial);
1280 boundary =
1281 concat (boundary_prefix,
1282 first_alt->c_partno ? first_alt->c_partno : "",
1283 "-", buffer2, NULL);
1284 }
1285 }
1286
1287 if (found_boundary) {
1288 advise (NULL, "giving up trying to find a unique boundary");
1289 return NULL;
1290 }
1291 }
1292
1293 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1294 boundary, "\"", NULL);
1295
1296 /* Load c_first_hf and c_last_hf. */
1297 transfer_noncontent_headers (first_alt, ct);
1298 add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1299 free (name);
1300
1301 /* Load c_partno. */
1302 if (first_alt->c_partno) {
1303 ct->c_partno = add (first_alt->c_partno, NULL);
1304 free (first_alt->c_partno);
1305 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1306 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1307 } else {
1308 first_alt->c_partno = add ("1", NULL);
1309 new_part->c_partno = add ("2", NULL);
1310 }
1311
1312 if (ctinit) {
1313 ct->c_ctinfo.ci_type = add (typename, NULL);
1314 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1315 }
1316
1317 name = concat (" ", typename, "/", subtypename, boundary_indicator,
1318 boundary, NULL);
1319 if ((cp = strstr (name, boundary_indicator))) {
1320 ct->c_ctinfo.ci_attrs[0] = name;
1321 ct->c_ctinfo.ci_attrs[1] = NULL;
1322 /* ci_values don't get free'd, so point into ci_attrs. */
1323 ct->c_ctinfo.ci_values[0] = cp + strlen (boundary_indicator);
1324 }
1325
1326 p = (struct part *) mh_xmalloc (sizeof *p);
1327 p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1328 p->mp_next->mp_next = NULL;
1329 p->mp_next->mp_part = first_alt;
1330
1331 if ((m = (struct multipart *) calloc (1, sizeof (struct multipart))) ==
1332 NULL)
1333 adios (NULL, "out of memory");
1334 m->mp_start = concat (boundary, "\n", NULL);
1335 m->mp_stop = concat (boundary, "--\n", NULL);
1336 m->mp_parts = p;
1337 ct->c_ctparams = (void *) m;
1338
1339 free (boundary);
1340
1341 return ct;
1342 }
1343
1344
1345 /* Check that the boundary does not appear in the content. */
1346 static int
1347 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1348 char buffer[BUFSIZ];
1349 size_t bytes_read;
1350 int found_boundary = 0;
1351
1352 /* free_content() will close *fp if we fopen it here. */
1353 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1354 advise (file, "unable to open %s for reading", file);
1355 return NOTOK;
1356 }
1357
1358 fseeko (*fp, 0L, SEEK_SET);
1359 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1360 if (find_str (buffer, bytes_read, boundary)) {
1361 found_boundary = 1;
1362 break;
1363 }
1364 }
1365
1366 return found_boundary;
1367 }
1368
1369
1370 /* Remove all non-Content headers. */
1371 static void
1372 transfer_noncontent_headers (CT old, CT new) {
1373 HF hp, hp_prev;
1374
1375 hp_prev = hp = old->c_first_hf;
1376 while (hp) {
1377 HF next = hp->next;
1378
1379 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1380 if (hp == old->c_last_hf) {
1381 if (hp == old->c_first_hf) {
1382 old->c_last_hf = old->c_first_hf = NULL;
1383 } else {
1384 hp_prev->next = NULL;
1385 old->c_last_hf = hp_prev;
1386 }
1387 } else {
1388 if (hp == old->c_first_hf) {
1389 old->c_first_hf = next;
1390 } else {
1391 hp_prev->next = next;
1392 }
1393 }
1394
1395 /* Put node hp in the new CT. */
1396 if (new->c_first_hf == NULL) {
1397 new->c_first_hf = hp;
1398 } else {
1399 new->c_last_hf->next = hp;
1400 }
1401 new->c_last_hf = hp;
1402 } else {
1403 /* A Content- header, leave in old. */
1404 hp_prev = hp;
1405 }
1406
1407 hp = next;
1408 }
1409 }
1410
1411
1412 static int
1413 set_ct_type (CT ct, int type, int subtype, int encoding) {
1414 char *typename = ct_type_str (type);
1415 char *subtypename = ct_subtype_str (type, subtype);
1416 /* E.g, " text/plain" */
1417 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1418 /* E.g, " text/plain\n" */
1419 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1420 int found_content_type = 0;
1421 HF hf;
1422 const char *cp = NULL;
1423 char *ctline;
1424 int status;
1425
1426 /* Update/add Content-Type header field. */
1427 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1428 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1429 found_content_type = 1;
1430 free (hf->value);
1431 hf->value = (cp = strchr (ct->c_ctline, ';'))
1432 ? concat (type_subtypename, cp, "\n", NULL)
1433 : add (name_plus_nl, NULL);
1434 }
1435 }
1436 if (! found_content_type) {
1437 add_header (ct, add (TYPE_FIELD, NULL),
1438 (cp = strchr (ct->c_ctline, ';'))
1439 ? concat (type_subtypename, cp, "\n", NULL)
1440 : add (name_plus_nl, NULL));
1441 }
1442
1443 /* Some of these might not be used, but set them anyway. */
1444 ctline = cp
1445 ? concat (type_subtypename, cp, NULL)
1446 : concat (type_subtypename, NULL);
1447 free (ct->c_ctline);
1448 ct->c_ctline = ctline;
1449 /* Leave other ctinfo members as they were. */
1450 free (ct->c_ctinfo.ci_type);
1451 ct->c_ctinfo.ci_type = add (typename, NULL);
1452 free (ct->c_ctinfo.ci_subtype);
1453 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1454 ct->c_type = type;
1455 ct->c_subtype = subtype;
1456
1457 free (name_plus_nl);
1458 free (type_subtypename);
1459
1460 status = set_ce (ct, encoding);
1461
1462 return status;
1463 }
1464
1465
1466 static int
1467 decode_text_parts (CT ct, int encoding, int *message_mods) {
1468 int status = OK;
1469
1470 switch (ct->c_type) {
1471 case CT_TEXT:
1472 switch (ct->c_encoding) {
1473 case CE_BASE64:
1474 case CE_QUOTED: {
1475 int ct_encoding;
1476
1477 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
1478 if ((ct_encoding = content_encoding (ct)) == CE_BINARY &&
1479 encoding != CE_BINARY) {
1480 /* The decoding isn't acceptable so discard it.
1481 Leave status as OK to allow other transformations. */
1482 if (verbosw) {
1483 report (ct->c_partno, ct->c_file,
1484 "will not decode%s because it is binary",
1485 ct->c_partno ? ""
1486 : ct->c_ctline ? ct->c_ctline
1487 : "");
1488 }
1489 (void) m_unlink (ct->c_cefile.ce_file);
1490 free (ct->c_cefile.ce_file);
1491 ct->c_cefile.ce_file = NULL;
1492 } else if (ct->c_encoding == CE_QUOTED &&
1493 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1494 /* The decoding isn't acceptable so discard it.
1495 Leave status as OK to allow other transformations. */
1496 if (verbosw) {
1497 report (ct->c_partno, ct->c_file,
1498 "will not decode%s because it is 8bit",
1499 ct->c_partno ? ""
1500 : ct->c_ctline ? ct->c_ctline
1501 : "");
1502 }
1503 (void) m_unlink (ct->c_cefile.ce_file);
1504 free (ct->c_cefile.ce_file);
1505 ct->c_cefile.ce_file = NULL;
1506 } else {
1507 int enc;
1508 if (ct_encoding == CE_BINARY)
1509 enc = CE_BINARY;
1510 else if (ct_encoding == CE_8BIT && encoding == CE_7BIT)
1511 enc = CE_QUOTED;
1512 else
1513 enc = charset_encoding (ct);
1514 if (set_ce (ct, enc) == OK) {
1515 ++*message_mods;
1516 if (verbosw) {
1517 report (ct->c_partno, ct->c_file, "decode%s",
1518 ct->c_ctline ? ct->c_ctline : "");
1519 }
1520 strip_crs (ct, message_mods);
1521 } else {
1522 status = NOTOK;
1523 }
1524 }
1525 } else {
1526 status = NOTOK;
1527 }
1528 break;
1529 }
1530 case CE_8BIT:
1531 case CE_7BIT:
1532 strip_crs (ct, message_mods);
1533 break;
1534 default:
1535 break;
1536 }
1537
1538 break;
1539
1540 case CT_MULTIPART: {
1541 struct multipart *m = (struct multipart *) ct->c_ctparams;
1542 struct part *part;
1543
1544 /* Should check to see if the body for this part is encoded?
1545 For now, it gets passed along as-is by InitMultiPart(). */
1546 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1547 status = decode_text_parts (part->mp_part, encoding, message_mods);
1548 }
1549 break;
1550 }
1551
1552 case CT_MESSAGE:
1553 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1554 struct exbody *e;
1555
1556 e = (struct exbody *) ct->c_ctparams;
1557 status = decode_text_parts (e->eb_content, encoding, message_mods);
1558 }
1559 break;
1560
1561 default:
1562 break;
1563 }
1564
1565 return status;
1566 }
1567
1568
1569 /* See if the decoded content is 7bit, 8bit, or binary. It's binary
1570 if it has any NUL characters, a CR not followed by a LF, or lines
1571 greater than 998 characters in length. */
1572 static int
1573 content_encoding (CT ct) {
1574 CE ce = &ct->c_cefile;
1575 int encoding = CE_7BIT;
1576
1577 if (ce->ce_file) {
1578 size_t line_len = 0;
1579 char buffer[BUFSIZ];
1580 size_t inbytes;
1581
1582 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
1583 advise (ce->ce_file, "unable to open for reading");
1584 return CE_UNKNOWN;
1585 }
1586
1587 fseeko (ce->ce_fp, 0L, SEEK_SET);
1588 while (encoding != CE_BINARY &&
1589 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
1590 char *cp;
1591 size_t i;
1592 int last_char_was_cr = 0;
1593
1594 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
1595 if (*cp == '\0' || ++line_len > 998 ||
1596 (*cp != '\n' && last_char_was_cr)) {
1597 encoding = CE_BINARY;
1598 break;
1599 } else if (*cp == '\n') {
1600 line_len = 0;
1601 } else if (! isascii ((unsigned char) *cp)) {
1602 encoding = CE_8BIT;
1603 }
1604
1605 last_char_was_cr = *cp == '\r' ? 1 : 0;
1606 }
1607 }
1608
1609 fclose (ce->ce_fp);
1610 ce->ce_fp = NULL;
1611 } /* else should never happen */
1612
1613 return encoding;
1614 }
1615
1616
1617 static int
1618 strip_crs (CT ct, int *message_mods) {
1619 /* norm_charmap() is case sensitive. */
1620 char *codeset = upcase (content_codeset (ct));
1621 int status = OK;
1622
1623 /* Only strip carriage returns if content is ASCII or another
1624 codeset that has the same readily recognizable CR followed by a
1625 LF. We can include UTF-8 here because if the high-order bit of
1626 a UTF-8 byte is 0, then it must be a single-byte ASCII
1627 character. */
1628 if (! strcmp (norm_charmap (codeset), "US-ASCII") ||
1629 ! strncmp (norm_charmap (codeset), "ISO-8859-", 9) ||
1630 ! strncmp (norm_charmap (codeset), "UTF-8", 5) ||
1631 ! strncmp (norm_charmap (codeset), "WINDOWS-12", 10)) {
1632 char **file = NULL;
1633 FILE **fp = NULL;
1634 size_t begin;
1635 size_t end;
1636 int has_crs = 0;
1637 int opened_input_file = 0;
1638
1639 if (ct->c_cefile.ce_file) {
1640 file = &ct->c_cefile.ce_file;
1641 fp = &ct->c_cefile.ce_fp;
1642 begin = end = 0;
1643 } else if (ct->c_file) {
1644 file = &ct->c_file;
1645 fp = &ct->c_fp;
1646 begin = (size_t) ct->c_begin;
1647 end = (size_t) ct->c_end;
1648 } /* else don't know where the content is */
1649
1650 if (file && *file && fp) {
1651 if (! *fp) {
1652 if ((*fp = fopen (*file, "r")) == NULL) {
1653 advise (*file, "unable to open for reading");
1654 status = NOTOK;
1655 } else {
1656 opened_input_file = 1;
1657 }
1658 }
1659 }
1660
1661 if (fp && *fp) {
1662 char buffer[BUFSIZ];
1663 size_t bytes_read;
1664 size_t bytes_to_read =
1665 end > 0 && end > begin ? end - begin : sizeof buffer;
1666
1667 fseeko (*fp, begin, SEEK_SET);
1668 while ((bytes_read = fread (buffer, 1,
1669 min (bytes_to_read, sizeof buffer),
1670 *fp)) > 0) {
1671 /* Look for CR followed by a LF. This is supposed to
1672 be text so there should be LF's. If not, don't
1673 modify the content. */
1674 char *cp;
1675 size_t i;
1676 int last_char_was_cr = 0;
1677
1678 if (end > 0) bytes_to_read -= bytes_read;
1679
1680 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
1681 if (*cp == '\n' && last_char_was_cr) {
1682 has_crs = 1;
1683 break;
1684 }
1685
1686 last_char_was_cr = *cp == '\r' ? 1 : 0;
1687 }
1688 }
1689
1690 if (has_crs) {
1691 int fd;
1692 char *stripped_content_file;
1693 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
1694
1695 if (tempfile == NULL) {
1696 adios (NULL, "unable to create temporary file in %s",
1697 get_temp_dir());
1698 }
1699 stripped_content_file = add (tempfile, NULL);
1700
1701 /* Strip each CR before a LF from the content. */
1702 fseeko (*fp, begin, SEEK_SET);
1703 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
1704 0) {
1705 char *cp;
1706 size_t i;
1707 int last_char_was_cr = 0;
1708
1709 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
1710 if (*cp == '\r') {
1711 last_char_was_cr = 1;
1712 } else if (last_char_was_cr) {
1713 if (*cp != '\n') write (fd, "\r", 1);
1714 write (fd, cp, 1);
1715 last_char_was_cr = 0;
1716 } else {
1717 write (fd, cp, 1);
1718 last_char_was_cr = 0;
1719 }
1720 }
1721 }
1722
1723 if (close (fd)) {
1724 admonish (NULL, "unable to write temporary file %s",
1725 stripped_content_file);
1726 (void) m_unlink (stripped_content_file);
1727 status = NOTOK;
1728 } else {
1729 /* Replace the decoded file with the converted one. */
1730 if (ct->c_cefile.ce_file) {
1731 if (ct->c_cefile.ce_unlink) {
1732 (void) m_unlink (ct->c_cefile.ce_file);
1733 }
1734 free (ct->c_cefile.ce_file);
1735 }
1736 ct->c_cefile.ce_file = stripped_content_file;
1737 ct->c_cefile.ce_unlink = 1;
1738
1739 ++*message_mods;
1740 if (verbosw) {
1741 report (ct->c_partno,
1742 begin == 0 && end == 0 ? "" : *file,
1743 "stripped CRs");
1744 }
1745 }
1746 }
1747
1748 if (opened_input_file) {
1749 fclose (*fp);
1750 *fp = NULL;
1751 }
1752 }
1753 }
1754
1755 free (codeset);
1756 return status;
1757 }
1758
1759
1760 char *
1761 content_codeset (CT ct) {
1762 const char *const charset = "charset";
1763 char *default_codeset = NULL;
1764 CI ctinfo = &ct->c_ctinfo;
1765 char **ap, **vp;
1766 char **src_codeset = NULL;
1767
1768 for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) {
1769 if (! strcasecmp (*ap, charset)) {
1770 src_codeset = vp;
1771 break;
1772 }
1773 }
1774
1775 /* RFC 2045, Sec. 5.2: default to us-ascii. */
1776 if (src_codeset == NULL) src_codeset = &default_codeset;
1777 if (*src_codeset == NULL) *src_codeset = "US-ASCII";
1778
1779 return *src_codeset;
1780 }
1781
1782
1783 static int
1784 convert_codesets (CT ct, char *dest_codeset, int *message_mods) {
1785 int status = OK;
1786
1787 switch (ct->c_type) {
1788 case CT_TEXT:
1789 if (ct->c_subtype == TEXT_PLAIN) {
1790 status = convert_codeset (ct, dest_codeset, message_mods);
1791 }
1792 break;
1793
1794 case CT_MULTIPART: {
1795 struct multipart *m = (struct multipart *) ct->c_ctparams;
1796 struct part *part;
1797
1798 /* Should check to see if the body for this part is encoded?
1799 For now, it gets passed along as-is by InitMultiPart(). */
1800 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1801 status =
1802 convert_codesets (part->mp_part, dest_codeset, message_mods);
1803 }
1804 break;
1805 }
1806
1807 case CT_MESSAGE:
1808 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1809 struct exbody *e;
1810
1811 e = (struct exbody *) ct->c_ctparams;
1812 status =
1813 convert_codesets (e->eb_content, dest_codeset, message_mods);
1814 }
1815 break;
1816
1817 default:
1818 break;
1819 }
1820
1821 return status;
1822 }
1823
1824
1825 static int
1826 convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
1827 char *src_codeset = content_codeset (ct);
1828 int status = OK;
1829
1830 /* norm_charmap() is case sensitive. */
1831 char *src_codeset_u = upcase (src_codeset);
1832 char *dest_codeset_u = upcase (dest_codeset);
1833 int different_codesets =
1834 strcmp (norm_charmap (src_codeset), norm_charmap (dest_codeset));
1835
1836 free (dest_codeset_u);
1837 free (src_codeset_u);
1838
1839 if (different_codesets) {
1840 #ifdef HAVE_ICONV
1841 iconv_t conv_desc = NULL;
1842 char *dest;
1843 int fd = -1;
1844 char **file = NULL;
1845 FILE **fp = NULL;
1846 size_t begin;
1847 size_t end;
1848 int opened_input_file = 0;
1849 char src_buffer[BUFSIZ];
1850 HF hf;
1851 char *tempfile;
1852
1853 if ((conv_desc = iconv_open (dest_codeset, src_codeset)) ==
1854 (iconv_t) -1) {
1855 advise (NULL, "Can't convert %s to %s", src_codeset, dest_codeset);
1856 return -1;
1857 }
1858
1859 if ((tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
1860 adios (NULL, "unable to create temporary file in %s",
1861 get_temp_dir());
1862 }
1863 dest = add (tempfile, NULL);
1864
1865 if (ct->c_cefile.ce_file) {
1866 file = &ct->c_cefile.ce_file;
1867 fp = &ct->c_cefile.ce_fp;
1868 begin = end = 0;
1869 } else if (ct->c_file) {
1870 file = &ct->c_file;
1871 fp = &ct->c_fp;
1872 begin = (size_t) ct->c_begin;
1873 end = (size_t) ct->c_end;
1874 } /* else no input file: shouldn't happen */
1875
1876 if (file && *file && fp) {
1877 if (! *fp) {
1878 if ((*fp = fopen (*file, "r")) == NULL) {
1879 advise (*file, "unable to open for reading");
1880 status = NOTOK;
1881 } else {
1882 opened_input_file = 1;
1883 }
1884 }
1885 }
1886
1887 if (fp && *fp) {
1888 size_t inbytes;
1889 size_t bytes_to_read =
1890 end > 0 && end > begin ? end - begin : sizeof src_buffer;
1891
1892 fseeko (*fp, begin, SEEK_SET);
1893 while ((inbytes = fread (src_buffer, 1,
1894 min (bytes_to_read, sizeof src_buffer),
1895 *fp)) > 0) {
1896 char dest_buffer[BUFSIZ];
1897 ICONV_CONST char *ib = src_buffer;
1898 char *ob = dest_buffer;
1899 size_t outbytes = sizeof dest_buffer;
1900 size_t outbytes_before = outbytes;
1901
1902 if (end > 0) bytes_to_read -= inbytes;
1903
1904 if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) ==
1905 (size_t) -1) {
1906 status = NOTOK;
1907 break;
1908 } else {
1909 write (fd, dest_buffer, outbytes_before - outbytes);
1910 }
1911 }
1912
1913 if (opened_input_file) {
1914 fclose (*fp);
1915 *fp = NULL;
1916 }
1917 }
1918
1919 iconv_close (conv_desc);
1920 close (fd);
1921
1922 if (status == OK) {
1923 /* Replace the decoded file with the converted one. */
1924 if (ct->c_cefile.ce_file) {
1925 if (ct->c_cefile.ce_unlink) {
1926 (void) m_unlink (ct->c_cefile.ce_file);
1927 }
1928 free (ct->c_cefile.ce_file);
1929 }
1930 ct->c_cefile.ce_file = dest;
1931 ct->c_cefile.ce_unlink = 1;
1932
1933 ++*message_mods;
1934 if (verbosw) {
1935 report (ct->c_partno, ct->c_file, "convert %s to %s",
1936 src_codeset, dest_codeset);
1937 }
1938
1939 /* Update ci_attrs. */
1940 src_codeset = dest_codeset;
1941
1942 /* Update ct->c_ctline. */
1943 if (ct->c_ctline) {
1944 char *ctline =
1945 update_attr (ct->c_ctline, "charset=", dest_codeset);
1946
1947 free (ct->c_ctline);
1948 ct->c_ctline = ctline;
1949 } /* else no CT line, which is odd */
1950
1951 /* Update Content-Type header field. */
1952 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1953 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1954 char *ctline_less_newline =
1955 update_attr (hf->value, "charset=", dest_codeset);
1956 char *ctline = concat (ctline_less_newline, "\n", NULL);
1957 free (ctline_less_newline);
1958
1959 free (hf->value);
1960 hf->value = ctline;
1961 break;
1962 }
1963 }
1964 } else {
1965 (void) m_unlink (dest);
1966 }
1967 #else /* ! HAVE_ICONV */
1968 NMH_UNUSED (message_mods);
1969
1970 advise (NULL, "Can't convert %s to %s without iconv", src_codeset,
1971 dest_codeset);
1972 status = NOTOK;
1973 #endif /* ! HAVE_ICONV */
1974 }
1975
1976 return status;
1977 }
1978
1979
1980 static int
1981 write_content (CT ct, char *input_filename, char *outfile, int modify_inplace,
1982 int message_mods) {
1983 int status = OK;
1984
1985 if (modify_inplace) {
1986 if (message_mods > 0) {
1987 if ((status = output_message (ct, outfile)) == OK) {
1988 char *infile = input_filename
1989 ? add (input_filename, NULL)
1990 : add (ct->c_file ? ct->c_file : "-", NULL);
1991
1992 if (remove_file (infile) == OK) {
1993 if (rename (outfile, infile)) {
1994 /* Rename didn't work, possibly because of an
1995 attempt to rename across filesystems. Try
1996 brute force copy. */
1997 int old = open (outfile, O_RDONLY);
1998 int new =
1999 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2000 int i = -1;
2001
2002 if (old != -1 && new != -1) {
2003 char buffer[BUFSIZ];
2004
2005 while ((i = read (old, buffer, sizeof buffer)) >
2006 0) {
2007 if (write (new, buffer, i) != i) {
2008 i = -1;
2009 break;
2010 }
2011 }
2012 }
2013 if (new != -1) close (new);
2014 if (old != -1) close (old);
2015 (void) m_unlink (outfile);
2016
2017 if (i < 0) {
2018 /* The -file argument processing used path() to
2019 expand filename to absolute path. */
2020 int file = ct->c_file && ct->c_file[0] == '/';
2021
2022 admonish (NULL, "unable to rename %s %s to %s",
2023 file ? "file" : "message", outfile,
2024 infile);
2025 status = NOTOK;
2026 }
2027 }
2028 } else {
2029 admonish (NULL, "unable to remove input file %s, "
2030 "not modifying it", infile);
2031 (void) m_unlink (outfile);
2032 status = NOTOK;
2033 }
2034
2035 free (infile);
2036 } else {
2037 status = NOTOK;
2038 }
2039 } else {
2040 /* No modifications and didn't need the tmp outfile. */
2041 (void) m_unlink (outfile);
2042 }
2043 } else {
2044 /* Output is going to some file. Produce it whether or not
2045 there were modifications. */
2046 status = output_message (ct, outfile);
2047 }
2048
2049 flush_errors ();
2050 return status;
2051 }
2052
2053
2054 /*
2055 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2056 * use the standard MH backup file.
2057 */
2058 static int
2059 remove_file (char *file) {
2060 if (rmmproc) {
2061 char *rmm_command = concat (rmmproc, " ", file, NULL);
2062 int status = system (rmm_command);
2063
2064 free (rmm_command);
2065 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2066 } else {
2067 /* This is OK for a non-message file, it still uses the
2068 BACKUP_PREFIX form. The backup file will be in the same
2069 directory as file. */
2070 return rename (file, m_backup (file));
2071 }
2072 }
2073
2074
2075 static void
2076 report (char *partno, char *filename, char *message, ...) {
2077 va_list args;
2078 char *fmt;
2079
2080 if (verbosw) {
2081 va_start (args, message);
2082 fmt = concat (filename, partno ? " part " : ", ",
2083 partno ? partno : "", partno ? ", " : "", message, NULL);
2084
2085 advertise (NULL, NULL, fmt, args);
2086
2087 free (fmt);
2088 va_end (args);
2089 }
2090 }
2091
2092
2093 static char *
2094 upcase (char *str) {
2095 char *up = cpytrim (str);
2096 char *cp;
2097
2098 for (cp = up; *cp; ++cp) *cp = toupper ((unsigned char) *cp);
2099
2100 return up;
2101 }
2102
2103
2104 static void
2105 pipeser (int i)
2106 {
2107 if (i == SIGQUIT) {
2108 fflush (stdout);
2109 fprintf (stderr, "\n");
2110 fflush (stderr);
2111 }
2112
2113 done (1);
2114 /* NOTREACHED */
2115 }