]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Added temporary probes to see what file --mime and file -i do on
[nmh] / uip / mhfixmsg.c
1 /*
2 * mhfixmsg.c -- rewrite a message with various tranformations
3 *
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include <fcntl.h>
15 #ifdef HAVE_ICONV
16 # include <iconv.h>
17 #endif
18
19 #define MHFIXMSG_SWITCHES \
20 X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
21 X("nodecodetext", 0, NDECODETEXTSW) \
22 X("textcodeset", 0, TEXTCODESETSW) \
23 X("notextcodeset", 0, NTEXTCODESETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
27 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
28 X("fixboundary", 0, FIXBOUNDARYSW) \
29 X("nofixboundary", 0, NFIXBOUNDARYSW) \
30 X("fixcte", 0, FIXCTESW) \
31 X("nofixcte", 0, NFIXCTESW) \
32 X("file file", 0, FILESW) \
33 X("outfile file", 0, OUTFILESW) \
34 X("rmmproc program", 0, RPROCSW) \
35 X("normmproc", 0, NRPRCSW) \
36 X("verbose", 0, VERBSW) \
37 X("noverbose", 0, NVERBSW) \
38 X("version", 0, VERSIONSW) \
39 X("help", 0, HELPSW) \
40
41 #define X(sw, minchars, id) id,
42 DEFINE_SWITCH_ENUM(MHFIXMSG);
43 #undef X
44
45 #define X(sw, minchars, id) { sw, minchars, id },
46 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
47 #undef X
48
49
50 int verbosw;
51 int debugsw; /* Needed by mhparse.c. */
52
53 #define quitser pipeser
54
55 /* mhparse.c */
56 extern char *tmp; /* directory to place tmp files */
57 extern int skip_mp_cte_check; /* flag to InitMultiPart */
58 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
59 extern int bogus_mp_content; /* flag from InitMultiPart */
60 CT parse_mime (char *);
61 void reverse_parts (CT);
62
63 /* mhoutsbr.c */
64 int output_message (CT, char *);
65
66 /* mhshowsbr.c */
67 int show_content_aux (CT, int, int, char *, char *);
68
69 /* mhmisc.c */
70 void flush_errors (void);
71
72 /* mhfree.c */
73 extern CT *cts;
74 void freects_done (int) NORETURN;
75
76 /*
77 * static prototypes
78 */
79 typedef struct fix_transformations {
80 int fixboundary;
81 int fixcte;
82 int reformat;
83 int replacetextplain;
84 int decodetext;
85 char *textcodeset;
86 } fix_transformations;
87
88 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
89 static void reverse_alternative_parts (CT);
90 static int fix_boundary (CT *, int *);
91 static int get_multipart_boundary (CT, char **);
92 static int replace_boundary (CT, char *, const char *);
93 static char *update_attr (char *, const char *, const char *e);
94 static int fix_multipart_cte (CT, int *);
95 static int set_ce (CT, int);
96 static int ensure_text_plain (CT *, CT, int *, int);
97 static CT build_text_plain_part (CT);
98 static CT divide_part (CT);
99 static void copy_ctinfo (CI, CI);
100 static int decode_part (CT);
101 static int reformat_part (CT, char *, char *, char *, int);
102 static int charset_encoding (CT);
103 static CT build_multipart_alt (CT, CT, int, int);
104 static int boundary_in_content (FILE **, char *, const char *);
105 static void transfer_noncontent_headers (CT, CT);
106 static int set_ct_type (CT, int type, int subtype, int encoding);
107 static int decode_text_parts (CT, int, int *);
108 static int content_encoding (CT);
109 static int strip_crs (CT, int *);
110 static int convert_codesets (CT, char *, int *);
111 static int convert_codeset (CT, char *, int *);
112 static char *content_codeset (CT);
113 static int write_content (CT, char *, char *, int, int);
114 static int remove_file (char *);
115 static void report (char *, char *, char *, ...);
116 static char *upcase (char *);
117 static void pipeser (int);
118
119
120 int
121 main (int argc, char **argv) {
122 int msgnum;
123 char *cp, *file = NULL, *folder = NULL;
124 char *maildir, buf[100], *outfile = NULL;
125 char **argp, **arguments;
126 struct msgs_array msgs = { 0, 0, NULL };
127 struct msgs *mp = NULL;
128 CT *ctp;
129 FILE *fp;
130 int using_stdin = 0;
131 int status = OK;
132 fix_transformations fx;
133 fx.reformat = fx.fixcte = fx.fixboundary = 1;
134 fx.replacetextplain = 0;
135 fx.decodetext = CE_8BIT;
136 fx.textcodeset = NULL;
137
138 done = freects_done;
139
140 #ifdef LOCALE
141 setlocale(LC_ALL, "");
142 #endif
143 invo_name = r1bindex (argv[0], '/');
144
145 /* read user profile/context */
146 context_read();
147
148 arguments = getarguments (invo_name, argc, argv, 1);
149 argp = arguments;
150
151 /*
152 * Parse arguments
153 */
154 while ((cp = *argp++)) {
155 if (*cp == '-') {
156 switch (smatch (++cp, switches)) {
157 case AMBIGSW:
158 ambigsw (cp, switches);
159 done (1);
160 case UNKWNSW:
161 adios (NULL, "-%s unknown", cp);
162
163 case HELPSW:
164 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
165 invo_name);
166 print_help (buf, switches, 1);
167 done (0);
168 case VERSIONSW:
169 print_version(invo_name);
170 done (0);
171
172 case DECODETEXTSW:
173 if (! (cp = *argp++) || *cp == '-')
174 adios (NULL, "missing argument to %s", argp[-2]);
175 if (! strcasecmp (cp, "8bit")) {
176 fx.decodetext = CE_8BIT;
177 } else if (! strcasecmp (cp, "7bit")) {
178 fx.decodetext = CE_7BIT;
179 } else {
180 adios (NULL, "invalid argument to %s", argp[-2]);
181 }
182 continue;
183 case NDECODETEXTSW:
184 fx.decodetext = 0;
185 continue;
186 case TEXTCODESETSW:
187 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
188 adios (NULL, "missing argument to %s", argp[-2]);
189 fx.textcodeset = cp;
190 continue;
191 case NTEXTCODESETSW:
192 fx.textcodeset = 0;
193 continue;
194 case FIXBOUNDARYSW:
195 fx.fixboundary = 1;
196 continue;
197 case NFIXBOUNDARYSW:
198 fx.fixboundary = 0;
199 continue;
200 case FIXCTESW:
201 fx.fixcte = 1;
202 continue;
203 case NFIXCTESW:
204 fx.fixcte = 0;
205 continue;
206 case REFORMATSW:
207 fx.reformat = 1;
208 continue;
209 case NREFORMATSW:
210 fx.reformat = 0;
211 continue;
212 case REPLACETEXTPLAINSW:
213 fx.replacetextplain = 1;
214 continue;
215 case NREPLACETEXTPLAINSW:
216 fx.replacetextplain = 0;
217 continue;
218 case FILESW:
219 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
220 adios (NULL, "missing argument to %s", argp[-2]);
221 file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
222 continue;
223 case OUTFILESW:
224 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
225 adios (NULL, "missing argument to %s", argp[-2]);
226 outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
227 continue;
228 case RPROCSW:
229 if (!(rmmproc = *argp++) || *rmmproc == '-')
230 adios (NULL, "missing argument to %s", argp[-2]);
231 continue;
232 case NRPRCSW:
233 rmmproc = NULL;
234 continue;
235 case VERBSW:
236 verbosw = 1;
237 continue;
238 case NVERBSW:
239 verbosw = 0;
240 continue;
241 }
242 }
243 if (*cp == '+' || *cp == '@') {
244 if (folder)
245 adios (NULL, "only one folder at a time!");
246 else
247 folder = pluspath (cp);
248 } else {
249 if (*cp == '/') {
250 /* Interpret a full path as a filename, not a message. */
251 file = add (cp, NULL);
252 } else {
253 app_msgarg (&msgs, cp);
254 }
255 }
256 }
257
258 SIGNAL (SIGQUIT, quitser);
259 SIGNAL (SIGPIPE, pipeser);
260
261 /*
262 * Read the standard profile setup
263 */
264 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
265 readconfig ((struct node **) 0, fp, cp, 0);
266 fclose (fp);
267 }
268
269 /*
270 * Check for storage directory. If specified,
271 * then store temporary files there. Else we
272 * store them in standard nmh directory.
273 */
274 if ((cp = context_find (nmhstorage)) && *cp)
275 tmp = concat (cp, "/", invo_name, NULL);
276 else
277 tmp = add (m_maildir (invo_name), NULL);
278
279 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
280
281 if (! context_find ("path"))
282 free (path ("./", TFOLDER));
283
284 if (file && msgs.size)
285 adios (NULL, "cannot specify msg and file at same time!");
286
287 /*
288 * check if message is coming from file
289 */
290 if (file) {
291 /* If file is stdin, create a tmp file name before parse_mime()
292 has a chance, because it might put in on a different
293 filesystem than the output file. Instead, put it in the
294 user's preferred tmp directory. */
295 CT ct;
296
297 if (! strcmp ("-", file)) {
298 int fd;
299 char *cp;
300
301 using_stdin = 1;
302
303 if ((cp = m_mktemp2 (tmp, invo_name, &fd, NULL)) == NULL) {
304 adios (NULL, "unable to create temporary file");
305 } else {
306 free (file);
307 file = add (cp, NULL);
308 chmod (file, 0600);
309 cpydata (STDIN_FILENO, fd, "-", file);
310 }
311
312 if (close (fd)) {
313 unlink (file);
314 adios (NULL, "failed to write temporary file");
315 }
316 }
317
318 if (! (cts = (CT *) calloc ((size_t) 2, sizeof *cts)))
319 adios (NULL, "out of memory");
320 ctp = cts;
321
322 if ((ct = parse_mime (file))) *ctp++ = ct;
323 } else {
324 /*
325 * message(s) are coming from a folder
326 */
327 CT ct;
328
329 if (! msgs.size)
330 app_msgarg(&msgs, "cur");
331 if (! folder)
332 folder = getfolder (1);
333 maildir = m_maildir (folder);
334
335 if (chdir (maildir) == NOTOK)
336 adios (maildir, "unable to change directory to");
337
338 /* read folder and create message structure */
339 if (! (mp = folder_read (folder, 1)))
340 adios (NULL, "unable to read folder %s", folder);
341
342 /* check for empty folder */
343 if (mp->nummsg == 0)
344 adios (NULL, "no messages in %s", folder);
345
346 /* parse all the message ranges/sequences and set SELECTED */
347 for (msgnum = 0; msgnum < msgs.size; msgnum++)
348 if (! m_convert (mp, msgs.msgs[msgnum]))
349 done (1);
350 seq_setprev (mp); /* set the previous-sequence */
351
352 if (! (cts = (CT *) calloc ((size_t) (mp->numsel + 1), sizeof *cts)))
353 adios (NULL, "out of memory");
354 ctp = cts;
355
356 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
357 if (is_selected(mp, msgnum)) {
358 char *msgnam;
359
360 msgnam = m_name (msgnum);
361 if ((ct = parse_mime (msgnam))) *ctp++ = ct;
362 }
363 }
364
365 seq_setcur (mp, mp->hghsel); /* update current message */
366 seq_save (mp); /* synchronize sequences */
367 context_replace (pfolder, folder);/* update current folder */
368 context_save (); /* save the context file */
369 }
370
371 if (*cts) {
372 for (ctp = cts; *ctp; ++ctp) {
373 status += mhfixmsgsbr (ctp, &fx, outfile);
374
375 if (using_stdin) {
376 unlink (file);
377
378 if (! outfile) {
379 /* Just calling m_backup() unlinks the backup file. */
380 (void) m_backup (file);
381 }
382 }
383 }
384 } else {
385 status = 1;
386 }
387
388 free (outfile);
389 free (tmp);
390 free (file);
391
392 /* done is freects_done, which will clean up all of cts. */
393 done (status);
394 return NOTOK;
395 }
396
397
398 int
399 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
400 /* Store input filename in case one of the transformations, i.e.,
401 fix_boundary(), rewrites to a tmp file. */
402 char *input_filename = add ((*ctp)->c_file, NULL);
403 int modify_inplace = 0;
404 int message_mods = 0;
405 int status = OK;
406
407 if (outfile == NULL) {
408 modify_inplace = 1;
409
410 if ((*ctp)->c_file) {
411 outfile = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL);
412 } else {
413 adios (NULL, "missing both input and output filenames\n");
414 }
415 }
416
417 reverse_alternative_parts (*ctp);
418 if (status == OK && fx->fixboundary) {
419 status = fix_boundary (ctp, &message_mods);
420 }
421 if (status == OK && fx->fixcte) {
422 status = fix_multipart_cte (*ctp, &message_mods);
423 }
424 if (status == OK && fx->reformat) {
425 status =
426 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
427 }
428 if (status == OK && fx->decodetext) {
429 status = decode_text_parts (*ctp, fx->decodetext, &message_mods);
430 }
431 if (status == OK && fx->textcodeset != NULL) {
432 status = convert_codesets (*ctp, fx->textcodeset, &message_mods);
433 }
434
435 if (! (*ctp)->c_umask) {
436 /* Set the umask for the contents file. This currently
437 isn't used but just in case it is in the future. */
438 struct stat st;
439
440 if (stat ((*ctp)->c_file, &st) != NOTOK) {
441 (*ctp)->c_umask = ~(st.st_mode & 0777);
442 } else {
443 (*ctp)->c_umask = ~m_gmprot();
444 }
445 }
446
447 /*
448 * Write the content to a file
449 */
450 if (status == OK) {
451 status = write_content (*ctp, input_filename, outfile, modify_inplace,
452 message_mods);
453 } else if (! modify_inplace) {
454 /* Something went wrong. Output might be expected, such
455 as if this were run as a filter. Just copy the input
456 to the output. */
457 int in = open (input_filename, O_RDONLY);
458 int out = strcmp (outfile, "-")
459 ? open (outfile, O_WRONLY | O_CREAT, m_gmprot ())
460 : STDOUT_FILENO;
461
462 if (in != -1 && out != -1) {
463 cpydata (in, out, input_filename, outfile);
464 } else {
465 status = NOTOK;
466 }
467
468 close (out);
469 close (in);
470 }
471
472 if (modify_inplace) {
473 if (status != OK) unlink (outfile);
474 free (outfile);
475 outfile = NULL;
476 }
477
478 free (input_filename);
479
480 return status;
481 }
482
483
484 /* parse_mime() arranges alternates in reverse (priority) order, so
485 reverse them back. This will put a text/plain part at the front of
486 a multipart/alternative part, for example, where it belongs. */
487 static void
488 reverse_alternative_parts (CT ct) {
489 if (ct->c_type == CT_MULTIPART) {
490 struct multipart *m = (struct multipart *) ct->c_ctparams;
491 struct part *part;
492
493 if (ct->c_subtype == MULTI_ALTERNATE) {
494 reverse_parts (ct);
495 }
496
497 /* And call recursively on each part of a multipart. */
498 for (part = m->mp_parts; part; part = part->mp_next) {
499 reverse_alternative_parts (part->mp_part);
500 }
501 }
502 }
503
504
505 static int
506 fix_boundary (CT *ct, int *message_mods) {
507 struct multipart *mp;
508 int status = OK;
509
510 if (bogus_mp_content) {
511 mp = (struct multipart *) (*ct)->c_ctparams;
512
513 /*
514 * 1) Get boundary at end of part.
515 * 2) Get boundary at beginning of part and compare to the end-of-part
516 * boundary.
517 * 3) Write out contents of ct to tmp file, replacing boundary in
518 * header with boundary from part. Set c_unlink to 1.
519 * 4) Free ct.
520 * 5) Call parse_mime() on the tmp file, replacing ct.
521 */
522
523 if (mp && mp->mp_start) {
524 char *part_boundary;
525
526 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
527 char *fixed;
528
529 if ((fixed = m_mktemp2 (tmp, invo_name, NULL, &(*ct)->c_fp))) {
530 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
531 char *filename = add ((*ct)->c_file, NULL);
532
533 free_content (*ct);
534 if ((*ct = parse_mime (fixed))) {
535 (*ct)->c_unlink = 1;
536
537 ++*message_mods;
538 if (verbosw) {
539 report (NULL, filename,
540 "fix multipart boundary");
541 }
542 }
543 free (filename);
544 } else {
545 advise (NULL, "unable to replace broken boundary");
546 status = NOTOK;
547 }
548 } else {
549 advise (NULL, "unable to create temporary file");
550 status = NOTOK;
551 }
552
553 free (part_boundary);
554 }
555 }
556 }
557
558 return status;
559 }
560
561
562 static int
563 get_multipart_boundary (CT ct, char **part_boundary) {
564 char buffer[BUFSIZ];
565 char *end_boundary = NULL;
566 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
567 ? (off_t) (ct->c_end - sizeof buffer)
568 : (off_t) ct->c_begin;
569 size_t bytes_read;
570 int status = OK;
571
572 /* This will fail if the boundary spans fread() calls. BUFSIZ should
573 be big enough, even if it's just 1024, to make that unlikely. */
574
575 /* free_content() will close ct->c_fp. */
576 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
577 advise (ct->c_file, "unable to open for reading");
578 return NOTOK;
579 }
580
581 /* Get boundary at end of multipart. */
582 while (begin >= (off_t) ct->c_begin) {
583 fseeko (ct->c_fp, begin, SEEK_SET);
584 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
585 char *end = buffer + bytes_read - 1;
586 char *cp;
587
588 if ((cp = rfind_str (buffer, bytes_read, "--"))) {
589 /* Trim off trailing "--" and anything beyond. */
590 *cp-- = '\0';
591 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
592 if (strlen (end) > 3 && *end++ == '\n' &&
593 *end++ == '-' && *end++ == '-') {
594 end_boundary = add (end, NULL);
595 break;
596 }
597 }
598 }
599 }
600
601 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
602 begin -= sizeof buffer;
603 } else {
604 break;
605 }
606 }
607
608 /* Get boundary at beginning of multipart. */
609 if (end_boundary) {
610 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
611 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
612 if (bytes_read >= strlen (end_boundary)) {
613 char *cp = find_str (buffer, bytes_read, end_boundary);
614
615 if (cp && cp - buffer >= 2 && *--cp == '-' &&
616 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
617 status = OK;
618 break;
619 }
620 } else {
621 /* The start and end boundaries didn't match, or the
622 start boundary doesn't begin with "\n--" (or "--"
623 if at the beginning of buffer). Keep trying. */
624 status = NOTOK;
625 }
626 }
627 } else {
628 status = NOTOK;
629 }
630
631 if (status == OK) {
632 *part_boundary = end_boundary;
633 } else {
634 *part_boundary = NULL;
635 free (end_boundary);
636 }
637
638 return status;
639 }
640
641
642 /* Open and copy ct->c_file to file, replacing the multipart boundary. */
643 static int
644 replace_boundary (CT ct, char *file, const char *boundary) {
645 FILE *fpin, *fpout;
646 int compnum, state;
647 char buf[BUFSIZ], name[NAMESZ];
648 char *np, *vp;
649 m_getfld_state_t gstate = 0;
650 int status = OK;
651
652 if (ct->c_file == NULL) {
653 advise (NULL, "missing input filename");
654 return NOTOK;
655 }
656
657 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
658 advise (ct->c_file, "unable to open for reading");
659 return NOTOK;
660 }
661
662 if ((fpout = fopen (file, "w")) == NULL) {
663 fclose (fpin);
664 advise (file, "unable to open for writing");
665 return NOTOK;
666 }
667
668 for (compnum = 1;;) {
669 int bufsz = (int) sizeof buf;
670
671 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
672 case FLD:
673 case FLDPLUS:
674 compnum++;
675
676 /* get copies of the buffers */
677 np = add (name, NULL);
678 vp = add (buf, NULL);
679
680 /* if necessary, get rest of field */
681 while (state == FLDPLUS) {
682 bufsz = sizeof buf;
683 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
684 vp = add (buf, vp); /* add to previous value */
685 }
686
687 if (strcasecmp (TYPE_FIELD, np)) {
688 fprintf (fpout, "%s:%s", np, vp);
689 } else {
690 char *new_boundary = update_attr (vp, "boundary=", boundary);
691
692 fprintf (fpout, "%s:%s\n", np, new_boundary);
693 free (new_boundary);
694 }
695
696 free (vp);
697 free (np);
698
699 continue;
700
701 case BODY:
702 fputs ("\n", fpout);
703 /* buf will have a terminating NULL, skip it. */
704 fwrite (buf, 1, bufsz-1, fpout);
705 continue;
706
707 case FILEEOF:
708 break;
709
710 case LENERR:
711 case FMTERR:
712 advise (NULL, "message format error in component #%d", compnum);
713 status = NOTOK;
714 break;
715
716 default:
717 advise (NULL, "getfld() returned %d", state);
718 status = NOTOK;
719 break;
720 }
721
722 break;
723 }
724
725 m_getfld_state_destroy (&gstate);
726 fclose (fpout);
727 fclose (fpin);
728
729 return status;
730 }
731
732
733 /* Change the value of a name=value pair in a header field body.
734 If the name isn't there, append them. In any case, a new
735 string will be allocated and must be free'd by the caller.
736 Trims any trailing newlines. */
737 static char *
738 update_attr (char *body, const char *name, const char *value) {
739 char *bp = nmh_strcasestr (body, name);
740 char *new_body;
741
742 if (bp) {
743 char *other_attrs = strchr (bp, ';');
744
745 *(bp + strlen (name)) = '\0';
746 new_body = concat (body, "\"", value, "\"", NULL);
747
748 if (other_attrs) {
749 char *cp;
750
751 /* Trim any trailing newlines. */
752 for (cp = &other_attrs[strlen (other_attrs) - 1];
753 cp > other_attrs && *cp == '\n';
754 *cp-- = '\0') continue;
755 new_body = add (other_attrs, new_body);
756 }
757 } else {
758 char *cp;
759
760 /* Append name/value pair, after first removing a final newline
761 and (extraneous) semicolon. */
762 if (*(cp = &body[strlen (body) - 1]) == '\n') *cp = '\0';
763 if (*(cp = &body[strlen (body) - 1]) == ';') *cp = '\0';
764 new_body = concat (body, "; ", name, "\"", value, "\"", NULL);
765 }
766
767 return new_body;
768 }
769
770
771 static int
772 fix_multipart_cte (CT ct, int *message_mods) {
773 int status = OK;
774
775 if (ct->c_type == CT_MULTIPART) {
776 struct multipart *m;
777 struct part *part;
778
779 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
780 ct->c_encoding != CE_BINARY) {
781 HF hf;
782
783 for (hf = ct->c_first_hf; hf; hf = hf->next) {
784 char *name = hf->name;
785 for (; *name && isspace ((unsigned char) *name); ++name) {
786 continue;
787 }
788
789 if (! strncasecmp (name, ENCODING_FIELD,
790 strlen (ENCODING_FIELD))) {
791 char *prefix = "Nmh-REPLACED-INVALID-";
792 HF h = mh_xmalloc (sizeof *h);
793
794 h->name = add (hf->name, NULL);
795 h->hf_encoding = hf->hf_encoding;
796 h->next = hf->next;
797 hf->next = h;
798
799 /* Retain old header but prefix its name. */
800 free (hf->name);
801 hf->name = concat (prefix, h->name, NULL);
802
803 ++*message_mods;
804 if (verbosw) {
805 char *encoding = cpytrim (hf->value);
806 report (ct->c_partno, ct->c_file,
807 "replace Content-Transfer-Encoding of %s "
808 "with 8 bit", encoding);
809 free (encoding);
810 }
811
812 h->value = add (" 8bit\n", NULL);
813
814 /* Don't need to warn for multiple C-T-E header
815 fields, parse_mime() already does that. But
816 if there are any, fix them all as necessary. */
817 hf = h;
818 }
819 }
820
821 set_ce (ct, CE_8BIT);
822 }
823
824 m = (struct multipart *) ct->c_ctparams;
825 for (part = m->mp_parts; part; part = part->mp_next) {
826 if (fix_multipart_cte (part->mp_part, message_mods) != OK) {
827 status = NOTOK;
828 break;
829 }
830 }
831 }
832
833 return status;
834 }
835
836
837 static int
838 set_ce (CT ct, int encoding) {
839 const char *ce = ce_str (encoding);
840 const struct str2init *ctinit = get_ce_method (ce);
841
842 if (ctinit) {
843 char *cte = concat (" ", ce, "\n", NULL);
844 int found_cte = 0;
845 HF hf;
846 /* Decoded contents might be in ct->c_cefile.ce_file, if the
847 caller is decode_text_parts (). Save because we'll
848 overwrite below. */
849 struct cefile decoded_content_info = ct->c_cefile;
850
851 ct->c_encoding = encoding;
852
853 ct->c_ctinitfnx = ctinit->si_init;
854 /* This will assign ct->c_cefile with an all-0 struct, which
855 is what we want. */
856 (*ctinit->si_init) (ct);
857 /* After returning, the caller should set
858 ct->c_cefile.ce_file to the name of the file containing
859 the contents. */
860
861 /* Restore the cefile. */
862 ct->c_cefile = decoded_content_info;
863
864 /* Update/add Content-Transfer-Encoding header field. */
865 for (hf = ct->c_first_hf; hf; hf = hf->next) {
866 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
867 found_cte = 1;
868 free (hf->value);
869 hf->value = cte;
870 }
871 }
872 if (! found_cte) {
873 add_header (ct, add (ENCODING_FIELD, NULL), cte);
874 }
875
876 /* Update c_celine. It's used only by mhlist -debug. */
877 free (ct->c_celine);
878 ct->c_celine = add (cte, NULL);
879
880 return OK;
881 } else {
882 return NOTOK;
883 }
884 }
885
886
887 /* Make sure each text part has a corresponding text/plain part. */
888 static int
889 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
890 int status = OK;
891
892 switch ((*ct)->c_type) {
893 case CT_TEXT: {
894 int has_text_plain = 0;
895
896 /* Nothing to do for text/plain. */
897 if ((*ct)->c_subtype == TEXT_PLAIN) return OK;
898
899 if (parent && parent->c_type == CT_MULTIPART &&
900 parent->c_subtype == MULTI_ALTERNATE) {
901 struct multipart *mp = (struct multipart *) parent->c_ctparams;
902 struct part *part, *prev;
903 int new_subpart_number = 1;
904
905 /* See if there is a sibling text/plain. */
906 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
907 ++new_subpart_number;
908 if (part->mp_part->c_type == CT_TEXT &&
909 part->mp_part->c_subtype == TEXT_PLAIN) {
910 if (replacetextplain) {
911 struct part *old_part;
912 if (part == mp->mp_parts) {
913 old_part = mp->mp_parts;
914 mp->mp_parts = part->mp_next;
915 } else {
916 old_part = prev->mp_next;
917 prev->mp_next = part->mp_next;
918 }
919 if (verbosw) {
920 report (parent->c_partno, parent->c_file,
921 "remove text/plain part %s",
922 old_part->mp_part->c_partno);
923 }
924 free_content (old_part->mp_part);
925 free (old_part);
926 } else {
927 has_text_plain = 1;
928 }
929 break;
930 }
931 prev = part;
932 }
933
934 if (! has_text_plain) {
935 /* Parent is a multipart/alternative. Insert a new
936 text/plain subpart. */
937 struct part *new_part = mh_xmalloc (sizeof *new_part);
938
939 if ((new_part->mp_part = build_text_plain_part (*ct))) {
940 char buffer[16];
941 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
942
943 new_part->mp_next = mp->mp_parts;
944 mp->mp_parts = new_part;
945 new_part->mp_part->c_partno =
946 concat (parent->c_partno ? parent->c_partno : "1", ".",
947 buffer, NULL);
948
949 ++*message_mods;
950 if (verbosw) {
951 report (parent->c_partno, parent->c_file,
952 "insert text/plain part");
953 }
954 } else {
955 free_content (new_part->mp_part);
956 free (new_part);
957 status = NOTOK;
958 }
959 }
960 } else {
961 /* Slip new text/plain part into a new multipart/alternative. */
962 CT tp_part = build_text_plain_part (*ct);
963
964 if (tp_part) {
965 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
966 MULTI_ALTERNATE);
967 if (mp_alt) {
968 struct multipart *mp =
969 (struct multipart *) mp_alt->c_ctparams;
970
971 if (mp && mp->mp_parts) {
972 mp->mp_parts->mp_part = tp_part;
973 /* Make the new multipart/alternative the parent. */
974 *ct = mp_alt;
975
976 ++*message_mods;
977 if (verbosw) {
978 report ((*ct)->c_partno, (*ct)->c_file,
979 "insert text/plain part");
980 }
981 } else {
982 free_content (tp_part);
983 free_content (mp_alt);
984 status = NOTOK;
985 }
986 } else {
987 status = NOTOK;
988 }
989 } else {
990 status = NOTOK;
991 }
992 }
993 break;
994 }
995
996 case CT_MULTIPART: {
997 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
998 struct part *part;
999
1000 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1001 if ((*ct)->c_type == CT_MULTIPART) {
1002 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1003 replacetextplain);
1004 }
1005 }
1006 break;
1007 }
1008
1009 case CT_MESSAGE:
1010 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1011 struct exbody *e;
1012
1013 e = (struct exbody *) (*ct)->c_ctparams;
1014 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1015 replacetextplain);
1016 }
1017 break;
1018 }
1019
1020 return status;
1021 }
1022
1023
1024 static CT
1025 build_text_plain_part (CT encoded_part) {
1026 CT tp_part = divide_part (encoded_part);
1027 char *tmp_plain_file = NULL;
1028
1029 if (decode_part (tp_part) == OK) {
1030 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1031 contains the decoded contents. And the decoding function, such
1032 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1033 be unlinked by free_content (). */
1034 tmp_plain_file = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL);
1035 if (reformat_part (tp_part, tmp_plain_file,
1036 tp_part->c_ctinfo.ci_type,
1037 tp_part->c_ctinfo.ci_subtype,
1038 tp_part->c_type) == OK) {
1039 return tp_part;
1040 }
1041 }
1042
1043 free_content (tp_part);
1044 unlink (tmp_plain_file);
1045 free (tmp_plain_file);
1046
1047 return NULL;
1048 }
1049
1050
1051 static CT
1052 divide_part (CT ct) {
1053 CT new_part;
1054
1055 if ((new_part = (CT) calloc (1, sizeof *new_part)) == NULL)
1056 adios (NULL, "out of memory");
1057
1058 /* Just copy over what is needed for decoding. c_vrsn and
1059 c_celine aren't necessary. */
1060 new_part->c_file = add (ct->c_file, NULL);
1061 new_part->c_begin = ct->c_begin;
1062 new_part->c_end = ct->c_end;
1063 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1064 new_part->c_type = ct->c_type;
1065 new_part->c_cefile = ct->c_cefile;
1066 new_part->c_encoding = ct->c_encoding;
1067 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1068 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1069 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1070 new_part->c_cesizefnx = ct->c_cesizefnx;
1071
1072 /* c_ctline is used by reformat__part(), so it can preserve
1073 anything after the type/subtype. */
1074 new_part->c_ctline = add (ct->c_ctline, NULL);
1075
1076 return new_part;
1077 }
1078
1079
1080 static void
1081 copy_ctinfo (CI dest, CI src) {
1082 char **s_ap, **d_ap, **s_vp, **d_vp;
1083
1084 dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1085 dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1086
1087 for (s_ap = src->ci_attrs, d_ap = dest->ci_attrs,
1088 s_vp = src->ci_values, d_vp = dest->ci_values;
1089 *s_ap;
1090 ++s_ap, ++d_ap, ++s_vp, ++d_vp) {
1091 *d_ap = add (*s_ap, NULL);
1092 *d_vp = *s_vp;
1093 }
1094 *d_ap = NULL;
1095
1096 dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1097 dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1098 }
1099
1100
1101 static int
1102 decode_part (CT ct) {
1103 char *tmp_decoded;
1104 int status;
1105
1106 tmp_decoded = add (m_mktemp2 (tmp, invo_name, NULL, NULL), NULL);
1107 /* The following call will load ct->c_cefile.ce_file with the tmp
1108 filename of the decoded content. tmp_decoded will contain the
1109 encoded output, get rid of that. */
1110 status = output_message (ct, tmp_decoded);
1111 unlink (tmp_decoded);
1112 free (tmp_decoded);
1113
1114 return status;
1115 }
1116
1117
1118 /* Some of the arguments aren't really needed now, but maybe will
1119 be in the future for other than text types. */
1120 static int
1121 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1122 int output_subtype, output_encoding;
1123 char *cp, *cf;
1124 int status;
1125
1126 /* Hacky: this redirects the output from whatever command is used
1127 to show the part to a file. So, the user can't have any output
1128 redirection in that command.
1129 Could show_multi() in mhshowsbr.c avoid this? */
1130
1131 /* Check for invo_name-format-type/subtype. */
1132 cp = concat (invo_name, "-format-", type, "/", subtype, NULL);
1133 if ((cf = context_find (cp)) && *cf != '\0') {
1134 if (strchr (cf, '>')) {
1135 free (cp);
1136 advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1137 "%s-format-%s/%s profile entry", cf, invo_name, type,
1138 subtype);
1139 return NOTOK;
1140 }
1141 } else {
1142 free (cp);
1143
1144 /* Check for invo_name-format-type. */
1145 cp = concat (invo_name, "-format-", type, NULL);
1146 if (! (cf = context_find (cp)) || *cf == '\0') {
1147 free (cp);
1148 if (verbosw) {
1149 advise (NULL, "Don't know how to convert %s, there is no "
1150 "%s-format-%s/%s profile entry",
1151 ct->c_file, invo_name, type, subtype);
1152 }
1153 return NOTOK;
1154 }
1155
1156 if (strchr (cf, '>')) {
1157 free (cp);
1158 advise (NULL, "'>' prohibited in \"%s\"", cf);
1159 return NOTOK;
1160 }
1161 }
1162 free (cp);
1163
1164 cp = concat (cf, " >", file, NULL);
1165 status = show_content_aux (ct, 1, 0, cp, NULL);
1166 free (cp);
1167
1168 /* Unlink decoded content tmp file and free its filename to avoid
1169 leaks. The file stream should already have been closed. */
1170 if (ct->c_cefile.ce_unlink) {
1171 unlink (ct->c_cefile.ce_file);
1172 free (ct->c_cefile.ce_file);
1173 ct->c_cefile.ce_file = NULL;
1174 ct->c_cefile.ce_unlink = 0;
1175 }
1176
1177 if (c_type == CT_TEXT) {
1178 output_subtype = TEXT_PLAIN;
1179 } else {
1180 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1181 output_subtype = 0;
1182 }
1183 output_encoding = charset_encoding (ct);
1184
1185 if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1186 ct->c_cefile.ce_file = file;
1187 ct->c_cefile.ce_unlink = 1;
1188 } else {
1189 ct->c_cefile.ce_unlink = 0;
1190 status = NOTOK;
1191 }
1192
1193 return status;
1194 }
1195
1196
1197 /* Identifies 7bit or 8bit content based on charset. */
1198 static int
1199 charset_encoding (CT ct) {
1200 /* norm_charmap() is case sensitive. */
1201 char *codeset = upcase (content_codeset (ct));
1202 int encoding =
1203 strcmp (norm_charmap (codeset), "US-ASCII") ? CE_8BIT : CE_7BIT;
1204
1205 free (codeset);
1206 return encoding;
1207 }
1208
1209
1210 static CT
1211 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1212 char *boundary_prefix = "----=_nmh-multipart";
1213 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1214 char *boundary_indicator = "; boundary=";
1215 char *typename, *subtypename, *name;
1216 CT ct;
1217 struct part *p;
1218 struct multipart *m;
1219 char *cp;
1220 const struct str2init *ctinit;
1221
1222 if ((ct = (CT) calloc (1, sizeof *ct)) == NULL)
1223 adios (NULL, "out of memory");
1224
1225 /* Set up the multipart/alternative part. These fields of *ct were
1226 initialized to 0 by calloc():
1227 c_fp, c_unlink, c_begin, c_end,
1228 c_vrsn, c_ctline, c_celine,
1229 c_id, c_descr, c_dispo, c_partno,
1230 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1231 c_cefile, c_encoding,
1232 c_digested, c_digest[16], c_ctexbody,
1233 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1234 c_umask, c_pid, c_rfc934,
1235 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1236 */
1237
1238 ct->c_file = add (first_alt->c_file, NULL);
1239 ct->c_type = type;
1240 ct->c_subtype = subtype;
1241
1242 ctinit = get_ct_init (ct->c_type);
1243
1244 typename = ct_type_str (type);
1245 subtypename = ct_subtype_str (type, subtype);
1246
1247 {
1248 int serial = 0;
1249 int found_boundary = 1;
1250
1251 while (found_boundary && serial < 1000000) {
1252 found_boundary = 0;
1253
1254 /* Ensure that the boundary doesn't appear in the decoded
1255 content. */
1256 if (new_part->c_cefile.ce_file) {
1257 if ((found_boundary =
1258 boundary_in_content (&new_part->c_cefile.ce_fp,
1259 new_part->c_cefile.ce_file,
1260 boundary)) == -1) {
1261 return NULL;
1262 }
1263 }
1264
1265 /* Ensure that the boundary doesn't appear in the encoded
1266 content. */
1267 if (! found_boundary && new_part->c_file) {
1268 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1269 new_part->c_file,
1270 boundary)) == -1) {
1271 return NULL;
1272 }
1273 }
1274
1275 if (found_boundary) {
1276 /* Try a slightly different boundary. */
1277 char buffer2[16];
1278
1279 free (boundary);
1280 ++serial;
1281 snprintf (buffer2, sizeof buffer2, "%d", serial);
1282 boundary =
1283 concat (boundary_prefix,
1284 first_alt->c_partno ? first_alt->c_partno : "",
1285 "-", buffer2, NULL);
1286 }
1287 }
1288
1289 if (found_boundary) {
1290 advise (NULL, "giving up trying to find a unique boundary");
1291 return NULL;
1292 }
1293 }
1294
1295 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1296 boundary, "\"", NULL);
1297
1298 /* Load c_first_hf and c_last_hf. */
1299 transfer_noncontent_headers (first_alt, ct);
1300 add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1301 free (name);
1302
1303 /* Load c_partno. */
1304 if (first_alt->c_partno) {
1305 ct->c_partno = add (first_alt->c_partno, NULL);
1306 free (first_alt->c_partno);
1307 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1308 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1309 } else {
1310 first_alt->c_partno = add ("1", NULL);
1311 new_part->c_partno = add ("2", NULL);
1312 }
1313
1314 if (ctinit) {
1315 ct->c_ctinfo.ci_type = add (typename, NULL);
1316 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1317 }
1318
1319 name = concat (" ", typename, "/", subtypename, boundary_indicator,
1320 boundary, NULL);
1321 if ((cp = strstr (name, boundary_indicator))) {
1322 ct->c_ctinfo.ci_attrs[0] = name;
1323 ct->c_ctinfo.ci_attrs[1] = NULL;
1324 /* ci_values don't get free'd, so point into ci_attrs. */
1325 ct->c_ctinfo.ci_values[0] = cp + strlen (boundary_indicator);
1326 }
1327
1328 p = (struct part *) mh_xmalloc (sizeof *p);
1329 p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1330 p->mp_next->mp_next = NULL;
1331 p->mp_next->mp_part = first_alt;
1332
1333 if ((m = (struct multipart *) calloc (1, sizeof (struct multipart))) ==
1334 NULL)
1335 adios (NULL, "out of memory");
1336 m->mp_start = concat (boundary, "\n", NULL);
1337 m->mp_stop = concat (boundary, "--\n", NULL);
1338 m->mp_parts = p;
1339 ct->c_ctparams = (void *) m;
1340
1341 free (boundary);
1342
1343 return ct;
1344 }
1345
1346
1347 /* Check that the boundary does not appear in the content. */
1348 static int
1349 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1350 char buffer[BUFSIZ];
1351 size_t bytes_read;
1352 int found_boundary = 0;
1353
1354 /* free_content() will close *fp if we fopen it here. */
1355 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1356 advise (file, "unable to open %s for reading", file);
1357 return NOTOK;
1358 }
1359
1360 fseeko (*fp, 0L, SEEK_SET);
1361 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1362 if (find_str (buffer, bytes_read, boundary)) {
1363 found_boundary = 1;
1364 break;
1365 }
1366 }
1367
1368 return found_boundary;
1369 }
1370
1371
1372 /* Remove all non-Content headers. */
1373 static void
1374 transfer_noncontent_headers (CT old, CT new) {
1375 HF hp, hp_prev;
1376
1377 hp_prev = hp = old->c_first_hf;
1378 while (hp) {
1379 HF next = hp->next;
1380
1381 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1382 if (hp == old->c_last_hf) {
1383 if (hp == old->c_first_hf) {
1384 old->c_last_hf = old->c_first_hf = NULL;
1385 } else {
1386 hp_prev->next = NULL;
1387 old->c_last_hf = hp_prev;
1388 }
1389 } else {
1390 if (hp == old->c_first_hf) {
1391 old->c_first_hf = next;
1392 } else {
1393 hp_prev->next = next;
1394 }
1395 }
1396
1397 /* Put node hp in the new CT. */
1398 if (new->c_first_hf == NULL) {
1399 new->c_first_hf = hp;
1400 } else {
1401 new->c_last_hf->next = hp;
1402 }
1403 new->c_last_hf = hp;
1404 } else {
1405 /* A Content- header, leave in old. */
1406 hp_prev = hp;
1407 }
1408
1409 hp = next;
1410 }
1411 }
1412
1413
1414 static int
1415 set_ct_type (CT ct, int type, int subtype, int encoding) {
1416 char *typename = ct_type_str (type);
1417 char *subtypename = ct_subtype_str (type, subtype);
1418 /* E.g, " text/plain" */
1419 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1420 /* E.g, " text/plain\n" */
1421 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1422 int found_content_type = 0;
1423 HF hf;
1424 const char *cp = NULL;
1425 char *ctline;
1426 int status;
1427
1428 /* Update/add Content-Type header field. */
1429 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1430 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1431 found_content_type = 1;
1432 free (hf->value);
1433 hf->value = (cp = strchr (ct->c_ctline, ';'))
1434 ? concat (type_subtypename, cp, "\n", NULL)
1435 : add (name_plus_nl, NULL);
1436 }
1437 }
1438 if (! found_content_type) {
1439 add_header (ct, add (TYPE_FIELD, NULL),
1440 (cp = strchr (ct->c_ctline, ';'))
1441 ? concat (type_subtypename, cp, "\n", NULL)
1442 : add (name_plus_nl, NULL));
1443 }
1444
1445 /* Some of these might not be used, but set them anyway. */
1446 ctline = cp
1447 ? concat (type_subtypename, cp, NULL)
1448 : concat (type_subtypename, NULL);
1449 free (ct->c_ctline);
1450 ct->c_ctline = ctline;
1451 /* Leave other ctinfo members as they were. */
1452 free (ct->c_ctinfo.ci_type);
1453 ct->c_ctinfo.ci_type = add (typename, NULL);
1454 free (ct->c_ctinfo.ci_subtype);
1455 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1456 ct->c_type = type;
1457 ct->c_subtype = subtype;
1458
1459 free (name_plus_nl);
1460 free (type_subtypename);
1461
1462 status = set_ce (ct, encoding);
1463
1464 return status;
1465 }
1466
1467
1468 static int
1469 decode_text_parts (CT ct, int encoding, int *message_mods) {
1470 int status = OK;
1471
1472 switch (ct->c_type) {
1473 case CT_TEXT:
1474 switch (ct->c_encoding) {
1475 case CE_BASE64:
1476 case CE_QUOTED: {
1477 int ct_encoding;
1478
1479 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
1480 if ((ct_encoding = content_encoding (ct)) == CE_BINARY &&
1481 encoding != CE_BINARY) {
1482 /* The decoding isn't acceptable so discard it.
1483 Leave status as OK to allow other transformations. */
1484 if (verbosw) {
1485 report (ct->c_partno, ct->c_file,
1486 "will not decode%s because it is binary",
1487 ct->c_partno ? ""
1488 : ct->c_ctline ? ct->c_ctline
1489 : "");
1490 }
1491 unlink (ct->c_cefile.ce_file);
1492 free (ct->c_cefile.ce_file);
1493 ct->c_cefile.ce_file = NULL;
1494 } else if (ct->c_encoding == CE_QUOTED &&
1495 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1496 /* The decoding isn't acceptable so discard it.
1497 Leave status as OK to allow other transformations. */
1498 if (verbosw) {
1499 report (ct->c_partno, ct->c_file,
1500 "will not decode%s because it is 8bit",
1501 ct->c_partno ? ""
1502 : ct->c_ctline ? ct->c_ctline
1503 : "");
1504 }
1505 unlink (ct->c_cefile.ce_file);
1506 free (ct->c_cefile.ce_file);
1507 ct->c_cefile.ce_file = NULL;
1508 } else {
1509 int enc;
1510 if (ct_encoding == CE_BINARY)
1511 enc = CE_BINARY;
1512 else if (ct_encoding == CE_8BIT && encoding == CE_7BIT)
1513 enc = CE_QUOTED;
1514 else
1515 enc = charset_encoding (ct);
1516 if (set_ce (ct, enc) == OK) {
1517 ++*message_mods;
1518 if (verbosw) {
1519 report (ct->c_partno, ct->c_file, "decode%s",
1520 ct->c_ctline ? ct->c_ctline : "");
1521 }
1522 strip_crs (ct, message_mods);
1523 } else {
1524 status = NOTOK;
1525 }
1526 }
1527 } else {
1528 status = NOTOK;
1529 }
1530 break;
1531 }
1532 case CE_8BIT:
1533 case CE_7BIT:
1534 strip_crs (ct, message_mods);
1535 break;
1536 default:
1537 break;
1538 }
1539
1540 break;
1541
1542 case CT_MULTIPART: {
1543 struct multipart *m = (struct multipart *) ct->c_ctparams;
1544 struct part *part;
1545
1546 /* Should check to see if the body for this part is encoded?
1547 For now, it gets passed along as-is by InitMultiPart(). */
1548 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1549 status = decode_text_parts (part->mp_part, encoding, message_mods);
1550 }
1551 break;
1552 }
1553
1554 case CT_MESSAGE:
1555 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1556 struct exbody *e;
1557
1558 e = (struct exbody *) ct->c_ctparams;
1559 status = decode_text_parts (e->eb_content, encoding, message_mods);
1560 }
1561 break;
1562
1563 default:
1564 break;
1565 }
1566
1567 return status;
1568 }
1569
1570
1571 /* See if the decoded content is 7bit, 8bit, or binary. It's binary
1572 if it has any NUL characters, a CR not followed by a LF, or lines
1573 greater than 998 characters in length. */
1574 static int
1575 content_encoding (CT ct) {
1576 CE ce = &ct->c_cefile;
1577 int encoding = CE_7BIT;
1578
1579 if (ce->ce_file) {
1580 size_t line_len = 0;
1581 char buffer[BUFSIZ];
1582 size_t inbytes;
1583
1584 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
1585 advise (ce->ce_file, "unable to open for reading");
1586 return CE_UNKNOWN;
1587 }
1588
1589 fseeko (ce->ce_fp, 0L, SEEK_SET);
1590 while (encoding != CE_BINARY &&
1591 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
1592 char *cp;
1593 size_t i;
1594 int last_char_was_cr = 0;
1595
1596 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
1597 if (*cp == '\0' || ++line_len > 998 ||
1598 (*cp != '\n' && last_char_was_cr)) {
1599 encoding = CE_BINARY;
1600 break;
1601 } else if (*cp == '\n') {
1602 line_len = 0;
1603 } else if (! isascii ((unsigned char) *cp)) {
1604 encoding = CE_8BIT;
1605 }
1606
1607 last_char_was_cr = *cp == '\r' ? 1 : 0;
1608 }
1609 }
1610
1611 fclose (ce->ce_fp);
1612 ce->ce_fp = NULL;
1613 } /* else should never happen */
1614
1615 return encoding;
1616 }
1617
1618
1619 static int
1620 strip_crs (CT ct, int *message_mods) {
1621 /* norm_charmap() is case sensitive. */
1622 char *codeset = upcase (content_codeset (ct));
1623 int status = OK;
1624
1625 /* Only strip carriage returns if content is ASCII or another
1626 codeset that has the same readily recognizable CR followed by a
1627 LF. We can include UTF-8 here because if the high-order bit of
1628 a UTF-8 byte is 0, then it must be a single-byte ASCII
1629 character. */
1630 if (! strcmp (norm_charmap (codeset), "US-ASCII") ||
1631 ! strncmp (norm_charmap (codeset), "ISO-8859-", 9) ||
1632 ! strncmp (norm_charmap (codeset), "UTF-8", 5) ||
1633 ! strncmp (norm_charmap (codeset), "WINDOWS-12", 10)) {
1634 char **file = NULL;
1635 FILE **fp = NULL;
1636 size_t begin;
1637 size_t end;
1638 int has_crs = 0;
1639 int opened_input_file = 0;
1640
1641 if (ct->c_cefile.ce_file) {
1642 file = &ct->c_cefile.ce_file;
1643 fp = &ct->c_cefile.ce_fp;
1644 begin = end = 0;
1645 } else if (ct->c_file) {
1646 file = &ct->c_file;
1647 fp = &ct->c_fp;
1648 begin = (size_t) ct->c_begin;
1649 end = (size_t) ct->c_end;
1650 } /* else don't know where the content is */
1651
1652 if (file && *file && fp) {
1653 if (! *fp) {
1654 if ((*fp = fopen (*file, "r")) == NULL) {
1655 advise (*file, "unable to open for reading");
1656 status = NOTOK;
1657 } else {
1658 opened_input_file = 1;
1659 }
1660 }
1661 }
1662
1663 if (fp && *fp) {
1664 char buffer[BUFSIZ];
1665 size_t bytes_read;
1666 size_t bytes_to_read =
1667 end > 0 && end > begin ? end - begin : sizeof buffer;
1668
1669 fseeko (*fp, begin, SEEK_SET);
1670 while ((bytes_read = fread (buffer, 1,
1671 min (bytes_to_read, sizeof buffer),
1672 *fp)) > 0) {
1673 /* Look for CR followed by a LF. This is supposed to
1674 be text so there should be LF's. If not, don't
1675 modify the content. */
1676 char *cp;
1677 size_t i;
1678 int last_char_was_cr = 0;
1679
1680 if (end > 0) bytes_to_read -= bytes_read;
1681
1682 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
1683 if (*cp == '\n' && last_char_was_cr) {
1684 has_crs = 1;
1685 break;
1686 }
1687
1688 last_char_was_cr = *cp == '\r' ? 1 : 0;
1689 }
1690 }
1691
1692 if (has_crs) {
1693 int fd;
1694 char *stripped_content_file =
1695 add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL);
1696
1697 /* Strip each CR before a LF from the content. */
1698 fseeko (*fp, begin, SEEK_SET);
1699 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
1700 0) {
1701 char *cp;
1702 size_t i;
1703 int last_char_was_cr = 0;
1704
1705 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
1706 if (*cp == '\r') {
1707 last_char_was_cr = 1;
1708 } else if (last_char_was_cr) {
1709 if (*cp != '\n') write (fd, "\r", 1);
1710 write (fd, cp, 1);
1711 last_char_was_cr = 0;
1712 } else {
1713 write (fd, cp, 1);
1714 last_char_was_cr = 0;
1715 }
1716 }
1717 }
1718
1719 if (close (fd)) {
1720 admonish (NULL, "unable to write temporary file %s",
1721 stripped_content_file);
1722 unlink (stripped_content_file);
1723 status = NOTOK;
1724 } else {
1725 /* Replace the decoded file with the converted one. */
1726 if (ct->c_cefile.ce_file) {
1727 if (ct->c_cefile.ce_unlink) {
1728 unlink (ct->c_cefile.ce_file);
1729 }
1730 free (ct->c_cefile.ce_file);
1731 }
1732 ct->c_cefile.ce_file = stripped_content_file;
1733 ct->c_cefile.ce_unlink = 1;
1734
1735 ++*message_mods;
1736 if (verbosw) {
1737 report (ct->c_partno,
1738 begin == 0 && end == 0 ? "" : *file,
1739 "stripped CRs");
1740 }
1741 }
1742 }
1743
1744 if (opened_input_file) {
1745 fclose (*fp);
1746 *fp = NULL;
1747 }
1748 }
1749 }
1750
1751 free (codeset);
1752 return status;
1753 }
1754
1755
1756 char *
1757 content_codeset (CT ct) {
1758 const char *const charset = "charset";
1759 char *default_codeset = NULL;
1760 CI ctinfo = &ct->c_ctinfo;
1761 char **ap, **vp;
1762 char **src_codeset = NULL;
1763
1764 for (ap = ctinfo->ci_attrs, vp = ctinfo->ci_values; *ap; ++ap, ++vp) {
1765 if (! strcasecmp (*ap, charset)) {
1766 src_codeset = vp;
1767 break;
1768 }
1769 }
1770
1771 /* RFC 2045, Sec. 5.2: default to us-ascii. */
1772 if (src_codeset == NULL) src_codeset = &default_codeset;
1773 if (*src_codeset == NULL) *src_codeset = "US-ASCII";
1774
1775 return *src_codeset;
1776 }
1777
1778
1779 static int
1780 convert_codesets (CT ct, char *dest_codeset, int *message_mods) {
1781 int status = OK;
1782
1783 switch (ct->c_type) {
1784 case CT_TEXT:
1785 if (ct->c_subtype == TEXT_PLAIN) {
1786 status = convert_codeset (ct, dest_codeset, message_mods);
1787 }
1788 break;
1789
1790 case CT_MULTIPART: {
1791 struct multipart *m = (struct multipart *) ct->c_ctparams;
1792 struct part *part;
1793
1794 /* Should check to see if the body for this part is encoded?
1795 For now, it gets passed along as-is by InitMultiPart(). */
1796 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1797 status =
1798 convert_codesets (part->mp_part, dest_codeset, message_mods);
1799 }
1800 break;
1801 }
1802
1803 case CT_MESSAGE:
1804 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1805 struct exbody *e;
1806
1807 e = (struct exbody *) ct->c_ctparams;
1808 status =
1809 convert_codesets (e->eb_content, dest_codeset, message_mods);
1810 }
1811 break;
1812
1813 default:
1814 break;
1815 }
1816
1817 return status;
1818 }
1819
1820
1821 static int
1822 convert_codeset (CT ct, char *dest_codeset, int *message_mods) {
1823 char *src_codeset = content_codeset (ct);
1824 int status = OK;
1825
1826 /* norm_charmap() is case sensitive. */
1827 char *src_codeset_u = upcase (src_codeset);
1828 char *dest_codeset_u = upcase (dest_codeset);
1829 int different_codesets =
1830 strcmp (norm_charmap (src_codeset), norm_charmap (dest_codeset));
1831
1832 free (dest_codeset_u);
1833 free (src_codeset_u);
1834
1835 if (different_codesets) {
1836 #ifdef HAVE_ICONV
1837 iconv_t conv_desc = NULL;
1838 char *dest;
1839 int fd = -1;
1840 char **file = NULL;
1841 FILE **fp = NULL;
1842 size_t begin;
1843 size_t end;
1844 int opened_input_file = 0;
1845 char src_buffer[BUFSIZ];
1846 HF hf;
1847
1848 if ((conv_desc = iconv_open (dest_codeset, src_codeset)) ==
1849 (iconv_t) -1) {
1850 advise (NULL, "Can't convert %s to %s", src_codeset, dest_codeset);
1851 return -1;
1852 }
1853
1854 dest = add (m_mktemp2 (tmp, invo_name, &fd, NULL), NULL);
1855
1856 if (ct->c_cefile.ce_file) {
1857 file = &ct->c_cefile.ce_file;
1858 fp = &ct->c_cefile.ce_fp;
1859 begin = end = 0;
1860 } else if (ct->c_file) {
1861 file = &ct->c_file;
1862 fp = &ct->c_fp;
1863 begin = (size_t) ct->c_begin;
1864 end = (size_t) ct->c_end;
1865 } /* else no input file: shouldn't happen */
1866
1867 if (file && *file && fp) {
1868 if (! *fp) {
1869 if ((*fp = fopen (*file, "r")) == NULL) {
1870 advise (*file, "unable to open for reading");
1871 status = NOTOK;
1872 } else {
1873 opened_input_file = 1;
1874 }
1875 }
1876 }
1877
1878 if (fp && *fp) {
1879 size_t inbytes;
1880 size_t bytes_to_read =
1881 end > 0 && end > begin ? end - begin : sizeof src_buffer;
1882
1883 fseeko (*fp, begin, SEEK_SET);
1884 while ((inbytes = fread (src_buffer, 1,
1885 min (bytes_to_read, sizeof src_buffer),
1886 *fp)) > 0) {
1887 char dest_buffer[BUFSIZ];
1888 ICONV_CONST char *ib = src_buffer;
1889 char *ob = dest_buffer;
1890 size_t outbytes = sizeof dest_buffer;
1891 size_t outbytes_before = outbytes;
1892
1893 if (end > 0) bytes_to_read -= inbytes;
1894
1895 if (iconv (conv_desc, &ib, &inbytes, &ob, &outbytes) ==
1896 (size_t) -1) {
1897 status = NOTOK;
1898 break;
1899 } else {
1900 write (fd, dest_buffer, outbytes_before - outbytes);
1901 }
1902 }
1903
1904 if (opened_input_file) {
1905 fclose (*fp);
1906 *fp = NULL;
1907 }
1908 }
1909
1910 iconv_close (conv_desc);
1911 close (fd);
1912
1913 if (status == OK) {
1914 /* Replace the decoded file with the converted one. */
1915 if (ct->c_cefile.ce_file) {
1916 if (ct->c_cefile.ce_unlink) {
1917 unlink (ct->c_cefile.ce_file);
1918 }
1919 free (ct->c_cefile.ce_file);
1920 }
1921 ct->c_cefile.ce_file = dest;
1922 ct->c_cefile.ce_unlink = 1;
1923
1924 ++*message_mods;
1925 if (verbosw) {
1926 report (ct->c_partno, ct->c_file, "convert %s to %s",
1927 src_codeset, dest_codeset);
1928 }
1929
1930 /* Update ci_attrs. */
1931 src_codeset = dest_codeset;
1932
1933 /* Update ct->c_ctline. */
1934 if (ct->c_ctline) {
1935 char *ctline =
1936 update_attr (ct->c_ctline, "charset=", dest_codeset);
1937
1938 free (ct->c_ctline);
1939 ct->c_ctline = ctline;
1940 } /* else no CT line, which is odd */
1941
1942 /* Update Content-Type header field. */
1943 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1944 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1945 char *ctline_less_newline =
1946 update_attr (hf->value, "charset=", dest_codeset);
1947 char *ctline = concat (ctline_less_newline, "\n", NULL);
1948 free (ctline_less_newline);
1949
1950 free (hf->value);
1951 hf->value = ctline;
1952 break;
1953 }
1954 }
1955 } else {
1956 unlink (dest);
1957 }
1958 #else /* ! HAVE_ICONV */
1959 NMH_UNUSED (message_mods);
1960
1961 advise (NULL, "Can't convert %s to %s without iconv", src_codeset,
1962 dest_codeset);
1963 status = NOTOK;
1964 #endif /* ! HAVE_ICONV */
1965 }
1966
1967 return status;
1968 }
1969
1970
1971 static int
1972 write_content (CT ct, char *input_filename, char *outfile, int modify_inplace,
1973 int message_mods) {
1974 int status = OK;
1975
1976 if (modify_inplace) {
1977 if (message_mods > 0) {
1978 if ((status = output_message (ct, outfile)) == OK) {
1979 char *infile = input_filename
1980 ? add (input_filename, NULL)
1981 : add (ct->c_file ? ct->c_file : "-", NULL);
1982
1983 if (remove_file (infile) == OK) {
1984 if (rename (outfile, infile)) {
1985 /* Rename didn't work, possibly because of an
1986 attempt to rename across filesystems. Try
1987 brute force copy. */
1988 int old = open (outfile, O_RDONLY);
1989 int new =
1990 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
1991 int i = -1;
1992
1993 if (old != -1 && new != -1) {
1994 char buffer[BUFSIZ];
1995
1996 while ((i = read (old, buffer, sizeof buffer)) >
1997 0) {
1998 if (write (new, buffer, i) != i) {
1999 i = -1;
2000 break;
2001 }
2002 }
2003 }
2004 if (new != -1) close (new);
2005 if (old != -1) close (old);
2006 unlink (outfile);
2007
2008 if (i < 0) {
2009 /* The -file argument processing used path() to
2010 expand filename to absolute path. */
2011 int file = ct->c_file && ct->c_file[0] == '/';
2012
2013 admonish (NULL, "unable to rename %s %s to %s",
2014 file ? "file" : "message", outfile,
2015 infile);
2016 status = NOTOK;
2017 }
2018 }
2019 } else {
2020 admonish (NULL, "unable to remove input file %s, "
2021 "not modifying it", infile);
2022 unlink (outfile);
2023 status = NOTOK;
2024 }
2025
2026 free (infile);
2027 } else {
2028 status = NOTOK;
2029 }
2030 } else {
2031 /* No modifications and didn't need the tmp outfile. */
2032 unlink (outfile);
2033 }
2034 } else {
2035 /* Output is going to some file. Produce it whether or not
2036 there were modifications. */
2037 status = output_message (ct, outfile);
2038 }
2039
2040 flush_errors ();
2041 return status;
2042 }
2043
2044
2045 /*
2046 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2047 * use the standard MH backup file.
2048 */
2049 static int
2050 remove_file (char *file) {
2051 if (rmmproc) {
2052 char *rmm_command = concat (rmmproc, " ", file, NULL);
2053 int status = system (rmm_command);
2054
2055 free (rmm_command);
2056 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2057 } else {
2058 /* This is OK for a non-message file, it still uses the
2059 BACKUP_PREFIX form. The backup file will be in the same
2060 directory as file. */
2061 return rename (file, m_backup (file));
2062 }
2063 }
2064
2065
2066 static void
2067 report (char *partno, char *filename, char *message, ...) {
2068 va_list args;
2069 char *fmt;
2070
2071 if (verbosw) {
2072 va_start (args, message);
2073 fmt = concat (filename, partno ? " part " : ", ",
2074 partno ? partno : "", partno ? ", " : "", message, NULL);
2075
2076 advertise (NULL, NULL, fmt, args);
2077
2078 free (fmt);
2079 va_end (args);
2080 }
2081 }
2082
2083
2084 static char *
2085 upcase (char *str) {
2086 char *up = cpytrim (str);
2087 char *cp;
2088
2089 for (cp = up; *cp; ++cp) *cp = toupper ((unsigned char) *cp);
2090
2091 return up;
2092 }
2093
2094
2095 static void
2096 pipeser (int i)
2097 {
2098 if (i == SIGQUIT) {
2099 fflush (stdout);
2100 fprintf (stderr, "\n");
2101 fflush (stderr);
2102 }
2103
2104 done (1);
2105 /* NOTREACHED */
2106 }