]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Compare character with EOF using signed comparison because
[nmh] / uip / mhfixmsg.c
1 /*
2 * mhfixmsg.c -- rewrite a message with various tranformations
3 *
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include <fcntl.h>
15
16 #define MHFIXMSG_SWITCHES \
17 X("decodetext 8bit|7bit", 0, DECODETEXTSW) \
18 X("nodecodetext", 0, NDECODETEXTSW) \
19 X("textcharset", 0, TEXTCHARSETSW) \
20 X("notextcharset", 0, NTEXTCHARSETSW) \
21 X("reformat", 0, REFORMATSW) \
22 X("noreformat", 0, NREFORMATSW) \
23 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
24 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
25 X("fixboundary", 0, FIXBOUNDARYSW) \
26 X("nofixboundary", 0, NFIXBOUNDARYSW) \
27 X("fixcte", 0, FIXCTESW) \
28 X("nofixcte", 0, NFIXCTESW) \
29 X("file file", 0, FILESW) \
30 X("outfile file", 0, OUTFILESW) \
31 X("rmmproc program", 0, RPROCSW) \
32 X("normmproc", 0, NRPRCSW) \
33 X("verbose", 0, VERBSW) \
34 X("noverbose", 0, NVERBSW) \
35 X("version", 0, VERSIONSW) \
36 X("help", 0, HELPSW) \
37
38 #define X(sw, minchars, id) id,
39 DEFINE_SWITCH_ENUM(MHFIXMSG);
40 #undef X
41
42 #define X(sw, minchars, id) { sw, minchars, id },
43 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
44 #undef X
45
46
47 int verbosw;
48 int debugsw; /* Needed by mhparse.c. */
49
50 #define quitser pipeser
51
52 /* mhparse.c */
53 extern int skip_mp_cte_check; /* flag to InitMultiPart */
54 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
55 extern int bogus_mp_content; /* flag from InitMultiPart */
56 CT parse_mime (char *);
57 void reverse_parts (CT);
58
59 /* mhoutsbr.c */
60 int output_message (CT, char *);
61
62 /* mhshowsbr.c */
63 int show_content_aux (CT, int, int, char *, char *);
64
65 /* mhmisc.c */
66 void flush_errors (void);
67
68 /* mhfree.c */
69 extern CT *cts;
70 void freects_done (int) NORETURN;
71
72 /*
73 * static prototypes
74 */
75 typedef struct fix_transformations {
76 int fixboundary;
77 int fixcte;
78 int reformat;
79 int replacetextplain;
80 int decodetext;
81 char *textcharset;
82 } fix_transformations;
83
84 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
85 static void reverse_alternative_parts (CT);
86 static int fix_boundary (CT *, int *);
87 static int get_multipart_boundary (CT, char **);
88 static int replace_boundary (CT, char *, char *);
89 static int fix_multipart_cte (CT, int *);
90 static int set_ce (CT, int);
91 static int ensure_text_plain (CT *, CT, int *, int);
92 static CT build_text_plain_part (CT);
93 static CT divide_part (CT);
94 static void copy_ctinfo (CI, CI);
95 static int decode_part (CT);
96 static int reformat_part (CT, char *, char *, char *, int);
97 static int charset_encoding (CT);
98 static CT build_multipart_alt (CT, CT, int, int);
99 static int boundary_in_content (FILE **, char *, const char *);
100 static void transfer_noncontent_headers (CT, CT);
101 static int set_ct_type (CT, int type, int subtype, int encoding);
102 static int decode_text_parts (CT, int, int *);
103 static int content_encoding (CT, const char **);
104 static int strip_crs (CT, int *);
105 static int convert_charsets (CT, char *, int *);
106 static int write_content (CT, char *, char *, int, int);
107 static int remove_file (char *);
108 static void report (char *, char *, char *, ...);
109 static void pipeser (int);
110
111
112 int
113 main (int argc, char **argv) {
114 int msgnum;
115 char *cp, *file = NULL, *folder = NULL;
116 char *maildir, buf[100], *outfile = NULL;
117 char **argp, **arguments;
118 struct msgs_array msgs = { 0, 0, NULL };
119 struct msgs *mp = NULL;
120 CT *ctp;
121 FILE *fp;
122 int using_stdin = 0;
123 int status = OK;
124 fix_transformations fx;
125 fx.reformat = fx.fixcte = fx.fixboundary = 1;
126 fx.replacetextplain = 0;
127 fx.decodetext = CE_8BIT;
128 fx.textcharset = NULL;
129
130 if (nmh_init(argv[0], 1)) { return 1; }
131
132 done = freects_done;
133
134 arguments = getarguments (invo_name, argc, argv, 1);
135 argp = arguments;
136
137 /*
138 * Parse arguments
139 */
140 while ((cp = *argp++)) {
141 if (*cp == '-') {
142 switch (smatch (++cp, switches)) {
143 case AMBIGSW:
144 ambigsw (cp, switches);
145 done (1);
146 case UNKWNSW:
147 adios (NULL, "-%s unknown", cp);
148
149 case HELPSW:
150 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
151 invo_name);
152 print_help (buf, switches, 1);
153 done (0);
154 case VERSIONSW:
155 print_version(invo_name);
156 done (0);
157
158 case DECODETEXTSW:
159 if (! (cp = *argp++) || *cp == '-')
160 adios (NULL, "missing argument to %s", argp[-2]);
161 if (! strcasecmp (cp, "8bit")) {
162 fx.decodetext = CE_8BIT;
163 } else if (! strcasecmp (cp, "7bit")) {
164 fx.decodetext = CE_7BIT;
165 } else {
166 adios (NULL, "invalid argument to %s", argp[-2]);
167 }
168 continue;
169 case NDECODETEXTSW:
170 fx.decodetext = 0;
171 continue;
172 case TEXTCHARSETSW:
173 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
174 adios (NULL, "missing argument to %s", argp[-2]);
175 fx.textcharset = cp;
176 continue;
177 case NTEXTCHARSETSW:
178 fx.textcharset = 0;
179 continue;
180 case FIXBOUNDARYSW:
181 fx.fixboundary = 1;
182 continue;
183 case NFIXBOUNDARYSW:
184 fx.fixboundary = 0;
185 continue;
186 case FIXCTESW:
187 fx.fixcte = 1;
188 continue;
189 case NFIXCTESW:
190 fx.fixcte = 0;
191 continue;
192 case REFORMATSW:
193 fx.reformat = 1;
194 continue;
195 case NREFORMATSW:
196 fx.reformat = 0;
197 continue;
198 case REPLACETEXTPLAINSW:
199 fx.replacetextplain = 1;
200 continue;
201 case NREPLACETEXTPLAINSW:
202 fx.replacetextplain = 0;
203 continue;
204 case FILESW:
205 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
206 adios (NULL, "missing argument to %s", argp[-2]);
207 file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
208 continue;
209 case OUTFILESW:
210 if (! (cp = *argp++) || (*cp == '-' && cp[1]))
211 adios (NULL, "missing argument to %s", argp[-2]);
212 outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
213 continue;
214 case RPROCSW:
215 if (!(rmmproc = *argp++) || *rmmproc == '-')
216 adios (NULL, "missing argument to %s", argp[-2]);
217 continue;
218 case NRPRCSW:
219 rmmproc = NULL;
220 continue;
221 case VERBSW:
222 verbosw = 1;
223 continue;
224 case NVERBSW:
225 verbosw = 0;
226 continue;
227 }
228 }
229 if (*cp == '+' || *cp == '@') {
230 if (folder)
231 adios (NULL, "only one folder at a time!");
232 else
233 folder = pluspath (cp);
234 } else {
235 if (*cp == '/') {
236 /* Interpret a full path as a filename, not a message. */
237 file = add (cp, NULL);
238 } else {
239 app_msgarg (&msgs, cp);
240 }
241 }
242 }
243
244 SIGNAL (SIGQUIT, quitser);
245 SIGNAL (SIGPIPE, pipeser);
246
247 /*
248 * Read the standard profile setup
249 */
250 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
251 readconfig ((struct node **) 0, fp, cp, 0);
252 fclose (fp);
253 }
254
255 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
256
257 if (! context_find ("path"))
258 free (path ("./", TFOLDER));
259
260 if (file && msgs.size)
261 adios (NULL, "cannot specify msg and file at same time!");
262
263 /*
264 * check if message is coming from file
265 */
266 if (file) {
267 /* If file is stdin, create a tmp file name before parse_mime()
268 has a chance, because it might put in on a different
269 filesystem than the output file. Instead, put it in the
270 user's preferred tmp directory. */
271 CT ct;
272
273 if (! strcmp ("-", file)) {
274 int fd;
275 char *cp;
276
277 using_stdin = 1;
278
279 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
280 adios (NULL, "unable to create temporary file in %s",
281 get_temp_dir());
282 } else {
283 free (file);
284 file = add (cp, NULL);
285 cpydata (STDIN_FILENO, fd, "-", file);
286 }
287
288 if (close (fd)) {
289 (void) m_unlink (file);
290 adios (NULL, "failed to write temporary file");
291 }
292 }
293
294 if (! (cts = (CT *) calloc ((size_t) 2, sizeof *cts)))
295 adios (NULL, "out of memory");
296 ctp = cts;
297
298 if ((ct = parse_mime (file))) *ctp++ = ct;
299 } else {
300 /*
301 * message(s) are coming from a folder
302 */
303 CT ct;
304
305 if (! msgs.size)
306 app_msgarg(&msgs, "cur");
307 if (! folder)
308 folder = getfolder (1);
309 maildir = m_maildir (folder);
310
311 if (chdir (maildir) == NOTOK)
312 adios (maildir, "unable to change directory to");
313
314 /* read folder and create message structure */
315 if (! (mp = folder_read (folder, 1)))
316 adios (NULL, "unable to read folder %s", folder);
317
318 /* check for empty folder */
319 if (mp->nummsg == 0)
320 adios (NULL, "no messages in %s", folder);
321
322 /* parse all the message ranges/sequences and set SELECTED */
323 for (msgnum = 0; msgnum < msgs.size; msgnum++)
324 if (! m_convert (mp, msgs.msgs[msgnum]))
325 done (1);
326 seq_setprev (mp); /* set the previous-sequence */
327
328 if (! (cts = (CT *) calloc ((size_t) (mp->numsel + 1), sizeof *cts)))
329 adios (NULL, "out of memory");
330 ctp = cts;
331
332 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
333 if (is_selected(mp, msgnum)) {
334 char *msgnam;
335
336 msgnam = m_name (msgnum);
337 if ((ct = parse_mime (msgnam))) *ctp++ = ct;
338 }
339 }
340
341 seq_setcur (mp, mp->hghsel); /* update current message */
342 seq_save (mp); /* synchronize sequences */
343 context_replace (pfolder, folder);/* update current folder */
344 context_save (); /* save the context file */
345 }
346
347 if (*cts) {
348 for (ctp = cts; *ctp; ++ctp) {
349 status += mhfixmsgsbr (ctp, &fx, outfile);
350
351 if (using_stdin) {
352 (void) m_unlink (file);
353
354 if (! outfile) {
355 /* Just calling m_backup() unlinks the backup file. */
356 (void) m_backup (file);
357 }
358 }
359 }
360 } else {
361 status = 1;
362 }
363
364 free (outfile);
365 free (file);
366
367 /* done is freects_done, which will clean up all of cts. */
368 done (status);
369 return NOTOK;
370 }
371
372
373 int
374 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
375 /* Store input filename in case one of the transformations, i.e.,
376 fix_boundary(), rewrites to a tmp file. */
377 char *input_filename = add ((*ctp)->c_file, NULL);
378 int modify_inplace = 0;
379 int message_mods = 0;
380 int status = OK;
381
382 if (outfile == NULL) {
383 modify_inplace = 1;
384
385 if ((*ctp)->c_file) {
386 char *tempfile;
387 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
388 adios (NULL, "unable to create temporary file in %s",
389 get_temp_dir());
390 }
391 outfile = add (tempfile, NULL);
392 } else {
393 adios (NULL, "missing both input and output filenames\n");
394 }
395 }
396
397 reverse_alternative_parts (*ctp);
398 if (status == OK && fx->fixboundary) {
399 status = fix_boundary (ctp, &message_mods);
400 }
401 if (status == OK && fx->fixcte) {
402 status = fix_multipart_cte (*ctp, &message_mods);
403 }
404 if (status == OK && fx->reformat) {
405 status =
406 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
407 }
408 if (status == OK && fx->decodetext) {
409 status = decode_text_parts (*ctp, fx->decodetext, &message_mods);
410 }
411 if (status == OK && fx->textcharset != NULL) {
412 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
413 }
414
415 if (! (*ctp)->c_umask) {
416 /* Set the umask for the contents file. This currently
417 isn't used but just in case it is in the future. */
418 struct stat st;
419
420 if (stat ((*ctp)->c_file, &st) != NOTOK) {
421 (*ctp)->c_umask = ~(st.st_mode & 0777);
422 } else {
423 (*ctp)->c_umask = ~m_gmprot();
424 }
425 }
426
427 /*
428 * Write the content to a file
429 */
430 if (status == OK) {
431 status = write_content (*ctp, input_filename, outfile, modify_inplace,
432 message_mods);
433 } else if (! modify_inplace) {
434 /* Something went wrong. Output might be expected, such
435 as if this were run as a filter. Just copy the input
436 to the output. */
437 int in = open (input_filename, O_RDONLY);
438 int out = strcmp (outfile, "-")
439 ? open (outfile, O_WRONLY | O_CREAT, m_gmprot ())
440 : STDOUT_FILENO;
441
442 if (in != -1 && out != -1) {
443 cpydata (in, out, input_filename, outfile);
444 } else {
445 status = NOTOK;
446 }
447
448 close (out);
449 close (in);
450 }
451
452 if (modify_inplace) {
453 if (status != OK) (void) m_unlink (outfile);
454 free (outfile);
455 outfile = NULL;
456 }
457
458 free (input_filename);
459
460 return status;
461 }
462
463
464 /* parse_mime() arranges alternates in reverse (priority) order, so
465 reverse them back. This will put a text/plain part at the front of
466 a multipart/alternative part, for example, where it belongs. */
467 static void
468 reverse_alternative_parts (CT ct) {
469 if (ct->c_type == CT_MULTIPART) {
470 struct multipart *m = (struct multipart *) ct->c_ctparams;
471 struct part *part;
472
473 if (ct->c_subtype == MULTI_ALTERNATE) {
474 reverse_parts (ct);
475 }
476
477 /* And call recursively on each part of a multipart. */
478 for (part = m->mp_parts; part; part = part->mp_next) {
479 reverse_alternative_parts (part->mp_part);
480 }
481 }
482 }
483
484
485 static int
486 fix_boundary (CT *ct, int *message_mods) {
487 struct multipart *mp;
488 int status = OK;
489
490 if (bogus_mp_content) {
491 mp = (struct multipart *) (*ct)->c_ctparams;
492
493 /*
494 * 1) Get boundary at end of part.
495 * 2) Get boundary at beginning of part and compare to the end-of-part
496 * boundary.
497 * 3) Write out contents of ct to tmp file, replacing boundary in
498 * header with boundary from part. Set c_unlink to 1.
499 * 4) Free ct.
500 * 5) Call parse_mime() on the tmp file, replacing ct.
501 */
502
503 if (mp && mp->mp_start) {
504 char *part_boundary;
505
506 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
507 char *fixed;
508
509 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
510 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
511 char *filename = add ((*ct)->c_file, NULL);
512
513 free_content (*ct);
514 if ((*ct = parse_mime (fixed))) {
515 (*ct)->c_unlink = 1;
516
517 ++*message_mods;
518 if (verbosw) {
519 report (NULL, filename,
520 "fix multipart boundary");
521 }
522 }
523 free (filename);
524 } else {
525 advise (NULL, "unable to replace broken boundary");
526 status = NOTOK;
527 }
528 } else {
529 advise (NULL, "unable to create temporary file in %s",
530 get_temp_dir());
531 status = NOTOK;
532 }
533
534 free (part_boundary);
535 }
536 }
537 }
538
539 return status;
540 }
541
542
543 static int
544 get_multipart_boundary (CT ct, char **part_boundary) {
545 char buffer[BUFSIZ];
546 char *end_boundary = NULL;
547 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
548 ? (off_t) (ct->c_end - sizeof buffer)
549 : (off_t) ct->c_begin;
550 size_t bytes_read;
551 int status = OK;
552
553 /* This will fail if the boundary spans fread() calls. BUFSIZ should
554 be big enough, even if it's just 1024, to make that unlikely. */
555
556 /* free_content() will close ct->c_fp. */
557 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
558 advise (ct->c_file, "unable to open for reading");
559 return NOTOK;
560 }
561
562 /* Get boundary at end of multipart. */
563 while (begin >= (off_t) ct->c_begin) {
564 fseeko (ct->c_fp, begin, SEEK_SET);
565 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
566 char *end = buffer + bytes_read - 1;
567 char *cp;
568
569 if ((cp = rfind_str (buffer, bytes_read, "--"))) {
570 /* Trim off trailing "--" and anything beyond. */
571 *cp-- = '\0';
572 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
573 if (strlen (end) > 3 && *end++ == '\n' &&
574 *end++ == '-' && *end++ == '-') {
575 end_boundary = add (end, NULL);
576 break;
577 }
578 }
579 }
580 }
581
582 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
583 begin -= sizeof buffer;
584 } else {
585 break;
586 }
587 }
588
589 /* Get boundary at beginning of multipart. */
590 if (end_boundary) {
591 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
592 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
593 if (bytes_read >= strlen (end_boundary)) {
594 char *cp = find_str (buffer, bytes_read, end_boundary);
595
596 if (cp && cp - buffer >= 2 && *--cp == '-' &&
597 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
598 status = OK;
599 break;
600 }
601 } else {
602 /* The start and end boundaries didn't match, or the
603 start boundary doesn't begin with "\n--" (or "--"
604 if at the beginning of buffer). Keep trying. */
605 status = NOTOK;
606 }
607 }
608 } else {
609 status = NOTOK;
610 }
611
612 if (status == OK) {
613 *part_boundary = end_boundary;
614 } else {
615 *part_boundary = NULL;
616 free (end_boundary);
617 }
618
619 return status;
620 }
621
622
623 /* Open and copy ct->c_file to file, replacing the multipart boundary. */
624 static int
625 replace_boundary (CT ct, char *file, char *boundary) {
626 FILE *fpin, *fpout;
627 int compnum, state;
628 char buf[BUFSIZ], name[NAMESZ];
629 char *np, *vp;
630 m_getfld_state_t gstate = 0;
631 int status = OK;
632
633 if (ct->c_file == NULL) {
634 advise (NULL, "missing input filename");
635 return NOTOK;
636 }
637
638 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
639 advise (ct->c_file, "unable to open for reading");
640 return NOTOK;
641 }
642
643 if ((fpout = fopen (file, "w")) == NULL) {
644 fclose (fpin);
645 advise (file, "unable to open for writing");
646 return NOTOK;
647 }
648
649 for (compnum = 1;;) {
650 int bufsz = (int) sizeof buf;
651
652 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
653 case FLD:
654 case FLDPLUS:
655 compnum++;
656
657 /* get copies of the buffers */
658 np = add (name, NULL);
659 vp = add (buf, NULL);
660
661 /* if necessary, get rest of field */
662 while (state == FLDPLUS) {
663 bufsz = sizeof buf;
664 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
665 vp = add (buf, vp); /* add to previous value */
666 }
667
668 if (strcasecmp (TYPE_FIELD, np)) {
669 fprintf (fpout, "%s:%s", np, vp);
670 } else {
671 char *new_ctline, *new_params;
672
673 replace_param(&ct->c_ctinfo.ci_first_pm,
674 &ct->c_ctinfo.ci_last_pm, "boundary",
675 boundary, 0);
676
677 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
678 ct->c_ctinfo.ci_subtype, NULL);
679 new_params = output_params(strlen(TYPE_FIELD) +
680 strlen(new_ctline) + 1,
681 ct->c_ctinfo.ci_first_pm, NULL, 0);
682 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
683 new_params ? new_params : "");
684 free(new_ctline);
685 if (new_params)
686 free(new_params);
687 }
688
689 free (vp);
690 free (np);
691
692 continue;
693
694 case BODY:
695 fputs ("\n", fpout);
696 /* buf will have a terminating NULL, skip it. */
697 fwrite (buf, 1, bufsz-1, fpout);
698 continue;
699
700 case FILEEOF:
701 break;
702
703 case LENERR:
704 case FMTERR:
705 advise (NULL, "message format error in component #%d", compnum);
706 status = NOTOK;
707 break;
708
709 default:
710 advise (NULL, "getfld() returned %d", state);
711 status = NOTOK;
712 break;
713 }
714
715 break;
716 }
717
718 m_getfld_state_destroy (&gstate);
719 fclose (fpout);
720 fclose (fpin);
721
722 return status;
723 }
724
725
726 static int
727 fix_multipart_cte (CT ct, int *message_mods) {
728 int status = OK;
729
730 if (ct->c_type == CT_MULTIPART) {
731 struct multipart *m;
732 struct part *part;
733
734 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
735 ct->c_encoding != CE_BINARY) {
736 HF hf;
737
738 for (hf = ct->c_first_hf; hf; hf = hf->next) {
739 char *name = hf->name;
740 for (; *name && isspace ((unsigned char) *name); ++name) {
741 continue;
742 }
743
744 if (! strncasecmp (name, ENCODING_FIELD,
745 strlen (ENCODING_FIELD))) {
746 char *prefix = "Nmh-REPLACED-INVALID-";
747 HF h = mh_xmalloc (sizeof *h);
748
749 h->name = add (hf->name, NULL);
750 h->hf_encoding = hf->hf_encoding;
751 h->next = hf->next;
752 hf->next = h;
753
754 /* Retain old header but prefix its name. */
755 free (hf->name);
756 hf->name = concat (prefix, h->name, NULL);
757
758 ++*message_mods;
759 if (verbosw) {
760 char *encoding = cpytrim (hf->value);
761 report (ct->c_partno, ct->c_file,
762 "replace Content-Transfer-Encoding of %s "
763 "with 8 bit", encoding);
764 free (encoding);
765 }
766
767 h->value = add (" 8bit\n", NULL);
768
769 /* Don't need to warn for multiple C-T-E header
770 fields, parse_mime() already does that. But
771 if there are any, fix them all as necessary. */
772 hf = h;
773 }
774 }
775
776 set_ce (ct, CE_8BIT);
777 }
778
779 m = (struct multipart *) ct->c_ctparams;
780 for (part = m->mp_parts; part; part = part->mp_next) {
781 if (fix_multipart_cte (part->mp_part, message_mods) != OK) {
782 status = NOTOK;
783 break;
784 }
785 }
786 }
787
788 return status;
789 }
790
791
792 static int
793 set_ce (CT ct, int encoding) {
794 const char *ce = ce_str (encoding);
795 const struct str2init *ctinit = get_ce_method (ce);
796
797 if (ctinit) {
798 char *cte = concat (" ", ce, "\n", NULL);
799 int found_cte = 0;
800 HF hf;
801 /* Decoded contents might be in ct->c_cefile.ce_file, if the
802 caller is decode_text_parts (). Save because we'll
803 overwrite below. */
804 struct cefile decoded_content_info = ct->c_cefile;
805
806 ct->c_encoding = encoding;
807
808 ct->c_ctinitfnx = ctinit->si_init;
809 /* This will assign ct->c_cefile with an all-0 struct, which
810 is what we want. */
811 (*ctinit->si_init) (ct);
812 /* After returning, the caller should set
813 ct->c_cefile.ce_file to the name of the file containing
814 the contents. */
815
816 /* Restore the cefile. */
817 ct->c_cefile = decoded_content_info;
818
819 /* Update/add Content-Transfer-Encoding header field. */
820 for (hf = ct->c_first_hf; hf; hf = hf->next) {
821 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
822 found_cte = 1;
823 free (hf->value);
824 hf->value = cte;
825 }
826 }
827 if (! found_cte) {
828 add_header (ct, add (ENCODING_FIELD, NULL), cte);
829 }
830
831 /* Update c_celine. It's used only by mhlist -debug. */
832 free (ct->c_celine);
833 ct->c_celine = add (cte, NULL);
834
835 return OK;
836 } else {
837 return NOTOK;
838 }
839 }
840
841
842 /* Make sure each text part has a corresponding text/plain part. */
843 static int
844 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
845 int status = OK;
846
847 switch ((*ct)->c_type) {
848 case CT_TEXT: {
849 int has_text_plain = 0;
850
851 /* Nothing to do for text/plain. */
852 if ((*ct)->c_subtype == TEXT_PLAIN) return OK;
853
854 if (parent && parent->c_type == CT_MULTIPART &&
855 parent->c_subtype == MULTI_ALTERNATE) {
856 struct multipart *mp = (struct multipart *) parent->c_ctparams;
857 struct part *part, *prev;
858 int new_subpart_number = 1;
859
860 /* See if there is a sibling text/plain. */
861 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
862 ++new_subpart_number;
863 if (part->mp_part->c_type == CT_TEXT &&
864 part->mp_part->c_subtype == TEXT_PLAIN) {
865 if (replacetextplain) {
866 struct part *old_part;
867 if (part == mp->mp_parts) {
868 old_part = mp->mp_parts;
869 mp->mp_parts = part->mp_next;
870 } else {
871 old_part = prev->mp_next;
872 prev->mp_next = part->mp_next;
873 }
874 if (verbosw) {
875 report (parent->c_partno, parent->c_file,
876 "remove text/plain part %s",
877 old_part->mp_part->c_partno);
878 }
879 free_content (old_part->mp_part);
880 free (old_part);
881 } else {
882 has_text_plain = 1;
883 }
884 break;
885 }
886 prev = part;
887 }
888
889 if (! has_text_plain) {
890 /* Parent is a multipart/alternative. Insert a new
891 text/plain subpart. */
892 struct part *new_part = mh_xmalloc (sizeof *new_part);
893
894 if ((new_part->mp_part = build_text_plain_part (*ct))) {
895 char buffer[16];
896 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
897
898 new_part->mp_next = mp->mp_parts;
899 mp->mp_parts = new_part;
900 new_part->mp_part->c_partno =
901 concat (parent->c_partno ? parent->c_partno : "1", ".",
902 buffer, NULL);
903
904 ++*message_mods;
905 if (verbosw) {
906 report (parent->c_partno, parent->c_file,
907 "insert text/plain part");
908 }
909 } else {
910 free_content (new_part->mp_part);
911 free (new_part);
912 status = NOTOK;
913 }
914 }
915 } else {
916 /* Slip new text/plain part into a new multipart/alternative. */
917 CT tp_part = build_text_plain_part (*ct);
918
919 if (tp_part) {
920 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
921 MULTI_ALTERNATE);
922 if (mp_alt) {
923 struct multipart *mp =
924 (struct multipart *) mp_alt->c_ctparams;
925
926 if (mp && mp->mp_parts) {
927 mp->mp_parts->mp_part = tp_part;
928 /* Make the new multipart/alternative the parent. */
929 *ct = mp_alt;
930
931 ++*message_mods;
932 if (verbosw) {
933 report ((*ct)->c_partno, (*ct)->c_file,
934 "insert text/plain part");
935 }
936 } else {
937 free_content (tp_part);
938 free_content (mp_alt);
939 status = NOTOK;
940 }
941 } else {
942 status = NOTOK;
943 }
944 } else {
945 status = NOTOK;
946 }
947 }
948 break;
949 }
950
951 case CT_MULTIPART: {
952 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
953 struct part *part;
954
955 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
956 if ((*ct)->c_type == CT_MULTIPART) {
957 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
958 replacetextplain);
959 }
960 }
961 break;
962 }
963
964 case CT_MESSAGE:
965 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
966 struct exbody *e;
967
968 e = (struct exbody *) (*ct)->c_ctparams;
969 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
970 replacetextplain);
971 }
972 break;
973 }
974
975 return status;
976 }
977
978
979 static CT
980 build_text_plain_part (CT encoded_part) {
981 CT tp_part = divide_part (encoded_part);
982 char *tmp_plain_file = NULL;
983
984 if (decode_part (tp_part) == OK) {
985 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
986 contains the decoded contents. And the decoding function, such
987 as openQuoted, will have set ...->ce_unlink to 1 so that it will
988 be unlinked by free_content (). */
989 char *tempfile;
990
991 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
992 advise (NULL, "unable to create temporary file in %s",
993 get_temp_dir());
994 }
995 tmp_plain_file = add (tempfile, NULL);
996 if (reformat_part (tp_part, tmp_plain_file,
997 tp_part->c_ctinfo.ci_type,
998 tp_part->c_ctinfo.ci_subtype,
999 tp_part->c_type) == OK) {
1000 return tp_part;
1001 }
1002 }
1003
1004 free_content (tp_part);
1005 (void) m_unlink (tmp_plain_file);
1006 free (tmp_plain_file);
1007
1008 return NULL;
1009 }
1010
1011
1012 static CT
1013 divide_part (CT ct) {
1014 CT new_part;
1015
1016 if ((new_part = (CT) calloc (1, sizeof *new_part)) == NULL)
1017 adios (NULL, "out of memory");
1018
1019 /* Just copy over what is needed for decoding. c_vrsn and
1020 c_celine aren't necessary. */
1021 new_part->c_file = add (ct->c_file, NULL);
1022 new_part->c_begin = ct->c_begin;
1023 new_part->c_end = ct->c_end;
1024 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1025 new_part->c_type = ct->c_type;
1026 new_part->c_cefile = ct->c_cefile;
1027 new_part->c_encoding = ct->c_encoding;
1028 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1029 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1030 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1031 new_part->c_cesizefnx = ct->c_cesizefnx;
1032
1033 /* c_ctline is used by reformat__part(), so it can preserve
1034 anything after the type/subtype. */
1035 new_part->c_ctline = add (ct->c_ctline, NULL);
1036
1037 return new_part;
1038 }
1039
1040
1041 static void
1042 copy_ctinfo (CI dest, CI src) {
1043 PM s_pm, d_pm;
1044
1045 dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1046 dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1047
1048 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1049 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1050 s_pm->pm_value, 0);
1051 if (s_pm->pm_charset)
1052 d_pm->pm_charset = getcpy(s_pm->pm_charset);
1053 if (s_pm->pm_lang)
1054 d_pm->pm_lang = getcpy(s_pm->pm_lang);
1055 }
1056
1057 dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1058 dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1059 }
1060
1061
1062 static int
1063 decode_part (CT ct) {
1064 char *tmp_decoded;
1065 int status;
1066 char *tempfile;
1067
1068 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1069 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1070 }
1071 tmp_decoded = add (tempfile, NULL);
1072 /* The following call will load ct->c_cefile.ce_file with the tmp
1073 filename of the decoded content. tmp_decoded will contain the
1074 encoded output, get rid of that. */
1075 status = output_message (ct, tmp_decoded);
1076 (void) m_unlink (tmp_decoded);
1077 free (tmp_decoded);
1078
1079 return status;
1080 }
1081
1082
1083 /* Some of the arguments aren't really needed now, but maybe will
1084 be in the future for other than text types. */
1085 static int
1086 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1087 int output_subtype, output_encoding;
1088 char *cp, *cf;
1089 int status;
1090
1091 /* Hacky: this redirects the output from whatever command is used
1092 to show the part to a file. So, the user can't have any output
1093 redirection in that command.
1094 Could show_multi() in mhshowsbr.c avoid this? */
1095
1096 /* Check for invo_name-format-type/subtype. */
1097 cp = concat (invo_name, "-format-", type, "/", subtype, NULL);
1098 if ((cf = context_find (cp)) && *cf != '\0') {
1099 if (strchr (cf, '>')) {
1100 free (cp);
1101 advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1102 "%s-format-%s/%s profile entry", cf, invo_name, type,
1103 subtype);
1104 return NOTOK;
1105 }
1106 } else {
1107 free (cp);
1108
1109 /* Check for invo_name-format-type. */
1110 cp = concat (invo_name, "-format-", type, NULL);
1111 if (! (cf = context_find (cp)) || *cf == '\0') {
1112 free (cp);
1113 if (verbosw) {
1114 advise (NULL, "Don't know how to convert %s, there is no "
1115 "%s-format-%s/%s profile entry",
1116 ct->c_file, invo_name, type, subtype);
1117 }
1118 return NOTOK;
1119 }
1120
1121 if (strchr (cf, '>')) {
1122 free (cp);
1123 advise (NULL, "'>' prohibited in \"%s\"", cf);
1124 return NOTOK;
1125 }
1126 }
1127 free (cp);
1128
1129 cp = concat (cf, " >", file, NULL);
1130 status = show_content_aux (ct, 1, 0, cp, NULL);
1131 free (cp);
1132
1133 /* Unlink decoded content tmp file and free its filename to avoid
1134 leaks. The file stream should already have been closed. */
1135 if (ct->c_cefile.ce_unlink) {
1136 (void) m_unlink (ct->c_cefile.ce_file);
1137 free (ct->c_cefile.ce_file);
1138 ct->c_cefile.ce_file = NULL;
1139 ct->c_cefile.ce_unlink = 0;
1140 }
1141
1142 if (c_type == CT_TEXT) {
1143 output_subtype = TEXT_PLAIN;
1144 } else {
1145 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1146 output_subtype = 0;
1147 }
1148 output_encoding = charset_encoding (ct);
1149
1150 if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1151 ct->c_cefile.ce_file = file;
1152 ct->c_cefile.ce_unlink = 1;
1153 } else {
1154 ct->c_cefile.ce_unlink = 0;
1155 status = NOTOK;
1156 }
1157
1158 return status;
1159 }
1160
1161
1162 /* Identifies 7bit or 8bit content based on charset. */
1163 static int
1164 charset_encoding (CT ct) {
1165 /* norm_charmap() is case sensitive. */
1166 char *charset = upcase (content_charset (ct));
1167 int encoding =
1168 strcmp (norm_charmap (charset), "US-ASCII") ? CE_8BIT : CE_7BIT;
1169
1170 free (charset);
1171 return encoding;
1172 }
1173
1174
1175 static CT
1176 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1177 char *boundary_prefix = "----=_nmh-multipart";
1178 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1179 char *boundary_indicator = "; boundary=";
1180 char *typename, *subtypename, *name;
1181 CT ct;
1182 struct part *p;
1183 struct multipart *m;
1184 const struct str2init *ctinit;
1185
1186 if ((ct = (CT) calloc (1, sizeof *ct)) == NULL)
1187 adios (NULL, "out of memory");
1188
1189 /* Set up the multipart/alternative part. These fields of *ct were
1190 initialized to 0 by calloc():
1191 c_fp, c_unlink, c_begin, c_end,
1192 c_vrsn, c_ctline, c_celine,
1193 c_id, c_descr, c_dispo, c_partno,
1194 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1195 c_cefile, c_encoding,
1196 c_digested, c_digest[16], c_ctexbody,
1197 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1198 c_umask, c_pid, c_rfc934,
1199 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1200 */
1201
1202 ct->c_file = add (first_alt->c_file, NULL);
1203 ct->c_type = type;
1204 ct->c_subtype = subtype;
1205
1206 ctinit = get_ct_init (ct->c_type);
1207
1208 typename = ct_type_str (type);
1209 subtypename = ct_subtype_str (type, subtype);
1210
1211 {
1212 int serial = 0;
1213 int found_boundary = 1;
1214
1215 while (found_boundary && serial < 1000000) {
1216 found_boundary = 0;
1217
1218 /* Ensure that the boundary doesn't appear in the decoded
1219 content. */
1220 if (new_part->c_cefile.ce_file) {
1221 if ((found_boundary =
1222 boundary_in_content (&new_part->c_cefile.ce_fp,
1223 new_part->c_cefile.ce_file,
1224 boundary)) == -1) {
1225 return NULL;
1226 }
1227 }
1228
1229 /* Ensure that the boundary doesn't appear in the encoded
1230 content. */
1231 if (! found_boundary && new_part->c_file) {
1232 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1233 new_part->c_file,
1234 boundary)) == -1) {
1235 return NULL;
1236 }
1237 }
1238
1239 if (found_boundary) {
1240 /* Try a slightly different boundary. */
1241 char buffer2[16];
1242
1243 free (boundary);
1244 ++serial;
1245 snprintf (buffer2, sizeof buffer2, "%d", serial);
1246 boundary =
1247 concat (boundary_prefix,
1248 first_alt->c_partno ? first_alt->c_partno : "",
1249 "-", buffer2, NULL);
1250 }
1251 }
1252
1253 if (found_boundary) {
1254 advise (NULL, "giving up trying to find a unique boundary");
1255 return NULL;
1256 }
1257 }
1258
1259 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1260 boundary, "\"", NULL);
1261
1262 /* Load c_first_hf and c_last_hf. */
1263 transfer_noncontent_headers (first_alt, ct);
1264 add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1265 free (name);
1266
1267 /* Load c_partno. */
1268 if (first_alt->c_partno) {
1269 ct->c_partno = add (first_alt->c_partno, NULL);
1270 free (first_alt->c_partno);
1271 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1272 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1273 } else {
1274 first_alt->c_partno = add ("1", NULL);
1275 new_part->c_partno = add ("2", NULL);
1276 }
1277
1278 if (ctinit) {
1279 ct->c_ctinfo.ci_type = add (typename, NULL);
1280 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1281 }
1282
1283 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1284 "boundary", boundary, 0);
1285
1286 p = (struct part *) mh_xmalloc (sizeof *p);
1287 p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1288 p->mp_next->mp_next = NULL;
1289 p->mp_next->mp_part = first_alt;
1290
1291 if ((m = (struct multipart *) calloc (1, sizeof (struct multipart))) ==
1292 NULL)
1293 adios (NULL, "out of memory");
1294 m->mp_start = concat (boundary, "\n", NULL);
1295 m->mp_stop = concat (boundary, "--\n", NULL);
1296 m->mp_parts = p;
1297 ct->c_ctparams = (void *) m;
1298
1299 free (boundary);
1300
1301 return ct;
1302 }
1303
1304
1305 /* Check that the boundary does not appear in the content. */
1306 static int
1307 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1308 char buffer[BUFSIZ];
1309 size_t bytes_read;
1310 int found_boundary = 0;
1311
1312 /* free_content() will close *fp if we fopen it here. */
1313 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1314 advise (file, "unable to open %s for reading", file);
1315 return NOTOK;
1316 }
1317
1318 fseeko (*fp, 0L, SEEK_SET);
1319 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1320 if (find_str (buffer, bytes_read, boundary)) {
1321 found_boundary = 1;
1322 break;
1323 }
1324 }
1325
1326 return found_boundary;
1327 }
1328
1329
1330 /* Remove all non-Content headers. */
1331 static void
1332 transfer_noncontent_headers (CT old, CT new) {
1333 HF hp, hp_prev;
1334
1335 hp_prev = hp = old->c_first_hf;
1336 while (hp) {
1337 HF next = hp->next;
1338
1339 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1340 if (hp == old->c_last_hf) {
1341 if (hp == old->c_first_hf) {
1342 old->c_last_hf = old->c_first_hf = NULL;
1343 } else {
1344 hp_prev->next = NULL;
1345 old->c_last_hf = hp_prev;
1346 }
1347 } else {
1348 if (hp == old->c_first_hf) {
1349 old->c_first_hf = next;
1350 } else {
1351 hp_prev->next = next;
1352 }
1353 }
1354
1355 /* Put node hp in the new CT. */
1356 if (new->c_first_hf == NULL) {
1357 new->c_first_hf = hp;
1358 } else {
1359 new->c_last_hf->next = hp;
1360 }
1361 new->c_last_hf = hp;
1362 } else {
1363 /* A Content- header, leave in old. */
1364 hp_prev = hp;
1365 }
1366
1367 hp = next;
1368 }
1369 }
1370
1371
1372 static int
1373 set_ct_type (CT ct, int type, int subtype, int encoding) {
1374 char *typename = ct_type_str (type);
1375 char *subtypename = ct_subtype_str (type, subtype);
1376 /* E.g, " text/plain" */
1377 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1378 /* E.g, " text/plain\n" */
1379 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1380 int found_content_type = 0;
1381 HF hf;
1382 const char *cp = NULL;
1383 char *ctline;
1384 int status;
1385
1386 /* Update/add Content-Type header field. */
1387 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1388 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1389 found_content_type = 1;
1390 free (hf->value);
1391 hf->value = (cp = strchr (ct->c_ctline, ';'))
1392 ? concat (type_subtypename, cp, "\n", NULL)
1393 : add (name_plus_nl, NULL);
1394 }
1395 }
1396 if (! found_content_type) {
1397 add_header (ct, add (TYPE_FIELD, NULL),
1398 (cp = strchr (ct->c_ctline, ';'))
1399 ? concat (type_subtypename, cp, "\n", NULL)
1400 : add (name_plus_nl, NULL));
1401 }
1402
1403 /* Some of these might not be used, but set them anyway. */
1404 ctline = cp
1405 ? concat (type_subtypename, cp, NULL)
1406 : concat (type_subtypename, NULL);
1407 free (ct->c_ctline);
1408 ct->c_ctline = ctline;
1409 /* Leave other ctinfo members as they were. */
1410 free (ct->c_ctinfo.ci_type);
1411 ct->c_ctinfo.ci_type = add (typename, NULL);
1412 free (ct->c_ctinfo.ci_subtype);
1413 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1414 ct->c_type = type;
1415 ct->c_subtype = subtype;
1416
1417 free (name_plus_nl);
1418 free (type_subtypename);
1419
1420 status = set_ce (ct, encoding);
1421
1422 return status;
1423 }
1424
1425
1426 static int
1427 decode_text_parts (CT ct, int encoding, int *message_mods) {
1428 int status = OK;
1429
1430 switch (ct->c_type) {
1431 case CT_TEXT:
1432 switch (ct->c_encoding) {
1433 case CE_BASE64:
1434 case CE_QUOTED: {
1435 int ct_encoding;
1436
1437 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
1438 const char *reason = NULL;
1439
1440 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
1441 && encoding != CE_BINARY) {
1442 /* The decoding isn't acceptable so discard it.
1443 Leave status as OK to allow other transformations. */
1444 if (verbosw) {
1445 report (ct->c_partno, ct->c_file,
1446 "will not decode%s because it is binary (%s)",
1447 ct->c_partno ? ""
1448 : ct->c_ctline ? ct->c_ctline
1449 : "",
1450 reason);
1451 }
1452 (void) m_unlink (ct->c_cefile.ce_file);
1453 free (ct->c_cefile.ce_file);
1454 ct->c_cefile.ce_file = NULL;
1455 } else if (ct->c_encoding == CE_QUOTED &&
1456 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1457 /* The decoding isn't acceptable so discard it.
1458 Leave status as OK to allow other transformations. */
1459 if (verbosw) {
1460 report (ct->c_partno, ct->c_file,
1461 "will not decode%s because it is 8bit",
1462 ct->c_partno ? ""
1463 : ct->c_ctline ? ct->c_ctline
1464 : "");
1465 }
1466 (void) m_unlink (ct->c_cefile.ce_file);
1467 free (ct->c_cefile.ce_file);
1468 ct->c_cefile.ce_file = NULL;
1469 } else {
1470 int enc;
1471 if (ct_encoding == CE_BINARY)
1472 enc = CE_BINARY;
1473 else if (ct_encoding == CE_8BIT && encoding == CE_7BIT)
1474 enc = CE_QUOTED;
1475 else
1476 enc = charset_encoding (ct);
1477 if (set_ce (ct, enc) == OK) {
1478 ++*message_mods;
1479 if (verbosw) {
1480 report (ct->c_partno, ct->c_file, "decode%s",
1481 ct->c_ctline ? ct->c_ctline : "");
1482 }
1483 strip_crs (ct, message_mods);
1484 } else {
1485 status = NOTOK;
1486 }
1487 }
1488 } else {
1489 status = NOTOK;
1490 }
1491 break;
1492 }
1493 case CE_8BIT:
1494 case CE_7BIT:
1495 strip_crs (ct, message_mods);
1496 break;
1497 default:
1498 break;
1499 }
1500
1501 break;
1502
1503 case CT_MULTIPART: {
1504 struct multipart *m = (struct multipart *) ct->c_ctparams;
1505 struct part *part;
1506
1507 /* Should check to see if the body for this part is encoded?
1508 For now, it gets passed along as-is by InitMultiPart(). */
1509 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1510 status = decode_text_parts (part->mp_part, encoding, message_mods);
1511 }
1512 break;
1513 }
1514
1515 case CT_MESSAGE:
1516 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1517 struct exbody *e;
1518
1519 e = (struct exbody *) ct->c_ctparams;
1520 status = decode_text_parts (e->eb_content, encoding, message_mods);
1521 }
1522 break;
1523
1524 default:
1525 break;
1526 }
1527
1528 return status;
1529 }
1530
1531
1532 /* See if the decoded content is 7bit, 8bit, or binary. It's binary
1533 if it has any NUL characters, a CR not followed by a LF, or lines
1534 greater than 998 characters in length. If binary, reason is set
1535 to a string explaining why. */
1536 static int
1537 content_encoding (CT ct, const char **reason) {
1538 CE ce = &ct->c_cefile;
1539 int encoding = CE_7BIT;
1540
1541 if (ce->ce_file) {
1542 size_t line_len = 0;
1543 char buffer[BUFSIZ];
1544 size_t inbytes;
1545
1546 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
1547 advise (ce->ce_file, "unable to open for reading");
1548 return CE_UNKNOWN;
1549 }
1550
1551 fseeko (ce->ce_fp, 0L, SEEK_SET);
1552 while (encoding != CE_BINARY &&
1553 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
1554 char *cp;
1555 size_t i;
1556 int last_char_was_cr = 0;
1557
1558 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
1559 if (*cp == '\0' || ++line_len > 998 ||
1560 (*cp != '\n' && last_char_was_cr)) {
1561 encoding = CE_BINARY;
1562 if (*cp == '\0') {
1563 *reason = "null character";
1564 } else if (line_len > 998) {
1565 *reason = "line length > 998";
1566 } else if (*cp != '\n' && last_char_was_cr) {
1567 *reason = "CR not followed by LF";
1568 } else {
1569 /* Should not reach this. */
1570 *reason = "";
1571 }
1572 break;
1573 } else if (*cp == '\n') {
1574 line_len = 0;
1575 } else if (! isascii ((unsigned char) *cp)) {
1576 encoding = CE_8BIT;
1577 }
1578
1579 last_char_was_cr = *cp == '\r' ? 1 : 0;
1580 }
1581 }
1582
1583 fclose (ce->ce_fp);
1584 ce->ce_fp = NULL;
1585 } /* else should never happen */
1586
1587 return encoding;
1588 }
1589
1590
1591 static int
1592 strip_crs (CT ct, int *message_mods) {
1593 /* norm_charmap() is case sensitive. */
1594 char *charset = upcase (content_charset (ct));
1595 int status = OK;
1596
1597 /* Only strip carriage returns if content is ASCII or another
1598 charset that has the same readily recognizable CR followed by a
1599 LF. We can include UTF-8 here because if the high-order bit of
1600 a UTF-8 byte is 0, then it must be a single-byte ASCII
1601 character. */
1602 if (! strcmp (norm_charmap (charset), "US-ASCII") ||
1603 ! strncmp (norm_charmap (charset), "ISO-8859-", 9) ||
1604 ! strncmp (norm_charmap (charset), "UTF-8", 5) ||
1605 ! strncmp (norm_charmap (charset), "WINDOWS-12", 10)) {
1606 char **file = NULL;
1607 FILE **fp = NULL;
1608 size_t begin;
1609 size_t end;
1610 int has_crs = 0;
1611 int opened_input_file = 0;
1612
1613 if (ct->c_cefile.ce_file) {
1614 file = &ct->c_cefile.ce_file;
1615 fp = &ct->c_cefile.ce_fp;
1616 begin = end = 0;
1617 } else if (ct->c_file) {
1618 file = &ct->c_file;
1619 fp = &ct->c_fp;
1620 begin = (size_t) ct->c_begin;
1621 end = (size_t) ct->c_end;
1622 } /* else don't know where the content is */
1623
1624 if (file && *file && fp) {
1625 if (! *fp) {
1626 if ((*fp = fopen (*file, "r")) == NULL) {
1627 advise (*file, "unable to open for reading");
1628 status = NOTOK;
1629 } else {
1630 opened_input_file = 1;
1631 }
1632 }
1633 }
1634
1635 if (fp && *fp) {
1636 char buffer[BUFSIZ];
1637 size_t bytes_read;
1638 size_t bytes_to_read =
1639 end > 0 && end > begin ? end - begin : sizeof buffer;
1640
1641 fseeko (*fp, begin, SEEK_SET);
1642 while ((bytes_read = fread (buffer, 1,
1643 min (bytes_to_read, sizeof buffer),
1644 *fp)) > 0) {
1645 /* Look for CR followed by a LF. This is supposed to
1646 be text so there should be LF's. If not, don't
1647 modify the content. */
1648 char *cp;
1649 size_t i;
1650 int last_char_was_cr = 0;
1651
1652 if (end > 0) bytes_to_read -= bytes_read;
1653
1654 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
1655 if (*cp == '\n' && last_char_was_cr) {
1656 has_crs = 1;
1657 break;
1658 }
1659
1660 last_char_was_cr = *cp == '\r' ? 1 : 0;
1661 }
1662 }
1663
1664 if (has_crs) {
1665 int fd;
1666 char *stripped_content_file;
1667 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
1668
1669 if (tempfile == NULL) {
1670 adios (NULL, "unable to create temporary file in %s",
1671 get_temp_dir());
1672 }
1673 stripped_content_file = add (tempfile, NULL);
1674
1675 /* Strip each CR before a LF from the content. */
1676 fseeko (*fp, begin, SEEK_SET);
1677 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
1678 0) {
1679 char *cp;
1680 size_t i;
1681 int last_char_was_cr = 0;
1682
1683 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
1684 if (*cp == '\r') {
1685 last_char_was_cr = 1;
1686 } else if (last_char_was_cr) {
1687 if (*cp != '\n') write (fd, "\r", 1);
1688 write (fd, cp, 1);
1689 last_char_was_cr = 0;
1690 } else {
1691 write (fd, cp, 1);
1692 last_char_was_cr = 0;
1693 }
1694 }
1695 }
1696
1697 if (close (fd)) {
1698 admonish (NULL, "unable to write temporary file %s",
1699 stripped_content_file);
1700 (void) m_unlink (stripped_content_file);
1701 status = NOTOK;
1702 } else {
1703 /* Replace the decoded file with the converted one. */
1704 if (ct->c_cefile.ce_file) {
1705 if (ct->c_cefile.ce_unlink) {
1706 (void) m_unlink (ct->c_cefile.ce_file);
1707 }
1708 free (ct->c_cefile.ce_file);
1709 }
1710 ct->c_cefile.ce_file = stripped_content_file;
1711 ct->c_cefile.ce_unlink = 1;
1712
1713 ++*message_mods;
1714 if (verbosw) {
1715 report (ct->c_partno,
1716 begin == 0 && end == 0 ? "" : *file,
1717 "stripped CRs");
1718 }
1719 }
1720 }
1721
1722 if (opened_input_file) {
1723 fclose (*fp);
1724 *fp = NULL;
1725 }
1726 }
1727 }
1728
1729 free (charset);
1730 return status;
1731 }
1732
1733
1734 static int
1735 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
1736 int status = OK;
1737
1738 switch (ct->c_type) {
1739 case CT_TEXT:
1740 if (ct->c_subtype == TEXT_PLAIN) {
1741 status = convert_charset (ct, dest_charset, message_mods);
1742 if (verbosw && status == OK) {
1743 report (ct->c_partno, ct->c_file, "convert %s to %s",
1744 content_charset(ct), dest_charset);
1745 }
1746 }
1747 break;
1748
1749 case CT_MULTIPART: {
1750 struct multipart *m = (struct multipart *) ct->c_ctparams;
1751 struct part *part;
1752
1753 /* Should check to see if the body for this part is encoded?
1754 For now, it gets passed along as-is by InitMultiPart(). */
1755 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1756 status =
1757 convert_charsets (part->mp_part, dest_charset, message_mods);
1758 }
1759 break;
1760 }
1761
1762 case CT_MESSAGE:
1763 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1764 struct exbody *e;
1765
1766 e = (struct exbody *) ct->c_ctparams;
1767 status =
1768 convert_charsets (e->eb_content, dest_charset, message_mods);
1769 }
1770 break;
1771
1772 default:
1773 break;
1774 }
1775
1776 return status;
1777 }
1778
1779
1780 static int
1781 write_content (CT ct, char *input_filename, char *outfile, int modify_inplace,
1782 int message_mods) {
1783 int status = OK;
1784
1785 if (modify_inplace) {
1786 if (message_mods > 0) {
1787 if ((status = output_message (ct, outfile)) == OK) {
1788 char *infile = input_filename
1789 ? add (input_filename, NULL)
1790 : add (ct->c_file ? ct->c_file : "-", NULL);
1791
1792 if (remove_file (infile) == OK) {
1793 if (rename (outfile, infile)) {
1794 /* Rename didn't work, possibly because of an
1795 attempt to rename across filesystems. Try
1796 brute force copy. */
1797 int old = open (outfile, O_RDONLY);
1798 int new =
1799 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
1800 int i = -1;
1801
1802 if (old != -1 && new != -1) {
1803 char buffer[BUFSIZ];
1804
1805 while ((i = read (old, buffer, sizeof buffer)) >
1806 0) {
1807 if (write (new, buffer, i) != i) {
1808 i = -1;
1809 break;
1810 }
1811 }
1812 }
1813 if (new != -1) close (new);
1814 if (old != -1) close (old);
1815 (void) m_unlink (outfile);
1816
1817 if (i < 0) {
1818 /* The -file argument processing used path() to
1819 expand filename to absolute path. */
1820 int file = ct->c_file && ct->c_file[0] == '/';
1821
1822 admonish (NULL, "unable to rename %s %s to %s",
1823 file ? "file" : "message", outfile,
1824 infile);
1825 status = NOTOK;
1826 }
1827 }
1828 } else {
1829 admonish (NULL, "unable to remove input file %s, "
1830 "not modifying it", infile);
1831 (void) m_unlink (outfile);
1832 status = NOTOK;
1833 }
1834
1835 free (infile);
1836 } else {
1837 status = NOTOK;
1838 }
1839 } else {
1840 /* No modifications and didn't need the tmp outfile. */
1841 (void) m_unlink (outfile);
1842 }
1843 } else {
1844 /* Output is going to some file. Produce it whether or not
1845 there were modifications. */
1846 status = output_message (ct, outfile);
1847 }
1848
1849 flush_errors ();
1850 return status;
1851 }
1852
1853
1854 /*
1855 * If "rmmproc" is defined, call that to remove the file. Otherwise,
1856 * use the standard MH backup file.
1857 */
1858 static int
1859 remove_file (char *file) {
1860 if (rmmproc) {
1861 char *rmm_command = concat (rmmproc, " ", file, NULL);
1862 int status = system (rmm_command);
1863
1864 free (rmm_command);
1865 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
1866 } else {
1867 /* This is OK for a non-message file, it still uses the
1868 BACKUP_PREFIX form. The backup file will be in the same
1869 directory as file. */
1870 return rename (file, m_backup (file));
1871 }
1872 }
1873
1874
1875 static void
1876 report (char *partno, char *filename, char *message, ...) {
1877 va_list args;
1878 char *fmt;
1879
1880 if (verbosw) {
1881 va_start (args, message);
1882 fmt = concat (filename, partno ? " part " : ", ",
1883 partno ? partno : "", partno ? ", " : "", message, NULL);
1884
1885 advertise (NULL, NULL, fmt, args);
1886
1887 free (fmt);
1888 va_end (args);
1889 }
1890 }
1891
1892
1893 static void
1894 pipeser (int i)
1895 {
1896 if (i == SIGQUIT) {
1897 fflush (stdout);
1898 fprintf (stderr, "\n");
1899 fflush (stderr);
1900 }
1901
1902 done (1);
1903 /* NOTREACHED */
1904 }