]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Move things around so you can still initialize netsec with SASL
[nmh] / uip / mhfixmsg.c
1 /*
2 * mhfixmsg.c -- rewrite a message with various transformations
3 *
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include <fcntl.h>
15
16 #define MHFIXMSG_SWITCHES \
17 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
18 X("nodecodetext", 0, NDECODETEXTSW) \
19 X("decodetypes", 0, DECODETYPESW) \
20 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
21 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
22 X("textcharset", 0, TEXTCHARSETSW) \
23 X("notextcharset", 0, NTEXTCHARSETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
27 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
28 X("fixboundary", 0, FIXBOUNDARYSW) \
29 X("nofixboundary", 0, NFIXBOUNDARYSW) \
30 X("fixcte", 0, FIXCTESW) \
31 X("nofixcte", 0, NFIXCTESW) \
32 X("fixtype mimetype", 0, FIXTYPESW) \
33 X("file file", 0, FILESW) \
34 X("outfile file", 0, OUTFILESW) \
35 X("rmmproc program", 0, RPROCSW) \
36 X("normmproc", 0, NRPRCSW) \
37 X("changecur", 0, CHGSW) \
38 X("nochangecur", 0, NCHGSW) \
39 X("verbose", 0, VERBSW) \
40 X("noverbose", 0, NVERBSW) \
41 X("version", 0, VERSIONSW) \
42 X("help", 0, HELPSW) \
43
44 #define X(sw, minchars, id) id,
45 DEFINE_SWITCH_ENUM(MHFIXMSG);
46 #undef X
47
48 #define X(sw, minchars, id) { sw, minchars, id },
49 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
50 #undef X
51
52
53 int verbosw;
54 int debugsw; /* Needed by mhparse.c. */
55
56 #define quitser pipeser
57
58 /* mhparse.c */
59 extern int skip_mp_cte_check; /* flag to InitMultiPart */
60 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
61 extern int bogus_mp_content; /* flag from InitMultiPart */
62 /* flags to/from parse_header_attrs */
63 extern int suppress_extraneous_trailing_semicolon_warning;
64 extern int extraneous_trailing_semicolon;
65
66 /* mhoutsbr.c */
67 int output_message (CT, char *);
68
69 /* mhmisc.c */
70 void flush_errors (void);
71
72 /* mhfree.c */
73 extern CT *cts;
74 void freects_done (int) NORETURN;
75
76 /*
77 * static prototypes
78 */
79 typedef struct fix_transformations {
80 int fixboundary;
81 int fixcte;
82 svector_t fixtypes;
83 int reformat;
84 int replacetextplain;
85 int decodetext;
86 char *decodetypes;
87 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
88 int lf_line_endings;
89 char *textcharset;
90 } fix_transformations;
91
92 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
93 static int fix_boundary (CT *, int *);
94 static int copy_input_to_output (const char *, const char *);
95 static int get_multipart_boundary (CT, char **);
96 static int replace_boundary (CT, char *, char *);
97 static int fix_types (CT, svector_t, int *);
98 static char *replace_substring (char **, const char *, const char *);
99 static char *remove_parameter (char *, const char *);
100 static int fix_multipart_cte (CT, int *);
101 static int set_ce (CT, int);
102 static int ensure_text_plain (CT *, CT, int *, int);
103 static int find_textplain_sibling (CT, int, int *);
104 static int insert_new_text_plain_part (CT, int, CT);
105 static CT build_text_plain_part (CT);
106 static int insert_into_new_mp_alt (CT *, int *);
107 static CT divide_part (CT);
108 static void copy_ctinfo (CI, CI);
109 static int decode_part (CT);
110 static int reformat_part (CT, char *, char *, char *, int);
111 static int charset_encoding (CT);
112 static CT build_multipart_alt (CT, CT, int, int);
113 static int boundary_in_content (FILE **, char *, const char *);
114 static void transfer_noncontent_headers (CT, CT);
115 static int set_ct_type (CT, int type, int subtype, int encoding);
116 static int decode_text_parts (CT, int, const char *, int *);
117 static int should_decode(const char *, const char *, const char *);
118 static int content_encoding (CT, const char **);
119 static int strip_crs (CT, int *);
120 static int convert_charsets (CT, char *, int *);
121 static int fix_always (CT, int *);
122 static int write_content (CT, const char *, char *, int, int);
123 static void set_text_ctparams(CT, char *, int);
124 static int remove_file (const char *);
125 static void report (char *, char *, char *, char *, ...);
126 static void pipeser (int);
127
128
129 int
130 main (int argc, char **argv) {
131 int msgnum;
132 char *cp, *file = NULL, *folder = NULL;
133 char *maildir, buf[100], *outfile = NULL;
134 char **argp, **arguments;
135 struct msgs_array msgs = { 0, 0, NULL };
136 struct msgs *mp = NULL;
137 CT *ctp;
138 FILE *fp;
139 int using_stdin = 0;
140 int chgflag = 1;
141 int status = OK;
142 fix_transformations fx;
143 fx.reformat = fx.fixcte = fx.fixboundary = 1;
144 fx.fixtypes = NULL;
145 fx.replacetextplain = 0;
146 fx.decodetext = CE_8BIT;
147 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
148 fx.lf_line_endings = 0;
149 fx.textcharset = NULL;
150
151 if (nmh_init(argv[0], 1)) { return 1; }
152
153 done = freects_done;
154
155 arguments = getarguments (invo_name, argc, argv, 1);
156 argp = arguments;
157
158 /*
159 * Parse arguments
160 */
161 while ((cp = *argp++)) {
162 if (*cp == '-') {
163 switch (smatch (++cp, switches)) {
164 case AMBIGSW:
165 ambigsw (cp, switches);
166 done (1);
167 case UNKWNSW:
168 adios (NULL, "-%s unknown", cp);
169
170 case HELPSW:
171 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
172 invo_name);
173 print_help (buf, switches, 1);
174 done (0);
175 case VERSIONSW:
176 print_version(invo_name);
177 done (0);
178
179 case DECODETEXTSW:
180 if (! (cp = *argp++) || *cp == '-') {
181 adios (NULL, "missing argument to %s", argp[-2]);
182 }
183 if (! strcasecmp (cp, "8bit")) {
184 fx.decodetext = CE_8BIT;
185 } else if (! strcasecmp (cp, "7bit")) {
186 fx.decodetext = CE_7BIT;
187 } else if (! strcasecmp (cp, "binary")) {
188 fx.decodetext = CE_BINARY;
189 } else {
190 adios (NULL, "invalid argument to %s", argp[-2]);
191 }
192 continue;
193 case NDECODETEXTSW:
194 fx.decodetext = 0;
195 continue;
196 case DECODETYPESW:
197 if (! (cp = *argp++) || *cp == '-') {
198 adios (NULL, "missing argument to %s", argp[-2]);
199 }
200 fx.decodetypes = cp;
201 continue;
202 case CRLFLINEBREAKSSW:
203 fx.lf_line_endings = 0;
204 continue;
205 case NCRLFLINEBREAKSSW:
206 fx.lf_line_endings = 1;
207 continue;
208 case TEXTCHARSETSW:
209 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
210 adios (NULL, "missing argument to %s", argp[-2]);
211 }
212 fx.textcharset = cp;
213 continue;
214 case NTEXTCHARSETSW:
215 fx.textcharset = 0;
216 continue;
217 case FIXBOUNDARYSW:
218 fx.fixboundary = 1;
219 continue;
220 case NFIXBOUNDARYSW:
221 fx.fixboundary = 0;
222 continue;
223 case FIXCTESW:
224 fx.fixcte = 1;
225 continue;
226 case NFIXCTESW:
227 fx.fixcte = 0;
228 continue;
229 case FIXTYPESW:
230 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
231 adios (NULL, "missing argument to %s", argp[-2]);
232 }
233 if (! strncasecmp (cp, "multipart/", 10) ||
234 ! strncasecmp (cp, "message/", 8)) {
235 adios (NULL, "-fixtype %s not allowed", cp);
236 } else if (! strchr (cp, '/')) {
237 adios (NULL, "-fixtype requires type/subtype");
238 }
239 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
240 svector_push_back (fx.fixtypes, cp);
241 continue;
242 case REFORMATSW:
243 fx.reformat = 1;
244 continue;
245 case NREFORMATSW:
246 fx.reformat = 0;
247 continue;
248 case REPLACETEXTPLAINSW:
249 fx.replacetextplain = 1;
250 continue;
251 case NREPLACETEXTPLAINSW:
252 fx.replacetextplain = 0;
253 continue;
254 case FILESW:
255 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
256 adios (NULL, "missing argument to %s", argp[-2]);
257 }
258 file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
259 continue;
260 case OUTFILESW:
261 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
262 adios (NULL, "missing argument to %s", argp[-2]);
263 }
264 outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
265 continue;
266 case RPROCSW:
267 if (!(rmmproc = *argp++) || *rmmproc == '-') {
268 adios (NULL, "missing argument to %s", argp[-2]);
269 }
270 continue;
271 case NRPRCSW:
272 rmmproc = NULL;
273 continue;
274 case CHGSW:
275 chgflag = 1;
276 continue;
277 case NCHGSW:
278 chgflag = 0;
279 continue;
280 case VERBSW:
281 verbosw = 1;
282 continue;
283 case NVERBSW:
284 verbosw = 0;
285 continue;
286 }
287 }
288 if (*cp == '+' || *cp == '@') {
289 if (folder) {
290 adios (NULL, "only one folder at a time!");
291 } else {
292 folder = pluspath (cp);
293 }
294 } else {
295 if (*cp == '/') {
296 /* Interpret a full path as a filename, not a message. */
297 file = add (cp, NULL);
298 } else {
299 app_msgarg (&msgs, cp);
300 }
301 }
302 }
303
304 SIGNAL (SIGQUIT, quitser);
305 SIGNAL (SIGPIPE, pipeser);
306
307 /*
308 * Read the standard profile setup
309 */
310 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
311 readconfig ((struct node **) 0, fp, cp, 0);
312 fclose (fp);
313 }
314
315 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
316 suppress_extraneous_trailing_semicolon_warning = 1;
317
318 if (! context_find ("path")) {
319 free (path ("./", TFOLDER));
320 }
321
322 if (file && msgs.size) {
323 adios (NULL, "cannot specify msg and file at same time!");
324 }
325
326 /*
327 * check if message is coming from file
328 */
329 if (file) {
330 /* If file is stdin, create a tmp file name before parse_mime()
331 has a chance, because it might put in on a different
332 filesystem than the output file. Instead, put it in the
333 user's preferred tmp directory. */
334 CT ct;
335
336 if (! strcmp ("-", file)) {
337 int fd;
338 char *cp;
339
340 using_stdin = 1;
341
342 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
343 adios (NULL, "unable to create temporary file in %s",
344 get_temp_dir());
345 } else {
346 free (file);
347 file = add (cp, NULL);
348 cpydata (STDIN_FILENO, fd, "-", file);
349 }
350
351 if (close (fd)) {
352 (void) m_unlink (file);
353 adios (NULL, "failed to write temporary file");
354 }
355 }
356
357 if (! (cts = (CT *) mh_xcalloc ((size_t) 2, sizeof *cts))) {
358 adios (NULL, "out of memory");
359 }
360 ctp = cts;
361
362 if ((ct = parse_mime (file))) {
363 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
364 *ctp++ = ct;
365 } else {
366 advise (NULL, "unable to parse message from file %s", file);
367 status = NOTOK;
368
369 /* If there's an outfile, pass the input message unchanged, so the message won't
370 get dropped from a pipeline. */
371 if (outfile) {
372 /* Something went wrong. Output might be expected, such as if this were run
373 as a filter. Just copy the input to the output. */
374 if (copy_input_to_output (file, outfile) != OK) {
375 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
376 }
377 }
378 }
379 } else {
380 /*
381 * message(s) are coming from a folder
382 */
383 CT ct;
384
385 if (! msgs.size) {
386 app_msgarg(&msgs, "cur");
387 }
388 if (! folder) {
389 folder = getfolder (1);
390 }
391 maildir = m_maildir (folder);
392
393 if (chdir (maildir) == NOTOK) {
394 adios (maildir, "unable to change directory to");
395 }
396
397 /* read folder and create message structure */
398 if (! (mp = folder_read (folder, 1))) {
399 adios (NULL, "unable to read folder %s", folder);
400 }
401
402 /* check for empty folder */
403 if (mp->nummsg == 0) {
404 adios (NULL, "no messages in %s", folder);
405 }
406
407 /* parse all the message ranges/sequences and set SELECTED */
408 for (msgnum = 0; msgnum < msgs.size; msgnum++)
409 if (! m_convert (mp, msgs.msgs[msgnum])) {
410 done (1);
411 }
412 seq_setprev (mp); /* set the previous-sequence */
413
414 if (! (cts =
415 (CT *) mh_xcalloc ((size_t) (mp->numsel + 1), sizeof *cts))) {
416 adios (NULL, "out of memory");
417 }
418 ctp = cts;
419
420 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
421 if (is_selected(mp, msgnum)) {
422 char *msgnam;
423
424 msgnam = m_name (msgnum);
425 if ((ct = parse_mime (msgnam))) {
426 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
427 *ctp++ = ct;
428 } else {
429 advise (NULL, "unable to parse message %s", msgnam);
430 status = NOTOK;
431
432 /* If there's an outfile, pass the input message unchanged, so the message won't
433 get dropped from a pipeline. */
434 if (outfile) {
435 /* Something went wrong. Output might be expected, such as if this were run
436 as a filter. Just copy the input to the output. */
437 const char *input_filename = path (msgnam, TFILE);
438
439 if (copy_input_to_output (input_filename, outfile) != OK) {
440 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
441 }
442 }
443 }
444 }
445 }
446
447 if (chgflag) {
448 seq_setcur (mp, mp->hghsel); /* update current message */
449 }
450 seq_save (mp); /* synchronize sequences */
451 context_replace (pfolder, folder);/* update current folder */
452 context_save (); /* save the context file */
453 }
454
455 if (*cts) {
456 for (ctp = cts; *ctp; ++ctp) {
457 status += mhfixmsgsbr (ctp, &fx, outfile);
458
459 if (using_stdin) {
460 (void) m_unlink (file);
461
462 if (! outfile) {
463 /* Just calling m_backup() unlinks the backup file. */
464 (void) m_backup (file);
465 }
466 }
467 }
468 } else {
469 status = 1;
470 }
471
472 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
473 free (outfile);
474 free (file);
475 free (folder);
476 free (arguments);
477
478 /* done is freects_done, which will clean up all of cts. */
479 done (status);
480 return NOTOK;
481 }
482
483
484 int
485 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
486 /* Store input filename in case one of the transformations, i.e.,
487 fix_boundary(), rewrites to a tmp file. */
488 char *input_filename = add ((*ctp)->c_file, NULL);
489 int modify_inplace = 0;
490 int message_mods = 0;
491 int status = OK;
492
493 if (outfile == NULL) {
494 modify_inplace = 1;
495
496 if ((*ctp)->c_file) {
497 char *tempfile;
498 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
499 adios (NULL, "unable to create temporary file in %s",
500 get_temp_dir());
501 }
502 outfile = add (tempfile, NULL);
503 } else {
504 adios (NULL, "missing both input and output filenames\n");
505 }
506 }
507
508 reverse_alternative_parts (*ctp);
509 status = fix_always (*ctp, &message_mods);
510 if (status == OK && fx->fixboundary) {
511 status = fix_boundary (ctp, &message_mods);
512 }
513 if (status == OK && fx->fixtypes != NULL) {
514 status = fix_types (*ctp, fx->fixtypes, &message_mods);
515 }
516 if (status == OK && fx->fixcte) {
517 status = fix_multipart_cte (*ctp, &message_mods);
518 }
519 if (status == OK && fx->reformat) {
520 status =
521 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
522 }
523 if (status == OK && fx->decodetext) {
524 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes, &message_mods);
525 }
526 if (status == OK && fx->textcharset != NULL) {
527 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
528 }
529
530 if (status == OK && ! (*ctp)->c_umask) {
531 /* Set the umask for the contents file. This currently
532 isn't used but just in case it is in the future. */
533 struct stat st;
534
535 if (stat ((*ctp)->c_file, &st) != NOTOK) {
536 (*ctp)->c_umask = ~(st.st_mode & 0777);
537 } else {
538 (*ctp)->c_umask = ~m_gmprot();
539 }
540 }
541
542 /*
543 * Write the content to a file
544 */
545 if (status == OK) {
546 status = write_content (*ctp, input_filename, outfile, modify_inplace,
547 message_mods);
548 } else if (! modify_inplace) {
549 /* Something went wrong. Output might be expected, such
550 as if this were run as a filter. Just copy the input
551 to the output. */
552 if (copy_input_to_output (input_filename, outfile) != OK) {
553 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
554 }
555 }
556
557 if (modify_inplace) {
558 if (status != OK) { (void) m_unlink (outfile); }
559 free (outfile);
560 outfile = NULL;
561 }
562
563 free (input_filename);
564
565 return status;
566 }
567
568
569 /* Copy input message to output. Assumes not modifying in place, so this
570 might be running as part of a pipeline. */
571 static int
572 copy_input_to_output (const char *input_filename, const char *output_filename) {
573 int in = open (input_filename, O_RDONLY);
574 int out = strcmp (output_filename, "-")
575 ? open (output_filename, O_WRONLY | O_CREAT, m_gmprot ())
576 : STDOUT_FILENO;
577 int status = OK;
578
579 if (in != -1 && out != -1) {
580 cpydata (in, out, input_filename, output_filename);
581 } else {
582 status = NOTOK;
583 }
584
585 close (out);
586 close (in);
587
588 return status;
589 }
590
591
592 static int
593 fix_boundary (CT *ct, int *message_mods) {
594 struct multipart *mp;
595 int status = OK;
596
597 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
598 mp = (struct multipart *) (*ct)->c_ctparams;
599
600 /*
601 * 1) Get boundary at end of part.
602 * 2) Get boundary at beginning of part and compare to the end-of-part
603 * boundary.
604 * 3) Write out contents of ct to tmp file, replacing boundary in
605 * header with boundary from part. Set c_unlink to 1.
606 * 4) Free ct.
607 * 5) Call parse_mime() on the tmp file, replacing ct.
608 */
609
610 if (mp && mp->mp_start) {
611 char *part_boundary;
612
613 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
614 char *fixed;
615
616 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
617 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
618 char *filename = add ((*ct)->c_file, NULL);
619 CT fixed_ct;
620
621 free_content (*ct);
622 if ((fixed_ct = parse_mime (fixed))) {
623 *ct = fixed_ct;
624 (*ct)->c_unlink = 1;
625
626 ++*message_mods;
627 if (verbosw) {
628 report (NULL, NULL, filename,
629 "fix multipart boundary");
630 }
631 } else {
632 *ct = NULL;
633 advise (NULL, "unable to parse fixed part");
634 status = NOTOK;
635 }
636 free (filename);
637 } else {
638 advise (NULL, "unable to replace broken boundary");
639 status = NOTOK;
640 }
641 } else {
642 advise (NULL, "unable to create temporary file in %s",
643 get_temp_dir());
644 status = NOTOK;
645 }
646
647 free (part_boundary);
648 } else {
649 /* Couldn't fix the boundary. Report failure so that mhfixmsg
650 doesn't modify the message. */
651 status = NOTOK;
652 }
653 } else {
654 /* No multipart struct, even though the content type is
655 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
656 the message. */
657 status = NOTOK;
658 }
659 }
660
661 return status;
662 }
663
664
665 static int
666 get_multipart_boundary (CT ct, char **part_boundary) {
667 char buffer[BUFSIZ];
668 char *end_boundary = NULL;
669 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
670 ? (off_t) (ct->c_end - sizeof buffer)
671 : (off_t) ct->c_begin;
672 size_t bytes_read;
673 int status = OK;
674
675 /* This will fail if the boundary spans fread() calls. BUFSIZ should
676 be big enough, even if it's just 1024, to make that unlikely. */
677
678 /* free_content() will close ct->c_fp. */
679 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
680 advise (ct->c_file, "unable to open for reading");
681 return NOTOK;
682 }
683
684 /* Get boundary at end of multipart. */
685 while (begin >= (off_t) ct->c_begin) {
686 fseeko (ct->c_fp, begin, SEEK_SET);
687 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
688 char *cp = rfind_str (buffer, bytes_read, "--");
689
690 if (cp) {
691 char *end;
692
693 /* Trim off trailing "--" and anything beyond. */
694 *cp-- = '\0';
695 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
696 if (strlen (end) > 3 && *end++ == '\n' &&
697 *end++ == '-' && *end++ == '-') {
698 end_boundary = add (end, NULL);
699 break;
700 }
701 }
702 }
703 }
704
705 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
706 begin -= sizeof buffer;
707 } else {
708 break;
709 }
710 }
711
712 /* Get boundary at beginning of multipart. */
713 if (end_boundary) {
714 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
715 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
716 if (bytes_read >= strlen (end_boundary)) {
717 char *cp = find_str (buffer, bytes_read, end_boundary);
718
719 if (cp && cp - buffer >= 2 && *--cp == '-' &&
720 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
721 status = OK;
722 break;
723 }
724 } else {
725 /* The start and end boundaries didn't match, or the
726 start boundary doesn't begin with "\n--" (or "--"
727 if at the beginning of buffer). Keep trying. */
728 status = NOTOK;
729 }
730 }
731 } else {
732 status = NOTOK;
733 }
734
735 if (status == OK) {
736 *part_boundary = end_boundary;
737 } else {
738 *part_boundary = NULL;
739 free (end_boundary);
740 }
741
742 return status;
743 }
744
745
746 /* Open and copy ct->c_file to file, replacing the multipart boundary. */
747 static int
748 replace_boundary (CT ct, char *file, char *boundary) {
749 FILE *fpin, *fpout;
750 int compnum, state;
751 char buf[BUFSIZ], name[NAMESZ];
752 char *np, *vp;
753 m_getfld_state_t gstate = 0;
754 int status = OK;
755
756 if (ct->c_file == NULL) {
757 advise (NULL, "missing input filename");
758 return NOTOK;
759 }
760
761 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
762 advise (ct->c_file, "unable to open for reading");
763 return NOTOK;
764 }
765
766 if ((fpout = fopen (file, "w")) == NULL) {
767 fclose (fpin);
768 advise (file, "unable to open for writing");
769 return NOTOK;
770 }
771
772 for (compnum = 1;;) {
773 int bufsz = (int) sizeof buf;
774
775 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
776 case FLD:
777 case FLDPLUS:
778 compnum++;
779
780 /* get copies of the buffers */
781 np = add (name, NULL);
782 vp = add (buf, NULL);
783
784 /* if necessary, get rest of field */
785 while (state == FLDPLUS) {
786 bufsz = sizeof buf;
787 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
788 vp = add (buf, vp); /* add to previous value */
789 }
790
791 if (strcasecmp (TYPE_FIELD, np)) {
792 fprintf (fpout, "%s:%s", np, vp);
793 } else {
794 char *new_ctline, *new_params;
795
796 replace_param(&ct->c_ctinfo.ci_first_pm,
797 &ct->c_ctinfo.ci_last_pm, "boundary",
798 boundary, 0);
799
800 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
801 ct->c_ctinfo.ci_subtype, NULL);
802 new_params = output_params(strlen(TYPE_FIELD) +
803 strlen(new_ctline) + 1,
804 ct->c_ctinfo.ci_first_pm, NULL, 0);
805 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
806 new_params ? new_params : "");
807 free(new_ctline);
808 if (new_params) {
809 free(new_params);
810 }
811 }
812
813 free (vp);
814 free (np);
815
816 continue;
817
818 case BODY:
819 fputs ("\n", fpout);
820 /* buf will have a terminating NULL, skip it. */
821 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
822 advise (file, "fwrite");
823 }
824 continue;
825
826 case FILEEOF:
827 break;
828
829 case LENERR:
830 case FMTERR:
831 advise (NULL, "message format error in component #%d", compnum);
832 status = NOTOK;
833 break;
834
835 default:
836 advise (NULL, "getfld() returned %d", state);
837 status = NOTOK;
838 break;
839 }
840
841 break;
842 }
843
844 m_getfld_state_destroy (&gstate);
845 fclose (fpout);
846 fclose (fpin);
847
848 return status;
849 }
850
851
852 static int
853 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
854 int status = OK;
855
856 switch (ct->c_type) {
857 case CT_MULTIPART: {
858 struct multipart *m = (struct multipart *) ct->c_ctparams;
859 struct part *part;
860
861 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
862 status = fix_types (part->mp_part, fixtypes, message_mods);
863 }
864 break;
865 }
866
867 case CT_MESSAGE:
868 if (ct->c_subtype == MESSAGE_EXTERNAL) {
869 struct exbody *e = (struct exbody *) ct->c_ctparams;
870
871 status = fix_types (e->eb_content, fixtypes, message_mods);
872 }
873 break;
874
875 default: {
876 char **typep, *type;
877
878 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
879 for (typep = svector_strs (fixtypes);
880 typep && (type = *typep);
881 ++typep) {
882 char *type_subtype =
883 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
884 NULL);
885
886 if (! strcasecmp (type, type_subtype) &&
887 decode_part (ct) == OK &&
888 ct->c_cefile.ce_file != NULL) {
889 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
890 char *cp;
891
892 if ((cp = strchr (ct_type_subtype, ';'))) {
893 /* Truncate to remove any parameter list from
894 mime_type () result. */
895 *cp = '\0';
896 }
897
898 if (strcasecmp (type, ct_type_subtype)) {
899 char *ct_type, *ct_subtype;
900 HF hf;
901
902 /* The Content-Type header does not match the
903 content, so update these struct Content
904 fields to match:
905 * c_type, c_subtype
906 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
907 * c_ctline
908 */
909 /* Extract type and subtype from type/subtype. */
910 ct_type = getcpy (ct_type_subtype);
911 if ((cp = strchr (ct_type, '/'))) {
912 *cp = '\0';
913 ct_subtype = getcpy (++cp);
914 } else {
915 advise (NULL, "missing / in MIME type of %s %s",
916 ct->c_file, ct->c_partno);
917 free (ct_type);
918 return NOTOK;
919 }
920
921 ct->c_type = ct_str_type (ct_type);
922 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
923
924 free (ct->c_ctinfo.ci_type);
925 ct->c_ctinfo.ci_type = ct_type;
926 free (ct->c_ctinfo.ci_subtype);
927 ct->c_ctinfo.ci_subtype = ct_subtype;
928 if (! replace_substring (&ct->c_ctline, type,
929 ct_type_subtype)) {
930 advise (NULL, "did not find %s in %s",
931 type, ct->c_ctline);
932 }
933
934 /* Update Content-Type header field. */
935 for (hf = ct->c_first_hf; hf; hf = hf->next) {
936 if (! strcasecmp (TYPE_FIELD, hf->name)) {
937 if (replace_substring (&hf->value, type,
938 ct_type_subtype)) {
939 ++*message_mods;
940 if (verbosw) {
941 report (NULL, ct->c_partno, ct->c_file,
942 "change Content-Type in header "
943 "from %s to %s",
944 type, ct_type_subtype);
945 }
946 break;
947 } else {
948 advise (NULL, "did not find %s in %s",
949 type, hf->value);
950 }
951 }
952 }
953 }
954 free (ct_type_subtype);
955 }
956 free (type_subtype);
957 }
958 }
959 }}
960
961 return status;
962 }
963
964 char *
965 replace_substring (char **str, const char *old, const char *new) {
966 char *cp;
967
968 if ((cp = strstr (*str, old))) {
969 char *remainder = cp + strlen (old);
970 char *prefix, *new_str;
971
972 if (cp - *str) {
973 prefix = getcpy (*str);
974 *(prefix + (cp - *str)) = '\0';
975 new_str = concat (prefix, new, remainder, NULL);
976 free (prefix);
977 } else {
978 new_str = concat (new, remainder, NULL);
979 }
980
981 free (*str);
982
983 return *str = new_str;
984 } else {
985 return NULL;
986 }
987 }
988
989 /*
990 * Remove a name=value parameter, given just its name, from a header value.
991 */
992 char *
993 remove_parameter (char *str, const char *name) {
994 /* It looks to me, based on the BNF in RFC 2045, than there can't
995 be whitespace betwwen the parameter name and the "=", or
996 between the "=" and the parameter value. */
997 char *param_name = concat (name, "=", NULL);
998 char *cp;
999
1000 if ((cp = strstr (str, param_name))) {
1001 char *start, *end;
1002 size_t count = 1;
1003
1004 /* Remove any leading spaces, before the parameter name. */
1005 for (start = cp;
1006 start > str && isspace ((unsigned char) *(start-1));
1007 --start) {
1008 continue;
1009 }
1010 /* Remove a leading semicolon. */
1011 if (start > str && *(start-1) == ';') { --start; }
1012
1013 end = cp + strlen (name) + 1;
1014 if (*end == '"') {
1015 /* Skip past the quoted value, and then the final quote. */
1016 for (++end ; *end && *end != '"'; ++end) { continue; }
1017 ++end;
1018 } else {
1019 /* Skip past the value. */
1020 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1021 }
1022
1023 /* Count how many characters need to be moved. Include
1024 trailing null, which is accounted for by the
1025 initialization of count to 1. */
1026 for (cp = end; *cp; ++cp) { ++count; }
1027 (void) memmove (start, end, count);
1028 }
1029
1030 free (param_name);
1031
1032 return str;
1033 }
1034
1035 static int
1036 fix_multipart_cte (CT ct, int *message_mods) {
1037 int status = OK;
1038
1039 if (ct->c_type == CT_MULTIPART) {
1040 struct multipart *m;
1041 struct part *part;
1042
1043 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1044 ct->c_encoding != CE_BINARY) {
1045 HF hf;
1046
1047 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1048 char *name = hf->name;
1049 for (; *name && isspace ((unsigned char) *name); ++name) {
1050 continue;
1051 }
1052
1053 if (! strncasecmp (name, ENCODING_FIELD,
1054 strlen (ENCODING_FIELD))) {
1055 char *prefix = "Nmh-REPLACED-INVALID-";
1056 HF h = mh_xmalloc (sizeof *h);
1057
1058 h->name = add (hf->name, NULL);
1059 h->hf_encoding = hf->hf_encoding;
1060 h->next = hf->next;
1061 hf->next = h;
1062
1063 /* Retain old header but prefix its name. */
1064 free (hf->name);
1065 hf->name = concat (prefix, h->name, NULL);
1066
1067 ++*message_mods;
1068 if (verbosw) {
1069 char *encoding = cpytrim (hf->value);
1070 report (NULL, ct->c_partno, ct->c_file,
1071 "replace Content-Transfer-Encoding of %s "
1072 "with 8 bit", encoding);
1073 free (encoding);
1074 }
1075
1076 h->value = add (" 8bit\n", NULL);
1077
1078 /* Don't need to warn for multiple C-T-E header
1079 fields, parse_mime() already does that. But
1080 if there are any, fix them all as necessary. */
1081 hf = h;
1082 }
1083 }
1084
1085 set_ce (ct, CE_8BIT);
1086 }
1087
1088 m = (struct multipart *) ct->c_ctparams;
1089 for (part = m->mp_parts; part; part = part->mp_next) {
1090 if (fix_multipart_cte (part->mp_part, message_mods) != OK) {
1091 status = NOTOK;
1092 break;
1093 }
1094 }
1095 }
1096
1097 return status;
1098 }
1099
1100
1101 static int
1102 set_ce (CT ct, int encoding) {
1103 const char *ce = ce_str (encoding);
1104 const struct str2init *ctinit = get_ce_method (ce);
1105
1106 if (ctinit) {
1107 char *cte = concat (" ", ce, "\n", NULL);
1108 int found_cte = 0;
1109 HF hf;
1110 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1111 caller is decode_text_parts (). Save because we'll
1112 overwrite below. */
1113 struct cefile decoded_content_info = ct->c_cefile;
1114
1115 ct->c_encoding = encoding;
1116
1117 ct->c_ctinitfnx = ctinit->si_init;
1118 /* This will assign ct->c_cefile with an all-0 struct, which
1119 is what we want. */
1120 (*ctinit->si_init) (ct);
1121 /* After returning, the caller should set
1122 ct->c_cefile.ce_file to the name of the file containing
1123 the contents. */
1124
1125 /* Restore the cefile. */
1126 ct->c_cefile = decoded_content_info;
1127
1128 /* Update/add Content-Transfer-Encoding header field. */
1129 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1130 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1131 found_cte = 1;
1132 free (hf->value);
1133 hf->value = cte;
1134 }
1135 }
1136 if (! found_cte) {
1137 add_header (ct, add (ENCODING_FIELD, NULL), cte);
1138 }
1139
1140 /* Update c_celine. It's used only by mhlist -debug. */
1141 free (ct->c_celine);
1142 ct->c_celine = add (cte, NULL);
1143
1144 return OK;
1145 } else {
1146 return NOTOK;
1147 }
1148 }
1149
1150
1151 /* Make sure each text part has a corresponding text/plain part. */
1152 static int
1153 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1154 int status = OK;
1155
1156 switch ((*ct)->c_type) {
1157 case CT_TEXT: {
1158 /* Nothing to do for text/plain. */
1159 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1160
1161 if (parent && parent->c_type == CT_MULTIPART &&
1162 parent->c_subtype == MULTI_ALTERNATE) {
1163 int new_subpart_number = 1;
1164 int has_text_plain =
1165 find_textplain_sibling (parent, replacetextplain,
1166 &new_subpart_number);
1167
1168 if (! has_text_plain) {
1169 /* Parent is a multipart/alternative. Insert a new
1170 text/plain subpart. */
1171 const int inserted =
1172 insert_new_text_plain_part (*ct, new_subpart_number,
1173 parent);
1174 if (inserted) {
1175 ++*message_mods;
1176 if (verbosw) {
1177 report (NULL, parent->c_partno, parent->c_file,
1178 "insert text/plain part");
1179 }
1180 } else {
1181 status = NOTOK;
1182 }
1183 }
1184 } else if (parent && parent->c_type == CT_MULTIPART &&
1185 parent->c_subtype == MULTI_RELATED) {
1186 char *type_subtype =
1187 concat ((*ct)->c_ctinfo.ci_type, "/",
1188 (*ct)->c_ctinfo.ci_subtype, NULL);
1189 const char *parent_type =
1190 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1191 int new_subpart_number = 1;
1192 int has_text_plain = 0;
1193
1194 /* Have to do string comparison on the subtype because we
1195 don't enumerate all of them in c_subtype values.
1196 parent_type will be NULL if the multipart/related part
1197 doesn't have a type parameter. The type parameter must
1198 be specified according to RFC 2387 Sec. 3.1 but not all
1199 messages comply. */
1200 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1201 /* The type of this part matches the root type of the
1202 parent multipart/related. Look to see if there's
1203 text/plain sibling. */
1204 has_text_plain =
1205 find_textplain_sibling (parent, replacetextplain,
1206 &new_subpart_number);
1207 }
1208
1209 free (type_subtype);
1210
1211 if (! has_text_plain) {
1212 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1213 struct part *part;
1214 int siblings = 0;
1215
1216 for (part = mp->mp_parts; part; part = part->mp_next) {
1217 if (*ct != part->mp_part) {
1218 ++siblings;
1219 }
1220 }
1221
1222 if (siblings) {
1223 /* Parent is a multipart/related. Insert a new
1224 text/plain subpart in a new multipart/alternative. */
1225 if (insert_into_new_mp_alt (ct, message_mods)) {
1226 /* Not an error if text/plain couldn't be added. */
1227 }
1228 } else {
1229 /* There are no siblings, so insert a new text/plain
1230 subpart, and change the parent type from
1231 multipart/related to multipart/alternative. */
1232 const int inserted =
1233 insert_new_text_plain_part (*ct, new_subpart_number,
1234 parent);
1235
1236 if (inserted) {
1237 HF hf;
1238
1239 parent->c_subtype = MULTI_ALTERNATE;
1240 parent->c_ctinfo.ci_subtype = getcpy ("alternative");
1241 if (! replace_substring (&parent->c_ctline, "/related",
1242 "/alternative")) {
1243 advise (NULL,
1244 "did not find multipart/related in %s",
1245 parent->c_ctline);
1246 }
1247
1248 /* Update Content-Type header field. */
1249 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1250 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1251 if (replace_substring (&hf->value, "/related",
1252 "/alternative")) {
1253 ++*message_mods;
1254 if (verbosw) {
1255 report (NULL, parent->c_partno,
1256 parent->c_file,
1257 "insert text/plain part");
1258 }
1259
1260 /* Remove, e.g., type="text/html" from
1261 multipart/alternative. */
1262 remove_parameter (hf->value, "type");
1263 break;
1264 } else {
1265 advise (NULL, "did not find multipart/"
1266 "related in header %s",
1267 hf->value);
1268 }
1269 }
1270 }
1271 } else {
1272 /* Not an error if text/plain couldn't be inserted. */
1273 }
1274 }
1275 }
1276 } else {
1277 if (insert_into_new_mp_alt (ct, message_mods)) {
1278 status = NOTOK;
1279 }
1280 }
1281 break;
1282 }
1283
1284 case CT_MULTIPART: {
1285 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1286 struct part *part;
1287
1288 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1289 if ((*ct)->c_type == CT_MULTIPART) {
1290 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1291 replacetextplain);
1292 }
1293 }
1294 break;
1295 }
1296
1297 case CT_MESSAGE:
1298 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1299 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1300
1301 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1302 replacetextplain);
1303 }
1304 break;
1305 }
1306
1307 return status;
1308 }
1309
1310
1311 /* See if there is a sibling text/plain. */
1312 static int
1313 find_textplain_sibling (CT parent, int replacetextplain,
1314 int *new_subpart_number) {
1315 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1316 struct part *part, *prev;
1317 int has_text_plain = 0;
1318
1319 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1320 ++*new_subpart_number;
1321 if (part->mp_part->c_type == CT_TEXT &&
1322 part->mp_part->c_subtype == TEXT_PLAIN) {
1323 if (replacetextplain) {
1324 struct part *old_part;
1325 if (part == mp->mp_parts) {
1326 old_part = mp->mp_parts;
1327 mp->mp_parts = part->mp_next;
1328 } else {
1329 old_part = prev->mp_next;
1330 prev->mp_next = part->mp_next;
1331 }
1332 if (verbosw) {
1333 report (NULL, parent->c_partno, parent->c_file,
1334 "remove text/plain part %s",
1335 old_part->mp_part->c_partno);
1336 }
1337 free_content (old_part->mp_part);
1338 free (old_part);
1339 } else {
1340 has_text_plain = 1;
1341 }
1342 break;
1343 }
1344 prev = part;
1345 }
1346
1347 return has_text_plain;
1348 }
1349
1350
1351 static int
1352 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1353 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1354 struct part *new_part = mh_xmalloc (sizeof *new_part);
1355
1356 if ((new_part->mp_part = build_text_plain_part (ct))) {
1357 char buffer[16];
1358 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1359
1360 new_part->mp_next = mp->mp_parts;
1361 mp->mp_parts = new_part;
1362 new_part->mp_part->c_partno =
1363 concat (parent->c_partno ? parent->c_partno : "1", ".",
1364 buffer, NULL);
1365
1366 return 1;
1367 } else {
1368 free_content (new_part->mp_part);
1369 free (new_part);
1370
1371 return 0;
1372 }
1373 }
1374
1375
1376 static CT
1377 build_text_plain_part (CT encoded_part) {
1378 CT tp_part = divide_part (encoded_part);
1379 char *tmp_plain_file = NULL;
1380
1381 if (decode_part (tp_part) == OK) {
1382 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1383 contains the decoded contents. And the decoding function, such
1384 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1385 be unlinked by free_content (). */
1386 char *tempfile;
1387
1388 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1389 advise (NULL, "unable to create temporary file in %s",
1390 get_temp_dir());
1391 } else {
1392 tmp_plain_file = add (tempfile, NULL);
1393 if (reformat_part (tp_part, tmp_plain_file,
1394 tp_part->c_ctinfo.ci_type,
1395 tp_part->c_ctinfo.ci_subtype,
1396 tp_part->c_type) == OK) {
1397 return tp_part;
1398 }
1399 }
1400 }
1401
1402 free_content (tp_part);
1403 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1404 free (tmp_plain_file);
1405
1406 return NULL;
1407 }
1408
1409
1410 /* Slip new text/plain part into a new multipart/alternative. */
1411 static int
1412 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1413 CT tp_part = build_text_plain_part (*ct);
1414 int status = OK;
1415
1416 if (tp_part) {
1417 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1418 MULTI_ALTERNATE);
1419 if (mp_alt) {
1420 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1421
1422 if (mp && mp->mp_parts) {
1423 mp->mp_parts->mp_part = tp_part;
1424 /* Make the new multipart/alternative the parent. */
1425 *ct = mp_alt;
1426
1427 ++*message_mods;
1428 if (verbosw) {
1429 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1430 "insert text/plain part");
1431 }
1432 } else {
1433 free_content (tp_part);
1434 free_content (mp_alt);
1435 status = NOTOK;
1436 }
1437 } else {
1438 status = NOTOK;
1439 }
1440 } else {
1441 /* Not an error if text/plain couldn't be built. */
1442 }
1443
1444 return status;
1445 }
1446
1447 static CT
1448 divide_part (CT ct) {
1449 CT new_part;
1450
1451 if ((new_part = (CT) mh_xcalloc (1, sizeof *new_part)) == NULL)
1452 adios (NULL, "out of memory");
1453
1454 /* Just copy over what is needed for decoding. c_vrsn and
1455 c_celine aren't necessary. */
1456 new_part->c_file = add (ct->c_file, NULL);
1457 new_part->c_begin = ct->c_begin;
1458 new_part->c_end = ct->c_end;
1459 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1460 new_part->c_type = ct->c_type;
1461 new_part->c_cefile = ct->c_cefile;
1462 new_part->c_encoding = ct->c_encoding;
1463 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1464 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1465 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1466 new_part->c_cesizefnx = ct->c_cesizefnx;
1467
1468 /* c_ctline is used by reformat__part(), so it can preserve
1469 anything after the type/subtype. */
1470 new_part->c_ctline = add (ct->c_ctline, NULL);
1471
1472 return new_part;
1473 }
1474
1475
1476 static void
1477 copy_ctinfo (CI dest, CI src) {
1478 PM s_pm, d_pm;
1479
1480 dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1481 dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1482
1483 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1484 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1485 s_pm->pm_value, 0);
1486 if (s_pm->pm_charset)
1487 d_pm->pm_charset = getcpy(s_pm->pm_charset);
1488 if (s_pm->pm_lang)
1489 d_pm->pm_lang = getcpy(s_pm->pm_lang);
1490 }
1491
1492 dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1493 dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1494 }
1495
1496
1497 static int
1498 decode_part (CT ct) {
1499 char *tmp_decoded;
1500 int status;
1501 char *tempfile;
1502
1503 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1504 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1505 }
1506 tmp_decoded = add (tempfile, NULL);
1507 /* The following call will load ct->c_cefile.ce_file with the tmp
1508 filename of the decoded content. tmp_decoded will contain the
1509 encoded output, get rid of that. */
1510 status = output_message (ct, tmp_decoded);
1511 (void) m_unlink (tmp_decoded);
1512 free (tmp_decoded);
1513
1514 return status;
1515 }
1516
1517
1518 /* Some of the arguments aren't really needed now, but maybe will
1519 be in the future for other than text types. */
1520 static int
1521 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1522 int output_subtype, output_encoding;
1523 char *cp, *cf;
1524 int status;
1525
1526 /* Hacky: this redirects the output from whatever command is used
1527 to show the part to a file. So, the user can't have any output
1528 redirection in that command.
1529 Could show_multi() in mhshowsbr.c avoid this? */
1530
1531 /* Check for invo_name-format-type/subtype. */
1532 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1533 if (verbosw) {
1534 advise (NULL, "Don't know how to convert %s, there is no "
1535 "%s-format-%s/%s profile entry",
1536 ct->c_file, invo_name, type, subtype);
1537 }
1538 return NOTOK;
1539 } else {
1540 if (strchr (cf, '>')) {
1541 advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1542 "%s-format-%s/%s profile entry", cf, invo_name, type,
1543 subtype ? subtype : "");
1544
1545 return NOTOK;
1546 }
1547 }
1548
1549 cp = concat (cf, " >", file, NULL);
1550 status = show_content_aux (ct, 0, cp, NULL, NULL);
1551 free (cp);
1552
1553 /* Unlink decoded content tmp file and free its filename to avoid
1554 leaks. The file stream should already have been closed. */
1555 if (ct->c_cefile.ce_unlink) {
1556 (void) m_unlink (ct->c_cefile.ce_file);
1557 free (ct->c_cefile.ce_file);
1558 ct->c_cefile.ce_file = NULL;
1559 ct->c_cefile.ce_unlink = 0;
1560 }
1561
1562 if (c_type == CT_TEXT) {
1563 output_subtype = TEXT_PLAIN;
1564 } else {
1565 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1566 output_subtype = 0;
1567 }
1568 output_encoding = charset_encoding (ct);
1569
1570 if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1571 ct->c_cefile.ce_file = file;
1572 ct->c_cefile.ce_unlink = 1;
1573 } else {
1574 ct->c_cefile.ce_unlink = 0;
1575 status = NOTOK;
1576 }
1577
1578 return status;
1579 }
1580
1581
1582 /* Identifies 7bit or 8bit content based on charset. */
1583 static int
1584 charset_encoding (CT ct) {
1585 char *ct_charset = content_charset (ct);
1586 int encoding = strcasecmp (ct_charset, "US-ASCII") ? CE_8BIT : CE_7BIT;
1587
1588 free (ct_charset);
1589
1590 return encoding;
1591 }
1592
1593
1594 static CT
1595 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1596 char *boundary_prefix = "----=_nmh-multipart";
1597 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1598 char *boundary_indicator = "; boundary=";
1599 char *typename, *subtypename, *name;
1600 CT ct;
1601 struct part *p;
1602 struct multipart *m;
1603 const struct str2init *ctinit;
1604
1605 if ((ct = (CT) mh_xcalloc (1, sizeof *ct)) == NULL)
1606 adios (NULL, "out of memory");
1607
1608 /* Set up the multipart/alternative part. These fields of *ct were
1609 initialized to 0 by mh_xcalloc():
1610 c_fp, c_unlink, c_begin, c_end,
1611 c_vrsn, c_ctline, c_celine,
1612 c_id, c_descr, c_dispo, c_partno,
1613 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1614 c_cefile, c_encoding,
1615 c_digested, c_digest[16], c_ctexbody,
1616 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1617 c_umask, c_rfc934,
1618 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1619 */
1620
1621 ct->c_file = add (first_alt->c_file, NULL);
1622 ct->c_type = type;
1623 ct->c_subtype = subtype;
1624
1625 ctinit = get_ct_init (ct->c_type);
1626
1627 typename = ct_type_str (type);
1628 subtypename = ct_subtype_str (type, subtype);
1629
1630 {
1631 int serial = 0;
1632 int found_boundary = 1;
1633
1634 while (found_boundary && serial < 1000000) {
1635 found_boundary = 0;
1636
1637 /* Ensure that the boundary doesn't appear in the decoded
1638 content. */
1639 if (new_part->c_cefile.ce_file) {
1640 if ((found_boundary =
1641 boundary_in_content (&new_part->c_cefile.ce_fp,
1642 new_part->c_cefile.ce_file,
1643 boundary)) == -1) {
1644 free (ct);
1645 return NULL;
1646 }
1647 }
1648
1649 /* Ensure that the boundary doesn't appear in the encoded
1650 content. */
1651 if (! found_boundary && new_part->c_file) {
1652 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1653 new_part->c_file,
1654 boundary)) == -1) {
1655 free (ct);
1656 return NULL;
1657 }
1658 }
1659
1660 if (found_boundary) {
1661 /* Try a slightly different boundary. */
1662 char buffer2[16];
1663
1664 free (boundary);
1665 ++serial;
1666 snprintf (buffer2, sizeof buffer2, "%d", serial);
1667 boundary =
1668 concat (boundary_prefix,
1669 first_alt->c_partno ? first_alt->c_partno : "",
1670 "-", buffer2, NULL);
1671 }
1672 }
1673
1674 if (found_boundary) {
1675 advise (NULL, "giving up trying to find a unique boundary");
1676 free (ct);
1677 return NULL;
1678 }
1679 }
1680
1681 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1682 boundary, "\"", NULL);
1683
1684 /* Load c_first_hf and c_last_hf. */
1685 transfer_noncontent_headers (first_alt, ct);
1686 add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1687 free (name);
1688
1689 /* Load c_partno. */
1690 if (first_alt->c_partno) {
1691 ct->c_partno = add (first_alt->c_partno, NULL);
1692 free (first_alt->c_partno);
1693 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1694 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1695 } else {
1696 first_alt->c_partno = add ("1", NULL);
1697 new_part->c_partno = add ("2", NULL);
1698 }
1699
1700 if (ctinit) {
1701 ct->c_ctinfo.ci_type = add (typename, NULL);
1702 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1703 }
1704
1705 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1706 "boundary", boundary, 0);
1707
1708 p = (struct part *) mh_xmalloc (sizeof *p);
1709 p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1710 p->mp_next->mp_next = NULL;
1711 p->mp_next->mp_part = first_alt;
1712
1713 if ((m = (struct multipart *) mh_xcalloc (1, sizeof (struct multipart))) ==
1714 NULL)
1715 adios (NULL, "out of memory");
1716 m->mp_start = concat (boundary, "\n", NULL);
1717 m->mp_stop = concat (boundary, "--\n", NULL);
1718 m->mp_parts = p;
1719 ct->c_ctparams = m;
1720
1721 free (boundary);
1722
1723 return ct;
1724 }
1725
1726
1727 /* Check that the boundary does not appear in the content. */
1728 static int
1729 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1730 char buffer[BUFSIZ];
1731 size_t bytes_read;
1732 int found_boundary = 0;
1733
1734 /* free_content() will close *fp if we fopen it here. */
1735 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1736 advise (file, "unable to open %s for reading", file);
1737 return NOTOK;
1738 }
1739
1740 fseeko (*fp, 0L, SEEK_SET);
1741 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1742 if (find_str (buffer, bytes_read, boundary)) {
1743 found_boundary = 1;
1744 break;
1745 }
1746 }
1747
1748 return found_boundary;
1749 }
1750
1751
1752 /* Remove all non-Content headers. */
1753 static void
1754 transfer_noncontent_headers (CT old, CT new) {
1755 HF hp, hp_prev;
1756
1757 hp_prev = hp = old->c_first_hf;
1758 while (hp) {
1759 HF next = hp->next;
1760
1761 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1762 if (hp == old->c_last_hf) {
1763 if (hp == old->c_first_hf) {
1764 old->c_last_hf = old->c_first_hf = NULL;
1765 } else {
1766 hp_prev->next = NULL;
1767 old->c_last_hf = hp_prev;
1768 }
1769 } else {
1770 if (hp == old->c_first_hf) {
1771 old->c_first_hf = next;
1772 } else {
1773 hp_prev->next = next;
1774 }
1775 }
1776
1777 /* Put node hp in the new CT. */
1778 if (new->c_first_hf == NULL) {
1779 new->c_first_hf = hp;
1780 } else {
1781 new->c_last_hf->next = hp;
1782 }
1783 new->c_last_hf = hp;
1784 } else {
1785 /* A Content- header, leave in old. */
1786 hp_prev = hp;
1787 }
1788
1789 hp = next;
1790 }
1791 }
1792
1793
1794 static int
1795 set_ct_type (CT ct, int type, int subtype, int encoding) {
1796 char *typename = ct_type_str (type);
1797 char *subtypename = ct_subtype_str (type, subtype);
1798 /* E.g, " text/plain" */
1799 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1800 /* E.g, " text/plain\n" */
1801 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1802 int found_content_type = 0;
1803 HF hf;
1804 const char *cp = NULL;
1805 char *ctline;
1806 int status;
1807
1808 /* Update/add Content-Type header field. */
1809 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1810 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1811 found_content_type = 1;
1812 free (hf->value);
1813 hf->value = (cp = strchr (ct->c_ctline, ';'))
1814 ? concat (type_subtypename, cp, "\n", NULL)
1815 : add (name_plus_nl, NULL);
1816 }
1817 }
1818 if (! found_content_type) {
1819 add_header (ct, add (TYPE_FIELD, NULL),
1820 (cp = strchr (ct->c_ctline, ';'))
1821 ? concat (type_subtypename, cp, "\n", NULL)
1822 : add (name_plus_nl, NULL));
1823 }
1824
1825 /* Some of these might not be used, but set them anyway. */
1826 ctline = cp
1827 ? concat (type_subtypename, cp, NULL)
1828 : concat (type_subtypename, NULL);
1829 free (ct->c_ctline);
1830 ct->c_ctline = ctline;
1831 /* Leave other ctinfo members as they were. */
1832 free (ct->c_ctinfo.ci_type);
1833 ct->c_ctinfo.ci_type = add (typename, NULL);
1834 free (ct->c_ctinfo.ci_subtype);
1835 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1836 ct->c_type = type;
1837 ct->c_subtype = subtype;
1838
1839 free (name_plus_nl);
1840 free (type_subtypename);
1841
1842 status = set_ce (ct, encoding);
1843
1844 return status;
1845 }
1846
1847
1848 /*
1849 * It's not necessary to update the charset parameter of a Content-Type
1850 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1851 * (content) was originally in the specified charset, "and will be in
1852 * that character set again after decoding."
1853 */
1854 static int
1855 decode_text_parts (CT ct, int encoding, const char *decodetypes, int *message_mods) {
1856 int status = OK;
1857 int lf_line_endings = 0;
1858
1859 switch (ct->c_type) {
1860 case CT_MULTIPART: {
1861 struct multipart *m = (struct multipart *) ct->c_ctparams;
1862 struct part *part;
1863
1864 /* Should check to see if the body for this part is encoded?
1865 For now, it gets passed along as-is by InitMultiPart(). */
1866 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1867 status = decode_text_parts (part->mp_part, encoding, decodetypes, message_mods);
1868 }
1869 break;
1870 }
1871
1872 case CT_MESSAGE:
1873 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1874 struct exbody *e = (struct exbody *) ct->c_ctparams;
1875
1876 status = decode_text_parts (e->eb_content, encoding, decodetypes, message_mods);
1877 }
1878 break;
1879
1880 default:
1881 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1882 break;
1883 }
1884
1885 lf_line_endings =
1886 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
1887
1888 switch (ct->c_encoding) {
1889 case CE_BASE64:
1890 case CE_QUOTED: {
1891 int ct_encoding;
1892
1893 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
1894 const char *reason = NULL;
1895
1896 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
1897 && encoding != CE_BINARY) {
1898 /* The decoding isn't acceptable so discard it.
1899 Leave status as OK to allow other transformations. */
1900 if (verbosw) {
1901 report (NULL, ct->c_partno, ct->c_file,
1902 "will not decode%s because it is binary (%s)",
1903 ct->c_partno ? ""
1904 : ct->c_ctline ? ct->c_ctline
1905 : "",
1906 reason);
1907 }
1908 (void) m_unlink (ct->c_cefile.ce_file);
1909 free (ct->c_cefile.ce_file);
1910 ct->c_cefile.ce_file = NULL;
1911 } else if (ct->c_encoding == CE_QUOTED &&
1912 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1913 /* The decoding isn't acceptable so discard it.
1914 Leave status as OK to allow other transformations. */
1915 if (verbosw) {
1916 report (NULL, ct->c_partno, ct->c_file,
1917 "will not decode%s because it is 8bit",
1918 ct->c_partno ? ""
1919 : ct->c_ctline ? ct->c_ctline
1920 : "");
1921 }
1922 (void) m_unlink (ct->c_cefile.ce_file);
1923 free (ct->c_cefile.ce_file);
1924 ct->c_cefile.ce_file = NULL;
1925 } else {
1926 int enc;
1927 if (ct_encoding == CE_BINARY) {
1928 enc = CE_BINARY;
1929 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1930 enc = CE_QUOTED;
1931 } else {
1932 enc = charset_encoding (ct);
1933 }
1934 if (set_ce (ct, enc) == OK) {
1935 ++*message_mods;
1936 if (verbosw) {
1937 report (NULL, ct->c_partno, ct->c_file, "decode%s",
1938 ct->c_ctline ? ct->c_ctline : "");
1939 }
1940 if (lf_line_endings) {
1941 strip_crs (ct, message_mods);
1942 }
1943 } else {
1944 status = NOTOK;
1945 }
1946 }
1947 } else {
1948 status = NOTOK;
1949 }
1950 break;
1951 }
1952 case CE_8BIT:
1953 case CE_7BIT:
1954 if (lf_line_endings) {
1955 strip_crs (ct, message_mods);
1956 }
1957 break;
1958 default:
1959 break;
1960 }
1961
1962 break;
1963 }
1964
1965 return status;
1966 }
1967
1968
1969 /* Determine if the part with type[/subtype] should be decoded, according to
1970 decodetypes (which came from the -decodetypes switch). */
1971 static int
1972 should_decode(const char *decodetypes, const char *type, const char *subtype) {
1973 /* Quick search for matching type[/subtype] in decodetypes: bracket
1974 decodetypes with commas, then search for ,type, and ,type/subtype, in
1975 it. */
1976
1977 int found_match = 0;
1978 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
1979 char *delimited_type = concat(",", type, ",", NULL);
1980
1981 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
1982 found_match = 1;
1983 } else if (subtype != NULL) {
1984 char *delimited_type_subtype =
1985 concat(",", type, "/", subtype, ",", NULL);
1986
1987 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
1988 found_match = 1;
1989 }
1990 free(delimited_type_subtype);
1991 }
1992
1993 free(delimited_type);
1994 free(delimited_decodetypes);
1995
1996 return found_match;
1997 }
1998
1999
2000 /* See if the decoded content is 7bit, 8bit, or binary. It's binary
2001 if it has any NUL characters, a CR not followed by a LF, or lines
2002 greater than 998 characters in length. If binary, reason is set
2003 to a string explaining why. */
2004 static int
2005 content_encoding (CT ct, const char **reason) {
2006 CE ce = &ct->c_cefile;
2007 int encoding = CE_7BIT;
2008
2009 if (ce->ce_file) {
2010 size_t line_len = 0;
2011 char buffer[BUFSIZ];
2012 size_t inbytes;
2013
2014 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2015 advise (ce->ce_file, "unable to open for reading");
2016 return CE_UNKNOWN;
2017 }
2018
2019 fseeko (ce->ce_fp, 0L, SEEK_SET);
2020 while (encoding != CE_BINARY &&
2021 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2022 char *cp;
2023 size_t i;
2024 int last_char_was_cr = 0;
2025
2026 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2027 if (*cp == '\0' || ++line_len > 998 ||
2028 (*cp != '\n' && last_char_was_cr)) {
2029 encoding = CE_BINARY;
2030 if (*cp == '\0') {
2031 *reason = "null character";
2032 } else if (line_len > 998) {
2033 *reason = "line length > 998";
2034 } else if (*cp != '\n' && last_char_was_cr) {
2035 *reason = "CR not followed by LF";
2036 } else {
2037 /* Should not reach this. */
2038 *reason = "";
2039 }
2040 break;
2041 } else if (*cp == '\n') {
2042 line_len = 0;
2043 } else if (! isascii ((unsigned char) *cp)) {
2044 encoding = CE_8BIT;
2045 }
2046
2047 last_char_was_cr = *cp == '\r' ? 1 : 0;
2048 }
2049 }
2050
2051 fclose (ce->ce_fp);
2052 ce->ce_fp = NULL;
2053 } /* else should never happen */
2054
2055 return encoding;
2056 }
2057
2058
2059 static int
2060 strip_crs (CT ct, int *message_mods) {
2061 char *charset = content_charset (ct);
2062 int status = OK;
2063
2064 /* Only strip carriage returns if content is ASCII or another
2065 charset that has the same readily recognizable CR followed by a
2066 LF. We can include UTF-8 here because if the high-order bit of
2067 a UTF-8 byte is 0, then it must be a single-byte ASCII
2068 character. */
2069 if (! strcasecmp (charset, "US-ASCII") ||
2070 ! strcasecmp (charset, "UTF-8") ||
2071 ! strncasecmp (charset, "ISO-8859-", 9) ||
2072 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2073 char **file = NULL;
2074 FILE **fp = NULL;
2075 size_t begin;
2076 size_t end;
2077 int has_crs = 0;
2078 int opened_input_file = 0;
2079
2080 if (ct->c_cefile.ce_file) {
2081 file = &ct->c_cefile.ce_file;
2082 fp = &ct->c_cefile.ce_fp;
2083 begin = end = 0;
2084 } else if (ct->c_file) {
2085 file = &ct->c_file;
2086 fp = &ct->c_fp;
2087 begin = (size_t) ct->c_begin;
2088 end = (size_t) ct->c_end;
2089 } /* else don't know where the content is */
2090
2091 if (file && *file && fp) {
2092 if (! *fp) {
2093 if ((*fp = fopen (*file, "r")) == NULL) {
2094 advise (*file, "unable to open for reading");
2095 status = NOTOK;
2096 } else {
2097 opened_input_file = 1;
2098 }
2099 }
2100 }
2101
2102 if (fp && *fp) {
2103 char buffer[BUFSIZ];
2104 size_t bytes_read;
2105 size_t bytes_to_read =
2106 end > 0 && end > begin ? end - begin : sizeof buffer;
2107
2108 fseeko (*fp, begin, SEEK_SET);
2109 while ((bytes_read = fread (buffer, 1,
2110 min (bytes_to_read, sizeof buffer),
2111 *fp)) > 0) {
2112 /* Look for CR followed by a LF. This is supposed to
2113 be text so there should be LF's. If not, don't
2114 modify the content. */
2115 char *cp;
2116 size_t i;
2117 int last_char_was_cr = 0;
2118
2119 if (end > 0) { bytes_to_read -= bytes_read; }
2120
2121 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2122 if (*cp == '\n' && last_char_was_cr) {
2123 has_crs = 1;
2124 break;
2125 }
2126
2127 last_char_was_cr = *cp == '\r' ? 1 : 0;
2128 }
2129 }
2130
2131 if (has_crs) {
2132 int fd;
2133 char *stripped_content_file;
2134 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2135
2136 if (tempfile == NULL) {
2137 adios (NULL, "unable to create temporary file in %s",
2138 get_temp_dir());
2139 }
2140 stripped_content_file = add (tempfile, NULL);
2141
2142 /* Strip each CR before a LF from the content. */
2143 fseeko (*fp, begin, SEEK_SET);
2144 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2145 0) {
2146 char *cp;
2147 size_t i;
2148 int last_char_was_cr = 0;
2149
2150 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2151 if (*cp == '\r') {
2152 last_char_was_cr = 1;
2153 } else if (last_char_was_cr) {
2154 if (*cp != '\n') {
2155 if (write (fd, "\r", 1) < 0) {
2156 advise (tempfile, "CR write");
2157 }
2158 }
2159 if (write (fd, cp, 1) < 0) {
2160 advise (tempfile, "write");
2161 }
2162 last_char_was_cr = 0;
2163 } else {
2164 if (write (fd, cp, 1) < 0) {
2165 advise (tempfile, "write");
2166 }
2167 last_char_was_cr = 0;
2168 }
2169 }
2170 }
2171
2172 if (close (fd)) {
2173 admonish (NULL, "unable to write temporary file %s",
2174 stripped_content_file);
2175 (void) m_unlink (stripped_content_file);
2176 status = NOTOK;
2177 } else {
2178 /* Replace the decoded file with the converted one. */
2179 if (ct->c_cefile.ce_file) {
2180 if (ct->c_cefile.ce_unlink) {
2181 (void) m_unlink (ct->c_cefile.ce_file);
2182 }
2183 free (ct->c_cefile.ce_file);
2184 }
2185 ct->c_cefile.ce_file = stripped_content_file;
2186 ct->c_cefile.ce_unlink = 1;
2187
2188 ++*message_mods;
2189 if (verbosw) {
2190 report (NULL, ct->c_partno,
2191 begin == 0 && end == 0 ? "" : *file,
2192 "stripped CRs");
2193 }
2194 }
2195 }
2196
2197 if (opened_input_file) {
2198 fclose (*fp);
2199 *fp = NULL;
2200 }
2201 }
2202 }
2203
2204 free (charset);
2205
2206 return status;
2207 }
2208
2209
2210 static int
2211 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2212 int status = OK;
2213
2214 switch (ct->c_type) {
2215 case CT_TEXT:
2216 if (ct->c_subtype == TEXT_PLAIN) {
2217 status = convert_charset (ct, dest_charset, message_mods);
2218 if (status == OK) {
2219 if (verbosw) {
2220 char *ct_charset = content_charset (ct);
2221
2222 report (NULL, ct->c_partno, ct->c_file,
2223 "convert %s to %s", ct_charset, dest_charset);
2224 free (ct_charset);
2225 }
2226 } else {
2227 char *ct_charset = content_charset (ct);
2228
2229 report ("iconv", ct->c_partno, ct->c_file,
2230 "failed to convert %s to %s", ct_charset, dest_charset);
2231 free (ct_charset);
2232 }
2233 }
2234 break;
2235
2236 case CT_MULTIPART: {
2237 struct multipart *m = (struct multipart *) ct->c_ctparams;
2238 struct part *part;
2239
2240 /* Should check to see if the body for this part is encoded?
2241 For now, it gets passed along as-is by InitMultiPart(). */
2242 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2243 status =
2244 convert_charsets (part->mp_part, dest_charset, message_mods);
2245 }
2246 break;
2247 }
2248
2249 case CT_MESSAGE:
2250 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2251 struct exbody *e = (struct exbody *) ct->c_ctparams;
2252
2253 status =
2254 convert_charsets (e->eb_content, dest_charset, message_mods);
2255 }
2256 break;
2257
2258 default:
2259 break;
2260 }
2261
2262 return status;
2263 }
2264
2265
2266 /*
2267 * Fix various problems that aren't handled elsewhere. These
2268 * are fixed unconditionally: there are no switches to disable
2269 * them. (Currently, "problems" is just one: an extraneous
2270 * semicolon at the end of a header parameter list.)
2271 */
2272 static int
2273 fix_always (CT ct, int *message_mods) {
2274 int status = OK;
2275
2276 switch (ct->c_type) {
2277 case CT_MULTIPART: {
2278 struct multipart *m = (struct multipart *) ct->c_ctparams;
2279 struct part *part;
2280
2281 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2282 status = fix_always (part->mp_part, message_mods);
2283 }
2284 break;
2285 }
2286
2287 case CT_MESSAGE:
2288 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2289 struct exbody *e = (struct exbody *) ct->c_ctparams;
2290
2291 status = fix_always (e->eb_content, message_mods);
2292 }
2293 break;
2294
2295 default: {
2296 HF hf;
2297
2298 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2299 size_t len = strlen (hf->value);
2300
2301 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2302 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2303 /* Only do this for Content-Type and
2304 Content-Disposition fields because those are the
2305 only headers that parse_mime() warns about. */
2306 continue;
2307 }
2308
2309 /* whitespace following a trailing ';' will be nuked as well */
2310 if (hf->value[len - 1] == '\n') {
2311 while (isspace((unsigned char)(hf->value[len - 2]))) {
2312 if (len-- == 0) { break; }
2313 }
2314 }
2315
2316 if (hf->value[len - 2] == ';') {
2317 /* Remove trailing ';' from parameter value. */
2318 hf->value[len - 2] = '\n';
2319 hf->value[len - 1] = '\0';
2320
2321 /* Also, if Content-Type parameter, remove trailing ';'
2322 from ct->c_ctline. This probably isn't necessary
2323 but can't hurt. */
2324 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2325 size_t l = strlen(ct->c_ctline) - 1;
2326 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2327 ct->c_ctline[l] == ';') {
2328 ct->c_ctline[l--] = '\0';
2329 if (l == 0) { break; }
2330 }
2331 }
2332
2333 ++*message_mods;
2334 if (verbosw) {
2335 report (NULL, ct->c_partno, ct->c_file,
2336 "remove trailing ; from %s parameter value",
2337 hf->name);
2338 }
2339 }
2340 }
2341 }}
2342
2343 return status;
2344 }
2345
2346
2347 static int
2348 write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace,
2349 int message_mods) {
2350 int status = OK;
2351
2352 if (modify_inplace) {
2353 if (message_mods > 0) {
2354 if ((status = output_message (ct, outfile)) == OK) {
2355 char *infile = input_filename
2356 ? add (input_filename, NULL)
2357 : add (ct->c_file ? ct->c_file : "-", NULL);
2358
2359 if (remove_file (infile) == OK) {
2360 if (rename (outfile, infile)) {
2361 /* Rename didn't work, possibly because of an
2362 attempt to rename across filesystems. Try
2363 brute force copy. */
2364 int old = open (outfile, O_RDONLY);
2365 int new =
2366 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2367 int i = -1;
2368
2369 if (old != -1 && new != -1) {
2370 char buffer[BUFSIZ];
2371
2372 while ((i = read (old, buffer, sizeof buffer)) >
2373 0) {
2374 if (write (new, buffer, i) != i) {
2375 i = -1;
2376 break;
2377 }
2378 }
2379 }
2380 if (new != -1) { close (new); }
2381 if (old != -1) { close (old); }
2382 (void) m_unlink (outfile);
2383
2384 if (i < 0) {
2385 /* The -file argument processing used path() to
2386 expand filename to absolute path. */
2387 int file = ct->c_file && ct->c_file[0] == '/';
2388
2389 admonish (NULL, "unable to rename %s %s to %s",
2390 file ? "file" : "message", outfile,
2391 infile);
2392 status = NOTOK;
2393 }
2394 }
2395 } else {
2396 admonish (NULL, "unable to remove input file %s, "
2397 "not modifying it", infile);
2398 (void) m_unlink (outfile);
2399 status = NOTOK;
2400 }
2401
2402 free (infile);
2403 } else {
2404 status = NOTOK;
2405 }
2406 } else {
2407 /* No modifications and didn't need the tmp outfile. */
2408 (void) m_unlink (outfile);
2409 }
2410 } else {
2411 /* Output is going to some file. Produce it whether or not
2412 there were modifications. */
2413 status = output_message (ct, outfile);
2414 }
2415
2416 flush_errors ();
2417 return status;
2418 }
2419
2420
2421 /*
2422 * parse_mime() does not set lf_line_endings in struct text, so use this function to do it.
2423 * It touches the parts the decodetypes identifies.
2424 */
2425 static void
2426 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2427 switch (ct->c_type) {
2428 case CT_MULTIPART: {
2429 struct multipart *m = (struct multipart *) ct->c_ctparams;
2430 struct part *part;
2431
2432 for (part = m->mp_parts; part; part = part->mp_next) {
2433 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2434 }
2435 break;
2436 }
2437
2438 case CT_MESSAGE:
2439 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2440 struct exbody *e = (struct exbody *) ct->c_ctparams;
2441
2442 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2443 }
2444 break;
2445
2446 default:
2447 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2448 if (ct->c_ctparams == NULL) {
2449 if ((ct->c_ctparams = (struct text *) mh_xcalloc (1, sizeof (struct text))) == NULL) {
2450 adios (NULL, "out of memory");
2451 }
2452 }
2453 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2454 }
2455 }
2456 }
2457
2458
2459 /*
2460 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2461 * use the standard MH backup file.
2462 */
2463 static int
2464 remove_file (const char *file) {
2465 if (rmmproc) {
2466 char *rmm_command = concat (rmmproc, " ", file, NULL);
2467 int status = system (rmm_command);
2468
2469 free (rmm_command);
2470 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2471 } else {
2472 /* This is OK for a non-message file, it still uses the
2473 BACKUP_PREFIX form. The backup file will be in the same
2474 directory as file. */
2475 return rename (file, m_backup (file));
2476 }
2477 }
2478
2479
2480 static void
2481 report (char *what, char *partno, char *filename, char *message, ...) {
2482 va_list args;
2483 char *fmt;
2484
2485 if (verbosw) {
2486 va_start (args, message);
2487 fmt = concat (filename, partno ? " part " : ", ",
2488 partno ? partno : "", partno ? ", " : "", message, NULL);
2489
2490 advertise (what, NULL, fmt, args);
2491
2492 free (fmt);
2493 va_end (args);
2494 }
2495 }
2496
2497
2498 static void
2499 pipeser (int i)
2500 {
2501 if (i == SIGQUIT) {
2502 fflush (stdout);
2503 fprintf (stderr, "\n");
2504 fflush (stderr);
2505 }
2506
2507 done (1);
2508 /* NOTREACHED */
2509 }