]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Put parameter names in h/utils.h memory function prototypes.
[nmh] / uip / mhfixmsg.c
1 /*
2 * mhfixmsg.c -- rewrite a message with various transformations
3 *
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include <fcntl.h>
15
16 #define MHFIXMSG_SWITCHES \
17 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
18 X("nodecodetext", 0, NDECODETEXTSW) \
19 X("decodetypes", 0, DECODETYPESW) \
20 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
21 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
22 X("textcharset", 0, TEXTCHARSETSW) \
23 X("notextcharset", 0, NTEXTCHARSETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
27 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
28 X("fixboundary", 0, FIXBOUNDARYSW) \
29 X("nofixboundary", 0, NFIXBOUNDARYSW) \
30 X("fixcte", 0, FIXCOMPOSITECTESW) \
31 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
32 X("fixtype mimetype", 0, FIXTYPESW) \
33 X("file file", 0, FILESW) \
34 X("outfile file", 0, OUTFILESW) \
35 X("rmmproc program", 0, RPROCSW) \
36 X("normmproc", 0, NRPRCSW) \
37 X("changecur", 0, CHGSW) \
38 X("nochangecur", 0, NCHGSW) \
39 X("verbose", 0, VERBSW) \
40 X("noverbose", 0, NVERBSW) \
41 X("version", 0, VERSIONSW) \
42 X("help", 0, HELPSW) \
43
44 #define X(sw, minchars, id) id,
45 DEFINE_SWITCH_ENUM(MHFIXMSG);
46 #undef X
47
48 #define X(sw, minchars, id) { sw, minchars, id },
49 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
50 #undef X
51
52
53 int verbosw;
54 int debugsw; /* Needed by mhparse.c. */
55
56 #define quitser pipeser
57
58 /* mhparse.c */
59 extern int skip_mp_cte_check; /* flag to InitMultiPart */
60 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
61 extern int bogus_mp_content; /* flag from InitMultiPart */
62 /* flags to/from parse_header_attrs */
63 extern int suppress_extraneous_trailing_semicolon_warning;
64 extern int extraneous_trailing_semicolon;
65
66 /* mhoutsbr.c */
67 int output_message (CT, char *);
68
69 /* mhmisc.c */
70 void flush_errors (void);
71
72 /* mhfree.c */
73 extern CT *cts;
74 void freects_done (int) NORETURN;
75
76 /*
77 * static prototypes
78 */
79 typedef struct fix_transformations {
80 int fixboundary;
81 int fixcompositecte;
82 svector_t fixtypes;
83 int reformat;
84 int replacetextplain;
85 int decodetext;
86 char *decodetypes;
87 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
88 int lf_line_endings;
89 char *textcharset;
90 } fix_transformations;
91
92 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
93 static int fix_boundary (CT *, int *);
94 static int copy_input_to_output (const char *, const char *);
95 static int get_multipart_boundary (CT, char **);
96 static int replace_boundary (CT, char *, char *);
97 static int fix_types (CT, svector_t, int *);
98 static char *replace_substring (char **, const char *, const char *);
99 static char *remove_parameter (char *, const char *);
100 static int fix_composite_cte (CT, int *);
101 static int set_ce (CT, int);
102 static int ensure_text_plain (CT *, CT, int *, int);
103 static int find_textplain_sibling (CT, int, int *);
104 static int insert_new_text_plain_part (CT, int, CT);
105 static CT build_text_plain_part (CT);
106 static int insert_into_new_mp_alt (CT *, int *);
107 static CT divide_part (CT);
108 static void copy_ctinfo (CI, CI);
109 static int decode_part (CT);
110 static int reformat_part (CT, char *, char *, char *, int);
111 static CT build_multipart_alt (CT, CT, int, int);
112 static int boundary_in_content (FILE **, char *, const char *);
113 static void transfer_noncontent_headers (CT, CT);
114 static int set_ct_type (CT, int type, int subtype, int encoding);
115 static int decode_text_parts (CT, int, const char *, int *);
116 static int should_decode(const char *, const char *, const char *);
117 static int content_encoding (CT, const char **);
118 static int strip_crs (CT, int *);
119 static void update_cte (CT);
120 static int least_restrictive_encoding (CT);
121 static int less_restrictive (int, int);
122 static int convert_charsets (CT, char *, int *);
123 static int fix_always (CT, int *);
124 static int fix_filename_param (char *, char *, PM *, PM *);
125 static int fix_filename_encoding (CT);
126 static int write_content (CT, const char *, char *, int, int);
127 static void set_text_ctparams(CT, char *, int);
128 static int remove_file (const char *);
129 static void report (char *, char *, char *, char *, ...);
130 static void pipeser (int);
131
132
133 int
134 main (int argc, char **argv) {
135 int msgnum;
136 char *cp, *file = NULL, *folder = NULL;
137 char *maildir, buf[100], *outfile = NULL;
138 char **argp, **arguments;
139 struct msgs_array msgs = { 0, 0, NULL };
140 struct msgs *mp = NULL;
141 CT *ctp;
142 FILE *fp;
143 int using_stdin = 0;
144 int chgflag = 1;
145 int status = OK;
146 fix_transformations fx;
147 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
148 fx.fixtypes = NULL;
149 fx.replacetextplain = 0;
150 fx.decodetext = CE_8BIT;
151 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
152 fx.lf_line_endings = 0;
153 fx.textcharset = NULL;
154
155 if (nmh_init(argv[0], 2)) { return 1; }
156
157 done = freects_done;
158
159 arguments = getarguments (invo_name, argc, argv, 1);
160 argp = arguments;
161
162 /*
163 * Parse arguments
164 */
165 while ((cp = *argp++)) {
166 if (*cp == '-') {
167 switch (smatch (++cp, switches)) {
168 case AMBIGSW:
169 ambigsw (cp, switches);
170 done (1);
171 case UNKWNSW:
172 adios (NULL, "-%s unknown", cp);
173
174 case HELPSW:
175 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
176 invo_name);
177 print_help (buf, switches, 1);
178 done (0);
179 case VERSIONSW:
180 print_version(invo_name);
181 done (0);
182
183 case DECODETEXTSW:
184 if (! (cp = *argp++) || *cp == '-') {
185 adios (NULL, "missing argument to %s", argp[-2]);
186 }
187 if (! strcasecmp (cp, "8bit")) {
188 fx.decodetext = CE_8BIT;
189 } else if (! strcasecmp (cp, "7bit")) {
190 fx.decodetext = CE_7BIT;
191 } else if (! strcasecmp (cp, "binary")) {
192 fx.decodetext = CE_BINARY;
193 } else {
194 adios (NULL, "invalid argument to %s", argp[-2]);
195 }
196 continue;
197 case NDECODETEXTSW:
198 fx.decodetext = 0;
199 continue;
200 case DECODETYPESW:
201 if (! (cp = *argp++) || *cp == '-') {
202 adios (NULL, "missing argument to %s", argp[-2]);
203 }
204 fx.decodetypes = cp;
205 continue;
206 case CRLFLINEBREAKSSW:
207 fx.lf_line_endings = 0;
208 continue;
209 case NCRLFLINEBREAKSSW:
210 fx.lf_line_endings = 1;
211 continue;
212 case TEXTCHARSETSW:
213 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
214 adios (NULL, "missing argument to %s", argp[-2]);
215 }
216 fx.textcharset = cp;
217 continue;
218 case NTEXTCHARSETSW:
219 fx.textcharset = 0;
220 continue;
221 case FIXBOUNDARYSW:
222 fx.fixboundary = 1;
223 continue;
224 case NFIXBOUNDARYSW:
225 fx.fixboundary = 0;
226 continue;
227 case FIXCOMPOSITECTESW:
228 fx.fixcompositecte = 1;
229 continue;
230 case NFIXCOMPOSITECTESW:
231 fx.fixcompositecte = 0;
232 continue;
233 case FIXTYPESW:
234 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
235 adios (NULL, "missing argument to %s", argp[-2]);
236 }
237 if (! strncasecmp (cp, "multipart/", 10) ||
238 ! strncasecmp (cp, "message/", 8)) {
239 adios (NULL, "-fixtype %s not allowed", cp);
240 } else if (! strchr (cp, '/')) {
241 adios (NULL, "-fixtype requires type/subtype");
242 }
243 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
244 svector_push_back (fx.fixtypes, cp);
245 continue;
246 case REFORMATSW:
247 fx.reformat = 1;
248 continue;
249 case NREFORMATSW:
250 fx.reformat = 0;
251 continue;
252 case REPLACETEXTPLAINSW:
253 fx.replacetextplain = 1;
254 continue;
255 case NREPLACETEXTPLAINSW:
256 fx.replacetextplain = 0;
257 continue;
258 case FILESW:
259 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
260 adios (NULL, "missing argument to %s", argp[-2]);
261 }
262 file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
263 continue;
264 case OUTFILESW:
265 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
266 adios (NULL, "missing argument to %s", argp[-2]);
267 }
268 outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
269 continue;
270 case RPROCSW:
271 if (!(rmmproc = *argp++) || *rmmproc == '-') {
272 adios (NULL, "missing argument to %s", argp[-2]);
273 }
274 continue;
275 case NRPRCSW:
276 rmmproc = NULL;
277 continue;
278 case CHGSW:
279 chgflag = 1;
280 continue;
281 case NCHGSW:
282 chgflag = 0;
283 continue;
284 case VERBSW:
285 verbosw = 1;
286 continue;
287 case NVERBSW:
288 verbosw = 0;
289 continue;
290 }
291 }
292 if (*cp == '+' || *cp == '@') {
293 if (folder) {
294 adios (NULL, "only one folder at a time!");
295 } else {
296 folder = pluspath (cp);
297 }
298 } else {
299 if (*cp == '/') {
300 /* Interpret a full path as a filename, not a message. */
301 file = add (cp, NULL);
302 } else {
303 app_msgarg (&msgs, cp);
304 }
305 }
306 }
307
308 SIGNAL (SIGQUIT, quitser);
309 SIGNAL (SIGPIPE, pipeser);
310
311 /*
312 * Read the standard profile setup
313 */
314 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
315 readconfig ((struct node **) 0, fp, cp, 0);
316 fclose (fp);
317 }
318
319 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
320 suppress_extraneous_trailing_semicolon_warning = 1;
321
322 if (! context_find ("path")) {
323 free (path ("./", TFOLDER));
324 }
325
326 if (file && msgs.size) {
327 adios (NULL, "cannot specify msg and file at same time!");
328 }
329
330 /*
331 * check if message is coming from file
332 */
333 if (file) {
334 /* If file is stdin, create a tmp file name before parse_mime()
335 has a chance, because it might put in on a different
336 filesystem than the output file. Instead, put it in the
337 user's preferred tmp directory. */
338 CT ct;
339
340 if (! strcmp ("-", file)) {
341 int fd;
342 char *cp;
343
344 using_stdin = 1;
345
346 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
347 adios (NULL, "unable to create temporary file in %s",
348 get_temp_dir());
349 } else {
350 free (file);
351 file = add (cp, NULL);
352 cpydata (STDIN_FILENO, fd, "-", file);
353 }
354
355 if (close (fd)) {
356 (void) m_unlink (file);
357 adios (NULL, "failed to write temporary file");
358 }
359 }
360
361 cts = mh_xcalloc(2, sizeof *cts);
362 ctp = cts;
363
364 if ((ct = parse_mime (file))) {
365 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
366 *ctp++ = ct;
367 } else {
368 advise (NULL, "unable to parse message from file %s", file);
369 status = NOTOK;
370
371 /* If there's an outfile, pass the input message unchanged, so the message won't
372 get dropped from a pipeline. */
373 if (outfile) {
374 /* Something went wrong. Output might be expected, such as if this were run
375 as a filter. Just copy the input to the output. */
376 if (copy_input_to_output (file, outfile) != OK) {
377 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
378 }
379 }
380 }
381 } else {
382 /*
383 * message(s) are coming from a folder
384 */
385 CT ct;
386
387 if (! msgs.size) {
388 app_msgarg(&msgs, "cur");
389 }
390 if (! folder) {
391 folder = getfolder (1);
392 }
393 maildir = m_maildir (folder);
394
395 if (chdir (maildir) == NOTOK) {
396 adios (maildir, "unable to change directory to");
397 }
398
399 /* read folder and create message structure */
400 if (! (mp = folder_read (folder, 1))) {
401 adios (NULL, "unable to read folder %s", folder);
402 }
403
404 /* check for empty folder */
405 if (mp->nummsg == 0) {
406 adios (NULL, "no messages in %s", folder);
407 }
408
409 /* parse all the message ranges/sequences and set SELECTED */
410 for (msgnum = 0; msgnum < msgs.size; msgnum++)
411 if (! m_convert (mp, msgs.msgs[msgnum])) {
412 done (1);
413 }
414 seq_setprev (mp); /* set the previous-sequence */
415
416 cts = mh_xcalloc(mp->numsel + 1, sizeof *cts);
417 ctp = cts;
418
419 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
420 if (is_selected(mp, msgnum)) {
421 char *msgnam;
422
423 msgnam = m_name (msgnum);
424 if ((ct = parse_mime (msgnam))) {
425 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
426 *ctp++ = ct;
427 } else {
428 advise (NULL, "unable to parse message %s", msgnam);
429 status = NOTOK;
430
431 /* If there's an outfile, pass the input message unchanged, so the message won't
432 get dropped from a pipeline. */
433 if (outfile) {
434 /* Something went wrong. Output might be expected, such as if this were run
435 as a filter. Just copy the input to the output. */
436 const char *input_filename = path (msgnam, TFILE);
437
438 if (copy_input_to_output (input_filename, outfile) != OK) {
439 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
440 }
441 }
442 }
443 }
444 }
445
446 if (chgflag) {
447 seq_setcur (mp, mp->hghsel); /* update current message */
448 }
449 seq_save (mp); /* synchronize sequences */
450 context_replace (pfolder, folder);/* update current folder */
451 context_save (); /* save the context file */
452 }
453
454 if (*cts) {
455 for (ctp = cts; *ctp; ++ctp) {
456 status += mhfixmsgsbr (ctp, &fx, outfile);
457
458 if (using_stdin) {
459 (void) m_unlink (file);
460
461 if (! outfile) {
462 /* Just calling m_backup() unlinks the backup file. */
463 (void) m_backup (file);
464 }
465 }
466 }
467 } else {
468 status = 1;
469 }
470
471 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
472 free (outfile);
473 free (file);
474 free (folder);
475 free (arguments);
476
477 /* done is freects_done, which will clean up all of cts. */
478 done (status);
479 return NOTOK;
480 }
481
482
483 /*
484 * Apply transformations to one message.
485 */
486 int
487 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
488 /* Store input filename in case one of the transformations, i.e.,
489 fix_boundary(), rewrites to a tmp file. */
490 char *input_filename = add ((*ctp)->c_file, NULL);
491 int modify_inplace = 0;
492 int message_mods = 0;
493 int status = OK;
494
495 if (outfile == NULL) {
496 modify_inplace = 1;
497
498 if ((*ctp)->c_file) {
499 char *tempfile;
500 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
501 adios (NULL, "unable to create temporary file in %s",
502 get_temp_dir());
503 }
504 outfile = add (tempfile, NULL);
505 } else {
506 adios (NULL, "missing both input and output filenames\n");
507 }
508 }
509
510 reverse_alternative_parts (*ctp);
511 status = fix_always (*ctp, &message_mods);
512 if (status == OK && fx->fixboundary) {
513 status = fix_boundary (ctp, &message_mods);
514 }
515 if (status == OK && fx->fixtypes != NULL) {
516 status = fix_types (*ctp, fx->fixtypes, &message_mods);
517 }
518 if (status == OK && fx->fixcompositecte) {
519 status = fix_composite_cte (*ctp, &message_mods);
520 }
521 if (status == OK && fx->reformat) {
522 status =
523 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
524 }
525 if (status == OK && fx->decodetext) {
526 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
527 &message_mods);
528 update_cte (*ctp);
529 }
530 if (status == OK && fx->textcharset != NULL) {
531 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
532 }
533
534 if (status == OK && ! (*ctp)->c_umask) {
535 /* Set the umask for the contents file. This currently
536 isn't used but just in case it is in the future. */
537 struct stat st;
538
539 if (stat ((*ctp)->c_file, &st) != NOTOK) {
540 (*ctp)->c_umask = ~(st.st_mode & 0777);
541 } else {
542 (*ctp)->c_umask = ~m_gmprot();
543 }
544 }
545
546 /*
547 * Write the content to a file
548 */
549 if (status == OK) {
550 status = write_content (*ctp, input_filename, outfile, modify_inplace,
551 message_mods);
552 } else if (! modify_inplace) {
553 /* Something went wrong. Output might be expected, such
554 as if this were run as a filter. Just copy the input
555 to the output. */
556 if (copy_input_to_output (input_filename, outfile) != OK) {
557 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
558 }
559 }
560
561 if (modify_inplace) {
562 if (status != OK) { (void) m_unlink (outfile); }
563 free (outfile);
564 outfile = NULL;
565 }
566
567 free (input_filename);
568
569 return status;
570 }
571
572
573 /*
574 * Copy input message to output. Assumes not modifying in place, so this
575 * might be running as part of a pipeline.
576 */
577 static int
578 copy_input_to_output (const char *input_filename, const char *output_filename) {
579 int in = open (input_filename, O_RDONLY);
580 int out = strcmp (output_filename, "-")
581 ? open (output_filename, O_WRONLY | O_CREAT, m_gmprot ())
582 : STDOUT_FILENO;
583 int status = OK;
584
585 if (in != -1 && out != -1) {
586 cpydata (in, out, input_filename, output_filename);
587 } else {
588 status = NOTOK;
589 }
590
591 close (out);
592 close (in);
593
594 return status;
595 }
596
597
598 /*
599 * Fix mismatched outer level boundary.
600 */
601 static int
602 fix_boundary (CT *ct, int *message_mods) {
603 struct multipart *mp;
604 int status = OK;
605
606 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
607 mp = (struct multipart *) (*ct)->c_ctparams;
608
609 /*
610 * 1) Get boundary at end of part.
611 * 2) Get boundary at beginning of part and compare to the end-of-part
612 * boundary.
613 * 3) Write out contents of ct to tmp file, replacing boundary in
614 * header with boundary from part. Set c_unlink to 1.
615 * 4) Free ct.
616 * 5) Call parse_mime() on the tmp file, replacing ct.
617 */
618
619 if (mp && mp->mp_start) {
620 char *part_boundary;
621
622 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
623 char *fixed;
624
625 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
626 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
627 char *filename = add ((*ct)->c_file, NULL);
628 CT fixed_ct;
629
630 free_content (*ct);
631 if ((fixed_ct = parse_mime (fixed))) {
632 *ct = fixed_ct;
633 (*ct)->c_unlink = 1;
634
635 ++*message_mods;
636 if (verbosw) {
637 report (NULL, NULL, filename,
638 "fix multipart boundary");
639 }
640 } else {
641 *ct = NULL;
642 advise (NULL, "unable to parse fixed part");
643 status = NOTOK;
644 }
645 free (filename);
646 } else {
647 advise (NULL, "unable to replace broken boundary");
648 status = NOTOK;
649 }
650 } else {
651 advise (NULL, "unable to create temporary file in %s",
652 get_temp_dir());
653 status = NOTOK;
654 }
655
656 free (part_boundary);
657 } else {
658 /* Couldn't fix the boundary. Report failure so that mhfixmsg
659 doesn't modify the message. */
660 status = NOTOK;
661 }
662 } else {
663 /* No multipart struct, even though the content type is
664 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
665 the message. */
666 status = NOTOK;
667 }
668 }
669
670 return status;
671 }
672
673
674 /*
675 * Find boundary at end of multipart.
676 */
677 static int
678 get_multipart_boundary (CT ct, char **part_boundary) {
679 char buffer[BUFSIZ];
680 char *end_boundary = NULL;
681 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
682 ? (off_t) (ct->c_end - sizeof buffer)
683 : (off_t) ct->c_begin;
684 size_t bytes_read;
685 int status = OK;
686
687 /* This will fail if the boundary spans fread() calls. BUFSIZ should
688 be big enough, even if it's just 1024, to make that unlikely. */
689
690 /* free_content() will close ct->c_fp. */
691 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
692 advise (ct->c_file, "unable to open for reading");
693 return NOTOK;
694 }
695
696 /* Get boundary at end of multipart. */
697 while (begin >= (off_t) ct->c_begin) {
698 fseeko (ct->c_fp, begin, SEEK_SET);
699 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
700 char *cp = rfind_str (buffer, bytes_read, "--");
701
702 if (cp) {
703 char *end;
704
705 /* Trim off trailing "--" and anything beyond. */
706 *cp-- = '\0';
707 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
708 if (strlen (end) > 3 && *end++ == '\n' &&
709 *end++ == '-' && *end++ == '-') {
710 end_boundary = add (end, NULL);
711 break;
712 }
713 }
714 }
715 }
716
717 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
718 begin -= sizeof buffer;
719 } else {
720 break;
721 }
722 }
723
724 /* Get boundary at beginning of multipart. */
725 if (end_boundary) {
726 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
727 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
728 if (bytes_read >= strlen (end_boundary)) {
729 char *cp = find_str (buffer, bytes_read, end_boundary);
730
731 if (cp && cp - buffer >= 2 && *--cp == '-' &&
732 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
733 status = OK;
734 break;
735 }
736 } else {
737 /* The start and end boundaries didn't match, or the
738 start boundary doesn't begin with "\n--" (or "--"
739 if at the beginning of buffer). Keep trying. */
740 status = NOTOK;
741 }
742 }
743 } else {
744 status = NOTOK;
745 }
746
747 if (status == OK) {
748 *part_boundary = end_boundary;
749 } else {
750 *part_boundary = NULL;
751 free (end_boundary);
752 }
753
754 return status;
755 }
756
757
758 /*
759 * Open and copy ct->c_file to file, replacing the multipart boundary.
760 */
761 static int
762 replace_boundary (CT ct, char *file, char *boundary) {
763 FILE *fpin, *fpout;
764 int compnum, state;
765 char buf[BUFSIZ], name[NAMESZ];
766 char *np, *vp;
767 m_getfld_state_t gstate = 0;
768 int status = OK;
769
770 if (ct->c_file == NULL) {
771 advise (NULL, "missing input filename");
772 return NOTOK;
773 }
774
775 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
776 advise (ct->c_file, "unable to open for reading");
777 return NOTOK;
778 }
779
780 if ((fpout = fopen (file, "w")) == NULL) {
781 fclose (fpin);
782 advise (file, "unable to open for writing");
783 return NOTOK;
784 }
785
786 for (compnum = 1;;) {
787 int bufsz = (int) sizeof buf;
788
789 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
790 case FLD:
791 case FLDPLUS:
792 compnum++;
793
794 /* get copies of the buffers */
795 np = add (name, NULL);
796 vp = add (buf, NULL);
797
798 /* if necessary, get rest of field */
799 while (state == FLDPLUS) {
800 bufsz = sizeof buf;
801 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
802 vp = add (buf, vp); /* add to previous value */
803 }
804
805 if (strcasecmp (TYPE_FIELD, np)) {
806 fprintf (fpout, "%s:%s", np, vp);
807 } else {
808 char *new_ctline, *new_params;
809
810 replace_param(&ct->c_ctinfo.ci_first_pm,
811 &ct->c_ctinfo.ci_last_pm, "boundary",
812 boundary, 0);
813
814 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
815 ct->c_ctinfo.ci_subtype, NULL);
816 new_params = output_params(strlen(TYPE_FIELD) +
817 strlen(new_ctline) + 1,
818 ct->c_ctinfo.ci_first_pm, NULL, 0);
819 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
820 new_params ? new_params : "");
821 free(new_ctline);
822 if (new_params) {
823 free(new_params);
824 }
825 }
826
827 free (vp);
828 free (np);
829
830 continue;
831
832 case BODY:
833 fputs ("\n", fpout);
834 /* buf will have a terminating NULL, skip it. */
835 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
836 advise (file, "fwrite");
837 }
838 continue;
839
840 case FILEEOF:
841 break;
842
843 case LENERR:
844 case FMTERR:
845 advise (NULL, "message format error in component #%d", compnum);
846 status = NOTOK;
847 break;
848
849 default:
850 advise (NULL, "getfld() returned %d", state);
851 status = NOTOK;
852 break;
853 }
854
855 break;
856 }
857
858 m_getfld_state_destroy (&gstate);
859 fclose (fpout);
860 fclose (fpin);
861
862 return status;
863 }
864
865
866 /*
867 * Fix Content-Type header to reflect the content of its part.
868 */
869 static int
870 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
871 int status = OK;
872
873 switch (ct->c_type) {
874 case CT_MULTIPART: {
875 struct multipart *m = (struct multipart *) ct->c_ctparams;
876 struct part *part;
877
878 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
879 status = fix_types (part->mp_part, fixtypes, message_mods);
880 }
881 break;
882 }
883
884 case CT_MESSAGE:
885 if (ct->c_subtype == MESSAGE_EXTERNAL) {
886 struct exbody *e = (struct exbody *) ct->c_ctparams;
887
888 status = fix_types (e->eb_content, fixtypes, message_mods);
889 }
890 break;
891
892 default: {
893 char **typep, *type;
894
895 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
896 for (typep = svector_strs (fixtypes);
897 typep && (type = *typep);
898 ++typep) {
899 char *type_subtype =
900 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
901 NULL);
902
903 if (! strcasecmp (type, type_subtype) &&
904 decode_part (ct) == OK &&
905 ct->c_cefile.ce_file != NULL) {
906 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
907 char *cp;
908
909 if ((cp = strchr (ct_type_subtype, ';'))) {
910 /* Truncate to remove any parameter list from
911 mime_type () result. */
912 *cp = '\0';
913 }
914
915 if (strcasecmp (type, ct_type_subtype)) {
916 char *ct_type, *ct_subtype;
917 HF hf;
918
919 /* The Content-Type header does not match the
920 content, so update these struct Content
921 fields to match:
922 * c_type, c_subtype
923 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
924 * c_ctline
925 */
926 /* Extract type and subtype from type/subtype. */
927 ct_type = getcpy (ct_type_subtype);
928 if ((cp = strchr (ct_type, '/'))) {
929 *cp = '\0';
930 ct_subtype = getcpy (++cp);
931 } else {
932 advise (NULL, "missing / in MIME type of %s %s",
933 ct->c_file, ct->c_partno);
934 free (ct_type);
935 return NOTOK;
936 }
937
938 ct->c_type = ct_str_type (ct_type);
939 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
940
941 free (ct->c_ctinfo.ci_type);
942 ct->c_ctinfo.ci_type = ct_type;
943 free (ct->c_ctinfo.ci_subtype);
944 ct->c_ctinfo.ci_subtype = ct_subtype;
945 if (! replace_substring (&ct->c_ctline, type,
946 ct_type_subtype)) {
947 advise (NULL, "did not find %s in %s",
948 type, ct->c_ctline);
949 }
950
951 /* Update Content-Type header field. */
952 for (hf = ct->c_first_hf; hf; hf = hf->next) {
953 if (! strcasecmp (TYPE_FIELD, hf->name)) {
954 if (replace_substring (&hf->value, type,
955 ct_type_subtype)) {
956 ++*message_mods;
957 if (verbosw) {
958 report (NULL, ct->c_partno, ct->c_file,
959 "change Content-Type in header "
960 "from %s to %s",
961 type, ct_type_subtype);
962 }
963 break;
964 } else {
965 advise (NULL, "did not find %s in %s",
966 type, hf->value);
967 }
968 }
969 }
970 }
971 free (ct_type_subtype);
972 }
973 free (type_subtype);
974 }
975 }
976 }}
977
978 return status;
979 }
980
981
982 /*
983 * Replace a substring, allocating space to hold the new one.
984 */
985 char *
986 replace_substring (char **str, const char *old, const char *new) {
987 char *cp;
988
989 if ((cp = strstr (*str, old))) {
990 char *remainder = cp + strlen (old);
991 char *prefix, *new_str;
992
993 if (cp - *str) {
994 prefix = getcpy (*str);
995 *(prefix + (cp - *str)) = '\0';
996 new_str = concat (prefix, new, remainder, NULL);
997 free (prefix);
998 } else {
999 new_str = concat (new, remainder, NULL);
1000 }
1001
1002 free (*str);
1003
1004 return *str = new_str;
1005 } else {
1006 return NULL;
1007 }
1008 }
1009
1010
1011 /*
1012 * Remove a name=value parameter, given just its name, from a header value.
1013 */
1014 char *
1015 remove_parameter (char *str, const char *name) {
1016 /* It looks to me, based on the BNF in RFC 2045, than there can't
1017 be whitespace betwwen the parameter name and the "=", or
1018 between the "=" and the parameter value. */
1019 char *param_name = concat (name, "=", NULL);
1020 char *cp;
1021
1022 if ((cp = strstr (str, param_name))) {
1023 char *start, *end;
1024 size_t count = 1;
1025
1026 /* Remove any leading spaces, before the parameter name. */
1027 for (start = cp;
1028 start > str && isspace ((unsigned char) *(start-1));
1029 --start) {
1030 continue;
1031 }
1032 /* Remove a leading semicolon. */
1033 if (start > str && *(start-1) == ';') { --start; }
1034
1035 end = cp + strlen (name) + 1;
1036 if (*end == '"') {
1037 /* Skip past the quoted value, and then the final quote. */
1038 for (++end ; *end && *end != '"'; ++end) { continue; }
1039 ++end;
1040 } else {
1041 /* Skip past the value. */
1042 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1043 }
1044
1045 /* Count how many characters need to be moved. Include
1046 trailing null, which is accounted for by the
1047 initialization of count to 1. */
1048 for (cp = end; *cp; ++cp) { ++count; }
1049 (void) memmove (start, end, count);
1050 }
1051
1052 free (param_name);
1053
1054 return str;
1055 }
1056
1057
1058 /*
1059 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1060 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1061 * 8 bit.
1062 */
1063 static int
1064 fix_composite_cte (CT ct, int *message_mods) {
1065 int status = OK;
1066
1067 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1068 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1069 ct->c_encoding != CE_BINARY) {
1070 HF hf;
1071
1072 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1073 char *name = hf->name;
1074 for (; *name && isspace ((unsigned char) *name); ++name) {
1075 continue;
1076 }
1077
1078 if (! strncasecmp (name, ENCODING_FIELD,
1079 strlen (ENCODING_FIELD))) {
1080 char *prefix = "Nmh-REPLACED-INVALID-";
1081 HF h = mh_xmalloc (sizeof *h);
1082
1083 h->name = add (hf->name, NULL);
1084 h->hf_encoding = hf->hf_encoding;
1085 h->next = hf->next;
1086 hf->next = h;
1087
1088 /* Retain old header but prefix its name. */
1089 free (hf->name);
1090 hf->name = concat (prefix, h->name, NULL);
1091
1092 ++*message_mods;
1093 if (verbosw) {
1094 char *encoding = cpytrim (hf->value);
1095 report (NULL, ct->c_partno, ct->c_file,
1096 "replace Content-Transfer-Encoding of %s "
1097 "with 8 bit", encoding);
1098 free (encoding);
1099 }
1100
1101 h->value = add (" 8bit\n", NULL);
1102
1103 /* Don't need to warn for multiple C-T-E header
1104 fields, parse_mime() already does that. But
1105 if there are any, fix them all as necessary. */
1106 hf = h;
1107 }
1108 }
1109
1110 set_ce (ct, CE_8BIT);
1111 }
1112
1113 if (ct->c_type == CT_MULTIPART) {
1114 struct multipart *m;
1115 struct part *part;
1116
1117 m = (struct multipart *) ct->c_ctparams;
1118 for (part = m->mp_parts; part; part = part->mp_next) {
1119 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1120 status = NOTOK;
1121 break;
1122 }
1123 }
1124 }
1125 }
1126
1127 return status;
1128 }
1129
1130
1131 /*
1132 * Set content encoding.
1133 */
1134 static int
1135 set_ce (CT ct, int encoding) {
1136 const char *ce = ce_str (encoding);
1137 const struct str2init *ctinit = get_ce_method (ce);
1138
1139 if (ctinit) {
1140 char *cte = concat (" ", ce, "\n", NULL);
1141 int found_cte = 0;
1142 HF hf;
1143 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1144 caller is decode_text_parts (). Save because we'll
1145 overwrite below. */
1146 struct cefile decoded_content_info = ct->c_cefile;
1147
1148 ct->c_encoding = encoding;
1149
1150 ct->c_ctinitfnx = ctinit->si_init;
1151 /* This will assign ct->c_cefile with an all-0 struct, which
1152 is what we want. */
1153 (*ctinit->si_init) (ct);
1154 /* After returning, the caller should set
1155 ct->c_cefile.ce_file to the name of the file containing
1156 the contents. */
1157
1158 if (ct->c_ceclosefnx) {
1159 (*ct->c_ceclosefnx) (ct);
1160 }
1161
1162 /* Restore the cefile. */
1163 ct->c_cefile = decoded_content_info;
1164
1165 /* Update/add Content-Transfer-Encoding header field. */
1166 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1167 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1168 found_cte = 1;
1169 free (hf->value);
1170 hf->value = cte;
1171 }
1172 }
1173 if (! found_cte) {
1174 add_header (ct, add (ENCODING_FIELD, NULL), cte);
1175 }
1176
1177 /* Update c_celine. It's used only by mhlist -debug. */
1178 free (ct->c_celine);
1179 ct->c_celine = add (cte, NULL);
1180
1181 return OK;
1182 } else {
1183 return NOTOK;
1184 }
1185 }
1186
1187
1188 /*
1189 * Make sure each text part has a corresponding text/plain part.
1190 */
1191 static int
1192 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1193 int status = OK;
1194
1195 switch ((*ct)->c_type) {
1196 case CT_TEXT: {
1197 /* Nothing to do for text/plain. */
1198 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1199
1200 if (parent && parent->c_type == CT_MULTIPART &&
1201 parent->c_subtype == MULTI_ALTERNATE) {
1202 int new_subpart_number = 1;
1203 int has_text_plain =
1204 find_textplain_sibling (parent, replacetextplain,
1205 &new_subpart_number);
1206
1207 if (! has_text_plain) {
1208 /* Parent is a multipart/alternative. Insert a new
1209 text/plain subpart. */
1210 const int inserted =
1211 insert_new_text_plain_part (*ct, new_subpart_number,
1212 parent);
1213 if (inserted) {
1214 ++*message_mods;
1215 if (verbosw) {
1216 report (NULL, parent->c_partno, parent->c_file,
1217 "insert text/plain part");
1218 }
1219 } else {
1220 status = NOTOK;
1221 }
1222 }
1223 } else if (parent && parent->c_type == CT_MULTIPART &&
1224 parent->c_subtype == MULTI_RELATED) {
1225 char *type_subtype =
1226 concat ((*ct)->c_ctinfo.ci_type, "/",
1227 (*ct)->c_ctinfo.ci_subtype, NULL);
1228 const char *parent_type =
1229 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1230 int new_subpart_number = 1;
1231 int has_text_plain = 0;
1232
1233 /* Have to do string comparison on the subtype because we
1234 don't enumerate all of them in c_subtype values.
1235 parent_type will be NULL if the multipart/related part
1236 doesn't have a type parameter. The type parameter must
1237 be specified according to RFC 2387 Sec. 3.1 but not all
1238 messages comply. */
1239 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1240 /* The type of this part matches the root type of the
1241 parent multipart/related. Look to see if there's
1242 text/plain sibling. */
1243 has_text_plain =
1244 find_textplain_sibling (parent, replacetextplain,
1245 &new_subpart_number);
1246 }
1247
1248 free (type_subtype);
1249
1250 if (! has_text_plain) {
1251 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1252 struct part *part;
1253 int siblings = 0;
1254
1255 for (part = mp->mp_parts; part; part = part->mp_next) {
1256 if (*ct != part->mp_part) {
1257 ++siblings;
1258 }
1259 }
1260
1261 if (siblings) {
1262 /* Parent is a multipart/related. Insert a new
1263 text/plain subpart in a new multipart/alternative. */
1264 if (insert_into_new_mp_alt (ct, message_mods)) {
1265 /* Not an error if text/plain couldn't be added. */
1266 }
1267 } else {
1268 /* There are no siblings, so insert a new text/plain
1269 subpart, and change the parent type from
1270 multipart/related to multipart/alternative. */
1271 const int inserted =
1272 insert_new_text_plain_part (*ct, new_subpart_number,
1273 parent);
1274
1275 if (inserted) {
1276 HF hf;
1277
1278 parent->c_subtype = MULTI_ALTERNATE;
1279 parent->c_ctinfo.ci_subtype = getcpy ("alternative");
1280 if (! replace_substring (&parent->c_ctline, "/related",
1281 "/alternative")) {
1282 advise (NULL,
1283 "did not find multipart/related in %s",
1284 parent->c_ctline);
1285 }
1286
1287 /* Update Content-Type header field. */
1288 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1289 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1290 if (replace_substring (&hf->value, "/related",
1291 "/alternative")) {
1292 ++*message_mods;
1293 if (verbosw) {
1294 report (NULL, parent->c_partno,
1295 parent->c_file,
1296 "insert text/plain part");
1297 }
1298
1299 /* Remove, e.g., type="text/html" from
1300 multipart/alternative. */
1301 remove_parameter (hf->value, "type");
1302 break;
1303 } else {
1304 advise (NULL, "did not find multipart/"
1305 "related in header %s",
1306 hf->value);
1307 }
1308 }
1309 }
1310 } else {
1311 /* Not an error if text/plain couldn't be inserted. */
1312 }
1313 }
1314 }
1315 } else {
1316 if (insert_into_new_mp_alt (ct, message_mods)) {
1317 status = NOTOK;
1318 }
1319 }
1320 break;
1321 }
1322
1323 case CT_MULTIPART: {
1324 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1325 struct part *part;
1326
1327 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1328 if ((*ct)->c_type == CT_MULTIPART) {
1329 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1330 replacetextplain);
1331 }
1332 }
1333 break;
1334 }
1335
1336 case CT_MESSAGE:
1337 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1338 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1339
1340 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1341 replacetextplain);
1342 }
1343 break;
1344 }
1345
1346 return status;
1347 }
1348
1349
1350 /*
1351 * See if there is a sibling text/plain, and return its subpart number.
1352 */
1353 static int
1354 find_textplain_sibling (CT parent, int replacetextplain,
1355 int *new_subpart_number) {
1356 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1357 struct part *part, *prev;
1358 int has_text_plain = 0;
1359
1360 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1361 ++*new_subpart_number;
1362 if (part->mp_part->c_type == CT_TEXT &&
1363 part->mp_part->c_subtype == TEXT_PLAIN) {
1364 if (replacetextplain) {
1365 struct part *old_part;
1366 if (part == mp->mp_parts) {
1367 old_part = mp->mp_parts;
1368 mp->mp_parts = part->mp_next;
1369 } else {
1370 old_part = prev->mp_next;
1371 prev->mp_next = part->mp_next;
1372 }
1373 if (verbosw) {
1374 report (NULL, parent->c_partno, parent->c_file,
1375 "remove text/plain part %s",
1376 old_part->mp_part->c_partno);
1377 }
1378 free_content (old_part->mp_part);
1379 free (old_part);
1380 } else {
1381 has_text_plain = 1;
1382 }
1383 break;
1384 }
1385 prev = part;
1386 }
1387
1388 return has_text_plain;
1389 }
1390
1391
1392 /*
1393 * Insert a new text/plain part.
1394 */
1395 static int
1396 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1397 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1398 struct part *new_part = mh_xmalloc (sizeof *new_part);
1399
1400 if ((new_part->mp_part = build_text_plain_part (ct))) {
1401 char buffer[16];
1402 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1403
1404 new_part->mp_next = mp->mp_parts;
1405 mp->mp_parts = new_part;
1406 new_part->mp_part->c_partno =
1407 concat (parent->c_partno ? parent->c_partno : "1", ".",
1408 buffer, NULL);
1409
1410 return 1;
1411 } else {
1412 free_content (new_part->mp_part);
1413 free (new_part);
1414
1415 return 0;
1416 }
1417 }
1418
1419
1420 /*
1421 * Create a text/plain part to go along with non-plain sibling part.
1422 */
1423 static CT
1424 build_text_plain_part (CT encoded_part) {
1425 CT tp_part = divide_part (encoded_part);
1426 char *tmp_plain_file = NULL;
1427
1428 if (decode_part (tp_part) == OK) {
1429 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1430 contains the decoded contents. And the decoding function, such
1431 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1432 be unlinked by free_content (). */
1433 char *tempfile;
1434
1435 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1436 advise (NULL, "unable to create temporary file in %s",
1437 get_temp_dir());
1438 } else {
1439 tmp_plain_file = add (tempfile, NULL);
1440 if (reformat_part (tp_part, tmp_plain_file,
1441 tp_part->c_ctinfo.ci_type,
1442 tp_part->c_ctinfo.ci_subtype,
1443 tp_part->c_type) == OK) {
1444 return tp_part;
1445 }
1446 }
1447 }
1448
1449 free_content (tp_part);
1450 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1451 free (tmp_plain_file);
1452
1453 return NULL;
1454 }
1455
1456
1457 /*
1458 * Slip new text/plain part into a new multipart/alternative.
1459 */
1460 static int
1461 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1462 CT tp_part = build_text_plain_part (*ct);
1463 int status = OK;
1464
1465 if (tp_part) {
1466 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1467 MULTI_ALTERNATE);
1468 if (mp_alt) {
1469 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1470
1471 if (mp && mp->mp_parts) {
1472 mp->mp_parts->mp_part = tp_part;
1473 /* Make the new multipart/alternative the parent. */
1474 *ct = mp_alt;
1475
1476 ++*message_mods;
1477 if (verbosw) {
1478 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1479 "insert text/plain part");
1480 }
1481 } else {
1482 free_content (tp_part);
1483 free_content (mp_alt);
1484 status = NOTOK;
1485 }
1486 } else {
1487 status = NOTOK;
1488 }
1489 } else {
1490 /* Not an error if text/plain couldn't be built. */
1491 }
1492
1493 return status;
1494 }
1495
1496
1497 /*
1498 * Clone a MIME part.
1499 */
1500 static CT
1501 divide_part (CT ct) {
1502 CT new_part;
1503
1504 new_part = mh_xcalloc(1, sizeof *new_part);
1505 /* Just copy over what is needed for decoding. c_vrsn and
1506 c_celine aren't necessary. */
1507 new_part->c_file = add (ct->c_file, NULL);
1508 new_part->c_begin = ct->c_begin;
1509 new_part->c_end = ct->c_end;
1510 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1511 new_part->c_type = ct->c_type;
1512 new_part->c_cefile = ct->c_cefile;
1513 new_part->c_encoding = ct->c_encoding;
1514 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1515 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1516 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1517 new_part->c_cesizefnx = ct->c_cesizefnx;
1518
1519 /* c_ctline is used by reformat__part(), so it can preserve
1520 anything after the type/subtype. */
1521 new_part->c_ctline = add (ct->c_ctline, NULL);
1522
1523 return new_part;
1524 }
1525
1526
1527 /*
1528 * Copy the content info from one part to another.
1529 */
1530 static void
1531 copy_ctinfo (CI dest, CI src) {
1532 PM s_pm, d_pm;
1533
1534 dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1535 dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1536
1537 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1538 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1539 s_pm->pm_value, 0);
1540 if (s_pm->pm_charset) {
1541 d_pm->pm_charset = getcpy(s_pm->pm_charset);
1542 }
1543 if (s_pm->pm_lang) {
1544 d_pm->pm_lang = getcpy(s_pm->pm_lang);
1545 }
1546 }
1547
1548 dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1549 dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1550 }
1551
1552
1553 /*
1554 * Decode content.
1555 */
1556 static int
1557 decode_part (CT ct) {
1558 char *tmp_decoded;
1559 int status;
1560 char *tempfile;
1561
1562 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1563 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1564 }
1565 tmp_decoded = add (tempfile, NULL);
1566 /* The following call will load ct->c_cefile.ce_file with the tmp
1567 filename of the decoded content. tmp_decoded will contain the
1568 encoded output, get rid of that. */
1569 status = output_message (ct, tmp_decoded);
1570 (void) m_unlink (tmp_decoded);
1571 free (tmp_decoded);
1572
1573 return status;
1574 }
1575
1576
1577 /*
1578 * Reformat content as plain text.
1579 * Some of the arguments aren't really needed now, but maybe will
1580 * be in the future for other than text types.
1581 */
1582 static int
1583 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1584 int output_subtype, output_encoding;
1585 const char *reason = NULL;
1586 char *cp, *cf;
1587 int status;
1588
1589 /* Hacky: this redirects the output from whatever command is used
1590 to show the part to a file. So, the user can't have any output
1591 redirection in that command.
1592 Could show_multi() in mhshowsbr.c avoid this? */
1593
1594 /* Check for invo_name-format-type/subtype. */
1595 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1596 if (verbosw) {
1597 advise (NULL, "Don't know how to convert %s, there is no "
1598 "%s-format-%s/%s profile entry",
1599 ct->c_file, invo_name, type, subtype);
1600 }
1601 return NOTOK;
1602 } else {
1603 if (strchr (cf, '>')) {
1604 advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1605 "%s-format-%s/%s profile entry", cf, invo_name, type,
1606 subtype ? subtype : "");
1607
1608 return NOTOK;
1609 }
1610 }
1611
1612 cp = concat (cf, " >", file, NULL);
1613 status = show_content_aux (ct, 0, cp, NULL, NULL);
1614 free (cp);
1615
1616 /* Unlink decoded content tmp file and free its filename to avoid
1617 leaks. The file stream should already have been closed. */
1618 if (ct->c_cefile.ce_unlink) {
1619 (void) m_unlink (ct->c_cefile.ce_file);
1620 free (ct->c_cefile.ce_file);
1621 ct->c_cefile.ce_file = NULL;
1622 ct->c_cefile.ce_unlink = 0;
1623 }
1624
1625 if (c_type == CT_TEXT) {
1626 output_subtype = TEXT_PLAIN;
1627 } else {
1628 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1629 output_subtype = 0;
1630 }
1631
1632 output_encoding = content_encoding (ct, &reason);
1633 if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1634 ct->c_cefile.ce_file = file;
1635 ct->c_cefile.ce_unlink = 1;
1636 } else {
1637 ct->c_cefile.ce_unlink = 0;
1638 status = NOTOK;
1639 }
1640
1641 return status;
1642 }
1643
1644
1645 /*
1646 * Fill in a multipart/alternative part.
1647 */
1648 static CT
1649 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1650 char *boundary_prefix = "----=_nmh-multipart";
1651 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1652 char *boundary_indicator = "; boundary=";
1653 char *typename, *subtypename, *name;
1654 CT ct;
1655 struct part *p;
1656 struct multipart *m;
1657 const struct str2init *ctinit;
1658
1659 ct = mh_xcalloc(1, sizeof *ct);
1660
1661 /* Set up the multipart/alternative part. These fields of *ct were
1662 initialized to 0 by mh_xcalloc():
1663 c_fp, c_unlink, c_begin, c_end,
1664 c_vrsn, c_ctline, c_celine,
1665 c_id, c_descr, c_dispo, c_partno,
1666 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1667 c_cefile, c_encoding,
1668 c_digested, c_digest[16], c_ctexbody,
1669 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1670 c_umask, c_rfc934,
1671 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1672 */
1673
1674 ct->c_file = add (first_alt->c_file, NULL);
1675 ct->c_type = type;
1676 ct->c_subtype = subtype;
1677
1678 ctinit = get_ct_init (ct->c_type);
1679
1680 typename = ct_type_str (type);
1681 subtypename = ct_subtype_str (type, subtype);
1682
1683 {
1684 int serial = 0;
1685 int found_boundary = 1;
1686
1687 while (found_boundary && serial < 1000000) {
1688 found_boundary = 0;
1689
1690 /* Ensure that the boundary doesn't appear in the decoded
1691 content. */
1692 if (new_part->c_cefile.ce_file) {
1693 if ((found_boundary =
1694 boundary_in_content (&new_part->c_cefile.ce_fp,
1695 new_part->c_cefile.ce_file,
1696 boundary)) == -1) {
1697 free (ct);
1698 return NULL;
1699 }
1700 }
1701
1702 /* Ensure that the boundary doesn't appear in the encoded
1703 content. */
1704 if (! found_boundary && new_part->c_file) {
1705 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1706 new_part->c_file,
1707 boundary)) == -1) {
1708 free (ct);
1709 return NULL;
1710 }
1711 }
1712
1713 if (found_boundary) {
1714 /* Try a slightly different boundary. */
1715 char buffer2[16];
1716
1717 free (boundary);
1718 ++serial;
1719 snprintf (buffer2, sizeof buffer2, "%d", serial);
1720 boundary =
1721 concat (boundary_prefix,
1722 first_alt->c_partno ? first_alt->c_partno : "",
1723 "-", buffer2, NULL);
1724 }
1725 }
1726
1727 if (found_boundary) {
1728 advise (NULL, "giving up trying to find a unique boundary");
1729 free (ct);
1730 return NULL;
1731 }
1732 }
1733
1734 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1735 boundary, "\"", NULL);
1736
1737 /* Load c_first_hf and c_last_hf. */
1738 transfer_noncontent_headers (first_alt, ct);
1739 add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1740 free (name);
1741
1742 /* Load c_partno. */
1743 if (first_alt->c_partno) {
1744 ct->c_partno = add (first_alt->c_partno, NULL);
1745 free (first_alt->c_partno);
1746 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1747 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1748 } else {
1749 first_alt->c_partno = add ("1", NULL);
1750 new_part->c_partno = add ("2", NULL);
1751 }
1752
1753 if (ctinit) {
1754 ct->c_ctinfo.ci_type = add (typename, NULL);
1755 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1756 }
1757
1758 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1759 "boundary", boundary, 0);
1760
1761 p = (struct part *) mh_xmalloc (sizeof *p);
1762 p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1763 p->mp_next->mp_next = NULL;
1764 p->mp_next->mp_part = first_alt;
1765
1766 m = mh_xcalloc(1, sizeof *m);
1767 m->mp_start = concat (boundary, "\n", NULL);
1768 m->mp_stop = concat (boundary, "--\n", NULL);
1769 m->mp_parts = p;
1770 ct->c_ctparams = m;
1771
1772 free (boundary);
1773
1774 return ct;
1775 }
1776
1777
1778 /*
1779 * Check that the boundary does not appear in the content.
1780 */
1781 static int
1782 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1783 char buffer[BUFSIZ];
1784 size_t bytes_read;
1785 int found_boundary = 0;
1786
1787 /* free_content() will close *fp if we fopen it here. */
1788 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1789 advise (file, "unable to open %s for reading", file);
1790 return NOTOK;
1791 }
1792
1793 fseeko (*fp, 0L, SEEK_SET);
1794 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1795 if (find_str (buffer, bytes_read, boundary)) {
1796 found_boundary = 1;
1797 break;
1798 }
1799 }
1800
1801 return found_boundary;
1802 }
1803
1804
1805 /*
1806 * Remove all non-Content headers.
1807 */
1808 static void
1809 transfer_noncontent_headers (CT old, CT new) {
1810 HF hp, hp_prev;
1811
1812 hp_prev = hp = old->c_first_hf;
1813 while (hp) {
1814 HF next = hp->next;
1815
1816 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1817 if (hp == old->c_last_hf) {
1818 if (hp == old->c_first_hf) {
1819 old->c_last_hf = old->c_first_hf = NULL;
1820 } else {
1821 hp_prev->next = NULL;
1822 old->c_last_hf = hp_prev;
1823 }
1824 } else {
1825 if (hp == old->c_first_hf) {
1826 old->c_first_hf = next;
1827 } else {
1828 hp_prev->next = next;
1829 }
1830 }
1831
1832 /* Put node hp in the new CT. */
1833 if (new->c_first_hf == NULL) {
1834 new->c_first_hf = hp;
1835 } else {
1836 new->c_last_hf->next = hp;
1837 }
1838 new->c_last_hf = hp;
1839 } else {
1840 /* A Content- header, leave in old. */
1841 hp_prev = hp;
1842 }
1843
1844 hp = next;
1845 }
1846 }
1847
1848
1849 /*
1850 * Set content type.
1851 */
1852 static int
1853 set_ct_type (CT ct, int type, int subtype, int encoding) {
1854 char *typename = ct_type_str (type);
1855 char *subtypename = ct_subtype_str (type, subtype);
1856 /* E.g, " text/plain" */
1857 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1858 /* E.g, " text/plain\n" */
1859 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1860 int found_content_type = 0;
1861 HF hf;
1862 const char *cp = NULL;
1863 char *ctline;
1864 int status;
1865
1866 /* Update/add Content-Type header field. */
1867 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1868 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1869 found_content_type = 1;
1870 free (hf->value);
1871 hf->value = (cp = strchr (ct->c_ctline, ';'))
1872 ? concat (type_subtypename, cp, "\n", NULL)
1873 : add (name_plus_nl, NULL);
1874 }
1875 }
1876 if (! found_content_type) {
1877 add_header (ct, add (TYPE_FIELD, NULL),
1878 (cp = strchr (ct->c_ctline, ';'))
1879 ? concat (type_subtypename, cp, "\n", NULL)
1880 : add (name_plus_nl, NULL));
1881 }
1882
1883 /* Some of these might not be used, but set them anyway. */
1884 ctline = cp
1885 ? concat (type_subtypename, cp, NULL)
1886 : concat (type_subtypename, NULL);
1887 free (ct->c_ctline);
1888 ct->c_ctline = ctline;
1889 /* Leave other ctinfo members as they were. */
1890 free (ct->c_ctinfo.ci_type);
1891 ct->c_ctinfo.ci_type = add (typename, NULL);
1892 free (ct->c_ctinfo.ci_subtype);
1893 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1894 ct->c_type = type;
1895 ct->c_subtype = subtype;
1896
1897 free (name_plus_nl);
1898 free (type_subtypename);
1899
1900 status = set_ce (ct, encoding);
1901
1902 return status;
1903 }
1904
1905
1906 /*
1907 * It's not necessary to update the charset parameter of a Content-Type
1908 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1909 * (content) was originally in the specified charset, "and will be in
1910 * that character set again after decoding."
1911 */
1912 static int
1913 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1914 int *message_mods) {
1915 int status = OK;
1916 int lf_line_endings = 0;
1917
1918 switch (ct->c_type) {
1919 case CT_MULTIPART: {
1920 struct multipart *m = (struct multipart *) ct->c_ctparams;
1921 struct part *part;
1922
1923 /* Should check to see if the body for this part is encoded?
1924 For now, it gets passed along as-is by InitMultiPart(). */
1925 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1926 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1927 message_mods);
1928 }
1929 break;
1930 }
1931
1932 case CT_MESSAGE:
1933 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1934 struct exbody *e = (struct exbody *) ct->c_ctparams;
1935
1936 status = decode_text_parts (e->eb_content, encoding, decodetypes,
1937 message_mods);
1938 }
1939 break;
1940
1941 default:
1942 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1943 break;
1944 }
1945
1946 lf_line_endings =
1947 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
1948
1949 switch (ct->c_encoding) {
1950 case CE_BASE64:
1951 case CE_QUOTED: {
1952 int ct_encoding;
1953
1954 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
1955 const char *reason = NULL;
1956
1957 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
1958 && encoding != CE_BINARY) {
1959 /* The decoding isn't acceptable so discard it.
1960 Leave status as OK to allow other transformations. */
1961 if (verbosw) {
1962 report (NULL, ct->c_partno, ct->c_file,
1963 "will not decode%s because it is binary (%s)",
1964 ct->c_partno ? ""
1965 : ct->c_ctline ? ct->c_ctline
1966 : "",
1967 reason);
1968 }
1969 (void) m_unlink (ct->c_cefile.ce_file);
1970 free (ct->c_cefile.ce_file);
1971 ct->c_cefile.ce_file = NULL;
1972 } else if (ct->c_encoding == CE_QUOTED &&
1973 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1974 /* The decoding isn't acceptable so discard it.
1975 Leave status as OK to allow other transformations. */
1976 if (verbosw) {
1977 report (NULL, ct->c_partno, ct->c_file,
1978 "will not decode%s because it is 8bit",
1979 ct->c_partno ? ""
1980 : ct->c_ctline ? ct->c_ctline
1981 : "");
1982 }
1983 (void) m_unlink (ct->c_cefile.ce_file);
1984 free (ct->c_cefile.ce_file);
1985 ct->c_cefile.ce_file = NULL;
1986 } else {
1987 int enc;
1988
1989 if (ct_encoding == CE_BINARY) {
1990 enc = CE_BINARY;
1991 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1992 enc = CE_QUOTED;
1993 } else {
1994 enc = ct_encoding;
1995 }
1996 if (set_ce (ct, enc) == OK) {
1997 ++*message_mods;
1998 if (verbosw) {
1999 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2000 ct->c_ctline ? ct->c_ctline : "");
2001 }
2002 if (lf_line_endings) {
2003 strip_crs (ct, message_mods);
2004 }
2005 } else {
2006 status = NOTOK;
2007 }
2008 }
2009 } else {
2010 status = NOTOK;
2011 }
2012 break;
2013 }
2014 case CE_8BIT:
2015 case CE_7BIT:
2016 if (lf_line_endings) {
2017 strip_crs (ct, message_mods);
2018 }
2019 break;
2020 default:
2021 break;
2022 }
2023
2024 break;
2025 }
2026
2027 return status;
2028 }
2029
2030
2031 /*
2032 * Determine if the part with type[/subtype] should be decoded, according to
2033 * decodetypes (which came from the -decodetypes switch).
2034 */
2035 static int
2036 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2037 /* Quick search for matching type[/subtype] in decodetypes: bracket
2038 decodetypes with commas, then search for ,type, and ,type/subtype, in
2039 it. */
2040
2041 int found_match = 0;
2042 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2043 char *delimited_type = concat(",", type, ",", NULL);
2044
2045 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2046 found_match = 1;
2047 } else if (subtype != NULL) {
2048 char *delimited_type_subtype =
2049 concat(",", type, "/", subtype, ",", NULL);
2050
2051 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2052 found_match = 1;
2053 }
2054 free(delimited_type_subtype);
2055 }
2056
2057 free(delimited_type);
2058 free(delimited_decodetypes);
2059
2060 return found_match;
2061 }
2062
2063
2064 /*
2065 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2066 * if it has any NUL characters, a CR not followed by a LF, or lines
2067 * greater than 998 characters in length. If binary, reason is set
2068 * to a string explaining why.
2069 */
2070 static int
2071 content_encoding (CT ct, const char **reason) {
2072 CE ce = &ct->c_cefile;
2073 int encoding = CE_7BIT;
2074
2075 if (ce->ce_file) {
2076 size_t line_len = 0;
2077 char buffer[BUFSIZ];
2078 size_t inbytes;
2079
2080 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2081 advise (ce->ce_file, "unable to open for reading");
2082 return CE_UNKNOWN;
2083 }
2084
2085 fseeko (ce->ce_fp, 0L, SEEK_SET);
2086 while (encoding != CE_BINARY &&
2087 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2088 char *cp;
2089 size_t i;
2090 int last_char_was_cr = 0;
2091
2092 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2093 if (*cp == '\0' || ++line_len > 998 ||
2094 (*cp != '\n' && last_char_was_cr)) {
2095 encoding = CE_BINARY;
2096 if (*cp == '\0') {
2097 *reason = "null character";
2098 } else if (line_len > 998) {
2099 *reason = "line length > 998";
2100 } else if (*cp != '\n' && last_char_was_cr) {
2101 *reason = "CR not followed by LF";
2102 } else {
2103 /* Should not reach this. */
2104 *reason = "";
2105 }
2106 break;
2107 } else if (*cp == '\n') {
2108 line_len = 0;
2109 } else if (! isascii ((unsigned char) *cp)) {
2110 encoding = CE_8BIT;
2111 }
2112
2113 last_char_was_cr = *cp == '\r' ? 1 : 0;
2114 }
2115 }
2116
2117 fclose (ce->ce_fp);
2118 ce->ce_fp = NULL;
2119 } /* else should never happen */
2120
2121 return encoding;
2122 }
2123
2124
2125 /*
2126 * Strip carriage returns from content.
2127 */
2128 static int
2129 strip_crs (CT ct, int *message_mods) {
2130 char *charset = content_charset (ct);
2131 int status = OK;
2132
2133 /* Only strip carriage returns if content is ASCII or another
2134 charset that has the same readily recognizable CR followed by a
2135 LF. We can include UTF-8 here because if the high-order bit of
2136 a UTF-8 byte is 0, then it must be a single-byte ASCII
2137 character. */
2138 if (! strcasecmp (charset, "US-ASCII") ||
2139 ! strcasecmp (charset, "UTF-8") ||
2140 ! strncasecmp (charset, "ISO-8859-", 9) ||
2141 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2142 char **file = NULL;
2143 FILE **fp = NULL;
2144 size_t begin;
2145 size_t end;
2146 int has_crs = 0;
2147 int opened_input_file = 0;
2148
2149 if (ct->c_cefile.ce_file) {
2150 file = &ct->c_cefile.ce_file;
2151 fp = &ct->c_cefile.ce_fp;
2152 begin = end = 0;
2153 } else if (ct->c_file) {
2154 file = &ct->c_file;
2155 fp = &ct->c_fp;
2156 begin = (size_t) ct->c_begin;
2157 end = (size_t) ct->c_end;
2158 } /* else don't know where the content is */
2159
2160 if (file && *file && fp) {
2161 if (! *fp) {
2162 if ((*fp = fopen (*file, "r")) == NULL) {
2163 advise (*file, "unable to open for reading");
2164 status = NOTOK;
2165 } else {
2166 opened_input_file = 1;
2167 }
2168 }
2169 }
2170
2171 if (fp && *fp) {
2172 char buffer[BUFSIZ];
2173 size_t bytes_read;
2174 size_t bytes_to_read =
2175 end > 0 && end > begin ? end - begin : sizeof buffer;
2176
2177 fseeko (*fp, begin, SEEK_SET);
2178 while ((bytes_read = fread (buffer, 1,
2179 min (bytes_to_read, sizeof buffer),
2180 *fp)) > 0) {
2181 /* Look for CR followed by a LF. This is supposed to
2182 be text so there should be LF's. If not, don't
2183 modify the content. */
2184 char *cp;
2185 size_t i;
2186 int last_char_was_cr = 0;
2187
2188 if (end > 0) { bytes_to_read -= bytes_read; }
2189
2190 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2191 if (*cp == '\n' && last_char_was_cr) {
2192 has_crs = 1;
2193 break;
2194 }
2195
2196 last_char_was_cr = *cp == '\r' ? 1 : 0;
2197 }
2198 }
2199
2200 if (has_crs) {
2201 int fd;
2202 char *stripped_content_file;
2203 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2204
2205 if (tempfile == NULL) {
2206 adios (NULL, "unable to create temporary file in %s",
2207 get_temp_dir());
2208 }
2209 stripped_content_file = add (tempfile, NULL);
2210
2211 /* Strip each CR before a LF from the content. */
2212 fseeko (*fp, begin, SEEK_SET);
2213 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2214 0) {
2215 char *cp;
2216 size_t i;
2217 int last_char_was_cr = 0;
2218
2219 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2220 if (*cp == '\r') {
2221 last_char_was_cr = 1;
2222 } else if (last_char_was_cr) {
2223 if (*cp != '\n') {
2224 if (write (fd, "\r", 1) < 0) {
2225 advise (tempfile, "CR write");
2226 }
2227 }
2228 if (write (fd, cp, 1) < 0) {
2229 advise (tempfile, "write");
2230 }
2231 last_char_was_cr = 0;
2232 } else {
2233 if (write (fd, cp, 1) < 0) {
2234 advise (tempfile, "write");
2235 }
2236 last_char_was_cr = 0;
2237 }
2238 }
2239 }
2240
2241 if (close (fd)) {
2242 admonish (NULL, "unable to write temporary file %s",
2243 stripped_content_file);
2244 (void) m_unlink (stripped_content_file);
2245 status = NOTOK;
2246 } else {
2247 /* Replace the decoded file with the converted one. */
2248 if (ct->c_cefile.ce_file) {
2249 if (ct->c_cefile.ce_unlink) {
2250 (void) m_unlink (ct->c_cefile.ce_file);
2251 }
2252 free (ct->c_cefile.ce_file);
2253 }
2254 ct->c_cefile.ce_file = stripped_content_file;
2255 ct->c_cefile.ce_unlink = 1;
2256
2257 ++*message_mods;
2258 if (verbosw) {
2259 report (NULL, ct->c_partno,
2260 begin == 0 && end == 0 ? "" : *file,
2261 "stripped CRs");
2262 }
2263 }
2264 }
2265
2266 if (opened_input_file) {
2267 fclose (*fp);
2268 *fp = NULL;
2269 }
2270 }
2271 }
2272
2273 free (charset);
2274
2275 return status;
2276 }
2277
2278
2279 /*
2280 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2281 * of the part C-T-E's.
2282 */
2283 static void
2284 update_cte (CT ct) {
2285 const int least_restrictive_enc = least_restrictive_encoding (ct);
2286
2287 if (least_restrictive_enc != CE_UNKNOWN &&
2288 least_restrictive_enc != CE_7BIT) {
2289 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2290 HF hf;
2291 int found_cte = 0;
2292
2293 /* Update/add Content-Transfer-Encoding header field. */
2294 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2295 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2296 found_cte = 1;
2297 free (hf->value);
2298 hf->value = cte;
2299 }
2300 }
2301 if (! found_cte) {
2302 add_header (ct, add (ENCODING_FIELD, NULL), cte);
2303 }
2304 }
2305 }
2306
2307
2308 /*
2309 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2310 * within a message.
2311 */
2312 static int
2313 least_restrictive_encoding (CT ct) {
2314 int encoding = CE_UNKNOWN;
2315
2316 switch (ct->c_type) {
2317 case CT_MULTIPART: {
2318 struct multipart *m = (struct multipart *) ct->c_ctparams;
2319 struct part *part;
2320
2321 for (part = m->mp_parts; part; part = part->mp_next) {
2322 const int part_encoding =
2323 least_restrictive_encoding (part->mp_part);
2324
2325 if (less_restrictive (encoding, part_encoding)) {
2326 encoding = part_encoding;
2327 }
2328 }
2329 break;
2330 }
2331
2332 case CT_MESSAGE:
2333 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2334 struct exbody *e = (struct exbody *) ct->c_ctparams;
2335 const int part_encoding =
2336 least_restrictive_encoding (e->eb_content);
2337
2338 if (less_restrictive (encoding, part_encoding)) {
2339 encoding = part_encoding;
2340 }
2341 }
2342 break;
2343
2344 default: {
2345 if (less_restrictive (encoding, ct->c_encoding)) {
2346 encoding = ct->c_encoding;
2347 }
2348 }}
2349
2350 return encoding;
2351 }
2352
2353
2354 /*
2355 * Return whether the second encoding is less restrictive than the first, where
2356 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2357 * CE_BINARY is less restrictive than CE_8BIT and
2358 * CE_8BIT is less restrictive than CE_7BIT.
2359 */
2360 static int
2361 less_restrictive (int encoding, int second_encoding) {
2362 switch (second_encoding) {
2363 case CE_BINARY:
2364 return encoding != CE_BINARY;
2365 case CE_8BIT:
2366 return encoding != CE_BINARY && encoding != CE_8BIT;
2367 case CE_7BIT:
2368 return encoding != CE_BINARY && encoding != CE_8BIT &&
2369 encoding != CE_7BIT;
2370 default :
2371 return 0;
2372 }
2373 }
2374
2375
2376 /*
2377 * Convert character set of each part.
2378 */
2379 static int
2380 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2381 int status = OK;
2382
2383 switch (ct->c_type) {
2384 case CT_TEXT:
2385 if (ct->c_subtype == TEXT_PLAIN) {
2386 status = convert_charset (ct, dest_charset, message_mods);
2387 if (status == OK) {
2388 if (verbosw) {
2389 char *ct_charset = content_charset (ct);
2390
2391 report (NULL, ct->c_partno, ct->c_file,
2392 "convert %s to %s", ct_charset, dest_charset);
2393 free (ct_charset);
2394 }
2395 } else {
2396 char *ct_charset = content_charset (ct);
2397
2398 report ("iconv", ct->c_partno, ct->c_file,
2399 "failed to convert %s to %s", ct_charset, dest_charset);
2400 free (ct_charset);
2401 }
2402 }
2403 break;
2404
2405 case CT_MULTIPART: {
2406 struct multipart *m = (struct multipart *) ct->c_ctparams;
2407 struct part *part;
2408
2409 /* Should check to see if the body for this part is encoded?
2410 For now, it gets passed along as-is by InitMultiPart(). */
2411 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2412 status =
2413 convert_charsets (part->mp_part, dest_charset, message_mods);
2414 }
2415 break;
2416 }
2417
2418 case CT_MESSAGE:
2419 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2420 struct exbody *e = (struct exbody *) ct->c_ctparams;
2421
2422 status =
2423 convert_charsets (e->eb_content, dest_charset, message_mods);
2424 }
2425 break;
2426
2427 default:
2428 break;
2429 }
2430
2431 return status;
2432 }
2433
2434
2435 /*
2436 * Fix various problems that aren't handled elsewhere. These
2437 * are fixed unconditionally: there are no switches to disable
2438 * them. Currently, "problems" are these:
2439 * 1) remove extraneous semicolon at the end of a header parameter list
2440 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2441 * filename parameters in Content-Type and Content-Disposition
2442 * headers, respectively.
2443 */
2444 static int
2445 fix_always (CT ct, int *message_mods) {
2446 int status = OK;
2447
2448 switch (ct->c_type) {
2449 case CT_MULTIPART: {
2450 struct multipart *m = (struct multipart *) ct->c_ctparams;
2451 struct part *part;
2452
2453 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2454 status = fix_always (part->mp_part, message_mods);
2455 }
2456 break;
2457 }
2458
2459 case CT_MESSAGE:
2460 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2461 struct exbody *e = (struct exbody *) ct->c_ctparams;
2462
2463 status = fix_always (e->eb_content, message_mods);
2464 }
2465 break;
2466
2467 default: {
2468 HF hf;
2469
2470 if (ct->c_first_hf) {
2471 fix_filename_encoding (ct);
2472 }
2473
2474 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2475 size_t len = strlen (hf->value);
2476
2477 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2478 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2479 /* Only do this for Content-Type and
2480 Content-Disposition fields because those are the
2481 only headers that parse_mime() warns about. */
2482 continue;
2483 }
2484
2485 /* whitespace following a trailing ';' will be nuked as well */
2486 if (hf->value[len - 1] == '\n') {
2487 while (isspace((unsigned char)(hf->value[len - 2]))) {
2488 if (len-- == 0) { break; }
2489 }
2490 }
2491
2492 if (hf->value[len - 2] == ';') {
2493 /* Remove trailing ';' from parameter value. */
2494 hf->value[len - 2] = '\n';
2495 hf->value[len - 1] = '\0';
2496
2497 /* Also, if Content-Type parameter, remove trailing ';'
2498 from ct->c_ctline. This probably isn't necessary
2499 but can't hurt. */
2500 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2501 size_t l = strlen(ct->c_ctline) - 1;
2502 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2503 ct->c_ctline[l] == ';') {
2504 ct->c_ctline[l--] = '\0';
2505 if (l == 0) { break; }
2506 }
2507 }
2508
2509 ++*message_mods;
2510 if (verbosw) {
2511 report (NULL, ct->c_partno, ct->c_file,
2512 "remove trailing ; from %s parameter value",
2513 hf->name);
2514 }
2515 }
2516 }
2517 }}
2518
2519 return status;
2520 }
2521
2522
2523 /*
2524 * Factor out common code for loops in fix_filename_encoding().
2525 */
2526 static int
2527 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2528 size_t value_len;
2529 int fixed = 0;
2530
2531 if (((value_len = strlen (value)) > 0) &&
2532 strncmp (value, "=?", 2) == 0 &&
2533 strncmp (&value[value_len - 2], "?=", 2) == 0) {
2534 /* Looks like an RFC 2047 encoded parameter. */
2535 char decoded[PATH_MAX + 1];
2536
2537 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2538 /* Encode using RFC 2231. */
2539 replace_param (first_pm, last_pm, name, decoded, 0);
2540 fixed = 1;
2541 } else {
2542 advise (NULL, "failed to decode %s parameter %s", name, value);
2543 }
2544 }
2545
2546 return fixed;
2547 }
2548
2549
2550 /*
2551 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2552 * filename parameters in Content-Type and Content-Disposition
2553 * headers, respectively.
2554 */
2555 static int
2556 fix_filename_encoding (CT ct) {
2557 PM pm;
2558 HF hf;
2559 int fixed = 0;
2560
2561 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2562 if (pm->pm_name && pm->pm_value &&
2563 strcasecmp (pm->pm_name, "name") == 0) {
2564 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2565 &ct->c_ctinfo.ci_first_pm,
2566 &ct->c_ctinfo.ci_last_pm);
2567 }
2568 }
2569
2570 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2571 if (pm->pm_name && pm->pm_value &&
2572 strcasecmp (pm->pm_name, "filename") == 0) {
2573 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2574 &ct->c_dispo_first,
2575 &ct->c_dispo_last);
2576 }
2577 }
2578
2579 /* Fix hf values to correspond. */
2580 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2581 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2582
2583 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2584 field = TYPE_HEADER;
2585 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2586 field = DISPO_HEADER;
2587 }
2588
2589 if (field != OTHER) {
2590 const char *const semicolon_loc = strchr (hf->value, ';');
2591
2592 if (semicolon_loc) {
2593 const size_t len =
2594 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2595 const char *const params =
2596 output_params (len,
2597 field == TYPE_HEADER
2598 ? ct->c_ctinfo.ci_first_pm
2599 : ct->c_dispo_first,
2600 NULL, 0);
2601 const char *const new_params = concat (params, "\n", NULL);
2602
2603 replace_substring (&hf->value, semicolon_loc, new_params);
2604 free ((char *) new_params);
2605 free ((char *) params);
2606 } else {
2607 advise (NULL, "did not find semicolon in %s:%s\n",
2608 hf->name, hf->value);
2609 }
2610 }
2611 }
2612
2613 return OK;
2614 }
2615
2616
2617 /*
2618 * Output content in input file to output file.
2619 */
2620 static int
2621 write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace,
2622 int message_mods) {
2623 int status = OK;
2624
2625 if (modify_inplace) {
2626 if (message_mods > 0) {
2627 if ((status = output_message (ct, outfile)) == OK) {
2628 char *infile = input_filename
2629 ? add (input_filename, NULL)
2630 : add (ct->c_file ? ct->c_file : "-", NULL);
2631
2632 if (remove_file (infile) == OK) {
2633 if (rename (outfile, infile)) {
2634 /* Rename didn't work, possibly because of an
2635 attempt to rename across filesystems. Try
2636 brute force copy. */
2637 int old = open (outfile, O_RDONLY);
2638 int new =
2639 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2640 int i = -1;
2641
2642 if (old != -1 && new != -1) {
2643 char buffer[BUFSIZ];
2644
2645 while ((i = read (old, buffer, sizeof buffer)) >
2646 0) {
2647 if (write (new, buffer, i) != i) {
2648 i = -1;
2649 break;
2650 }
2651 }
2652 }
2653 if (new != -1) { close (new); }
2654 if (old != -1) { close (old); }
2655 (void) m_unlink (outfile);
2656
2657 if (i < 0) {
2658 /* The -file argument processing used path() to
2659 expand filename to absolute path. */
2660 int file = ct->c_file && ct->c_file[0] == '/';
2661
2662 admonish (NULL, "unable to rename %s %s to %s",
2663 file ? "file" : "message", outfile,
2664 infile);
2665 status = NOTOK;
2666 }
2667 }
2668 } else {
2669 admonish (NULL, "unable to remove input file %s, "
2670 "not modifying it", infile);
2671 (void) m_unlink (outfile);
2672 status = NOTOK;
2673 }
2674
2675 free (infile);
2676 } else {
2677 status = NOTOK;
2678 }
2679 } else {
2680 /* No modifications and didn't need the tmp outfile. */
2681 (void) m_unlink (outfile);
2682 }
2683 } else {
2684 /* Output is going to some file. Produce it whether or not
2685 there were modifications. */
2686 status = output_message (ct, outfile);
2687 }
2688
2689 flush_errors ();
2690 return status;
2691 }
2692
2693
2694 /*
2695 * parse_mime() does not set lf_line_endings in struct text, so use this
2696 * function to do it. It touches the parts the decodetypes identifies.
2697 */
2698 static void
2699 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2700 switch (ct->c_type) {
2701 case CT_MULTIPART: {
2702 struct multipart *m = (struct multipart *) ct->c_ctparams;
2703 struct part *part;
2704
2705 for (part = m->mp_parts; part; part = part->mp_next) {
2706 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2707 }
2708 break;
2709 }
2710
2711 case CT_MESSAGE:
2712 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2713 struct exbody *e = (struct exbody *) ct->c_ctparams;
2714
2715 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2716 }
2717 break;
2718
2719 default:
2720 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2721 if (ct->c_ctparams == NULL) {
2722 ct->c_ctparams = mh_xcalloc(1, sizeof (struct text));
2723 }
2724 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2725 }
2726 }
2727 }
2728
2729
2730 /*
2731 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2732 * use the standard MH backup file.
2733 */
2734 static int
2735 remove_file (const char *file) {
2736 if (rmmproc) {
2737 char *rmm_command = concat (rmmproc, " ", file, NULL);
2738 int status = system (rmm_command);
2739
2740 free (rmm_command);
2741 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2742 } else {
2743 /* This is OK for a non-message file, it still uses the
2744 BACKUP_PREFIX form. The backup file will be in the same
2745 directory as file. */
2746 return rename (file, m_backup (file));
2747 }
2748 }
2749
2750
2751 /*
2752 * Output formatted message to user.
2753 */
2754 static void
2755 report (char *what, char *partno, char *filename, char *message, ...) {
2756 va_list args;
2757 char *fmt;
2758
2759 if (verbosw) {
2760 va_start (args, message);
2761 fmt = concat (filename, partno ? " part " : ", ",
2762 partno ? partno : "", partno ? ", " : "", message, NULL);
2763
2764 advertise (what, NULL, fmt, args);
2765
2766 free (fmt);
2767 va_end (args);
2768 }
2769 }
2770
2771
2772 static void
2773 pipeser (int i)
2774 {
2775 if (i == SIGQUIT) {
2776 fflush (stdout);
2777 fprintf (stderr, "\n");
2778 fflush (stderr);
2779 }
2780
2781 done (1);
2782 /* NOTREACHED */
2783 }