]> diplodocus.org Git - nmh/blob - uip/mhfixmsg.c
Fixed test negations in test-version-check to work with make distcheck.
[nmh] / uip / mhfixmsg.c
1 /*
2 * mhfixmsg.c -- rewrite a message with various transformations
3 *
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
7 */
8
9 #include <h/mh.h>
10 #include <h/mime.h>
11 #include <h/mhparse.h>
12 #include <h/utils.h>
13 #include <h/signals.h>
14 #include <fcntl.h>
15
16 #define MHFIXMSG_SWITCHES \
17 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
18 X("nodecodetext", 0, NDECODETEXTSW) \
19 X("decodetypes", 0, DECODETYPESW) \
20 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
21 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
22 X("textcharset", 0, TEXTCHARSETSW) \
23 X("notextcharset", 0, NTEXTCHARSETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
27 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
28 X("fixboundary", 0, FIXBOUNDARYSW) \
29 X("nofixboundary", 0, NFIXBOUNDARYSW) \
30 X("fixcte", 0, FIXCOMPOSITECTESW) \
31 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
32 X("fixtype mimetype", 0, FIXTYPESW) \
33 X("file file", 0, FILESW) \
34 X("outfile file", 0, OUTFILESW) \
35 X("rmmproc program", 0, RPROCSW) \
36 X("normmproc", 0, NRPRCSW) \
37 X("changecur", 0, CHGSW) \
38 X("nochangecur", 0, NCHGSW) \
39 X("verbose", 0, VERBSW) \
40 X("noverbose", 0, NVERBSW) \
41 X("version", 0, VERSIONSW) \
42 X("help", 0, HELPSW) \
43
44 #define X(sw, minchars, id) id,
45 DEFINE_SWITCH_ENUM(MHFIXMSG);
46 #undef X
47
48 #define X(sw, minchars, id) { sw, minchars, id },
49 DEFINE_SWITCH_ARRAY(MHFIXMSG, switches);
50 #undef X
51
52
53 int verbosw;
54 int debugsw; /* Needed by mhparse.c. */
55
56 #define quitser pipeser
57
58 /* mhparse.c */
59 extern int skip_mp_cte_check; /* flag to InitMultiPart */
60 extern int suppress_bogus_mp_content_warning; /* flag to InitMultiPart */
61 extern int bogus_mp_content; /* flag from InitMultiPart */
62 /* flags to/from parse_header_attrs */
63 extern int suppress_extraneous_trailing_semicolon_warning;
64 extern int extraneous_trailing_semicolon;
65
66 /* mhoutsbr.c */
67 int output_message (CT, char *);
68
69 /* mhmisc.c */
70 void flush_errors (void);
71
72 /* mhfree.c */
73 extern CT *cts;
74 void freects_done (int) NORETURN;
75
76 /*
77 * static prototypes
78 */
79 typedef struct fix_transformations {
80 int fixboundary;
81 int fixcompositecte;
82 svector_t fixtypes;
83 int reformat;
84 int replacetextplain;
85 int decodetext;
86 char *decodetypes;
87 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
88 int lf_line_endings;
89 char *textcharset;
90 } fix_transformations;
91
92 int mhfixmsgsbr (CT *, const fix_transformations *, char *);
93 static int fix_boundary (CT *, int *);
94 static int copy_input_to_output (const char *, const char *);
95 static int get_multipart_boundary (CT, char **);
96 static int replace_boundary (CT, char *, char *);
97 static int fix_types (CT, svector_t, int *);
98 static char *replace_substring (char **, const char *, const char *);
99 static char *remove_parameter (char *, const char *);
100 static int fix_composite_cte (CT, int *);
101 static int set_ce (CT, int);
102 static int ensure_text_plain (CT *, CT, int *, int);
103 static int find_textplain_sibling (CT, int, int *);
104 static int insert_new_text_plain_part (CT, int, CT);
105 static CT build_text_plain_part (CT);
106 static int insert_into_new_mp_alt (CT *, int *);
107 static CT divide_part (CT);
108 static void copy_ctinfo (CI, CI);
109 static int decode_part (CT);
110 static int reformat_part (CT, char *, char *, char *, int);
111 static CT build_multipart_alt (CT, CT, int, int);
112 static int boundary_in_content (FILE **, char *, const char *);
113 static void transfer_noncontent_headers (CT, CT);
114 static int set_ct_type (CT, int type, int subtype, int encoding);
115 static int decode_text_parts (CT, int, const char *, int *);
116 static int should_decode(const char *, const char *, const char *);
117 static int content_encoding (CT, const char **);
118 static int strip_crs (CT, int *);
119 static void update_cte (CT);
120 static int least_restrictive_encoding (CT);
121 static int less_restrictive (int, int);
122 static int convert_charsets (CT, char *, int *);
123 static int fix_always (CT, int *);
124 static int fix_filename_param (char *, char *, PM *, PM *);
125 static int fix_filename_encoding (CT);
126 static int write_content (CT, const char *, char *, int, int);
127 static void set_text_ctparams(CT, char *, int);
128 static int remove_file (const char *);
129 static void report (char *, char *, char *, char *, ...);
130 static void pipeser (int);
131
132
133 int
134 main (int argc, char **argv) {
135 int msgnum;
136 char *cp, *file = NULL, *folder = NULL;
137 char *maildir, buf[100], *outfile = NULL;
138 char **argp, **arguments;
139 struct msgs_array msgs = { 0, 0, NULL };
140 struct msgs *mp = NULL;
141 CT *ctp;
142 FILE *fp;
143 int using_stdin = 0;
144 int chgflag = 1;
145 int status = OK;
146 fix_transformations fx;
147 fx.reformat = fx.fixcompositecte = fx.fixboundary = 1;
148 fx.fixtypes = NULL;
149 fx.replacetextplain = 0;
150 fx.decodetext = CE_8BIT;
151 fx.decodetypes = "text,application/ics"; /* Default, per man page. */
152 fx.lf_line_endings = 0;
153 fx.textcharset = NULL;
154
155 if (nmh_init(argv[0], 2)) { return 1; }
156
157 done = freects_done;
158
159 arguments = getarguments (invo_name, argc, argv, 1);
160 argp = arguments;
161
162 /*
163 * Parse arguments
164 */
165 while ((cp = *argp++)) {
166 if (*cp == '-') {
167 switch (smatch (++cp, switches)) {
168 case AMBIGSW:
169 ambigsw (cp, switches);
170 done (1);
171 case UNKWNSW:
172 adios (NULL, "-%s unknown", cp);
173
174 case HELPSW:
175 snprintf (buf, sizeof buf, "%s [+folder] [msgs] [switches]",
176 invo_name);
177 print_help (buf, switches, 1);
178 done (0);
179 case VERSIONSW:
180 print_version(invo_name);
181 done (0);
182
183 case DECODETEXTSW:
184 if (! (cp = *argp++) || *cp == '-') {
185 adios (NULL, "missing argument to %s", argp[-2]);
186 }
187 if (! strcasecmp (cp, "8bit")) {
188 fx.decodetext = CE_8BIT;
189 } else if (! strcasecmp (cp, "7bit")) {
190 fx.decodetext = CE_7BIT;
191 } else if (! strcasecmp (cp, "binary")) {
192 fx.decodetext = CE_BINARY;
193 } else {
194 adios (NULL, "invalid argument to %s", argp[-2]);
195 }
196 continue;
197 case NDECODETEXTSW:
198 fx.decodetext = 0;
199 continue;
200 case DECODETYPESW:
201 if (! (cp = *argp++) || *cp == '-') {
202 adios (NULL, "missing argument to %s", argp[-2]);
203 }
204 fx.decodetypes = cp;
205 continue;
206 case CRLFLINEBREAKSSW:
207 fx.lf_line_endings = 0;
208 continue;
209 case NCRLFLINEBREAKSSW:
210 fx.lf_line_endings = 1;
211 continue;
212 case TEXTCHARSETSW:
213 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
214 adios (NULL, "missing argument to %s", argp[-2]);
215 }
216 fx.textcharset = cp;
217 continue;
218 case NTEXTCHARSETSW:
219 fx.textcharset = 0;
220 continue;
221 case FIXBOUNDARYSW:
222 fx.fixboundary = 1;
223 continue;
224 case NFIXBOUNDARYSW:
225 fx.fixboundary = 0;
226 continue;
227 case FIXCOMPOSITECTESW:
228 fx.fixcompositecte = 1;
229 continue;
230 case NFIXCOMPOSITECTESW:
231 fx.fixcompositecte = 0;
232 continue;
233 case FIXTYPESW:
234 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
235 adios (NULL, "missing argument to %s", argp[-2]);
236 }
237 if (! strncasecmp (cp, "multipart/", 10) ||
238 ! strncasecmp (cp, "message/", 8)) {
239 adios (NULL, "-fixtype %s not allowed", cp);
240 } else if (! strchr (cp, '/')) {
241 adios (NULL, "-fixtype requires type/subtype");
242 }
243 if (fx.fixtypes == NULL) { fx.fixtypes = svector_create (10); }
244 svector_push_back (fx.fixtypes, cp);
245 continue;
246 case REFORMATSW:
247 fx.reformat = 1;
248 continue;
249 case NREFORMATSW:
250 fx.reformat = 0;
251 continue;
252 case REPLACETEXTPLAINSW:
253 fx.replacetextplain = 1;
254 continue;
255 case NREPLACETEXTPLAINSW:
256 fx.replacetextplain = 0;
257 continue;
258 case FILESW:
259 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
260 adios (NULL, "missing argument to %s", argp[-2]);
261 }
262 file = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
263 continue;
264 case OUTFILESW:
265 if (! (cp = *argp++) || (*cp == '-' && cp[1])) {
266 adios (NULL, "missing argument to %s", argp[-2]);
267 }
268 outfile = *cp == '-' ? add (cp, NULL) : path (cp, TFILE);
269 continue;
270 case RPROCSW:
271 if (!(rmmproc = *argp++) || *rmmproc == '-') {
272 adios (NULL, "missing argument to %s", argp[-2]);
273 }
274 continue;
275 case NRPRCSW:
276 rmmproc = NULL;
277 continue;
278 case CHGSW:
279 chgflag = 1;
280 continue;
281 case NCHGSW:
282 chgflag = 0;
283 continue;
284 case VERBSW:
285 verbosw = 1;
286 continue;
287 case NVERBSW:
288 verbosw = 0;
289 continue;
290 }
291 }
292 if (*cp == '+' || *cp == '@') {
293 if (folder) {
294 adios (NULL, "only one folder at a time!");
295 } else {
296 folder = pluspath (cp);
297 }
298 } else {
299 if (*cp == '/') {
300 /* Interpret a full path as a filename, not a message. */
301 file = add (cp, NULL);
302 } else {
303 app_msgarg (&msgs, cp);
304 }
305 }
306 }
307
308 SIGNAL (SIGQUIT, quitser);
309 SIGNAL (SIGPIPE, pipeser);
310
311 /*
312 * Read the standard profile setup
313 */
314 if ((fp = fopen (cp = etcpath ("mhn.defaults"), "r"))) {
315 readconfig ((struct node **) 0, fp, cp, 0);
316 fclose (fp);
317 }
318
319 suppress_bogus_mp_content_warning = skip_mp_cte_check = 1;
320 suppress_extraneous_trailing_semicolon_warning = 1;
321
322 if (! context_find ("path")) {
323 free (path ("./", TFOLDER));
324 }
325
326 if (file && msgs.size) {
327 adios (NULL, "cannot specify msg and file at same time!");
328 }
329
330 /*
331 * check if message is coming from file
332 */
333 if (file) {
334 /* If file is stdin, create a tmp file name before parse_mime()
335 has a chance, because it might put in on a different
336 filesystem than the output file. Instead, put it in the
337 user's preferred tmp directory. */
338 CT ct;
339
340 if (! strcmp ("-", file)) {
341 int fd;
342 char *cp;
343
344 using_stdin = 1;
345
346 if ((cp = m_mktemp2 (NULL, invo_name, &fd, NULL)) == NULL) {
347 adios (NULL, "unable to create temporary file in %s",
348 get_temp_dir());
349 } else {
350 free (file);
351 file = add (cp, NULL);
352 cpydata (STDIN_FILENO, fd, "-", file);
353 }
354
355 if (close (fd)) {
356 (void) m_unlink (file);
357 adios (NULL, "failed to write temporary file");
358 }
359 }
360
361 if (! (cts = (CT *) mh_xcalloc ((size_t) 2, sizeof *cts))) {
362 adios (NULL, "out of memory");
363 }
364 ctp = cts;
365
366 if ((ct = parse_mime (file))) {
367 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
368 *ctp++ = ct;
369 } else {
370 advise (NULL, "unable to parse message from file %s", file);
371 status = NOTOK;
372
373 /* If there's an outfile, pass the input message unchanged, so the message won't
374 get dropped from a pipeline. */
375 if (outfile) {
376 /* Something went wrong. Output might be expected, such as if this were run
377 as a filter. Just copy the input to the output. */
378 if (copy_input_to_output (file, outfile) != OK) {
379 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
380 }
381 }
382 }
383 } else {
384 /*
385 * message(s) are coming from a folder
386 */
387 CT ct;
388
389 if (! msgs.size) {
390 app_msgarg(&msgs, "cur");
391 }
392 if (! folder) {
393 folder = getfolder (1);
394 }
395 maildir = m_maildir (folder);
396
397 if (chdir (maildir) == NOTOK) {
398 adios (maildir, "unable to change directory to");
399 }
400
401 /* read folder and create message structure */
402 if (! (mp = folder_read (folder, 1))) {
403 adios (NULL, "unable to read folder %s", folder);
404 }
405
406 /* check for empty folder */
407 if (mp->nummsg == 0) {
408 adios (NULL, "no messages in %s", folder);
409 }
410
411 /* parse all the message ranges/sequences and set SELECTED */
412 for (msgnum = 0; msgnum < msgs.size; msgnum++)
413 if (! m_convert (mp, msgs.msgs[msgnum])) {
414 done (1);
415 }
416 seq_setprev (mp); /* set the previous-sequence */
417
418 if (! (cts =
419 (CT *) mh_xcalloc ((size_t) (mp->numsel + 1), sizeof *cts))) {
420 adios (NULL, "out of memory");
421 }
422 ctp = cts;
423
424 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
425 if (is_selected(mp, msgnum)) {
426 char *msgnam;
427
428 msgnam = m_name (msgnum);
429 if ((ct = parse_mime (msgnam))) {
430 set_text_ctparams(ct, fx.decodetypes, fx.lf_line_endings);
431 *ctp++ = ct;
432 } else {
433 advise (NULL, "unable to parse message %s", msgnam);
434 status = NOTOK;
435
436 /* If there's an outfile, pass the input message unchanged, so the message won't
437 get dropped from a pipeline. */
438 if (outfile) {
439 /* Something went wrong. Output might be expected, such as if this were run
440 as a filter. Just copy the input to the output. */
441 const char *input_filename = path (msgnam, TFILE);
442
443 if (copy_input_to_output (input_filename, outfile) != OK) {
444 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
445 }
446 }
447 }
448 }
449 }
450
451 if (chgflag) {
452 seq_setcur (mp, mp->hghsel); /* update current message */
453 }
454 seq_save (mp); /* synchronize sequences */
455 context_replace (pfolder, folder);/* update current folder */
456 context_save (); /* save the context file */
457 }
458
459 if (*cts) {
460 for (ctp = cts; *ctp; ++ctp) {
461 status += mhfixmsgsbr (ctp, &fx, outfile);
462
463 if (using_stdin) {
464 (void) m_unlink (file);
465
466 if (! outfile) {
467 /* Just calling m_backup() unlinks the backup file. */
468 (void) m_backup (file);
469 }
470 }
471 }
472 } else {
473 status = 1;
474 }
475
476 if (fx.fixtypes != NULL) { svector_free (fx.fixtypes); }
477 free (outfile);
478 free (file);
479 free (folder);
480 free (arguments);
481
482 /* done is freects_done, which will clean up all of cts. */
483 done (status);
484 return NOTOK;
485 }
486
487
488 /*
489 * Apply transformations to one message.
490 */
491 int
492 mhfixmsgsbr (CT *ctp, const fix_transformations *fx, char *outfile) {
493 /* Store input filename in case one of the transformations, i.e.,
494 fix_boundary(), rewrites to a tmp file. */
495 char *input_filename = add ((*ctp)->c_file, NULL);
496 int modify_inplace = 0;
497 int message_mods = 0;
498 int status = OK;
499
500 if (outfile == NULL) {
501 modify_inplace = 1;
502
503 if ((*ctp)->c_file) {
504 char *tempfile;
505 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
506 adios (NULL, "unable to create temporary file in %s",
507 get_temp_dir());
508 }
509 outfile = add (tempfile, NULL);
510 } else {
511 adios (NULL, "missing both input and output filenames\n");
512 }
513 }
514
515 reverse_alternative_parts (*ctp);
516 status = fix_always (*ctp, &message_mods);
517 if (status == OK && fx->fixboundary) {
518 status = fix_boundary (ctp, &message_mods);
519 }
520 if (status == OK && fx->fixtypes != NULL) {
521 status = fix_types (*ctp, fx->fixtypes, &message_mods);
522 }
523 if (status == OK && fx->fixcompositecte) {
524 status = fix_composite_cte (*ctp, &message_mods);
525 }
526 if (status == OK && fx->reformat) {
527 status =
528 ensure_text_plain (ctp, NULL, &message_mods, fx->replacetextplain);
529 }
530 if (status == OK && fx->decodetext) {
531 status = decode_text_parts (*ctp, fx->decodetext, fx->decodetypes,
532 &message_mods);
533 update_cte (*ctp);
534 }
535 if (status == OK && fx->textcharset != NULL) {
536 status = convert_charsets (*ctp, fx->textcharset, &message_mods);
537 }
538
539 if (status == OK && ! (*ctp)->c_umask) {
540 /* Set the umask for the contents file. This currently
541 isn't used but just in case it is in the future. */
542 struct stat st;
543
544 if (stat ((*ctp)->c_file, &st) != NOTOK) {
545 (*ctp)->c_umask = ~(st.st_mode & 0777);
546 } else {
547 (*ctp)->c_umask = ~m_gmprot();
548 }
549 }
550
551 /*
552 * Write the content to a file
553 */
554 if (status == OK) {
555 status = write_content (*ctp, input_filename, outfile, modify_inplace,
556 message_mods);
557 } else if (! modify_inplace) {
558 /* Something went wrong. Output might be expected, such
559 as if this were run as a filter. Just copy the input
560 to the output. */
561 if (copy_input_to_output (input_filename, outfile) != OK) {
562 advise (NULL, "unable to copy message to %s, it might be lost\n", outfile);
563 }
564 }
565
566 if (modify_inplace) {
567 if (status != OK) { (void) m_unlink (outfile); }
568 free (outfile);
569 outfile = NULL;
570 }
571
572 free (input_filename);
573
574 return status;
575 }
576
577
578 /*
579 * Copy input message to output. Assumes not modifying in place, so this
580 * might be running as part of a pipeline.
581 */
582 static int
583 copy_input_to_output (const char *input_filename, const char *output_filename) {
584 int in = open (input_filename, O_RDONLY);
585 int out = strcmp (output_filename, "-")
586 ? open (output_filename, O_WRONLY | O_CREAT, m_gmprot ())
587 : STDOUT_FILENO;
588 int status = OK;
589
590 if (in != -1 && out != -1) {
591 cpydata (in, out, input_filename, output_filename);
592 } else {
593 status = NOTOK;
594 }
595
596 close (out);
597 close (in);
598
599 return status;
600 }
601
602
603 /*
604 * Fix mismatched outer level boundary.
605 */
606 static int
607 fix_boundary (CT *ct, int *message_mods) {
608 struct multipart *mp;
609 int status = OK;
610
611 if (ct && (*ct)->c_type == CT_MULTIPART && bogus_mp_content) {
612 mp = (struct multipart *) (*ct)->c_ctparams;
613
614 /*
615 * 1) Get boundary at end of part.
616 * 2) Get boundary at beginning of part and compare to the end-of-part
617 * boundary.
618 * 3) Write out contents of ct to tmp file, replacing boundary in
619 * header with boundary from part. Set c_unlink to 1.
620 * 4) Free ct.
621 * 5) Call parse_mime() on the tmp file, replacing ct.
622 */
623
624 if (mp && mp->mp_start) {
625 char *part_boundary;
626
627 if (get_multipart_boundary (*ct, &part_boundary) == OK) {
628 char *fixed;
629
630 if ((fixed = m_mktemp2 (NULL, invo_name, NULL, &(*ct)->c_fp))) {
631 if (replace_boundary (*ct, fixed, part_boundary) == OK) {
632 char *filename = add ((*ct)->c_file, NULL);
633 CT fixed_ct;
634
635 free_content (*ct);
636 if ((fixed_ct = parse_mime (fixed))) {
637 *ct = fixed_ct;
638 (*ct)->c_unlink = 1;
639
640 ++*message_mods;
641 if (verbosw) {
642 report (NULL, NULL, filename,
643 "fix multipart boundary");
644 }
645 } else {
646 *ct = NULL;
647 advise (NULL, "unable to parse fixed part");
648 status = NOTOK;
649 }
650 free (filename);
651 } else {
652 advise (NULL, "unable to replace broken boundary");
653 status = NOTOK;
654 }
655 } else {
656 advise (NULL, "unable to create temporary file in %s",
657 get_temp_dir());
658 status = NOTOK;
659 }
660
661 free (part_boundary);
662 } else {
663 /* Couldn't fix the boundary. Report failure so that mhfixmsg
664 doesn't modify the message. */
665 status = NOTOK;
666 }
667 } else {
668 /* No multipart struct, even though the content type is
669 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
670 the message. */
671 status = NOTOK;
672 }
673 }
674
675 return status;
676 }
677
678
679 /*
680 * Find boundary at end of multipart.
681 */
682 static int
683 get_multipart_boundary (CT ct, char **part_boundary) {
684 char buffer[BUFSIZ];
685 char *end_boundary = NULL;
686 off_t begin = (off_t) ct->c_end > (off_t) (ct->c_begin + sizeof buffer)
687 ? (off_t) (ct->c_end - sizeof buffer)
688 : (off_t) ct->c_begin;
689 size_t bytes_read;
690 int status = OK;
691
692 /* This will fail if the boundary spans fread() calls. BUFSIZ should
693 be big enough, even if it's just 1024, to make that unlikely. */
694
695 /* free_content() will close ct->c_fp. */
696 if (! ct->c_fp && (ct->c_fp = fopen (ct->c_file, "r")) == NULL) {
697 advise (ct->c_file, "unable to open for reading");
698 return NOTOK;
699 }
700
701 /* Get boundary at end of multipart. */
702 while (begin >= (off_t) ct->c_begin) {
703 fseeko (ct->c_fp, begin, SEEK_SET);
704 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
705 char *cp = rfind_str (buffer, bytes_read, "--");
706
707 if (cp) {
708 char *end;
709
710 /* Trim off trailing "--" and anything beyond. */
711 *cp-- = '\0';
712 if ((end = rfind_str (buffer, cp - buffer, "\n"))) {
713 if (strlen (end) > 3 && *end++ == '\n' &&
714 *end++ == '-' && *end++ == '-') {
715 end_boundary = add (end, NULL);
716 break;
717 }
718 }
719 }
720 }
721
722 if (! end_boundary && begin > (off_t) (ct->c_begin + sizeof buffer)) {
723 begin -= sizeof buffer;
724 } else {
725 break;
726 }
727 }
728
729 /* Get boundary at beginning of multipart. */
730 if (end_boundary) {
731 fseeko (ct->c_fp, ct->c_begin, SEEK_SET);
732 while ((bytes_read = fread (buffer, 1, sizeof buffer, ct->c_fp)) > 0) {
733 if (bytes_read >= strlen (end_boundary)) {
734 char *cp = find_str (buffer, bytes_read, end_boundary);
735
736 if (cp && cp - buffer >= 2 && *--cp == '-' &&
737 *--cp == '-' && (cp > buffer && *--cp == '\n')) {
738 status = OK;
739 break;
740 }
741 } else {
742 /* The start and end boundaries didn't match, or the
743 start boundary doesn't begin with "\n--" (or "--"
744 if at the beginning of buffer). Keep trying. */
745 status = NOTOK;
746 }
747 }
748 } else {
749 status = NOTOK;
750 }
751
752 if (status == OK) {
753 *part_boundary = end_boundary;
754 } else {
755 *part_boundary = NULL;
756 free (end_boundary);
757 }
758
759 return status;
760 }
761
762
763 /*
764 * Open and copy ct->c_file to file, replacing the multipart boundary.
765 */
766 static int
767 replace_boundary (CT ct, char *file, char *boundary) {
768 FILE *fpin, *fpout;
769 int compnum, state;
770 char buf[BUFSIZ], name[NAMESZ];
771 char *np, *vp;
772 m_getfld_state_t gstate = 0;
773 int status = OK;
774
775 if (ct->c_file == NULL) {
776 advise (NULL, "missing input filename");
777 return NOTOK;
778 }
779
780 if ((fpin = fopen (ct->c_file, "r")) == NULL) {
781 advise (ct->c_file, "unable to open for reading");
782 return NOTOK;
783 }
784
785 if ((fpout = fopen (file, "w")) == NULL) {
786 fclose (fpin);
787 advise (file, "unable to open for writing");
788 return NOTOK;
789 }
790
791 for (compnum = 1;;) {
792 int bufsz = (int) sizeof buf;
793
794 switch (state = m_getfld (&gstate, name, buf, &bufsz, fpin)) {
795 case FLD:
796 case FLDPLUS:
797 compnum++;
798
799 /* get copies of the buffers */
800 np = add (name, NULL);
801 vp = add (buf, NULL);
802
803 /* if necessary, get rest of field */
804 while (state == FLDPLUS) {
805 bufsz = sizeof buf;
806 state = m_getfld (&gstate, name, buf, &bufsz, fpin);
807 vp = add (buf, vp); /* add to previous value */
808 }
809
810 if (strcasecmp (TYPE_FIELD, np)) {
811 fprintf (fpout, "%s:%s", np, vp);
812 } else {
813 char *new_ctline, *new_params;
814
815 replace_param(&ct->c_ctinfo.ci_first_pm,
816 &ct->c_ctinfo.ci_last_pm, "boundary",
817 boundary, 0);
818
819 new_ctline = concat(" ", ct->c_ctinfo.ci_type, "/",
820 ct->c_ctinfo.ci_subtype, NULL);
821 new_params = output_params(strlen(TYPE_FIELD) +
822 strlen(new_ctline) + 1,
823 ct->c_ctinfo.ci_first_pm, NULL, 0);
824 fprintf (fpout, "%s:%s%s\n", np, new_ctline,
825 new_params ? new_params : "");
826 free(new_ctline);
827 if (new_params) {
828 free(new_params);
829 }
830 }
831
832 free (vp);
833 free (np);
834
835 continue;
836
837 case BODY:
838 fputs ("\n", fpout);
839 /* buf will have a terminating NULL, skip it. */
840 if ((int) fwrite (buf, 1, bufsz-1, fpout) < bufsz-1) {
841 advise (file, "fwrite");
842 }
843 continue;
844
845 case FILEEOF:
846 break;
847
848 case LENERR:
849 case FMTERR:
850 advise (NULL, "message format error in component #%d", compnum);
851 status = NOTOK;
852 break;
853
854 default:
855 advise (NULL, "getfld() returned %d", state);
856 status = NOTOK;
857 break;
858 }
859
860 break;
861 }
862
863 m_getfld_state_destroy (&gstate);
864 fclose (fpout);
865 fclose (fpin);
866
867 return status;
868 }
869
870
871 /*
872 * Fix Content-Type header to reflect the content of its part.
873 */
874 static int
875 fix_types (CT ct, svector_t fixtypes, int *message_mods) {
876 int status = OK;
877
878 switch (ct->c_type) {
879 case CT_MULTIPART: {
880 struct multipart *m = (struct multipart *) ct->c_ctparams;
881 struct part *part;
882
883 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
884 status = fix_types (part->mp_part, fixtypes, message_mods);
885 }
886 break;
887 }
888
889 case CT_MESSAGE:
890 if (ct->c_subtype == MESSAGE_EXTERNAL) {
891 struct exbody *e = (struct exbody *) ct->c_ctparams;
892
893 status = fix_types (e->eb_content, fixtypes, message_mods);
894 }
895 break;
896
897 default: {
898 char **typep, *type;
899
900 if (ct->c_ctinfo.ci_type && ct->c_ctinfo.ci_subtype) {
901 for (typep = svector_strs (fixtypes);
902 typep && (type = *typep);
903 ++typep) {
904 char *type_subtype =
905 concat (ct->c_ctinfo.ci_type, "/", ct->c_ctinfo.ci_subtype,
906 NULL);
907
908 if (! strcasecmp (type, type_subtype) &&
909 decode_part (ct) == OK &&
910 ct->c_cefile.ce_file != NULL) {
911 char *ct_type_subtype = mime_type (ct->c_cefile.ce_file);
912 char *cp;
913
914 if ((cp = strchr (ct_type_subtype, ';'))) {
915 /* Truncate to remove any parameter list from
916 mime_type () result. */
917 *cp = '\0';
918 }
919
920 if (strcasecmp (type, ct_type_subtype)) {
921 char *ct_type, *ct_subtype;
922 HF hf;
923
924 /* The Content-Type header does not match the
925 content, so update these struct Content
926 fields to match:
927 * c_type, c_subtype
928 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
929 * c_ctline
930 */
931 /* Extract type and subtype from type/subtype. */
932 ct_type = getcpy (ct_type_subtype);
933 if ((cp = strchr (ct_type, '/'))) {
934 *cp = '\0';
935 ct_subtype = getcpy (++cp);
936 } else {
937 advise (NULL, "missing / in MIME type of %s %s",
938 ct->c_file, ct->c_partno);
939 free (ct_type);
940 return NOTOK;
941 }
942
943 ct->c_type = ct_str_type (ct_type);
944 ct->c_subtype = ct_str_subtype (ct->c_type, ct_subtype);
945
946 free (ct->c_ctinfo.ci_type);
947 ct->c_ctinfo.ci_type = ct_type;
948 free (ct->c_ctinfo.ci_subtype);
949 ct->c_ctinfo.ci_subtype = ct_subtype;
950 if (! replace_substring (&ct->c_ctline, type,
951 ct_type_subtype)) {
952 advise (NULL, "did not find %s in %s",
953 type, ct->c_ctline);
954 }
955
956 /* Update Content-Type header field. */
957 for (hf = ct->c_first_hf; hf; hf = hf->next) {
958 if (! strcasecmp (TYPE_FIELD, hf->name)) {
959 if (replace_substring (&hf->value, type,
960 ct_type_subtype)) {
961 ++*message_mods;
962 if (verbosw) {
963 report (NULL, ct->c_partno, ct->c_file,
964 "change Content-Type in header "
965 "from %s to %s",
966 type, ct_type_subtype);
967 }
968 break;
969 } else {
970 advise (NULL, "did not find %s in %s",
971 type, hf->value);
972 }
973 }
974 }
975 }
976 free (ct_type_subtype);
977 }
978 free (type_subtype);
979 }
980 }
981 }}
982
983 return status;
984 }
985
986
987 /*
988 * Replace a substring, allocating space to hold the new one.
989 */
990 char *
991 replace_substring (char **str, const char *old, const char *new) {
992 char *cp;
993
994 if ((cp = strstr (*str, old))) {
995 char *remainder = cp + strlen (old);
996 char *prefix, *new_str;
997
998 if (cp - *str) {
999 prefix = getcpy (*str);
1000 *(prefix + (cp - *str)) = '\0';
1001 new_str = concat (prefix, new, remainder, NULL);
1002 free (prefix);
1003 } else {
1004 new_str = concat (new, remainder, NULL);
1005 }
1006
1007 free (*str);
1008
1009 return *str = new_str;
1010 } else {
1011 return NULL;
1012 }
1013 }
1014
1015
1016 /*
1017 * Remove a name=value parameter, given just its name, from a header value.
1018 */
1019 char *
1020 remove_parameter (char *str, const char *name) {
1021 /* It looks to me, based on the BNF in RFC 2045, than there can't
1022 be whitespace betwwen the parameter name and the "=", or
1023 between the "=" and the parameter value. */
1024 char *param_name = concat (name, "=", NULL);
1025 char *cp;
1026
1027 if ((cp = strstr (str, param_name))) {
1028 char *start, *end;
1029 size_t count = 1;
1030
1031 /* Remove any leading spaces, before the parameter name. */
1032 for (start = cp;
1033 start > str && isspace ((unsigned char) *(start-1));
1034 --start) {
1035 continue;
1036 }
1037 /* Remove a leading semicolon. */
1038 if (start > str && *(start-1) == ';') { --start; }
1039
1040 end = cp + strlen (name) + 1;
1041 if (*end == '"') {
1042 /* Skip past the quoted value, and then the final quote. */
1043 for (++end ; *end && *end != '"'; ++end) { continue; }
1044 ++end;
1045 } else {
1046 /* Skip past the value. */
1047 for (++end ; *end && ! isspace ((unsigned char) *end); ++end) {}
1048 }
1049
1050 /* Count how many characters need to be moved. Include
1051 trailing null, which is accounted for by the
1052 initialization of count to 1. */
1053 for (cp = end; *cp; ++cp) { ++count; }
1054 (void) memmove (start, end, count);
1055 }
1056
1057 free (param_name);
1058
1059 return str;
1060 }
1061
1062
1063 /*
1064 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1065 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1066 * 8 bit.
1067 */
1068 static int
1069 fix_composite_cte (CT ct, int *message_mods) {
1070 int status = OK;
1071
1072 if (ct->c_type == CT_MESSAGE || ct->c_type == CT_MULTIPART) {
1073 if (ct->c_encoding != CE_7BIT && ct->c_encoding != CE_8BIT &&
1074 ct->c_encoding != CE_BINARY) {
1075 HF hf;
1076
1077 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1078 char *name = hf->name;
1079 for (; *name && isspace ((unsigned char) *name); ++name) {
1080 continue;
1081 }
1082
1083 if (! strncasecmp (name, ENCODING_FIELD,
1084 strlen (ENCODING_FIELD))) {
1085 char *prefix = "Nmh-REPLACED-INVALID-";
1086 HF h = mh_xmalloc (sizeof *h);
1087
1088 h->name = add (hf->name, NULL);
1089 h->hf_encoding = hf->hf_encoding;
1090 h->next = hf->next;
1091 hf->next = h;
1092
1093 /* Retain old header but prefix its name. */
1094 free (hf->name);
1095 hf->name = concat (prefix, h->name, NULL);
1096
1097 ++*message_mods;
1098 if (verbosw) {
1099 char *encoding = cpytrim (hf->value);
1100 report (NULL, ct->c_partno, ct->c_file,
1101 "replace Content-Transfer-Encoding of %s "
1102 "with 8 bit", encoding);
1103 free (encoding);
1104 }
1105
1106 h->value = add (" 8bit\n", NULL);
1107
1108 /* Don't need to warn for multiple C-T-E header
1109 fields, parse_mime() already does that. But
1110 if there are any, fix them all as necessary. */
1111 hf = h;
1112 }
1113 }
1114
1115 set_ce (ct, CE_8BIT);
1116 }
1117
1118 if (ct->c_type == CT_MULTIPART) {
1119 struct multipart *m;
1120 struct part *part;
1121
1122 m = (struct multipart *) ct->c_ctparams;
1123 for (part = m->mp_parts; part; part = part->mp_next) {
1124 if (fix_composite_cte (part->mp_part, message_mods) != OK) {
1125 status = NOTOK;
1126 break;
1127 }
1128 }
1129 }
1130 }
1131
1132 return status;
1133 }
1134
1135
1136 /*
1137 * Set content encoding.
1138 */
1139 static int
1140 set_ce (CT ct, int encoding) {
1141 const char *ce = ce_str (encoding);
1142 const struct str2init *ctinit = get_ce_method (ce);
1143
1144 if (ctinit) {
1145 char *cte = concat (" ", ce, "\n", NULL);
1146 int found_cte = 0;
1147 HF hf;
1148 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1149 caller is decode_text_parts (). Save because we'll
1150 overwrite below. */
1151 struct cefile decoded_content_info = ct->c_cefile;
1152
1153 ct->c_encoding = encoding;
1154
1155 ct->c_ctinitfnx = ctinit->si_init;
1156 /* This will assign ct->c_cefile with an all-0 struct, which
1157 is what we want. */
1158 (*ctinit->si_init) (ct);
1159 /* After returning, the caller should set
1160 ct->c_cefile.ce_file to the name of the file containing
1161 the contents. */
1162
1163 if (ct->c_ceclosefnx) {
1164 (*ct->c_ceclosefnx) (ct);
1165 }
1166
1167 /* Restore the cefile. */
1168 ct->c_cefile = decoded_content_info;
1169
1170 /* Update/add Content-Transfer-Encoding header field. */
1171 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1172 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
1173 found_cte = 1;
1174 free (hf->value);
1175 hf->value = cte;
1176 }
1177 }
1178 if (! found_cte) {
1179 add_header (ct, add (ENCODING_FIELD, NULL), cte);
1180 }
1181
1182 /* Update c_celine. It's used only by mhlist -debug. */
1183 free (ct->c_celine);
1184 ct->c_celine = add (cte, NULL);
1185
1186 return OK;
1187 } else {
1188 return NOTOK;
1189 }
1190 }
1191
1192
1193 /*
1194 * Make sure each text part has a corresponding text/plain part.
1195 */
1196 static int
1197 ensure_text_plain (CT *ct, CT parent, int *message_mods, int replacetextplain) {
1198 int status = OK;
1199
1200 switch ((*ct)->c_type) {
1201 case CT_TEXT: {
1202 /* Nothing to do for text/plain. */
1203 if ((*ct)->c_subtype == TEXT_PLAIN) { return OK; }
1204
1205 if (parent && parent->c_type == CT_MULTIPART &&
1206 parent->c_subtype == MULTI_ALTERNATE) {
1207 int new_subpart_number = 1;
1208 int has_text_plain =
1209 find_textplain_sibling (parent, replacetextplain,
1210 &new_subpart_number);
1211
1212 if (! has_text_plain) {
1213 /* Parent is a multipart/alternative. Insert a new
1214 text/plain subpart. */
1215 const int inserted =
1216 insert_new_text_plain_part (*ct, new_subpart_number,
1217 parent);
1218 if (inserted) {
1219 ++*message_mods;
1220 if (verbosw) {
1221 report (NULL, parent->c_partno, parent->c_file,
1222 "insert text/plain part");
1223 }
1224 } else {
1225 status = NOTOK;
1226 }
1227 }
1228 } else if (parent && parent->c_type == CT_MULTIPART &&
1229 parent->c_subtype == MULTI_RELATED) {
1230 char *type_subtype =
1231 concat ((*ct)->c_ctinfo.ci_type, "/",
1232 (*ct)->c_ctinfo.ci_subtype, NULL);
1233 const char *parent_type =
1234 get_param (parent->c_ctinfo.ci_first_pm, "type", '?', 1);
1235 int new_subpart_number = 1;
1236 int has_text_plain = 0;
1237
1238 /* Have to do string comparison on the subtype because we
1239 don't enumerate all of them in c_subtype values.
1240 parent_type will be NULL if the multipart/related part
1241 doesn't have a type parameter. The type parameter must
1242 be specified according to RFC 2387 Sec. 3.1 but not all
1243 messages comply. */
1244 if (parent_type && strcasecmp (type_subtype, parent_type) == 0) {
1245 /* The type of this part matches the root type of the
1246 parent multipart/related. Look to see if there's
1247 text/plain sibling. */
1248 has_text_plain =
1249 find_textplain_sibling (parent, replacetextplain,
1250 &new_subpart_number);
1251 }
1252
1253 free (type_subtype);
1254
1255 if (! has_text_plain) {
1256 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1257 struct part *part;
1258 int siblings = 0;
1259
1260 for (part = mp->mp_parts; part; part = part->mp_next) {
1261 if (*ct != part->mp_part) {
1262 ++siblings;
1263 }
1264 }
1265
1266 if (siblings) {
1267 /* Parent is a multipart/related. Insert a new
1268 text/plain subpart in a new multipart/alternative. */
1269 if (insert_into_new_mp_alt (ct, message_mods)) {
1270 /* Not an error if text/plain couldn't be added. */
1271 }
1272 } else {
1273 /* There are no siblings, so insert a new text/plain
1274 subpart, and change the parent type from
1275 multipart/related to multipart/alternative. */
1276 const int inserted =
1277 insert_new_text_plain_part (*ct, new_subpart_number,
1278 parent);
1279
1280 if (inserted) {
1281 HF hf;
1282
1283 parent->c_subtype = MULTI_ALTERNATE;
1284 parent->c_ctinfo.ci_subtype = getcpy ("alternative");
1285 if (! replace_substring (&parent->c_ctline, "/related",
1286 "/alternative")) {
1287 advise (NULL,
1288 "did not find multipart/related in %s",
1289 parent->c_ctline);
1290 }
1291
1292 /* Update Content-Type header field. */
1293 for (hf = parent->c_first_hf; hf; hf = hf->next) {
1294 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1295 if (replace_substring (&hf->value, "/related",
1296 "/alternative")) {
1297 ++*message_mods;
1298 if (verbosw) {
1299 report (NULL, parent->c_partno,
1300 parent->c_file,
1301 "insert text/plain part");
1302 }
1303
1304 /* Remove, e.g., type="text/html" from
1305 multipart/alternative. */
1306 remove_parameter (hf->value, "type");
1307 break;
1308 } else {
1309 advise (NULL, "did not find multipart/"
1310 "related in header %s",
1311 hf->value);
1312 }
1313 }
1314 }
1315 } else {
1316 /* Not an error if text/plain couldn't be inserted. */
1317 }
1318 }
1319 }
1320 } else {
1321 if (insert_into_new_mp_alt (ct, message_mods)) {
1322 status = NOTOK;
1323 }
1324 }
1325 break;
1326 }
1327
1328 case CT_MULTIPART: {
1329 struct multipart *mp = (struct multipart *) (*ct)->c_ctparams;
1330 struct part *part;
1331
1332 for (part = mp->mp_parts; status == OK && part; part = part->mp_next) {
1333 if ((*ct)->c_type == CT_MULTIPART) {
1334 status = ensure_text_plain (&part->mp_part, *ct, message_mods,
1335 replacetextplain);
1336 }
1337 }
1338 break;
1339 }
1340
1341 case CT_MESSAGE:
1342 if ((*ct)->c_subtype == MESSAGE_EXTERNAL) {
1343 struct exbody *e = (struct exbody *) (*ct)->c_ctparams;
1344
1345 status = ensure_text_plain (&e->eb_content, *ct, message_mods,
1346 replacetextplain);
1347 }
1348 break;
1349 }
1350
1351 return status;
1352 }
1353
1354
1355 /*
1356 * See if there is a sibling text/plain, and return its subpart number.
1357 */
1358 static int
1359 find_textplain_sibling (CT parent, int replacetextplain,
1360 int *new_subpart_number) {
1361 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1362 struct part *part, *prev;
1363 int has_text_plain = 0;
1364
1365 for (prev = part = mp->mp_parts; part; part = part->mp_next) {
1366 ++*new_subpart_number;
1367 if (part->mp_part->c_type == CT_TEXT &&
1368 part->mp_part->c_subtype == TEXT_PLAIN) {
1369 if (replacetextplain) {
1370 struct part *old_part;
1371 if (part == mp->mp_parts) {
1372 old_part = mp->mp_parts;
1373 mp->mp_parts = part->mp_next;
1374 } else {
1375 old_part = prev->mp_next;
1376 prev->mp_next = part->mp_next;
1377 }
1378 if (verbosw) {
1379 report (NULL, parent->c_partno, parent->c_file,
1380 "remove text/plain part %s",
1381 old_part->mp_part->c_partno);
1382 }
1383 free_content (old_part->mp_part);
1384 free (old_part);
1385 } else {
1386 has_text_plain = 1;
1387 }
1388 break;
1389 }
1390 prev = part;
1391 }
1392
1393 return has_text_plain;
1394 }
1395
1396
1397 /*
1398 * Insert a new text/plain part.
1399 */
1400 static int
1401 insert_new_text_plain_part (CT ct, int new_subpart_number, CT parent) {
1402 struct multipart *mp = (struct multipart *) parent->c_ctparams;
1403 struct part *new_part = mh_xmalloc (sizeof *new_part);
1404
1405 if ((new_part->mp_part = build_text_plain_part (ct))) {
1406 char buffer[16];
1407 snprintf (buffer, sizeof buffer, "%d", new_subpart_number);
1408
1409 new_part->mp_next = mp->mp_parts;
1410 mp->mp_parts = new_part;
1411 new_part->mp_part->c_partno =
1412 concat (parent->c_partno ? parent->c_partno : "1", ".",
1413 buffer, NULL);
1414
1415 return 1;
1416 } else {
1417 free_content (new_part->mp_part);
1418 free (new_part);
1419
1420 return 0;
1421 }
1422 }
1423
1424
1425 /*
1426 * Create a text/plain part to go along with non-plain sibling part.
1427 */
1428 static CT
1429 build_text_plain_part (CT encoded_part) {
1430 CT tp_part = divide_part (encoded_part);
1431 char *tmp_plain_file = NULL;
1432
1433 if (decode_part (tp_part) == OK) {
1434 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1435 contains the decoded contents. And the decoding function, such
1436 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1437 be unlinked by free_content (). */
1438 char *tempfile;
1439
1440 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1441 advise (NULL, "unable to create temporary file in %s",
1442 get_temp_dir());
1443 } else {
1444 tmp_plain_file = add (tempfile, NULL);
1445 if (reformat_part (tp_part, tmp_plain_file,
1446 tp_part->c_ctinfo.ci_type,
1447 tp_part->c_ctinfo.ci_subtype,
1448 tp_part->c_type) == OK) {
1449 return tp_part;
1450 }
1451 }
1452 }
1453
1454 free_content (tp_part);
1455 if (tmp_plain_file) { (void) m_unlink (tmp_plain_file); }
1456 free (tmp_plain_file);
1457
1458 return NULL;
1459 }
1460
1461
1462 /*
1463 * Slip new text/plain part into a new multipart/alternative.
1464 */
1465 static int
1466 insert_into_new_mp_alt (CT *ct, int *message_mods) {
1467 CT tp_part = build_text_plain_part (*ct);
1468 int status = OK;
1469
1470 if (tp_part) {
1471 CT mp_alt = build_multipart_alt (*ct, tp_part, CT_MULTIPART,
1472 MULTI_ALTERNATE);
1473 if (mp_alt) {
1474 struct multipart *mp = (struct multipart *) mp_alt->c_ctparams;
1475
1476 if (mp && mp->mp_parts) {
1477 mp->mp_parts->mp_part = tp_part;
1478 /* Make the new multipart/alternative the parent. */
1479 *ct = mp_alt;
1480
1481 ++*message_mods;
1482 if (verbosw) {
1483 report (NULL, (*ct)->c_partno, (*ct)->c_file,
1484 "insert text/plain part");
1485 }
1486 } else {
1487 free_content (tp_part);
1488 free_content (mp_alt);
1489 status = NOTOK;
1490 }
1491 } else {
1492 status = NOTOK;
1493 }
1494 } else {
1495 /* Not an error if text/plain couldn't be built. */
1496 }
1497
1498 return status;
1499 }
1500
1501
1502 /*
1503 * Clone a MIME part.
1504 */
1505 static CT
1506 divide_part (CT ct) {
1507 CT new_part;
1508
1509 if ((new_part = (CT) mh_xcalloc (1, sizeof *new_part)) == NULL)
1510 adios (NULL, "out of memory");
1511
1512 /* Just copy over what is needed for decoding. c_vrsn and
1513 c_celine aren't necessary. */
1514 new_part->c_file = add (ct->c_file, NULL);
1515 new_part->c_begin = ct->c_begin;
1516 new_part->c_end = ct->c_end;
1517 copy_ctinfo (&new_part->c_ctinfo, &ct->c_ctinfo);
1518 new_part->c_type = ct->c_type;
1519 new_part->c_cefile = ct->c_cefile;
1520 new_part->c_encoding = ct->c_encoding;
1521 new_part->c_ctinitfnx = ct->c_ctinitfnx;
1522 new_part->c_ceopenfnx = ct->c_ceopenfnx;
1523 new_part->c_ceclosefnx = ct->c_ceclosefnx;
1524 new_part->c_cesizefnx = ct->c_cesizefnx;
1525
1526 /* c_ctline is used by reformat__part(), so it can preserve
1527 anything after the type/subtype. */
1528 new_part->c_ctline = add (ct->c_ctline, NULL);
1529
1530 return new_part;
1531 }
1532
1533
1534 /*
1535 * Copy the content info from one part to another.
1536 */
1537 static void
1538 copy_ctinfo (CI dest, CI src) {
1539 PM s_pm, d_pm;
1540
1541 dest->ci_type = src->ci_type ? add (src->ci_type, NULL) : NULL;
1542 dest->ci_subtype = src->ci_subtype ? add (src->ci_subtype, NULL) : NULL;
1543
1544 for (s_pm = src->ci_first_pm; s_pm; s_pm = s_pm->pm_next) {
1545 d_pm = add_param(&dest->ci_first_pm, &dest->ci_last_pm, s_pm->pm_name,
1546 s_pm->pm_value, 0);
1547 if (s_pm->pm_charset) {
1548 d_pm->pm_charset = getcpy(s_pm->pm_charset);
1549 }
1550 if (s_pm->pm_lang) {
1551 d_pm->pm_lang = getcpy(s_pm->pm_lang);
1552 }
1553 }
1554
1555 dest->ci_comment = src->ci_comment ? add (src->ci_comment, NULL) : NULL;
1556 dest->ci_magic = src->ci_magic ? add (src->ci_magic, NULL) : NULL;
1557 }
1558
1559
1560 /*
1561 * Decode content.
1562 */
1563 static int
1564 decode_part (CT ct) {
1565 char *tmp_decoded;
1566 int status;
1567 char *tempfile;
1568
1569 if ((tempfile = m_mktemp2 (NULL, invo_name, NULL, NULL)) == NULL) {
1570 adios (NULL, "unable to create temporary file in %s", get_temp_dir());
1571 }
1572 tmp_decoded = add (tempfile, NULL);
1573 /* The following call will load ct->c_cefile.ce_file with the tmp
1574 filename of the decoded content. tmp_decoded will contain the
1575 encoded output, get rid of that. */
1576 status = output_message (ct, tmp_decoded);
1577 (void) m_unlink (tmp_decoded);
1578 free (tmp_decoded);
1579
1580 return status;
1581 }
1582
1583
1584 /*
1585 * Reformat content as plain text.
1586 * Some of the arguments aren't really needed now, but maybe will
1587 * be in the future for other than text types.
1588 */
1589 static int
1590 reformat_part (CT ct, char *file, char *type, char *subtype, int c_type) {
1591 int output_subtype, output_encoding;
1592 const char *reason = NULL;
1593 char *cp, *cf;
1594 int status;
1595
1596 /* Hacky: this redirects the output from whatever command is used
1597 to show the part to a file. So, the user can't have any output
1598 redirection in that command.
1599 Could show_multi() in mhshowsbr.c avoid this? */
1600
1601 /* Check for invo_name-format-type/subtype. */
1602 if ((cf = context_find_by_type ("format", type, subtype)) == NULL) {
1603 if (verbosw) {
1604 advise (NULL, "Don't know how to convert %s, there is no "
1605 "%s-format-%s/%s profile entry",
1606 ct->c_file, invo_name, type, subtype);
1607 }
1608 return NOTOK;
1609 } else {
1610 if (strchr (cf, '>')) {
1611 advise (NULL, "'>' prohibited in \"%s\",\nplease fix your "
1612 "%s-format-%s/%s profile entry", cf, invo_name, type,
1613 subtype ? subtype : "");
1614
1615 return NOTOK;
1616 }
1617 }
1618
1619 cp = concat (cf, " >", file, NULL);
1620 status = show_content_aux (ct, 0, cp, NULL, NULL);
1621 free (cp);
1622
1623 /* Unlink decoded content tmp file and free its filename to avoid
1624 leaks. The file stream should already have been closed. */
1625 if (ct->c_cefile.ce_unlink) {
1626 (void) m_unlink (ct->c_cefile.ce_file);
1627 free (ct->c_cefile.ce_file);
1628 ct->c_cefile.ce_file = NULL;
1629 ct->c_cefile.ce_unlink = 0;
1630 }
1631
1632 if (c_type == CT_TEXT) {
1633 output_subtype = TEXT_PLAIN;
1634 } else {
1635 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1636 output_subtype = 0;
1637 }
1638
1639 output_encoding = content_encoding (ct, &reason);
1640 if (set_ct_type (ct, c_type, output_subtype, output_encoding) == OK) {
1641 ct->c_cefile.ce_file = file;
1642 ct->c_cefile.ce_unlink = 1;
1643 } else {
1644 ct->c_cefile.ce_unlink = 0;
1645 status = NOTOK;
1646 }
1647
1648 return status;
1649 }
1650
1651
1652 /*
1653 * Fill in a multipart/alternative part.
1654 */
1655 static CT
1656 build_multipart_alt (CT first_alt, CT new_part, int type, int subtype) {
1657 char *boundary_prefix = "----=_nmh-multipart";
1658 char *boundary = concat (boundary_prefix, first_alt->c_partno, NULL);
1659 char *boundary_indicator = "; boundary=";
1660 char *typename, *subtypename, *name;
1661 CT ct;
1662 struct part *p;
1663 struct multipart *m;
1664 const struct str2init *ctinit;
1665
1666 if ((ct = (CT) mh_xcalloc (1, sizeof *ct)) == NULL)
1667 adios (NULL, "out of memory");
1668
1669 /* Set up the multipart/alternative part. These fields of *ct were
1670 initialized to 0 by mh_xcalloc():
1671 c_fp, c_unlink, c_begin, c_end,
1672 c_vrsn, c_ctline, c_celine,
1673 c_id, c_descr, c_dispo, c_partno,
1674 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1675 c_cefile, c_encoding,
1676 c_digested, c_digest[16], c_ctexbody,
1677 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1678 c_umask, c_rfc934,
1679 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1680 */
1681
1682 ct->c_file = add (first_alt->c_file, NULL);
1683 ct->c_type = type;
1684 ct->c_subtype = subtype;
1685
1686 ctinit = get_ct_init (ct->c_type);
1687
1688 typename = ct_type_str (type);
1689 subtypename = ct_subtype_str (type, subtype);
1690
1691 {
1692 int serial = 0;
1693 int found_boundary = 1;
1694
1695 while (found_boundary && serial < 1000000) {
1696 found_boundary = 0;
1697
1698 /* Ensure that the boundary doesn't appear in the decoded
1699 content. */
1700 if (new_part->c_cefile.ce_file) {
1701 if ((found_boundary =
1702 boundary_in_content (&new_part->c_cefile.ce_fp,
1703 new_part->c_cefile.ce_file,
1704 boundary)) == -1) {
1705 free (ct);
1706 return NULL;
1707 }
1708 }
1709
1710 /* Ensure that the boundary doesn't appear in the encoded
1711 content. */
1712 if (! found_boundary && new_part->c_file) {
1713 if ((found_boundary = boundary_in_content (&new_part->c_fp,
1714 new_part->c_file,
1715 boundary)) == -1) {
1716 free (ct);
1717 return NULL;
1718 }
1719 }
1720
1721 if (found_boundary) {
1722 /* Try a slightly different boundary. */
1723 char buffer2[16];
1724
1725 free (boundary);
1726 ++serial;
1727 snprintf (buffer2, sizeof buffer2, "%d", serial);
1728 boundary =
1729 concat (boundary_prefix,
1730 first_alt->c_partno ? first_alt->c_partno : "",
1731 "-", buffer2, NULL);
1732 }
1733 }
1734
1735 if (found_boundary) {
1736 advise (NULL, "giving up trying to find a unique boundary");
1737 free (ct);
1738 return NULL;
1739 }
1740 }
1741
1742 name = concat (" ", typename, "/", subtypename, boundary_indicator, "\"",
1743 boundary, "\"", NULL);
1744
1745 /* Load c_first_hf and c_last_hf. */
1746 transfer_noncontent_headers (first_alt, ct);
1747 add_header (ct, add (TYPE_FIELD, NULL), concat (name, "\n", NULL));
1748 free (name);
1749
1750 /* Load c_partno. */
1751 if (first_alt->c_partno) {
1752 ct->c_partno = add (first_alt->c_partno, NULL);
1753 free (first_alt->c_partno);
1754 first_alt->c_partno = concat (ct->c_partno, ".1", NULL);
1755 new_part->c_partno = concat (ct->c_partno, ".2", NULL);
1756 } else {
1757 first_alt->c_partno = add ("1", NULL);
1758 new_part->c_partno = add ("2", NULL);
1759 }
1760
1761 if (ctinit) {
1762 ct->c_ctinfo.ci_type = add (typename, NULL);
1763 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1764 }
1765
1766 add_param(&ct->c_ctinfo.ci_first_pm, &ct->c_ctinfo.ci_last_pm,
1767 "boundary", boundary, 0);
1768
1769 p = (struct part *) mh_xmalloc (sizeof *p);
1770 p->mp_next = (struct part *) mh_xmalloc (sizeof *p->mp_next);
1771 p->mp_next->mp_next = NULL;
1772 p->mp_next->mp_part = first_alt;
1773
1774 if ((m = (struct multipart *) mh_xcalloc (1, sizeof (struct multipart))) ==
1775 NULL)
1776 adios (NULL, "out of memory");
1777 m->mp_start = concat (boundary, "\n", NULL);
1778 m->mp_stop = concat (boundary, "--\n", NULL);
1779 m->mp_parts = p;
1780 ct->c_ctparams = m;
1781
1782 free (boundary);
1783
1784 return ct;
1785 }
1786
1787
1788 /*
1789 * Check that the boundary does not appear in the content.
1790 */
1791 static int
1792 boundary_in_content (FILE **fp, char *file, const char *boundary) {
1793 char buffer[BUFSIZ];
1794 size_t bytes_read;
1795 int found_boundary = 0;
1796
1797 /* free_content() will close *fp if we fopen it here. */
1798 if (! *fp && (*fp = fopen (file, "r")) == NULL) {
1799 advise (file, "unable to open %s for reading", file);
1800 return NOTOK;
1801 }
1802
1803 fseeko (*fp, 0L, SEEK_SET);
1804 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) > 0) {
1805 if (find_str (buffer, bytes_read, boundary)) {
1806 found_boundary = 1;
1807 break;
1808 }
1809 }
1810
1811 return found_boundary;
1812 }
1813
1814
1815 /*
1816 * Remove all non-Content headers.
1817 */
1818 static void
1819 transfer_noncontent_headers (CT old, CT new) {
1820 HF hp, hp_prev;
1821
1822 hp_prev = hp = old->c_first_hf;
1823 while (hp) {
1824 HF next = hp->next;
1825
1826 if (strncasecmp (XXX_FIELD_PRF, hp->name, strlen (XXX_FIELD_PRF))) {
1827 if (hp == old->c_last_hf) {
1828 if (hp == old->c_first_hf) {
1829 old->c_last_hf = old->c_first_hf = NULL;
1830 } else {
1831 hp_prev->next = NULL;
1832 old->c_last_hf = hp_prev;
1833 }
1834 } else {
1835 if (hp == old->c_first_hf) {
1836 old->c_first_hf = next;
1837 } else {
1838 hp_prev->next = next;
1839 }
1840 }
1841
1842 /* Put node hp in the new CT. */
1843 if (new->c_first_hf == NULL) {
1844 new->c_first_hf = hp;
1845 } else {
1846 new->c_last_hf->next = hp;
1847 }
1848 new->c_last_hf = hp;
1849 } else {
1850 /* A Content- header, leave in old. */
1851 hp_prev = hp;
1852 }
1853
1854 hp = next;
1855 }
1856 }
1857
1858
1859 /*
1860 * Set content type.
1861 */
1862 static int
1863 set_ct_type (CT ct, int type, int subtype, int encoding) {
1864 char *typename = ct_type_str (type);
1865 char *subtypename = ct_subtype_str (type, subtype);
1866 /* E.g, " text/plain" */
1867 char *type_subtypename = concat (" ", typename, "/", subtypename, NULL);
1868 /* E.g, " text/plain\n" */
1869 char *name_plus_nl = concat (type_subtypename, "\n", NULL);
1870 int found_content_type = 0;
1871 HF hf;
1872 const char *cp = NULL;
1873 char *ctline;
1874 int status;
1875
1876 /* Update/add Content-Type header field. */
1877 for (hf = ct->c_first_hf; hf; hf = hf->next) {
1878 if (! strcasecmp (TYPE_FIELD, hf->name)) {
1879 found_content_type = 1;
1880 free (hf->value);
1881 hf->value = (cp = strchr (ct->c_ctline, ';'))
1882 ? concat (type_subtypename, cp, "\n", NULL)
1883 : add (name_plus_nl, NULL);
1884 }
1885 }
1886 if (! found_content_type) {
1887 add_header (ct, add (TYPE_FIELD, NULL),
1888 (cp = strchr (ct->c_ctline, ';'))
1889 ? concat (type_subtypename, cp, "\n", NULL)
1890 : add (name_plus_nl, NULL));
1891 }
1892
1893 /* Some of these might not be used, but set them anyway. */
1894 ctline = cp
1895 ? concat (type_subtypename, cp, NULL)
1896 : concat (type_subtypename, NULL);
1897 free (ct->c_ctline);
1898 ct->c_ctline = ctline;
1899 /* Leave other ctinfo members as they were. */
1900 free (ct->c_ctinfo.ci_type);
1901 ct->c_ctinfo.ci_type = add (typename, NULL);
1902 free (ct->c_ctinfo.ci_subtype);
1903 ct->c_ctinfo.ci_subtype = add (subtypename, NULL);
1904 ct->c_type = type;
1905 ct->c_subtype = subtype;
1906
1907 free (name_plus_nl);
1908 free (type_subtypename);
1909
1910 status = set_ce (ct, encoding);
1911
1912 return status;
1913 }
1914
1915
1916 /*
1917 * It's not necessary to update the charset parameter of a Content-Type
1918 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1919 * (content) was originally in the specified charset, "and will be in
1920 * that character set again after decoding."
1921 */
1922 static int
1923 decode_text_parts (CT ct, int encoding, const char *decodetypes,
1924 int *message_mods) {
1925 int status = OK;
1926 int lf_line_endings = 0;
1927
1928 switch (ct->c_type) {
1929 case CT_MULTIPART: {
1930 struct multipart *m = (struct multipart *) ct->c_ctparams;
1931 struct part *part;
1932
1933 /* Should check to see if the body for this part is encoded?
1934 For now, it gets passed along as-is by InitMultiPart(). */
1935 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
1936 status = decode_text_parts (part->mp_part, encoding, decodetypes,
1937 message_mods);
1938 }
1939 break;
1940 }
1941
1942 case CT_MESSAGE:
1943 if (ct->c_subtype == MESSAGE_EXTERNAL) {
1944 struct exbody *e = (struct exbody *) ct->c_ctparams;
1945
1946 status = decode_text_parts (e->eb_content, encoding, decodetypes,
1947 message_mods);
1948 }
1949 break;
1950
1951 default:
1952 if (! should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
1953 break;
1954 }
1955
1956 lf_line_endings =
1957 ct->c_ctparams && ((struct text *) ct->c_ctparams)->lf_line_endings;
1958
1959 switch (ct->c_encoding) {
1960 case CE_BASE64:
1961 case CE_QUOTED: {
1962 int ct_encoding;
1963
1964 if (decode_part (ct) == OK && ct->c_cefile.ce_file) {
1965 const char *reason = NULL;
1966
1967 if ((ct_encoding = content_encoding (ct, &reason)) == CE_BINARY
1968 && encoding != CE_BINARY) {
1969 /* The decoding isn't acceptable so discard it.
1970 Leave status as OK to allow other transformations. */
1971 if (verbosw) {
1972 report (NULL, ct->c_partno, ct->c_file,
1973 "will not decode%s because it is binary (%s)",
1974 ct->c_partno ? ""
1975 : ct->c_ctline ? ct->c_ctline
1976 : "",
1977 reason);
1978 }
1979 (void) m_unlink (ct->c_cefile.ce_file);
1980 free (ct->c_cefile.ce_file);
1981 ct->c_cefile.ce_file = NULL;
1982 } else if (ct->c_encoding == CE_QUOTED &&
1983 ct_encoding == CE_8BIT && encoding == CE_7BIT) {
1984 /* The decoding isn't acceptable so discard it.
1985 Leave status as OK to allow other transformations. */
1986 if (verbosw) {
1987 report (NULL, ct->c_partno, ct->c_file,
1988 "will not decode%s because it is 8bit",
1989 ct->c_partno ? ""
1990 : ct->c_ctline ? ct->c_ctline
1991 : "");
1992 }
1993 (void) m_unlink (ct->c_cefile.ce_file);
1994 free (ct->c_cefile.ce_file);
1995 ct->c_cefile.ce_file = NULL;
1996 } else {
1997 int enc;
1998
1999 if (ct_encoding == CE_BINARY) {
2000 enc = CE_BINARY;
2001 } else if (ct_encoding == CE_8BIT && encoding == CE_7BIT) {
2002 enc = CE_QUOTED;
2003 } else {
2004 enc = ct_encoding;
2005 }
2006 if (set_ce (ct, enc) == OK) {
2007 ++*message_mods;
2008 if (verbosw) {
2009 report (NULL, ct->c_partno, ct->c_file, "decode%s",
2010 ct->c_ctline ? ct->c_ctline : "");
2011 }
2012 if (lf_line_endings) {
2013 strip_crs (ct, message_mods);
2014 }
2015 } else {
2016 status = NOTOK;
2017 }
2018 }
2019 } else {
2020 status = NOTOK;
2021 }
2022 break;
2023 }
2024 case CE_8BIT:
2025 case CE_7BIT:
2026 if (lf_line_endings) {
2027 strip_crs (ct, message_mods);
2028 }
2029 break;
2030 default:
2031 break;
2032 }
2033
2034 break;
2035 }
2036
2037 return status;
2038 }
2039
2040
2041 /*
2042 * Determine if the part with type[/subtype] should be decoded, according to
2043 * decodetypes (which came from the -decodetypes switch).
2044 */
2045 static int
2046 should_decode(const char *decodetypes, const char *type, const char *subtype) {
2047 /* Quick search for matching type[/subtype] in decodetypes: bracket
2048 decodetypes with commas, then search for ,type, and ,type/subtype, in
2049 it. */
2050
2051 int found_match = 0;
2052 char *delimited_decodetypes = concat(",", decodetypes, ",", NULL);
2053 char *delimited_type = concat(",", type, ",", NULL);
2054
2055 if (nmh_strcasestr(delimited_decodetypes, delimited_type)) {
2056 found_match = 1;
2057 } else if (subtype != NULL) {
2058 char *delimited_type_subtype =
2059 concat(",", type, "/", subtype, ",", NULL);
2060
2061 if (nmh_strcasestr(delimited_decodetypes, delimited_type_subtype)) {
2062 found_match = 1;
2063 }
2064 free(delimited_type_subtype);
2065 }
2066
2067 free(delimited_type);
2068 free(delimited_decodetypes);
2069
2070 return found_match;
2071 }
2072
2073
2074 /*
2075 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2076 * if it has any NUL characters, a CR not followed by a LF, or lines
2077 * greater than 998 characters in length. If binary, reason is set
2078 * to a string explaining why.
2079 */
2080 static int
2081 content_encoding (CT ct, const char **reason) {
2082 CE ce = &ct->c_cefile;
2083 int encoding = CE_7BIT;
2084
2085 if (ce->ce_file) {
2086 size_t line_len = 0;
2087 char buffer[BUFSIZ];
2088 size_t inbytes;
2089
2090 if (! ce->ce_fp && (ce->ce_fp = fopen (ce->ce_file, "r")) == NULL) {
2091 advise (ce->ce_file, "unable to open for reading");
2092 return CE_UNKNOWN;
2093 }
2094
2095 fseeko (ce->ce_fp, 0L, SEEK_SET);
2096 while (encoding != CE_BINARY &&
2097 (inbytes = fread (buffer, 1, sizeof buffer, ce->ce_fp)) > 0) {
2098 char *cp;
2099 size_t i;
2100 int last_char_was_cr = 0;
2101
2102 for (i = 0, cp = buffer; i < inbytes; ++i, ++cp) {
2103 if (*cp == '\0' || ++line_len > 998 ||
2104 (*cp != '\n' && last_char_was_cr)) {
2105 encoding = CE_BINARY;
2106 if (*cp == '\0') {
2107 *reason = "null character";
2108 } else if (line_len > 998) {
2109 *reason = "line length > 998";
2110 } else if (*cp != '\n' && last_char_was_cr) {
2111 *reason = "CR not followed by LF";
2112 } else {
2113 /* Should not reach this. */
2114 *reason = "";
2115 }
2116 break;
2117 } else if (*cp == '\n') {
2118 line_len = 0;
2119 } else if (! isascii ((unsigned char) *cp)) {
2120 encoding = CE_8BIT;
2121 }
2122
2123 last_char_was_cr = *cp == '\r' ? 1 : 0;
2124 }
2125 }
2126
2127 fclose (ce->ce_fp);
2128 ce->ce_fp = NULL;
2129 } /* else should never happen */
2130
2131 return encoding;
2132 }
2133
2134
2135 /*
2136 * Strip carriage returns from content.
2137 */
2138 static int
2139 strip_crs (CT ct, int *message_mods) {
2140 char *charset = content_charset (ct);
2141 int status = OK;
2142
2143 /* Only strip carriage returns if content is ASCII or another
2144 charset that has the same readily recognizable CR followed by a
2145 LF. We can include UTF-8 here because if the high-order bit of
2146 a UTF-8 byte is 0, then it must be a single-byte ASCII
2147 character. */
2148 if (! strcasecmp (charset, "US-ASCII") ||
2149 ! strcasecmp (charset, "UTF-8") ||
2150 ! strncasecmp (charset, "ISO-8859-", 9) ||
2151 ! strncasecmp (charset, "WINDOWS-12", 10)) {
2152 char **file = NULL;
2153 FILE **fp = NULL;
2154 size_t begin;
2155 size_t end;
2156 int has_crs = 0;
2157 int opened_input_file = 0;
2158
2159 if (ct->c_cefile.ce_file) {
2160 file = &ct->c_cefile.ce_file;
2161 fp = &ct->c_cefile.ce_fp;
2162 begin = end = 0;
2163 } else if (ct->c_file) {
2164 file = &ct->c_file;
2165 fp = &ct->c_fp;
2166 begin = (size_t) ct->c_begin;
2167 end = (size_t) ct->c_end;
2168 } /* else don't know where the content is */
2169
2170 if (file && *file && fp) {
2171 if (! *fp) {
2172 if ((*fp = fopen (*file, "r")) == NULL) {
2173 advise (*file, "unable to open for reading");
2174 status = NOTOK;
2175 } else {
2176 opened_input_file = 1;
2177 }
2178 }
2179 }
2180
2181 if (fp && *fp) {
2182 char buffer[BUFSIZ];
2183 size_t bytes_read;
2184 size_t bytes_to_read =
2185 end > 0 && end > begin ? end - begin : sizeof buffer;
2186
2187 fseeko (*fp, begin, SEEK_SET);
2188 while ((bytes_read = fread (buffer, 1,
2189 min (bytes_to_read, sizeof buffer),
2190 *fp)) > 0) {
2191 /* Look for CR followed by a LF. This is supposed to
2192 be text so there should be LF's. If not, don't
2193 modify the content. */
2194 char *cp;
2195 size_t i;
2196 int last_char_was_cr = 0;
2197
2198 if (end > 0) { bytes_to_read -= bytes_read; }
2199
2200 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2201 if (*cp == '\n' && last_char_was_cr) {
2202 has_crs = 1;
2203 break;
2204 }
2205
2206 last_char_was_cr = *cp == '\r' ? 1 : 0;
2207 }
2208 }
2209
2210 if (has_crs) {
2211 int fd;
2212 char *stripped_content_file;
2213 char *tempfile = m_mktemp2 (NULL, invo_name, &fd, NULL);
2214
2215 if (tempfile == NULL) {
2216 adios (NULL, "unable to create temporary file in %s",
2217 get_temp_dir());
2218 }
2219 stripped_content_file = add (tempfile, NULL);
2220
2221 /* Strip each CR before a LF from the content. */
2222 fseeko (*fp, begin, SEEK_SET);
2223 while ((bytes_read = fread (buffer, 1, sizeof buffer, *fp)) >
2224 0) {
2225 char *cp;
2226 size_t i;
2227 int last_char_was_cr = 0;
2228
2229 for (i = 0, cp = buffer; i < bytes_read; ++i, ++cp) {
2230 if (*cp == '\r') {
2231 last_char_was_cr = 1;
2232 } else if (last_char_was_cr) {
2233 if (*cp != '\n') {
2234 if (write (fd, "\r", 1) < 0) {
2235 advise (tempfile, "CR write");
2236 }
2237 }
2238 if (write (fd, cp, 1) < 0) {
2239 advise (tempfile, "write");
2240 }
2241 last_char_was_cr = 0;
2242 } else {
2243 if (write (fd, cp, 1) < 0) {
2244 advise (tempfile, "write");
2245 }
2246 last_char_was_cr = 0;
2247 }
2248 }
2249 }
2250
2251 if (close (fd)) {
2252 admonish (NULL, "unable to write temporary file %s",
2253 stripped_content_file);
2254 (void) m_unlink (stripped_content_file);
2255 status = NOTOK;
2256 } else {
2257 /* Replace the decoded file with the converted one. */
2258 if (ct->c_cefile.ce_file) {
2259 if (ct->c_cefile.ce_unlink) {
2260 (void) m_unlink (ct->c_cefile.ce_file);
2261 }
2262 free (ct->c_cefile.ce_file);
2263 }
2264 ct->c_cefile.ce_file = stripped_content_file;
2265 ct->c_cefile.ce_unlink = 1;
2266
2267 ++*message_mods;
2268 if (verbosw) {
2269 report (NULL, ct->c_partno,
2270 begin == 0 && end == 0 ? "" : *file,
2271 "stripped CRs");
2272 }
2273 }
2274 }
2275
2276 if (opened_input_file) {
2277 fclose (*fp);
2278 *fp = NULL;
2279 }
2280 }
2281 }
2282
2283 free (charset);
2284
2285 return status;
2286 }
2287
2288
2289 /*
2290 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2291 * of the part C-T-E's.
2292 */
2293 static void
2294 update_cte (CT ct) {
2295 const int least_restrictive_enc = least_restrictive_encoding (ct);
2296
2297 if (least_restrictive_enc != CE_UNKNOWN &&
2298 least_restrictive_enc != CE_7BIT) {
2299 char *cte = concat (" ", ce_str (least_restrictive_enc), "\n", NULL);
2300 HF hf;
2301 int found_cte = 0;
2302
2303 /* Update/add Content-Transfer-Encoding header field. */
2304 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2305 if (! strcasecmp (ENCODING_FIELD, hf->name)) {
2306 found_cte = 1;
2307 free (hf->value);
2308 hf->value = cte;
2309 }
2310 }
2311 if (! found_cte) {
2312 add_header (ct, add (ENCODING_FIELD, NULL), cte);
2313 }
2314 }
2315 }
2316
2317
2318 /*
2319 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2320 * within a message.
2321 */
2322 static int
2323 least_restrictive_encoding (CT ct) {
2324 int encoding = CE_UNKNOWN;
2325
2326 switch (ct->c_type) {
2327 case CT_MULTIPART: {
2328 struct multipart *m = (struct multipart *) ct->c_ctparams;
2329 struct part *part;
2330
2331 for (part = m->mp_parts; part; part = part->mp_next) {
2332 const int part_encoding =
2333 least_restrictive_encoding (part->mp_part);
2334
2335 if (less_restrictive (encoding, part_encoding)) {
2336 encoding = part_encoding;
2337 }
2338 }
2339 break;
2340 }
2341
2342 case CT_MESSAGE:
2343 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2344 struct exbody *e = (struct exbody *) ct->c_ctparams;
2345 const int part_encoding =
2346 least_restrictive_encoding (e->eb_content);
2347
2348 if (less_restrictive (encoding, part_encoding)) {
2349 encoding = part_encoding;
2350 }
2351 }
2352 break;
2353
2354 default: {
2355 if (less_restrictive (encoding, ct->c_encoding)) {
2356 encoding = ct->c_encoding;
2357 }
2358 }}
2359
2360 return encoding;
2361 }
2362
2363
2364 /*
2365 * Return whether the second encoding is less restrictive than the first, where
2366 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2367 * CE_BINARY is less restrictive than CE_8BIT and
2368 * CE_8BIT is less restrictive than CE_7BIT.
2369 */
2370 static int
2371 less_restrictive (int encoding, int second_encoding) {
2372 switch (second_encoding) {
2373 case CE_BINARY:
2374 return encoding != CE_BINARY;
2375 case CE_8BIT:
2376 return encoding != CE_BINARY && encoding != CE_8BIT;
2377 case CE_7BIT:
2378 return encoding != CE_BINARY && encoding != CE_8BIT &&
2379 encoding != CE_7BIT;
2380 default :
2381 return 0;
2382 }
2383 }
2384
2385
2386 /*
2387 * Convert character set of each part.
2388 */
2389 static int
2390 convert_charsets (CT ct, char *dest_charset, int *message_mods) {
2391 int status = OK;
2392
2393 switch (ct->c_type) {
2394 case CT_TEXT:
2395 if (ct->c_subtype == TEXT_PLAIN) {
2396 status = convert_charset (ct, dest_charset, message_mods);
2397 if (status == OK) {
2398 if (verbosw) {
2399 char *ct_charset = content_charset (ct);
2400
2401 report (NULL, ct->c_partno, ct->c_file,
2402 "convert %s to %s", ct_charset, dest_charset);
2403 free (ct_charset);
2404 }
2405 } else {
2406 char *ct_charset = content_charset (ct);
2407
2408 report ("iconv", ct->c_partno, ct->c_file,
2409 "failed to convert %s to %s", ct_charset, dest_charset);
2410 free (ct_charset);
2411 }
2412 }
2413 break;
2414
2415 case CT_MULTIPART: {
2416 struct multipart *m = (struct multipart *) ct->c_ctparams;
2417 struct part *part;
2418
2419 /* Should check to see if the body for this part is encoded?
2420 For now, it gets passed along as-is by InitMultiPart(). */
2421 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2422 status =
2423 convert_charsets (part->mp_part, dest_charset, message_mods);
2424 }
2425 break;
2426 }
2427
2428 case CT_MESSAGE:
2429 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2430 struct exbody *e = (struct exbody *) ct->c_ctparams;
2431
2432 status =
2433 convert_charsets (e->eb_content, dest_charset, message_mods);
2434 }
2435 break;
2436
2437 default:
2438 break;
2439 }
2440
2441 return status;
2442 }
2443
2444
2445 /*
2446 * Fix various problems that aren't handled elsewhere. These
2447 * are fixed unconditionally: there are no switches to disable
2448 * them. Currently, "problems" are these:
2449 * 1) remove extraneous semicolon at the end of a header parameter list
2450 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2451 * filename parameters in Content-Type and Content-Disposition
2452 * headers, respectively.
2453 */
2454 static int
2455 fix_always (CT ct, int *message_mods) {
2456 int status = OK;
2457
2458 switch (ct->c_type) {
2459 case CT_MULTIPART: {
2460 struct multipart *m = (struct multipart *) ct->c_ctparams;
2461 struct part *part;
2462
2463 for (part = m->mp_parts; status == OK && part; part = part->mp_next) {
2464 status = fix_always (part->mp_part, message_mods);
2465 }
2466 break;
2467 }
2468
2469 case CT_MESSAGE:
2470 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2471 struct exbody *e = (struct exbody *) ct->c_ctparams;
2472
2473 status = fix_always (e->eb_content, message_mods);
2474 }
2475 break;
2476
2477 default: {
2478 HF hf;
2479
2480 if (ct->c_first_hf) {
2481 fix_filename_encoding (ct);
2482 }
2483
2484 for (hf = ct->c_first_hf; hf; hf = hf->next) {
2485 size_t len = strlen (hf->value);
2486
2487 if (strcasecmp (hf->name, TYPE_FIELD) != 0 &&
2488 strcasecmp (hf->name, DISPO_FIELD) != 0) {
2489 /* Only do this for Content-Type and
2490 Content-Disposition fields because those are the
2491 only headers that parse_mime() warns about. */
2492 continue;
2493 }
2494
2495 /* whitespace following a trailing ';' will be nuked as well */
2496 if (hf->value[len - 1] == '\n') {
2497 while (isspace((unsigned char)(hf->value[len - 2]))) {
2498 if (len-- == 0) { break; }
2499 }
2500 }
2501
2502 if (hf->value[len - 2] == ';') {
2503 /* Remove trailing ';' from parameter value. */
2504 hf->value[len - 2] = '\n';
2505 hf->value[len - 1] = '\0';
2506
2507 /* Also, if Content-Type parameter, remove trailing ';'
2508 from ct->c_ctline. This probably isn't necessary
2509 but can't hurt. */
2510 if (strcasecmp(hf->name, TYPE_FIELD) == 0 && ct->c_ctline) {
2511 size_t l = strlen(ct->c_ctline) - 1;
2512 while (isspace((unsigned char)(ct->c_ctline[l])) ||
2513 ct->c_ctline[l] == ';') {
2514 ct->c_ctline[l--] = '\0';
2515 if (l == 0) { break; }
2516 }
2517 }
2518
2519 ++*message_mods;
2520 if (verbosw) {
2521 report (NULL, ct->c_partno, ct->c_file,
2522 "remove trailing ; from %s parameter value",
2523 hf->name);
2524 }
2525 }
2526 }
2527 }}
2528
2529 return status;
2530 }
2531
2532
2533 /*
2534 * Factor out common code for loops in fix_filename_encoding().
2535 */
2536 static int
2537 fix_filename_param (char *name, char *value, PM *first_pm, PM *last_pm) {
2538 size_t value_len;
2539 int fixed = 0;
2540
2541 if (((value_len = strlen (value)) > 0) &&
2542 strncmp (value, "=?", 2) == 0 &&
2543 strncmp (&value[value_len - 2], "?=", 2) == 0) {
2544 /* Looks like an RFC 2047 encoded parameter. */
2545 char decoded[PATH_MAX + 1];
2546
2547 if (decode_rfc2047 (value, decoded, sizeof decoded)) {
2548 /* Encode using RFC 2231. */
2549 replace_param (first_pm, last_pm, name, decoded, 0);
2550 fixed = 1;
2551 } else {
2552 advise (NULL, "failed to decode %s parameter %s", name, value);
2553 }
2554 }
2555
2556 return fixed;
2557 }
2558
2559
2560 /*
2561 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2562 * filename parameters in Content-Type and Content-Disposition
2563 * headers, respectively.
2564 */
2565 static int
2566 fix_filename_encoding (CT ct) {
2567 PM pm;
2568 HF hf;
2569 int fixed = 0;
2570
2571 for (pm = ct->c_ctinfo.ci_first_pm; pm; pm = pm->pm_next) {
2572 if (pm->pm_name && pm->pm_value &&
2573 strcasecmp (pm->pm_name, "name") == 0) {
2574 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2575 &ct->c_ctinfo.ci_first_pm,
2576 &ct->c_ctinfo.ci_last_pm);
2577 }
2578 }
2579
2580 for (pm = ct->c_dispo_first; pm; pm = pm->pm_next) {
2581 if (pm->pm_name && pm->pm_value &&
2582 strcasecmp (pm->pm_name, "filename") == 0) {
2583 fixed = fix_filename_param (pm->pm_name, pm->pm_value,
2584 &ct->c_dispo_first,
2585 &ct->c_dispo_last);
2586 }
2587 }
2588
2589 /* Fix hf values to correspond. */
2590 for (hf = ct->c_first_hf; fixed && hf; hf = hf->next) {
2591 enum { OTHER, TYPE_HEADER, DISPO_HEADER } field = OTHER;
2592
2593 if (strcasecmp (hf->name, TYPE_FIELD) == 0) {
2594 field = TYPE_HEADER;
2595 } else if (strcasecmp (hf->name, DISPO_FIELD) == 0) {
2596 field = DISPO_HEADER;
2597 }
2598
2599 if (field != OTHER) {
2600 const char *const semicolon_loc = strchr (hf->value, ';');
2601
2602 if (semicolon_loc) {
2603 const size_t len =
2604 strlen (hf->name) + 1 + semicolon_loc - hf->value;
2605 const char *const params =
2606 output_params (len,
2607 field == TYPE_HEADER
2608 ? ct->c_ctinfo.ci_first_pm
2609 : ct->c_dispo_first,
2610 NULL, 0);
2611 const char *const new_params = concat (params, "\n", NULL);
2612
2613 replace_substring (&hf->value, semicolon_loc, new_params);
2614 free ((char *) new_params);
2615 free ((char *) params);
2616 } else {
2617 advise (NULL, "did not find semicolon in %s:%s\n",
2618 hf->name, hf->value);
2619 }
2620 }
2621 }
2622
2623 return OK;
2624 }
2625
2626
2627 /*
2628 * Output content in input file to output file.
2629 */
2630 static int
2631 write_content (CT ct, const char *input_filename, char *outfile, int modify_inplace,
2632 int message_mods) {
2633 int status = OK;
2634
2635 if (modify_inplace) {
2636 if (message_mods > 0) {
2637 if ((status = output_message (ct, outfile)) == OK) {
2638 char *infile = input_filename
2639 ? add (input_filename, NULL)
2640 : add (ct->c_file ? ct->c_file : "-", NULL);
2641
2642 if (remove_file (infile) == OK) {
2643 if (rename (outfile, infile)) {
2644 /* Rename didn't work, possibly because of an
2645 attempt to rename across filesystems. Try
2646 brute force copy. */
2647 int old = open (outfile, O_RDONLY);
2648 int new =
2649 open (infile, O_WRONLY | O_CREAT, m_gmprot ());
2650 int i = -1;
2651
2652 if (old != -1 && new != -1) {
2653 char buffer[BUFSIZ];
2654
2655 while ((i = read (old, buffer, sizeof buffer)) >
2656 0) {
2657 if (write (new, buffer, i) != i) {
2658 i = -1;
2659 break;
2660 }
2661 }
2662 }
2663 if (new != -1) { close (new); }
2664 if (old != -1) { close (old); }
2665 (void) m_unlink (outfile);
2666
2667 if (i < 0) {
2668 /* The -file argument processing used path() to
2669 expand filename to absolute path. */
2670 int file = ct->c_file && ct->c_file[0] == '/';
2671
2672 admonish (NULL, "unable to rename %s %s to %s",
2673 file ? "file" : "message", outfile,
2674 infile);
2675 status = NOTOK;
2676 }
2677 }
2678 } else {
2679 admonish (NULL, "unable to remove input file %s, "
2680 "not modifying it", infile);
2681 (void) m_unlink (outfile);
2682 status = NOTOK;
2683 }
2684
2685 free (infile);
2686 } else {
2687 status = NOTOK;
2688 }
2689 } else {
2690 /* No modifications and didn't need the tmp outfile. */
2691 (void) m_unlink (outfile);
2692 }
2693 } else {
2694 /* Output is going to some file. Produce it whether or not
2695 there were modifications. */
2696 status = output_message (ct, outfile);
2697 }
2698
2699 flush_errors ();
2700 return status;
2701 }
2702
2703
2704 /*
2705 * parse_mime() does not set lf_line_endings in struct text, so use this
2706 * function to do it. It touches the parts the decodetypes identifies.
2707 */
2708 static void
2709 set_text_ctparams(CT ct, char *decodetypes, int lf_line_endings) {
2710 switch (ct->c_type) {
2711 case CT_MULTIPART: {
2712 struct multipart *m = (struct multipart *) ct->c_ctparams;
2713 struct part *part;
2714
2715 for (part = m->mp_parts; part; part = part->mp_next) {
2716 set_text_ctparams(part->mp_part, decodetypes, lf_line_endings);
2717 }
2718 break;
2719 }
2720
2721 case CT_MESSAGE:
2722 if (ct->c_subtype == MESSAGE_EXTERNAL) {
2723 struct exbody *e = (struct exbody *) ct->c_ctparams;
2724
2725 set_text_ctparams(e->eb_content, decodetypes, lf_line_endings);
2726 }
2727 break;
2728
2729 default:
2730 if (should_decode(decodetypes, ct->c_ctinfo.ci_type, ct->c_ctinfo.ci_subtype)) {
2731 if (ct->c_ctparams == NULL) {
2732 if ((ct->c_ctparams = (struct text *) mh_xcalloc (1, sizeof (struct text))) == NULL) {
2733 adios (NULL, "out of memory");
2734 }
2735 }
2736 ((struct text *) ct->c_ctparams)->lf_line_endings = lf_line_endings;
2737 }
2738 }
2739 }
2740
2741
2742 /*
2743 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2744 * use the standard MH backup file.
2745 */
2746 static int
2747 remove_file (const char *file) {
2748 if (rmmproc) {
2749 char *rmm_command = concat (rmmproc, " ", file, NULL);
2750 int status = system (rmm_command);
2751
2752 free (rmm_command);
2753 return WIFEXITED (status) ? WEXITSTATUS (status) : NOTOK;
2754 } else {
2755 /* This is OK for a non-message file, it still uses the
2756 BACKUP_PREFIX form. The backup file will be in the same
2757 directory as file. */
2758 return rename (file, m_backup (file));
2759 }
2760 }
2761
2762
2763 /*
2764 * Output formatted message to user.
2765 */
2766 static void
2767 report (char *what, char *partno, char *filename, char *message, ...) {
2768 va_list args;
2769 char *fmt;
2770
2771 if (verbosw) {
2772 va_start (args, message);
2773 fmt = concat (filename, partno ? " part " : ", ",
2774 partno ? partno : "", partno ? ", " : "", message, NULL);
2775
2776 advertise (what, NULL, fmt, args);
2777
2778 free (fmt);
2779 va_end (args);
2780 }
2781 }
2782
2783
2784 static void
2785 pipeser (int i)
2786 {
2787 if (i == SIGQUIT) {
2788 fflush (stdout);
2789 fprintf (stderr, "\n");
2790 fflush (stderr);
2791 }
2792
2793 done (1);
2794 /* NOTREACHED */
2795 }