1 /* mhfixmsg.c -- rewrite a message with various transformations
3 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
4 * See the COPYRIGHT file in the root directory of the nmh
5 * distribution for complete copyright information.
9 #include "sbr/m_name.h"
10 #include "sbr/m_gmprot.h"
11 #include "sbr/m_getfld.h"
12 #include "sbr/getarguments.h"
13 #include "sbr/concat.h"
14 #include "sbr/seq_setprev.h"
15 #include "sbr/seq_setcur.h"
16 #include "sbr/seq_save.h"
17 #include "sbr/smatch.h"
18 #include "sbr/fmt_rfc2047.h"
19 #include "sbr/cpydata.h"
20 #include "sbr/trimcpy.h"
21 #include "sbr/m_convert.h"
22 #include "sbr/m_backup.h"
23 #include "sbr/getfolder.h"
24 #include "sbr/folder_read.h"
25 #include "sbr/context_save.h"
26 #include "sbr/context_replace.h"
27 #include "sbr/context_find.h"
28 #include "sbr/readconfig.h"
29 #include "sbr/ambigsw.h"
31 #include "sbr/print_version.h"
32 #include "sbr/print_help.h"
33 #include "sbr/error.h"
34 #include "h/fmt_scan.h"
36 #include "h/mhparse.h"
39 #include "h/signals.h"
40 #include "sbr/m_maildir.h"
41 #include "sbr/m_mktemp.h"
42 #include "sbr/mime_type.h"
46 #include "mhshowsbr.h"
49 #define MHFIXMSG_SWITCHES \
50 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
51 X("nodecodetext", 0, NDECODETEXTSW) \
52 X("decodetypes", 0, DECODETYPESW) \
53 X("decodeheaderfieldbodies utf-8", 0, DECODEHEADERFIELDBODIESSW) \
54 X("nodecodeheaderfieldbodies", 0, NDECODEHEADERFIELDBODIESSW) \
55 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
56 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
57 X("textcharset", 0, TEXTCHARSETSW) \
58 X("notextcharset", 0, NTEXTCHARSETSW) \
59 X("reformat", 0, REFORMATSW) \
60 X("noreformat", 0, NREFORMATSW) \
61 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
62 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
63 X("fixboundary", 0, FIXBOUNDARYSW) \
64 X("nofixboundary", 0, NFIXBOUNDARYSW) \
65 X("fixcte", 0, FIXCOMPOSITECTESW) \
66 X("nofixcte", 0, NFIXCOMPOSITECTESW) \
67 X("fixtype mimetype", 0, FIXTYPESW) \
68 X("file file", 0, FILESW) \
69 X("outfile file", 0, OUTFILESW) \
70 X("rmmproc program", 0, RPROCSW) \
71 X("normmproc", 0, NRPRCSW) \
72 X("changecur", 0, CHGSW) \
73 X("nochangecur", 0, NCHGSW) \
74 X("verbose", 0, VERBSW) \
75 X("noverbose", 0, NVERBSW) \
76 X("version", 0, VERSIONSW) \
77 X("help", 0, HELPSW) \
79 #define X(sw, minchars, id) id,
80 DEFINE_SWITCH_ENUM(MHFIXMSG
);
83 #define X(sw, minchars, id) { sw, minchars, id },
84 DEFINE_SWITCH_ARRAY(MHFIXMSG
, switches
);
89 int debugsw
; /* Needed by mhparse.c. */
91 #define quitser pipeser
96 typedef struct fix_transformations
{
101 int replacetextplain
;
104 char *decodeheaderfieldbodies
; /* Either NULL or "utf-8". */
105 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
108 } fix_transformations
;
110 static int mhfixmsgsbr (CT
*, char *, const fix_transformations
*,
111 FILE **, char *, FILE **);
112 static int fix_boundary (CT
*, int *);
113 static int copy_input_to_output (const char *, FILE *, const char *, FILE *);
114 static int get_multipart_boundary (CT
, char **);
115 static int replace_boundary (CT
, char *, char *);
116 static int fix_types (CT
, svector_t
, int *);
117 static char *replace_substring (char **, const char *, const char *);
118 static char *remove_parameter (char *, const char *);
119 static int fix_composite_cte (CT
, int *);
120 static int set_ce (CT
, int);
121 static int ensure_text_plain (CT
*, CT
, int *, int);
122 static int find_textplain_sibling (CT
, int, int *);
123 static int insert_new_text_plain_part (CT
, int, CT
);
124 static CT
build_text_plain_part (CT
);
125 static int insert_into_new_mp_alt (CT
*, int *);
126 static CT
divide_part (CT
);
127 static void copy_ctinfo (CI
, CI
);
128 static int decode_part (CT
);
129 static int reformat_part (CT
, char *, char *, char *, int);
130 static CT
build_multipart_alt (CT
, CT
, int, int);
131 static int boundary_in_content (FILE **, char *, const char *);
132 static void transfer_noncontent_headers (CT
, CT
);
133 static int set_ct_type (CT
, int type
, int subtype
, int encoding
);
134 static int decode_text_parts (CT
, int, const char *, int *);
135 static int should_decode(const char *, const char *, const char *);
136 static int content_encoding (CT
, const char **);
137 static int strip_crs (CT
, int *);
138 static void update_cte (CT
);
139 static int least_restrictive_encoding (CT
) PURE
;
140 static int less_restrictive (int, int);
141 static int convert_charsets (CT
, char *, int *);
142 static int fix_always (CT
, int *);
143 static int decode_header_field_bodies (CT
, int *);
144 static int fix_filename_param (char *, char *, PM
*, PM
*);
145 static int fix_filename_encoding (CT
);
146 static int write_content (CT
, const char *, char *, FILE *, int, int);
147 static void set_text_ctparams(CT
, char *, int);
148 static int remove_file (const char *);
149 static void report (char *, char *, char *, char *, ...)
151 static void pipeser (int);
155 main (int argc
, char **argv
)
158 char *cp
, *file
= NULL
, *folder
= NULL
;
159 char *maildir
= NULL
, buf
[100], *outfile
= NULL
;
160 char **argp
, **arguments
;
161 struct msgs_array msgs
= { 0, 0, NULL
};
162 struct msgs
*mp
= NULL
;
164 FILE *fp
, *infp
= NULL
, *outfp
= NULL
;
165 bool using_stdin
= false;
168 fix_transformations fx
;
169 fx
.reformat
= fx
.fixcompositecte
= fx
.fixboundary
= 1;
171 fx
.replacetextplain
= 0;
172 fx
.decodetext
= CE_8BIT
;
173 fx
.decodetypes
= "text,application/ics"; /* Default, per man page. */
174 fx
.decodeheaderfieldbodies
= NULL
;
175 fx
.lf_line_endings
= 0;
176 fx
.textcharset
= NULL
;
178 if (nmh_init(argv
[0], true, false)) { return 1; }
180 arguments
= getarguments (invo_name
, argc
, argv
, 1);
186 while ((cp
= *argp
++)) {
188 switch (smatch (++cp
, switches
)) {
190 ambigsw (cp
, switches
);
193 die("-%s unknown", cp
);
196 snprintf (buf
, sizeof buf
, "%s [+folder] [msgs] [switches]",
198 print_help (buf
, switches
, 1);
201 print_version(invo_name
);
205 if (! (cp
= *argp
++) || *cp
== '-') {
206 die("missing argument to %s", argp
[-2]);
208 if (! strcasecmp (cp
, "8bit")) {
209 fx
.decodetext
= CE_8BIT
;
210 } else if (! strcasecmp (cp
, "7bit")) {
211 fx
.decodetext
= CE_7BIT
;
212 } else if (! strcasecmp (cp
, "binary")) {
213 fx
.decodetext
= CE_BINARY
;
215 die("invalid argument to %s", argp
[-2]);
222 if (! (cp
= *argp
++) || *cp
== '-') {
223 die("missing argument to %s", argp
[-2]);
227 case DECODEHEADERFIELDBODIESSW
:
228 if (! (cp
= *argp
++) || *cp
== '-') {
229 die("missing argument to %s", argp
[-2]);
231 fx
.decodeheaderfieldbodies
= cp
;
232 if (strcasecmp (cp
, "utf-8") && strcasecmp (cp
, "utf8")) {
233 /* Because UTF-8 strings can't have embedded nulls. Other
234 encodings support that, too, but we won't bother to
236 die("-decodeheaderfieldbodies only supports utf-8");
239 case NDECODEHEADERFIELDBODIESSW
:
240 fx
.decodeheaderfieldbodies
= NULL
;
242 case CRLFLINEBREAKSSW
:
243 fx
.lf_line_endings
= 0;
245 case NCRLFLINEBREAKSSW
:
246 fx
.lf_line_endings
= 1;
249 if (! (cp
= *argp
++) || (*cp
== '-' && cp
[1])) {
250 die("missing argument to %s", argp
[-2]);
263 case FIXCOMPOSITECTESW
:
264 fx
.fixcompositecte
= 1;
266 case NFIXCOMPOSITECTESW
:
267 fx
.fixcompositecte
= 0;
270 if (! (cp
= *argp
++) || (*cp
== '-' && cp
[1])) {
271 die("missing argument to %s", argp
[-2]);
273 if (! strncasecmp (cp
, "multipart/", 10) ||
274 ! strncasecmp (cp
, "message/", 8))
275 die("-fixtype %s not allowed", cp
);
276 if (! strchr (cp
, '/'))
277 die("-fixtype requires type/subtype");
278 if (fx
.fixtypes
== NULL
) { fx
.fixtypes
= svector_create (10); }
279 svector_push_back (fx
.fixtypes
, cp
);
287 case REPLACETEXTPLAINSW
:
288 fx
.replacetextplain
= 1;
290 case NREPLACETEXTPLAINSW
:
291 fx
.replacetextplain
= 0;
294 if (! (cp
= *argp
++) || (*cp
== '-' && cp
[1])) {
295 die("missing argument to %s", argp
[-2]);
297 file
= *cp
== '-' ? mh_xstrdup (cp
) : path (cp
, TFILE
);
300 if (! (cp
= *argp
++) || (*cp
== '-' && cp
[1])) {
301 die("missing argument to %s", argp
[-2]);
303 outfile
= *cp
== '-' ? mh_xstrdup (cp
) : path (cp
, TFILE
);
306 if (!(rmmproc
= *argp
++) || *rmmproc
== '-') {
307 die("missing argument to %s", argp
[-2]);
327 if (*cp
== '+' || *cp
== '@') {
329 die("only one folder at a time!");
330 folder
= pluspath (cp
);
333 /* Interpret a full path as a filename, not a message. */
334 file
= mh_xstrdup (cp
);
336 app_msgarg (&msgs
, cp
);
341 SIGNAL (SIGQUIT
, quitser
);
342 SIGNAL (SIGPIPE
, pipeser
);
345 * Read the standard profile setup
347 if ((fp
= fopen (cp
= etcpath ("mhn.defaults"), "r"))) {
348 readconfig(NULL
, fp
, cp
, 0);
352 suppress_bogus_mp_content_warning
= skip_mp_cte_check
= true;
353 suppress_extraneous_trailing_semicolon_warning
= true;
355 if (! context_find ("path")) {
356 free (path ("./", TFOLDER
));
359 if (file
&& msgs
.size
) {
360 die("cannot specify msg and file at same time!");
364 /* Open the outfile now, so we don't have to risk opening it
365 after running out of fds. */
366 if (strcmp (outfile
, "-") == 0) {
368 } else if ((outfp
= fopen (outfile
, "w")) == NULL
) {
369 adios (outfile
, "unable to open for writing");
374 * check if message is coming from file
377 /* If file is stdin, create a tmp file name before parse_mime()
378 has a chance, because it might put in on a different
379 filesystem than the output file. Instead, put it in the
380 user's preferred tmp directory. */
383 if (! strcmp ("-", file
)) {
389 if ((cp
= m_mktemp2 (NULL
, invo_name
, &fd
, NULL
)) == NULL
) {
390 die("unable to create temporary file in %s",
394 file
= mh_xstrdup (cp
);
395 cpydata (STDIN_FILENO
, fd
, "-", file
);
399 (void) m_unlink (file
);
400 die("failed to write temporary file");
404 cts
= mh_xcalloc(2, sizeof *cts
);
407 if ((ct
= parse_mime (file
))) {
408 set_text_ctparams(ct
, fx
.decodetypes
, fx
.lf_line_endings
);
411 inform("unable to parse message from file %s", file
);
414 /* If there's an outfile, pass the input message unchanged, so the
415 message won't get dropped from a pipeline. */
417 /* Something went wrong. Output might be expected, such as if
418 this were run as a filter. Just copy the input to the
420 if ((infp
= fopen (file
, "r")) == NULL
) {
421 adios (file
, "unable to open for reading");
424 if (copy_input_to_output (file
, infp
, outfile
, outfp
) != OK
) {
425 inform("unable to copy message to %s, "
426 "it might be lost\n", outfile
);
435 * message(s) are coming from a folder
440 app_msgarg(&msgs
, "cur");
443 folder
= getfolder (1);
445 maildir
= mh_xstrdup(m_maildir (folder
));
447 /* chdir so that error messages, esp. from MIME parser, just
448 refer to the message and not its path. */
449 if (chdir (maildir
) == NOTOK
) {
450 adios (maildir
, "unable to change directory to");
453 /* read folder and create message structure */
454 if (! (mp
= folder_read (folder
, 1))) {
455 die("unable to read folder %s", folder
);
458 /* check for empty folder */
459 if (mp
->nummsg
== 0) {
460 die("no messages in %s", folder
);
463 /* parse all the message ranges/sequences and set SELECTED */
464 for (msgnum
= 0; msgnum
< msgs
.size
; msgnum
++)
465 if (! m_convert (mp
, msgs
.msgs
[msgnum
])) {
468 seq_setprev (mp
); /* set the previous-sequence */
470 cts
= mh_xcalloc(mp
->numsel
+ 1, sizeof *cts
);
473 for (msgnum
= mp
->lowsel
; msgnum
<= mp
->hghsel
; msgnum
++) {
474 if (is_selected(mp
, msgnum
)) {
475 char *msgnam
= m_name (msgnum
);
477 if ((ct
= parse_mime (msgnam
))) {
478 set_text_ctparams(ct
, fx
.decodetypes
, fx
.lf_line_endings
);
481 inform("unable to parse message %s", msgnam
);
484 /* If there's an outfile, pass the input message
485 unchanged, so the message won't get dropped from a
488 /* Something went wrong. Output might be expected,
489 such as if this were run as a filter. Just copy
490 the input to the output. */
491 /* Can't use path() here because 1) it might have been
492 called before and it caches the pwd, and 2) we call
493 chdir() after that. */
494 char *input_filename
=
495 concat (maildir
, "/", msgnam
, NULL
);
497 if ((infp
= fopen (input_filename
, "r")) == NULL
) {
498 adios (input_filename
,
499 "unable to open for reading");
502 if (copy_input_to_output (input_filename
, infp
,
503 outfile
, outfp
) != OK
) {
504 inform("unable to copy message to %s, "
505 "it might be lost\n", outfile
);
510 free (input_filename
);
517 seq_setcur (mp
, mp
->hghsel
); /* update current message */
519 seq_save (mp
); /* synchronize sequences */
520 context_replace (pfolder
, folder
);/* update current folder */
521 context_save (); /* save the context file */
525 for (ctp
= cts
; *ctp
; ++ctp
) {
527 mhfixmsgsbr (ctp
, maildir
, &fx
, &infp
, outfile
, &outfp
) == OK
533 (void) m_unlink (file
);
536 /* Just calling m_backup() unlinks the backup file. */
537 (void) m_backup (file
);
548 if (fx
.fixtypes
!= NULL
) { svector_free (fx
.fixtypes
); }
549 if (infp
) { fclose (infp
); } /* even if stdin */
550 if (outfp
) { fclose (outfp
); } /* even if stdout */
556 done (status
== OK
? 0 : 1);
562 * Apply transformations to one message.
565 mhfixmsgsbr (CT
*ctp
, char *maildir
, const fix_transformations
*fx
,
566 FILE **infp
, char *outfile
, FILE **outfp
)
568 /* Store input filename in case one of the transformations, i.e.,
569 fix_boundary(), rewrites to a tmp file. */
570 char *input_filename
= maildir
571 ? concat (maildir
, "/", (*ctp
)->c_file
, NULL
)
572 : mh_xstrdup ((*ctp
)->c_file
);
573 bool modify_inplace
= false;
574 int message_mods
= 0;
577 /* Though the input file won't need to be opened if everything goes
578 well, do it here just in case there's a failure, and that failure is
579 running out of file descriptors. */
580 if ((*infp
= fopen (input_filename
, "r")) == NULL
) {
581 adios (input_filename
, "unable to open for reading");
584 if (outfile
== NULL
) {
585 modify_inplace
= true;
587 if ((*ctp
)->c_file
) {
589 /* outfp will be closed by the caller */
590 if ((tempfile
= m_mktemp2 (NULL
, invo_name
, NULL
, outfp
)) ==
592 die("unable to create temporary file in %s",
595 outfile
= mh_xstrdup (tempfile
);
597 die("missing both input and output filenames\n");
599 } /* else *outfp was defined by caller */
601 reverse_alternative_parts (*ctp
);
602 status
= fix_always (*ctp
, &message_mods
);
603 if (status
== OK
&& fx
->fixboundary
) {
604 status
= fix_boundary (ctp
, &message_mods
);
606 if (status
== OK
&& fx
->fixtypes
!= NULL
) {
607 status
= fix_types (*ctp
, fx
->fixtypes
, &message_mods
);
609 if (status
== OK
&& fx
->fixcompositecte
) {
610 status
= fix_composite_cte (*ctp
, &message_mods
);
612 if (status
== OK
&& fx
->reformat
) {
614 ensure_text_plain (ctp
, NULL
, &message_mods
, fx
->replacetextplain
);
616 if (status
== OK
&& fx
->decodetext
) {
617 status
= decode_text_parts (*ctp
, fx
->decodetext
, fx
->decodetypes
,
621 if (status
== OK
&& fx
->decodeheaderfieldbodies
) {
622 status
= decode_header_field_bodies(*ctp
, &message_mods
);
624 if (status
== OK
&& fx
->textcharset
!= NULL
) {
625 status
= convert_charsets (*ctp
, fx
->textcharset
, &message_mods
);
628 if (status
== OK
&& ! (*ctp
)->c_umask
) {
629 /* Set the umask for the contents file. This currently
630 isn't used but just in case it is in the future. */
633 if (stat ((*ctp
)->c_file
, &st
) != NOTOK
) {
634 (*ctp
)->c_umask
= ~(st
.st_mode
& 0777);
636 (*ctp
)->c_umask
= ~m_gmprot();
641 * Write the content to a file
644 status
= write_content (*ctp
, input_filename
, outfile
, *outfp
,
645 modify_inplace
, message_mods
);
646 } else if (! modify_inplace
) {
647 /* Something went wrong. Output might be expected, such
648 as if this were run as a filter. Just copy the input
650 if (copy_input_to_output (input_filename
, *infp
, outfile
,
652 inform("unable to copy message to %s, it might be lost\n",
657 if (modify_inplace
) {
658 if (status
!= OK
) { (void) m_unlink (outfile
); }
665 free (input_filename
);
672 * Copy input message to output. Assumes not modifying in place, so this
673 * might be running as part of a pipeline.
676 copy_input_to_output (const char *input_filename
, FILE *infp
,
677 const char *output_filename
, FILE *outfp
)
679 int in
= fileno (infp
);
680 int out
= fileno (outfp
);
683 if (in
!= -1 && out
!= -1) {
684 cpydata (in
, out
, input_filename
, output_filename
);
694 * Fix mismatched outer level boundary.
697 fix_boundary (CT
*ct
, int *message_mods
)
699 struct multipart
*mp
;
702 if (ct
&& (*ct
)->c_type
== CT_MULTIPART
&& bogus_mp_content
) {
703 mp
= (struct multipart
*) (*ct
)->c_ctparams
;
706 * 1) Get boundary at end of part.
707 * 2) Get boundary at beginning of part and compare to the end-of-part
709 * 3) Write out contents of ct to tmp file, replacing boundary in
710 * header with boundary from part. Set c_unlink to 1.
712 * 5) Call parse_mime() on the tmp file, replacing ct.
715 if (mp
&& mp
->mp_start
) {
718 if (get_multipart_boundary (*ct
, &part_boundary
) == OK
) {
721 if ((fixed
= m_mktemp2 (NULL
, invo_name
, NULL
, &(*ct
)->c_fp
))) {
722 if (replace_boundary (*ct
, fixed
, part_boundary
) == OK
) {
723 char *filename
= mh_xstrdup ((*ct
)->c_file
);
727 if ((fixed_ct
= parse_mime (fixed
))) {
733 report (NULL
, NULL
, filename
,
734 "fix multipart boundary");
738 inform("unable to parse fixed part");
743 inform("unable to replace broken boundary");
747 inform("unable to create temporary file in %s",
752 free (part_boundary
);
754 /* Couldn't fix the boundary. Report failure so that mhfixmsg
755 doesn't modify the message. */
759 /* No multipart struct, even though the content type is
760 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
771 * Find boundary at end of multipart.
774 get_multipart_boundary (CT ct
, char **part_boundary
)
776 char buffer
[NMH_BUFSIZ
];
777 char *end_boundary
= NULL
;
778 off_t begin
= (off_t
) ct
->c_end
> (off_t
) (ct
->c_begin
+ sizeof buffer
)
779 ? (off_t
) (ct
->c_end
- sizeof buffer
)
780 : (off_t
) ct
->c_begin
;
784 /* This will fail if the boundary spans fread() calls. NMH_BUFSIZ should
785 be big enough, even if it's just 1024, to make that unlikely. */
787 /* free_content() will close ct->c_fp if bogus MP boundary is fixed. */
788 if (! ct
->c_fp
&& (ct
->c_fp
= fopen (ct
->c_file
, "r")) == NULL
) {
789 advise (ct
->c_file
, "unable to open for reading");
793 /* Get boundary at end of multipart. */
794 while (begin
>= (off_t
) ct
->c_begin
) {
795 fseeko (ct
->c_fp
, begin
, SEEK_SET
);
796 while ((bytes_read
= fread (buffer
, 1, sizeof buffer
, ct
->c_fp
)) > 0) {
797 char *cp
= rfind_str (buffer
, bytes_read
, "--");
802 /* Trim off trailing "--" and anything beyond. */
804 if ((end
= rfind_str (buffer
, cp
- buffer
, "\n"))) {
805 if (strlen (end
) > 3 && *end
++ == '\n' &&
806 *end
++ == '-' && *end
++ == '-') {
807 end_boundary
= mh_xstrdup (end
);
814 if (end_boundary
|| begin
<= (off_t
) (ct
->c_begin
+ sizeof buffer
))
816 begin
-= sizeof buffer
;
819 /* Get boundary at beginning of multipart. */
821 fseeko (ct
->c_fp
, ct
->c_begin
, SEEK_SET
);
822 while ((bytes_read
= fread (buffer
, 1, sizeof buffer
, ct
->c_fp
)) > 0) {
823 if (bytes_read
>= strlen (end_boundary
)) {
824 char *cp
= find_str (buffer
, bytes_read
, end_boundary
);
826 if (cp
&& cp
- buffer
>= 2 && *--cp
== '-' &&
827 *--cp
== '-' && (cp
> buffer
&& *--cp
== '\n')) {
832 /* The start and end boundaries didn't match, or the
833 start boundary doesn't begin with "\n--" (or "--"
834 if at the beginning of buffer). Keep trying. */
848 *part_boundary
= end_boundary
;
850 *part_boundary
= NULL
;
859 * Open and copy ct->c_file to file, replacing the multipart boundary.
862 replace_boundary (CT ct
, char *file
, char *boundary
)
866 char buf
[NMH_BUFSIZ
], name
[NAMESZ
];
868 m_getfld_state_t gstate
;
871 if (ct
->c_file
== NULL
) {
872 inform("missing input filename");
876 if ((fpin
= fopen (ct
->c_file
, "r")) == NULL
) {
877 advise (ct
->c_file
, "unable to open for reading");
881 if ((fpout
= fopen (file
, "w")) == NULL
) {
883 advise (file
, "unable to open for writing");
887 gstate
= m_getfld_state_init(fpin
);
888 for (compnum
= 1;;) {
889 int bufsz
= (int) sizeof buf
;
891 switch (state
= m_getfld2(&gstate
, name
, buf
, &bufsz
)) {
896 /* get copies of the buffers */
897 np
= mh_xstrdup (name
);
898 vp
= mh_xstrdup (buf
);
900 /* if necessary, get rest of field */
901 while (state
== FLDPLUS
) {
903 state
= m_getfld2(&gstate
, name
, buf
, &bufsz
);
904 vp
= add (buf
, vp
); /* add to previous value */
907 if (strcasecmp (TYPE_FIELD
, np
)) {
908 fprintf (fpout
, "%s:%s", np
, vp
);
910 char *new_ctline
, *new_params
;
912 replace_param(&ct
->c_ctinfo
.ci_first_pm
,
913 &ct
->c_ctinfo
.ci_last_pm
, "boundary",
916 new_ctline
= concat(" ", ct
->c_ctinfo
.ci_type
, "/",
917 ct
->c_ctinfo
.ci_subtype
, NULL
);
918 new_params
= output_params(LEN(TYPE_FIELD
) +
919 strlen(new_ctline
) + 1,
920 ct
->c_ctinfo
.ci_first_pm
, NULL
, 0);
921 fprintf (fpout
, "%s:%s%s\n", np
, new_ctline
,
922 FENDNULL(new_params
));
934 /* buf will have a terminating NULL, skip it. */
935 if ((int) fwrite (buf
, 1, bufsz
-1, fpout
) < bufsz
-1) {
936 advise (file
, "fwrite");
945 inform("message format error in component #%d", compnum
);
950 inform("getfld() returned %d", state
);
958 m_getfld_state_destroy (&gstate
);
967 * Fix Content-Type header to reflect the content of its part.
970 fix_types (CT ct
, svector_t fixtypes
, int *message_mods
)
974 switch (ct
->c_type
) {
976 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
979 for (part
= m
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
980 status
= fix_types (part
->mp_part
, fixtypes
, message_mods
);
986 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
987 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
989 status
= fix_types (e
->eb_content
, fixtypes
, message_mods
);
996 if (ct
->c_ctinfo
.ci_type
&& ct
->c_ctinfo
.ci_subtype
) {
997 for (typep
= svector_strs (fixtypes
);
998 typep
&& (type
= *typep
);
1000 char *type_subtype
=
1001 concat (ct
->c_ctinfo
.ci_type
, "/", ct
->c_ctinfo
.ci_subtype
,
1004 if (! strcasecmp (type
, type_subtype
) &&
1005 decode_part (ct
) == OK
&&
1006 ct
->c_cefile
.ce_file
!= NULL
) {
1007 char *ct_type_subtype
= mime_type (ct
->c_cefile
.ce_file
);
1010 if ((cp
= strchr (ct_type_subtype
, ';'))) {
1011 /* Truncate to remove any parameter list from
1012 mime_type () result. */
1016 if (strcasecmp (type
, ct_type_subtype
)) {
1017 char *ct_type
, *ct_subtype
;
1020 /* The Content-Type header does not match the
1021 content, so update these struct Content
1024 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
1027 /* Extract type and subtype from type/subtype. */
1028 ct_type
= mh_xstrdup(ct_type_subtype
);
1029 if ((cp
= strchr (ct_type
, '/'))) {
1031 ct_subtype
= mh_xstrdup(++cp
);
1033 inform("missing / in MIME type of %s %s",
1034 ct
->c_file
, ct
->c_partno
);
1039 ct
->c_type
= ct_str_type (ct_type
);
1040 ct
->c_subtype
= ct_str_subtype (ct
->c_type
, ct_subtype
);
1042 free (ct
->c_ctinfo
.ci_type
);
1043 ct
->c_ctinfo
.ci_type
= ct_type
;
1044 free (ct
->c_ctinfo
.ci_subtype
);
1045 ct
->c_ctinfo
.ci_subtype
= ct_subtype
;
1046 if (! replace_substring (&ct
->c_ctline
, type
,
1048 inform("did not find %s in %s",
1049 type
, ct
->c_ctline
);
1052 /* Update Content-Type header field. */
1053 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
1054 if (! strcasecmp (TYPE_FIELD
, hf
->name
)) {
1055 if (replace_substring (&hf
->value
, type
,
1059 report (NULL
, ct
->c_partno
, ct
->c_file
,
1060 "change Content-Type in header "
1062 type
, ct_type_subtype
);
1066 inform("did not find %s in %s", type
, hf
->value
);
1070 free (ct_type_subtype
);
1072 free (type_subtype
);
1082 * Replace a substring, allocating space to hold the new one.
1085 replace_substring (char **str
, const char *old
, const char *new)
1089 if ((cp
= strstr (*str
, old
))) {
1090 char *remainder
= cp
+ strlen (old
);
1091 char *prefix
, *new_str
;
1094 prefix
= mh_xstrdup(*str
);
1095 *(prefix
+ (cp
- *str
)) = '\0';
1096 new_str
= concat (prefix
, new, remainder
, NULL
);
1099 new_str
= concat (new, remainder
, NULL
);
1104 return *str
= new_str
;
1112 * Remove a name=value parameter, given just its name, from a header value.
1115 remove_parameter (char *str
, const char *name
)
1117 /* It looks to me, based on the BNF in RFC 2045, than there can't
1118 be whitespace between the parameter name and the "=", or
1119 between the "=" and the parameter value. */
1120 char *param_name
= concat (name
, "=", NULL
);
1123 if ((cp
= strstr (str
, param_name
))) {
1127 /* Remove any leading spaces, before the parameter name. */
1129 start
> str
&& isspace ((unsigned char) *(start
-1));
1133 /* Remove a leading semicolon. */
1134 if (start
> str
&& *(start
-1) == ';') { --start
; }
1136 end
= cp
+ strlen (name
) + 1;
1138 /* Skip past the quoted value, and then the final quote. */
1139 for (++end
; *end
&& *end
!= '"'; ++end
) { continue; }
1142 /* Skip past the value. */
1143 for (++end
; *end
&& ! isspace ((unsigned char) *end
); ++end
) {}
1146 /* Count how many characters need to be moved. Include
1147 trailing null, which is accounted for by the
1148 initialization of count to 1. */
1149 for (cp
= end
; *cp
; ++cp
) { ++count
; }
1150 (void) memmove (start
, end
, count
);
1160 * Fix Content-Transfer-Encoding of composite,, e.g., message or multipart, part.
1161 * According to RFC 2045 Sec. 6.4, it must be 7bit, 8bit, or binary. Set it to
1165 fix_composite_cte (CT ct
, int *message_mods
)
1169 if (ct
->c_type
== CT_MESSAGE
|| ct
->c_type
== CT_MULTIPART
) {
1170 if (ct
->c_encoding
!= CE_7BIT
&& ct
->c_encoding
!= CE_8BIT
&&
1171 ct
->c_encoding
!= CE_BINARY
) {
1174 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
1175 char *name
= hf
->name
;
1176 for (; isspace((unsigned char)*name
); ++name
) {
1180 if (! strncasecmp (name
, ENCODING_FIELD
,
1181 LEN(ENCODING_FIELD
))) {
1182 char *prefix
= "Nmh-REPLACED-INVALID-";
1186 h
->name
= mh_xstrdup (hf
->name
);
1187 h
->hf_encoding
= hf
->hf_encoding
;
1191 /* Retain old header but prefix its name. */
1193 hf
->name
= concat (prefix
, h
->name
, NULL
);
1197 char *encoding
= cpytrim (hf
->value
);
1198 report (NULL
, ct
->c_partno
, ct
->c_file
,
1199 "replace Content-Transfer-Encoding of %s "
1200 "with 8 bit", encoding
);
1204 h
->value
= mh_xstrdup (" 8bit\n");
1206 /* Don't need to warn for multiple C-T-E header
1207 fields, parse_mime() already does that. But
1208 if there are any, fix them all as necessary. */
1213 set_ce (ct
, CE_8BIT
);
1216 if (ct
->c_type
== CT_MULTIPART
) {
1217 struct multipart
*m
;
1220 m
= (struct multipart
*) ct
->c_ctparams
;
1221 for (part
= m
->mp_parts
; part
; part
= part
->mp_next
) {
1222 if (fix_composite_cte (part
->mp_part
, message_mods
) != OK
) {
1235 * Set content encoding.
1238 set_ce (CT ct
, int encoding
)
1240 const char *ce
= ce_str (encoding
);
1241 const struct str2init
*ctinit
= get_ce_method (ce
);
1244 char *cte
= concat (" ", ce
, "\n", NULL
);
1245 bool found_cte
= false;
1247 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1248 caller is decode_text_parts (). Save because we'll
1250 struct cefile decoded_content_info
= ct
->c_cefile
;
1252 ct
->c_encoding
= encoding
;
1254 ct
->c_ctinitfnx
= ctinit
->si_init
;
1255 /* This will assign ct->c_cefile with an all-0 struct, which
1257 (*ctinit
->si_init
) (ct
);
1258 /* After returning, the caller should set
1259 ct->c_cefile.ce_file to the name of the file containing
1262 if (ct
->c_ceclosefnx
) {
1263 (*ct
->c_ceclosefnx
) (ct
);
1266 /* Restore the cefile. */
1267 ct
->c_cefile
= decoded_content_info
;
1269 /* Update/add Content-Transfer-Encoding header field. */
1270 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
1271 if (! strcasecmp (ENCODING_FIELD
, hf
->name
)) {
1278 add_header (ct
, mh_xstrdup (ENCODING_FIELD
), cte
);
1281 /* Update c_celine. It's used only by mhlist -debug. */
1282 free (ct
->c_celine
);
1283 ct
->c_celine
= mh_xstrdup (cte
);
1293 * Make sure each text part has a corresponding text/plain part.
1296 ensure_text_plain (CT
*ct
, CT parent
, int *message_mods
, int replacetextplain
)
1300 switch ((*ct
)->c_type
) {
1302 /* Nothing to do for text/plain. */
1303 if ((*ct
)->c_subtype
== TEXT_PLAIN
) { return OK
; }
1305 if (parent
&& parent
->c_type
== CT_MULTIPART
&&
1306 parent
->c_subtype
== MULTI_ALTERNATE
) {
1307 int new_subpart_number
= 1;
1308 int has_text_plain
=
1309 find_textplain_sibling (parent
, replacetextplain
,
1310 &new_subpart_number
);
1312 if (! has_text_plain
) {
1313 /* Parent is a multipart/alternative. Insert a new
1314 text/plain subpart. */
1315 const int inserted
=
1316 insert_new_text_plain_part (*ct
, new_subpart_number
,
1321 report (NULL
, parent
->c_partno
, parent
->c_file
,
1322 "insert text/plain part");
1328 } else if (parent
&& parent
->c_type
== CT_MULTIPART
&&
1329 parent
->c_subtype
== MULTI_RELATED
) {
1330 char *type_subtype
=
1331 concat ((*ct
)->c_ctinfo
.ci_type
, "/",
1332 (*ct
)->c_ctinfo
.ci_subtype
, NULL
);
1333 const char *parent_type
=
1334 get_param (parent
->c_ctinfo
.ci_first_pm
, "type", '?', 1);
1335 int new_subpart_number
= 1;
1336 int has_text_plain
= 0;
1338 /* Have to do string comparison on the subtype because we
1339 don't enumerate all of them in c_subtype values.
1340 parent_type will be NULL if the multipart/related part
1341 doesn't have a type parameter. The type parameter must
1342 be specified according to RFC 2387 Sec. 3.1 but not all
1344 if (parent_type
&& strcasecmp (type_subtype
, parent_type
) == 0) {
1345 /* The type of this part matches the root type of the
1346 parent multipart/related. Look to see if there's
1347 text/plain sibling. */
1349 find_textplain_sibling (parent
, replacetextplain
,
1350 &new_subpart_number
);
1353 free (type_subtype
);
1355 if (! has_text_plain
) {
1356 struct multipart
*mp
= (struct multipart
*) parent
->c_ctparams
;
1360 for (part
= mp
->mp_parts
; part
; part
= part
->mp_next
) {
1361 if (*ct
!= part
->mp_part
) {
1367 /* Parent is a multipart/related. Insert a new
1368 text/plain subpart in a new multipart/alternative. */
1369 if (insert_into_new_mp_alt (ct
, message_mods
)) {
1370 /* Not an error if text/plain couldn't be added. */
1373 /* There are no siblings, so insert a new text/plain
1374 subpart, and change the parent type from
1375 multipart/related to multipart/alternative. */
1376 const int inserted
=
1377 insert_new_text_plain_part (*ct
, new_subpart_number
,
1383 parent
->c_subtype
= MULTI_ALTERNATE
;
1384 free (parent
->c_ctinfo
.ci_subtype
);
1385 parent
->c_ctinfo
.ci_subtype
= mh_xstrdup("alternative");
1386 if (! replace_substring (&parent
->c_ctline
, "/related",
1388 inform("did not find multipart/related in %s",
1392 /* Update Content-Type header field. */
1393 for (hf
= parent
->c_first_hf
; hf
; hf
= hf
->next
) {
1394 if (! strcasecmp (TYPE_FIELD
, hf
->name
)) {
1395 if (replace_substring (&hf
->value
, "/related",
1399 report (NULL
, parent
->c_partno
,
1401 "insert text/plain part");
1404 /* Remove, e.g., type="text/html" from
1405 multipart/alternative. */
1406 remove_parameter (hf
->value
, "type");
1409 inform("did not find multipart/"
1410 "related in header %s", hf
->value
);
1414 /* Not an error if text/plain couldn't be inserted. */
1419 if (insert_into_new_mp_alt (ct
, message_mods
)) {
1426 case CT_MULTIPART
: {
1427 struct multipart
*mp
= (struct multipart
*) (*ct
)->c_ctparams
;
1430 for (part
= mp
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
1431 if ((*ct
)->c_type
== CT_MULTIPART
) {
1432 status
= ensure_text_plain (&part
->mp_part
, *ct
, message_mods
,
1440 if ((*ct
)->c_subtype
== MESSAGE_EXTERNAL
) {
1441 struct exbody
*e
= (struct exbody
*) (*ct
)->c_ctparams
;
1443 status
= ensure_text_plain (&e
->eb_content
, *ct
, message_mods
,
1454 * See if there is a sibling text/plain, and return its subpart number.
1457 find_textplain_sibling (CT parent
, int replacetextplain
,
1458 int *new_subpart_number
)
1460 struct multipart
*mp
= (struct multipart
*) parent
->c_ctparams
;
1461 struct part
*part
, *prev
;
1462 bool has_text_plain
= false;
1464 for (prev
= part
= mp
->mp_parts
; part
; part
= part
->mp_next
) {
1465 ++*new_subpart_number
;
1466 if (part
->mp_part
->c_type
== CT_TEXT
&&
1467 part
->mp_part
->c_subtype
== TEXT_PLAIN
) {
1468 if (replacetextplain
) {
1469 struct part
*old_part
;
1470 if (part
== mp
->mp_parts
) {
1471 old_part
= mp
->mp_parts
;
1472 mp
->mp_parts
= part
->mp_next
;
1474 old_part
= prev
->mp_next
;
1475 prev
->mp_next
= part
->mp_next
;
1478 report (NULL
, parent
->c_partno
, parent
->c_file
,
1479 "remove text/plain part %s",
1480 old_part
->mp_part
->c_partno
);
1482 free_content (old_part
->mp_part
);
1485 has_text_plain
= true;
1492 return has_text_plain
;
1497 * Insert a new text/plain part.
1500 insert_new_text_plain_part (CT ct
, int new_subpart_number
, CT parent
)
1502 struct multipart
*mp
= (struct multipart
*) parent
->c_ctparams
;
1503 struct part
*new_part
;
1506 if ((new_part
->mp_part
= build_text_plain_part (ct
))) {
1508 snprintf (buffer
, sizeof buffer
, "%d", new_subpart_number
);
1510 new_part
->mp_next
= mp
->mp_parts
;
1511 mp
->mp_parts
= new_part
;
1512 new_part
->mp_part
->c_partno
=
1513 concat (parent
->c_partno
? parent
->c_partno
: "1", ".",
1519 free_content (new_part
->mp_part
);
1527 * Create a text/plain part to go along with non-plain sibling part.
1530 build_text_plain_part (CT encoded_part
)
1532 CT tp_part
= divide_part (encoded_part
);
1533 char *tmp_plain_file
= NULL
;
1535 if (decode_part (tp_part
) == OK
) {
1536 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1537 contains the decoded contents. And the decoding function, such
1538 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1539 be unlinked by free_content (). */
1542 /* This m_mktemp2() call closes the temp file. */
1543 if ((tempfile
= m_mktemp2 (NULL
, invo_name
, NULL
, NULL
)) == NULL
) {
1544 inform("unable to create temporary file in %s",
1547 tmp_plain_file
= mh_xstrdup (tempfile
);
1548 if (reformat_part (tp_part
, tmp_plain_file
,
1549 tp_part
->c_ctinfo
.ci_type
,
1550 tp_part
->c_ctinfo
.ci_subtype
,
1551 tp_part
->c_type
) == OK
) {
1557 free_content (tp_part
);
1558 if (tmp_plain_file
) { (void) m_unlink (tmp_plain_file
); }
1559 free (tmp_plain_file
);
1566 * Slip new text/plain part into a new multipart/alternative.
1569 insert_into_new_mp_alt (CT
*ct
, int *message_mods
)
1571 CT tp_part
= build_text_plain_part (*ct
);
1575 CT mp_alt
= build_multipart_alt (*ct
, tp_part
, CT_MULTIPART
,
1578 struct multipart
*mp
= (struct multipart
*) mp_alt
->c_ctparams
;
1580 if (mp
&& mp
->mp_parts
) {
1581 mp
->mp_parts
->mp_part
= tp_part
;
1582 /* Make the new multipart/alternative the parent. */
1587 report (NULL
, (*ct
)->c_partno
, (*ct
)->c_file
,
1588 "insert text/plain part");
1591 free_content (tp_part
);
1592 free_content (mp_alt
);
1599 /* Not an error if text/plain couldn't be built. */
1607 * Clone a MIME part.
1615 /* Just copy over what is needed for decoding. c_vrsn and
1616 c_celine aren't necessary. */
1617 new_part
->c_file
= mh_xstrdup (ct
->c_file
);
1618 new_part
->c_begin
= ct
->c_begin
;
1619 new_part
->c_end
= ct
->c_end
;
1620 copy_ctinfo (&new_part
->c_ctinfo
, &ct
->c_ctinfo
);
1621 new_part
->c_type
= ct
->c_type
;
1622 new_part
->c_cefile
= ct
->c_cefile
;
1623 new_part
->c_encoding
= ct
->c_encoding
;
1624 new_part
->c_ctinitfnx
= ct
->c_ctinitfnx
;
1625 new_part
->c_ceopenfnx
= ct
->c_ceopenfnx
;
1626 new_part
->c_ceclosefnx
= ct
->c_ceclosefnx
;
1627 new_part
->c_cesizefnx
= ct
->c_cesizefnx
;
1629 /* c_ctline is used by reformat__part(), so it can preserve
1630 anything after the type/subtype. */
1631 new_part
->c_ctline
= mh_xstrdup (ct
->c_ctline
);
1638 * Copy the content info from one part to another.
1641 copy_ctinfo (CI dest
, CI src
)
1645 dest
->ci_type
= src
->ci_type
? mh_xstrdup (src
->ci_type
) : NULL
;
1646 dest
->ci_subtype
= src
->ci_subtype
? mh_xstrdup (src
->ci_subtype
) : NULL
;
1648 for (s_pm
= src
->ci_first_pm
; s_pm
; s_pm
= s_pm
->pm_next
) {
1649 d_pm
= add_param(&dest
->ci_first_pm
, &dest
->ci_last_pm
, s_pm
->pm_name
,
1651 if (s_pm
->pm_charset
) {
1652 d_pm
->pm_charset
= mh_xstrdup(s_pm
->pm_charset
);
1654 if (s_pm
->pm_lang
) {
1655 d_pm
->pm_lang
= mh_xstrdup(s_pm
->pm_lang
);
1659 dest
->ci_comment
= src
->ci_comment
? mh_xstrdup (src
->ci_comment
) : NULL
;
1660 dest
->ci_magic
= src
->ci_magic
? mh_xstrdup (src
->ci_magic
) : NULL
;
1675 if ((tempfile
= m_mktemp2 (NULL
, invo_name
, NULL
, &file
)) == NULL
) {
1676 die("unable to create temporary file in %s", get_temp_dir());
1678 tmp_decoded
= mh_xstrdup (tempfile
);
1679 /* The following call will load ct->c_cefile.ce_file with the tmp
1680 filename of the decoded content. tmp_decoded will contain the
1681 encoded output, get rid of that. */
1682 status
= output_message_fp (ct
, file
, tmp_decoded
);
1683 (void) m_unlink (tmp_decoded
);
1685 if (fclose (file
)) {
1686 inform("unable to close temporary file %s, continuing...", tempfile
);
1694 * Reformat content as plain text.
1695 * Some of the arguments aren't really needed now, but maybe will
1696 * be in the future for other than text types.
1699 reformat_part (CT ct
, char *file
, char *type
, char *subtype
, int c_type
)
1701 int output_subtype
, output_encoding
;
1702 const char *reason
= NULL
;
1706 /* Hacky: this redirects the output from whatever command is used
1707 to show the part to a file. So, the user can't have any output
1708 redirection in that command.
1709 Could show_multi() in mhshowsbr.c avoid this? */
1711 /* Check for invo_name-format-type/subtype. */
1712 if ((cf
= context_find_by_type ("format", type
, subtype
)) == NULL
) {
1714 inform("Don't know how to convert %s, there is no "
1715 "%s-format-%s/%s profile entry",
1716 ct
->c_file
, invo_name
, type
, subtype
);
1720 if (strchr (cf
, '>')) {
1721 inform("'>' prohibited in \"%s\",\nplease fix your "
1722 "%s-format-%s/%s profile entry", cf
, invo_name
, type
,
1728 cp
= concat (cf
, " >", file
, NULL
);
1729 status
= show_content_aux (ct
, 0, cp
, NULL
, NULL
);
1732 /* Unlink decoded content tmp file and free its filename to avoid
1733 leaks. The file stream should already have been closed. */
1734 if (ct
->c_cefile
.ce_unlink
) {
1735 (void) m_unlink (ct
->c_cefile
.ce_file
);
1736 free (ct
->c_cefile
.ce_file
);
1737 ct
->c_cefile
.ce_file
= NULL
;
1738 ct
->c_cefile
.ce_unlink
= 0;
1741 if (c_type
== CT_TEXT
) {
1742 output_subtype
= TEXT_PLAIN
;
1744 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1748 output_encoding
= content_encoding (ct
, &reason
);
1750 set_ct_type (ct
, c_type
, output_subtype
, output_encoding
) == OK
) {
1751 ct
->c_cefile
.ce_file
= file
;
1752 ct
->c_cefile
.ce_unlink
= 1;
1754 ct
->c_cefile
.ce_unlink
= 0;
1763 * Fill in a multipart/alternative part.
1766 build_multipart_alt (CT first_alt
, CT new_part
, int type
, int subtype
)
1768 char *boundary_prefix
= "----=_nmh-multipart";
1769 char *boundary
= concat (boundary_prefix
, first_alt
->c_partno
, NULL
);
1770 char *boundary_indicator
= "; boundary=";
1771 char *typename
, *subtypename
, *name
;
1774 struct multipart
*m
;
1775 const struct str2init
*ctinit
;
1779 /* Set up the multipart/alternative part. These fields of *ct were
1780 initialized to 0 by mh_xcalloc():
1781 c_fp, c_unlink, c_begin, c_end,
1782 c_vrsn, c_ctline, c_celine,
1783 c_id, c_descr, c_dispo, c_partno,
1784 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1785 c_cefile, c_encoding,
1786 c_digested, c_digest[16], c_ctexbody,
1787 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1789 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1792 ct
->c_file
= mh_xstrdup (first_alt
->c_file
);
1794 ct
->c_subtype
= subtype
;
1796 ctinit
= get_ct_init (ct
->c_type
);
1798 typename
= ct_type_str (type
);
1799 subtypename
= ct_subtype_str (type
, subtype
);
1803 int found_boundary
= 1;
1805 while (found_boundary
&& serial
< 1000000) {
1808 /* Ensure that the boundary doesn't appear in the decoded
1810 if (new_part
->c_cefile
.ce_file
) {
1811 if ((found_boundary
=
1812 boundary_in_content (&new_part
->c_cefile
.ce_fp
,
1813 new_part
->c_cefile
.ce_file
,
1814 boundary
)) == NOTOK
) {
1819 /* Ensure that the boundary doesn't appear in the encoded
1821 if (! found_boundary
&& new_part
->c_file
) {
1822 if ((found_boundary
=
1823 boundary_in_content (&new_part
->c_fp
,
1825 boundary
)) == NOTOK
) {
1830 if (found_boundary
) {
1831 /* Try a slightly different boundary. */
1836 snprintf (buffer2
, sizeof buffer2
, "%d", serial
);
1838 concat (boundary_prefix
,
1839 FENDNULL(first_alt
->c_partno
),
1840 "-", buffer2
, NULL
);
1844 if (found_boundary
) {
1845 inform("giving up trying to find a unique boundary");
1850 name
= concat (" ", typename
, "/", subtypename
, boundary_indicator
, "\"",
1851 boundary
, "\"", NULL
);
1853 /* Load c_first_hf and c_last_hf. */
1854 transfer_noncontent_headers (first_alt
, ct
);
1855 add_header (ct
, mh_xstrdup (TYPE_FIELD
), concat (name
, "\n", NULL
));
1858 /* Load c_partno. */
1859 if (first_alt
->c_partno
) {
1860 ct
->c_partno
= mh_xstrdup (first_alt
->c_partno
);
1861 free (first_alt
->c_partno
);
1862 first_alt
->c_partno
= concat (ct
->c_partno
, ".1", NULL
);
1863 new_part
->c_partno
= concat (ct
->c_partno
, ".2", NULL
);
1865 first_alt
->c_partno
= mh_xstrdup ("1");
1866 new_part
->c_partno
= mh_xstrdup ("2");
1870 ct
->c_ctinfo
.ci_type
= mh_xstrdup (typename
);
1871 ct
->c_ctinfo
.ci_subtype
= mh_xstrdup (subtypename
);
1874 add_param(&ct
->c_ctinfo
.ci_first_pm
, &ct
->c_ctinfo
.ci_last_pm
,
1875 "boundary", boundary
, 0);
1879 p
->mp_next
->mp_next
= NULL
;
1880 p
->mp_next
->mp_part
= first_alt
;
1883 m
->mp_start
= concat (boundary
, "\n", NULL
);
1884 m
->mp_stop
= concat (boundary
, "--\n", NULL
);
1900 * Check that the boundary does not appear in the content.
1903 boundary_in_content (FILE **fp
, char *file
, const char *boundary
)
1905 char buffer
[NMH_BUFSIZ
];
1907 bool found_boundary
= false;
1909 /* free_content() will close *fp if we fopen it here. */
1910 if (! *fp
&& (*fp
= fopen (file
, "r")) == NULL
) {
1911 advise (file
, "unable to open %s for reading", file
);
1915 fseeko (*fp
, 0L, SEEK_SET
);
1916 while ((bytes_read
= fread (buffer
, 1, sizeof buffer
, *fp
)) > 0) {
1917 if (find_str (buffer
, bytes_read
, boundary
)) {
1918 found_boundary
= true;
1923 return found_boundary
;
1928 * Remove all non-Content headers.
1931 transfer_noncontent_headers (CT old
, CT
new)
1935 hp_prev
= hp
= old
->c_first_hf
;
1939 if (strncasecmp (XXX_FIELD_PRF
, hp
->name
, LEN(XXX_FIELD_PRF
))) {
1940 if (hp
== old
->c_last_hf
) {
1941 if (hp
== old
->c_first_hf
) {
1942 old
->c_last_hf
= old
->c_first_hf
= NULL
;
1944 hp_prev
->next
= NULL
;
1945 old
->c_last_hf
= hp_prev
;
1948 if (hp
== old
->c_first_hf
) {
1949 old
->c_first_hf
= next
;
1951 hp_prev
->next
= next
;
1955 /* Put node hp in the new CT. */
1956 if (new->c_first_hf
== NULL
) {
1957 new->c_first_hf
= hp
;
1959 new->c_last_hf
->next
= hp
;
1961 new->c_last_hf
= hp
;
1963 /* A Content- header, leave in old. */
1976 set_ct_type (CT ct
, int type
, int subtype
, int encoding
)
1978 char *typename
= ct_type_str (type
);
1979 char *subtypename
= ct_subtype_str (type
, subtype
);
1980 /* E.g, " text/plain" */
1981 char *type_subtypename
= concat (" ", typename
, "/", subtypename
, NULL
);
1982 /* E.g, " text/plain\n" */
1983 char *name_plus_nl
= concat (type_subtypename
, "\n", NULL
);
1984 bool found_content_type
= false;
1986 const char *cp
= NULL
;
1990 /* Update/add Content-Type header field. */
1991 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
1992 if (! strcasecmp (TYPE_FIELD
, hf
->name
)) {
1993 found_content_type
= true;
1995 hf
->value
= (cp
= strchr (ct
->c_ctline
, ';'))
1996 ? concat (type_subtypename
, cp
, "\n", NULL
)
1997 : mh_xstrdup (name_plus_nl
);
2000 if (! found_content_type
) {
2001 add_header (ct
, mh_xstrdup (TYPE_FIELD
),
2002 (cp
= strchr (ct
->c_ctline
, ';'))
2003 ? concat (type_subtypename
, cp
, "\n", NULL
)
2004 : mh_xstrdup (name_plus_nl
));
2007 /* Some of these might not be used, but set them anyway. */
2009 ? concat (type_subtypename
, cp
, NULL
)
2010 : concat (type_subtypename
, NULL
);
2011 free (ct
->c_ctline
);
2012 ct
->c_ctline
= ctline
;
2013 /* Leave other ctinfo members as they were. */
2014 free (ct
->c_ctinfo
.ci_type
);
2015 ct
->c_ctinfo
.ci_type
= mh_xstrdup (typename
);
2016 free (ct
->c_ctinfo
.ci_subtype
);
2017 ct
->c_ctinfo
.ci_subtype
= mh_xstrdup (subtypename
);
2019 ct
->c_subtype
= subtype
;
2021 free (name_plus_nl
);
2022 free (type_subtypename
);
2024 status
= set_ce (ct
, encoding
);
2031 * It's not necessary to update the charset parameter of a Content-Type
2032 * header for a text part. According to RFC 2045 Sec. 6.4, the body
2033 * (content) was originally in the specified charset, "and will be in
2034 * that character set again after decoding."
2037 decode_text_parts (CT ct
, int encoding
, const char *decodetypes
,
2041 int lf_line_endings
= 0;
2043 switch (ct
->c_type
) {
2044 case CT_MULTIPART
: {
2045 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
2048 /* Should check to see if the body for this part is encoded?
2049 For now, it gets passed along as-is by InitMultiPart(). */
2050 for (part
= m
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
2051 status
= decode_text_parts (part
->mp_part
, encoding
, decodetypes
,
2058 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
2059 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
2061 status
= decode_text_parts (e
->eb_content
, encoding
, decodetypes
,
2067 if (! should_decode(decodetypes
, ct
->c_ctinfo
.ci_type
, ct
->c_ctinfo
.ci_subtype
)) {
2072 ct
->c_ctparams
&& ((struct text
*) ct
->c_ctparams
)->lf_line_endings
;
2074 switch (ct
->c_encoding
) {
2079 if (decode_part (ct
) == OK
&& ct
->c_cefile
.ce_file
) {
2080 const char *reason
= NULL
;
2082 if ((ct_encoding
= content_encoding (ct
, &reason
)) == CE_BINARY
2083 && encoding
!= CE_BINARY
) {
2084 /* The decoding isn't acceptable so discard it.
2085 Leave status as OK to allow other transformations. */
2087 report (NULL
, ct
->c_partno
, ct
->c_file
,
2088 "will not decode%s because it is binary (%s)",
2090 : (FENDNULL(ct
->c_ctline
)),
2093 (void) m_unlink (ct
->c_cefile
.ce_file
);
2094 free (ct
->c_cefile
.ce_file
);
2095 ct
->c_cefile
.ce_file
= NULL
;
2096 } else if (ct
->c_encoding
== CE_QUOTED
&&
2097 ct_encoding
== CE_8BIT
&& encoding
== CE_7BIT
) {
2098 /* The decoding isn't acceptable so discard it.
2099 Leave status as OK to allow other transformations. */
2101 report (NULL
, ct
->c_partno
, ct
->c_file
,
2102 "will not decode%s because it is 8bit",
2104 : (FENDNULL(ct
->c_ctline
)));
2106 (void) m_unlink (ct
->c_cefile
.ce_file
);
2107 free (ct
->c_cefile
.ce_file
);
2108 ct
->c_cefile
.ce_file
= NULL
;
2112 if (ct_encoding
== CE_BINARY
) {
2114 } else if (ct_encoding
== CE_8BIT
&& encoding
== CE_7BIT
) {
2119 if (set_ce (ct
, enc
) == OK
) {
2122 report (NULL
, ct
->c_partno
, ct
->c_file
, "decode%s",
2123 FENDNULL(ct
->c_ctline
));
2125 if (lf_line_endings
) {
2126 strip_crs (ct
, message_mods
);
2139 if (lf_line_endings
) {
2140 strip_crs (ct
, message_mods
);
2155 * Determine if the part with type[/subtype] should be decoded, according to
2156 * decodetypes (which came from the -decodetypes switch).
2159 should_decode(const char *decodetypes
, const char *type
, const char *subtype
)
2161 /* Quick search for matching type[/subtype] in decodetypes: bracket
2162 decodetypes with commas, then search for ,type, and ,type/subtype, in
2165 bool found_match
= false;
2166 char *delimited_decodetypes
= concat(",", decodetypes
, ",", NULL
);
2167 char *delimited_type
= concat(",", type
, ",", NULL
);
2169 if (nmh_strcasestr(delimited_decodetypes
, delimited_type
)) {
2171 } else if (subtype
!= NULL
) {
2172 char *delimited_type_subtype
=
2173 concat(",", type
, "/", subtype
, ",", NULL
);
2175 if (nmh_strcasestr(delimited_decodetypes
, delimited_type_subtype
)) {
2178 free(delimited_type_subtype
);
2181 free(delimited_type
);
2182 free(delimited_decodetypes
);
2189 * See if the decoded content is 7bit, 8bit, or binary. It's binary
2190 * if it has any NUL characters, a CR not followed by a LF, or lines
2191 * greater than 998 characters in length. If binary, reason is set
2192 * to a string explaining why.
2195 content_encoding (CT ct
, const char **reason
)
2197 CE ce
= &ct
->c_cefile
;
2198 int encoding
= CE_7BIT
;
2201 size_t line_len
= 0;
2202 char buffer
[NMH_BUFSIZ
];
2205 if (! ce
->ce_fp
&& (ce
->ce_fp
= fopen (ce
->ce_file
, "r")) == NULL
) {
2206 advise (ce
->ce_file
, "unable to open for reading");
2210 fseeko (ce
->ce_fp
, 0L, SEEK_SET
);
2211 while (encoding
!= CE_BINARY
&&
2212 (inbytes
= fread (buffer
, 1, sizeof buffer
, ce
->ce_fp
)) > 0) {
2215 int last_char_was_cr
= 0;
2217 for (i
= 0, cp
= buffer
; i
< inbytes
; ++i
, ++cp
) {
2218 if (*cp
== '\0' || ++line_len
> 998 ||
2219 (*cp
!= '\n' && last_char_was_cr
)) {
2220 encoding
= CE_BINARY
;
2222 *reason
= "null character";
2223 } else if (line_len
> 998) {
2224 *reason
= "line length > 998";
2225 } else if (*cp
!= '\n' && last_char_was_cr
) {
2226 *reason
= "CR not followed by LF";
2228 /* Should not reach this. */
2235 } else if (! isascii ((unsigned char) *cp
)) {
2239 last_char_was_cr
= *cp
== '\r';
2245 } /* else should never happen */
2252 * Strip carriage returns from content.
2255 strip_crs (CT ct
, int *message_mods
)
2257 char *charset
= content_charset (ct
);
2260 /* Only strip carriage returns if content is ASCII or another
2261 charset that has the same readily recognizable CR followed by a
2262 LF. We can include UTF-8 here because if the high-order bit of
2263 a UTF-8 byte is 0, then it must be a single-byte ASCII
2265 if (! strcasecmp (charset
, "US-ASCII") ||
2266 ! strcasecmp (charset
, "UTF-8") ||
2267 ! strncasecmp (charset
, "ISO-8859-", 9) ||
2268 ! strncasecmp (charset
, "WINDOWS-12", 10)) {
2273 bool has_crs
= false;
2274 bool opened_input_file
= false;
2276 if (ct
->c_cefile
.ce_file
) {
2277 file
= &ct
->c_cefile
.ce_file
;
2278 fp
= &ct
->c_cefile
.ce_fp
;
2280 } else if (ct
->c_file
) {
2283 begin
= (size_t) ct
->c_begin
;
2284 end
= (size_t) ct
->c_end
;
2285 } /* else don't know where the content is */
2287 if (file
&& *file
&& fp
) {
2289 if ((*fp
= fopen (*file
, "r")) == NULL
) {
2290 advise (*file
, "unable to open for reading");
2293 opened_input_file
= true;
2299 char buffer
[NMH_BUFSIZ
];
2301 size_t bytes_to_read
=
2302 end
> 0 && end
> begin
? end
- begin
: sizeof buffer
;
2304 fseeko (*fp
, begin
, SEEK_SET
);
2305 while ((bytes_read
= fread (buffer
, 1,
2306 min (bytes_to_read
, sizeof buffer
),
2308 /* Look for CR followed by a LF. This is supposed to
2309 be text so there should be LF's. If not, don't
2310 modify the content. */
2313 bool last_char_was_cr
= false;
2315 if (end
> 0) { bytes_to_read
-= bytes_read
; }
2317 for (i
= 0, cp
= buffer
; i
< bytes_read
; ++i
, ++cp
) {
2318 if (*cp
== '\n' && last_char_was_cr
) {
2323 last_char_was_cr
= *cp
== '\r';
2329 char *stripped_content_file
;
2330 char *tempfile
= m_mktemp2 (NULL
, invo_name
, &fd
, NULL
);
2332 if (tempfile
== NULL
) {
2333 die("unable to create temporary file in %s",
2336 stripped_content_file
= mh_xstrdup (tempfile
);
2338 /* Strip each CR before a LF from the content. */
2339 fseeko (*fp
, begin
, SEEK_SET
);
2340 while ((bytes_read
= fread (buffer
, 1, sizeof buffer
, *fp
)) >
2344 bool last_char_was_cr
= false;
2346 for (i
= 0, cp
= buffer
; i
< bytes_read
; ++i
, ++cp
) {
2348 last_char_was_cr
= true;
2349 } else if (last_char_was_cr
) {
2351 if (write (fd
, "\r", 1) < 0) {
2352 advise (tempfile
, "CR write");
2355 if (write (fd
, cp
, 1) < 0) {
2356 advise (tempfile
, "write");
2358 last_char_was_cr
= false;
2360 if (write (fd
, cp
, 1) < 0) {
2361 advise (tempfile
, "write");
2363 last_char_was_cr
= false;
2369 inform("unable to write temporary file %s, continuing...",
2370 stripped_content_file
);
2371 (void) m_unlink (stripped_content_file
);
2372 free(stripped_content_file
);
2375 /* Replace the decoded file with the converted one. */
2376 if (ct
->c_cefile
.ce_file
&& ct
->c_cefile
.ce_unlink
)
2377 (void) m_unlink (ct
->c_cefile
.ce_file
);
2379 free(ct
->c_cefile
.ce_file
);
2380 ct
->c_cefile
.ce_file
= stripped_content_file
;
2381 ct
->c_cefile
.ce_unlink
= 1;
2385 report (NULL
, ct
->c_partno
,
2386 begin
== 0 && end
== 0 ? "" : *file
,
2392 if (opened_input_file
) {
2406 * Add/update, if necessary, the message C-T-E, based on the least restrictive
2407 * of the part C-T-E's.
2412 const int least_restrictive_enc
= least_restrictive_encoding (ct
);
2414 if (least_restrictive_enc
!= CE_UNKNOWN
&&
2415 least_restrictive_enc
!= CE_7BIT
) {
2416 char *cte
= concat (" ", ce_str (least_restrictive_enc
), "\n", NULL
);
2418 bool found_cte
= false;
2420 /* Update/add Content-Transfer-Encoding header field. */
2421 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
2422 if (! strcasecmp (ENCODING_FIELD
, hf
->name
)) {
2429 add_header (ct
, mh_xstrdup (ENCODING_FIELD
), cte
);
2436 * Find the least restrictive encoding (7bit, 8bit, binary) of the parts
2440 least_restrictive_encoding (CT ct
)
2442 int encoding
= CE_UNKNOWN
;
2444 switch (ct
->c_type
) {
2445 case CT_MULTIPART
: {
2446 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
2449 for (part
= m
->mp_parts
; part
; part
= part
->mp_next
) {
2450 const int part_encoding
=
2451 least_restrictive_encoding (part
->mp_part
);
2453 if (less_restrictive (encoding
, part_encoding
)) {
2454 encoding
= part_encoding
;
2461 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
2462 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
2463 const int part_encoding
=
2464 least_restrictive_encoding (e
->eb_content
);
2466 if (less_restrictive (encoding
, part_encoding
)) {
2467 encoding
= part_encoding
;
2473 if (less_restrictive (encoding
, ct
->c_encoding
)) {
2474 encoding
= ct
->c_encoding
;
2483 * Return whether the second encoding is less restrictive than the first, where
2484 * "less restrictive" is in the sense used by RFC 2045 Secs. 6.1 and 6.4. So,
2485 * CE_BINARY is less restrictive than CE_8BIT and
2486 * CE_8BIT is less restrictive than CE_7BIT.
2489 less_restrictive (int encoding
, int second_encoding
)
2491 switch (second_encoding
) {
2493 return encoding
!= CE_BINARY
;
2495 return encoding
!= CE_BINARY
&& encoding
!= CE_8BIT
;
2497 return encoding
!= CE_BINARY
&& encoding
!= CE_8BIT
&&
2498 encoding
!= CE_7BIT
;
2506 * Convert character set of each part.
2509 convert_charsets (CT ct
, char *dest_charset
, int *message_mods
)
2513 switch (ct
->c_type
) {
2515 if (ct
->c_subtype
== TEXT_PLAIN
) {
2516 status
= convert_charset (ct
, dest_charset
, message_mods
);
2519 char *ct_charset
= content_charset (ct
);
2521 report (NULL
, ct
->c_partno
, ct
->c_file
,
2522 "convert %s to %s", ct_charset
, dest_charset
);
2526 char *ct_charset
= content_charset (ct
);
2528 report ("iconv", ct
->c_partno
, ct
->c_file
,
2529 "failed to convert %s to %s", ct_charset
, dest_charset
);
2535 case CT_MULTIPART
: {
2536 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
2539 /* Should check to see if the body for this part is encoded?
2540 For now, it gets passed along as-is by InitMultiPart(). */
2541 for (part
= m
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
2543 convert_charsets (part
->mp_part
, dest_charset
, message_mods
);
2549 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
2550 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
2553 convert_charsets (e
->eb_content
, dest_charset
, message_mods
);
2566 * Fix various problems that aren't handled elsewhere. These
2567 * are fixed unconditionally: there are no switches to disable
2568 * them. Currently, "problems" are these:
2569 * 1) remove extraneous semicolon at the end of a header parameter list
2570 * 2) replace RFC 2047 encoding with RFC 2231 encoding of name and
2571 * filename parameters in Content-Type and Content-Disposition
2572 * headers, respectively.
2575 fix_always (CT ct
, int *message_mods
)
2579 switch (ct
->c_type
) {
2580 case CT_MULTIPART
: {
2581 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
2584 for (part
= m
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
2585 status
= fix_always (part
->mp_part
, message_mods
);
2591 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
2592 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
2594 status
= fix_always (e
->eb_content
, message_mods
);
2601 if (ct
->c_first_hf
) {
2602 fix_filename_encoding (ct
);
2605 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
2606 size_t len
= strlen (hf
->value
);
2608 if (strcasecmp (hf
->name
, TYPE_FIELD
) != 0 &&
2609 strcasecmp (hf
->name
, DISPO_FIELD
) != 0) {
2610 /* Only do this for Content-Type and
2611 Content-Disposition fields because those are the
2612 only headers that parse_mime() warns about. */
2616 /* whitespace following a trailing ';' will be nuked as well */
2617 if (hf
->value
[len
- 1] == '\n') {
2618 while (isspace((unsigned char)(hf
->value
[len
- 2]))) {
2619 if (len
-- == 0) { break; }
2623 if (hf
->value
[len
- 2] == ';') {
2624 /* Remove trailing ';' from parameter value. */
2625 hf
->value
[len
- 2] = '\n';
2626 hf
->value
[len
- 1] = '\0';
2628 /* Also, if Content-Type parameter, remove trailing ';'
2629 from ct->c_ctline. This probably isn't necessary
2631 if (strcasecmp(hf
->name
, TYPE_FIELD
) == 0 && ct
->c_ctline
) {
2632 size_t l
= strlen(ct
->c_ctline
) - 1;
2633 while (isspace((unsigned char)(ct
->c_ctline
[l
])) ||
2634 ct
->c_ctline
[l
] == ';') {
2635 ct
->c_ctline
[l
--] = '\0';
2636 if (l
== 0) { break; }
2642 report (NULL
, ct
->c_partno
, ct
->c_file
,
2643 "remove trailing ; from %s parameter value",
2655 * Decodes UTF-8 encoded header values. Similar to fix_filename_param(), but
2656 * does not modify any MIME parameter values.
2659 decode_header_field_bodies (CT ct
, int *message_mods
)
2663 switch (ct
->c_type
) {
2664 case CT_MULTIPART
: {
2665 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
2668 for (part
= m
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
2669 status
= decode_header_field_bodies (part
->mp_part
, message_mods
);
2675 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
2676 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
2678 status
= decode_header_field_bodies (e
->eb_content
, message_mods
);
2685 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
2686 /* Only decode UTF-8 values. */
2687 if (hf
->value
&& has_suffix(hf
->value
, "?=\n") &&
2688 (! strncasecmp (hf
->value
, " =?utf8?", 8) ||
2689 ! strncasecmp (hf
->value
, " =?utf-8?", 9))) {
2690 /* Looks like an RFC 2047 encoded parameter. */
2691 char decoded
[PATH_MAX
+ 1];
2693 if (decode_rfc2047 (hf
->value
, decoded
, sizeof decoded
)) {
2694 const size_t len
= strlen(decoded
);
2696 /* decode_rfc2047() could truncate if the buffer fills up.
2697 Detect and discard if that happened. */
2698 if (len
< sizeof(decoded
) - 1 && strcmp(hf
->value
, decoded
)) {
2699 hf
->value
= mh_xrealloc (hf
->value
, len
+ 1);
2700 strncpy (hf
->value
, decoded
, len
+ 1);
2704 inform("failed to decode %s parameter %s", hf
->name
, hf
->value
);
2715 * Factor out common code for loops in fix_filename_encoding().
2718 fix_filename_param (char *name
, char *value
, PM
*first_pm
, PM
*last_pm
)
2722 if (has_prefix(value
, "=?") && has_suffix(value
, "?=")) {
2723 /* Looks like an RFC 2047 encoded parameter. */
2724 char decoded
[PATH_MAX
+ 1];
2726 if (decode_rfc2047 (value
, decoded
, sizeof decoded
)) {
2727 /* Encode using RFC 2231. */
2728 replace_param (first_pm
, last_pm
, name
, decoded
, 0);
2731 inform("failed to decode %s parameter %s", name
, value
);
2740 * Replace RFC 2047 encoding with RFC 2231 encoding of name and
2741 * filename parameters in Content-Type and Content-Disposition
2742 * headers, respectively.
2745 fix_filename_encoding (CT ct
)
2751 for (pm
= ct
->c_ctinfo
.ci_first_pm
; pm
; pm
= pm
->pm_next
) {
2752 if (pm
->pm_name
&& pm
->pm_value
&&
2753 strcasecmp (pm
->pm_name
, "name") == 0) {
2754 fixed
= fix_filename_param (pm
->pm_name
, pm
->pm_value
,
2755 &ct
->c_ctinfo
.ci_first_pm
,
2756 &ct
->c_ctinfo
.ci_last_pm
);
2760 for (pm
= ct
->c_dispo_first
; pm
; pm
= pm
->pm_next
) {
2761 if (pm
->pm_name
&& pm
->pm_value
&&
2762 strcasecmp (pm
->pm_name
, "filename") == 0) {
2763 fixed
= fix_filename_param (pm
->pm_name
, pm
->pm_value
,
2769 /* Fix hf values to correspond. */
2770 for (hf
= ct
->c_first_hf
; fixed
&& hf
; hf
= hf
->next
) {
2771 enum { OTHER
, TYPE_HEADER
, DISPO_HEADER
} field
= OTHER
;
2773 if (strcasecmp (hf
->name
, TYPE_FIELD
) == 0) {
2774 field
= TYPE_HEADER
;
2775 } else if (strcasecmp (hf
->name
, DISPO_FIELD
) == 0) {
2776 field
= DISPO_HEADER
;
2779 if (field
!= OTHER
) {
2780 const char *const semicolon_loc
= strchr (hf
->value
, ';');
2782 if (semicolon_loc
) {
2784 strlen (hf
->name
) + 1 + semicolon_loc
- hf
->value
;
2785 const char *const params
=
2787 field
== TYPE_HEADER
2788 ? ct
->c_ctinfo
.ci_first_pm
2789 : ct
->c_dispo_first
,
2791 const char *const new_params
= concat (params
, "\n", NULL
);
2793 replace_substring (&hf
->value
, semicolon_loc
, new_params
);
2794 free((void *)new_params
); /* Cast away const. Sigh. */
2795 free((void *)params
);
2797 inform("did not find semicolon in %s:%s\n",
2798 hf
->name
, hf
->value
);
2808 * Output content in input file to output file.
2811 write_content (CT ct
, const char *input_filename
, char *outfile
, FILE *outfp
,
2812 int modify_inplace
, int message_mods
)
2816 if (modify_inplace
) {
2817 if (message_mods
> 0) {
2818 if ((status
= output_message_fp (ct
, outfp
, outfile
)) == OK
) {
2819 char *infile
= input_filename
2820 ? mh_xstrdup (input_filename
)
2821 : mh_xstrdup (ct
->c_file
? ct
->c_file
: "-");
2823 if (remove_file (infile
) == OK
) {
2824 if (rename (outfile
, infile
)) {
2825 /* Rename didn't work, possibly because of an
2826 attempt to rename across filesystems. Try
2827 brute force copy. */
2828 int old
= open (outfile
, O_RDONLY
);
2830 open (infile
, O_WRONLY
| O_CREAT
, m_gmprot ());
2833 if (old
!= -1 && new != -1) {
2834 char buffer
[NMH_BUFSIZ
];
2836 while ((i
= read (old
, buffer
, sizeof buffer
)) >
2838 if (write (new, buffer
, i
) != i
) {
2844 if (new != -1) { close (new); }
2845 if (old
!= -1) { close (old
); }
2846 (void) m_unlink (outfile
);
2849 /* The -file argument processing used path() to
2850 expand filename to absolute path. */
2851 int file
= ct
->c_file
&& ct
->c_file
[0] == '/';
2853 inform("unable to rename %s %s to %s, continuing...",
2854 file
? "file" : "message", outfile
,
2860 inform("unable to remove input file %s, "
2861 "not modifying it, continuing...", infile
);
2862 (void) m_unlink (outfile
);
2871 /* No modifications and didn't need the tmp outfile. */
2872 (void) m_unlink (outfile
);
2875 /* Output is going to some file. Produce it whether or not
2876 there were modifications. */
2877 status
= output_message_fp (ct
, outfp
, outfile
);
2886 * parse_mime() does not set lf_line_endings in struct text, so use this
2887 * function to do it. It touches the parts the decodetypes identifies.
2890 set_text_ctparams(CT ct
, char *decodetypes
, int lf_line_endings
)
2892 switch (ct
->c_type
) {
2893 case CT_MULTIPART
: {
2894 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
2897 for (part
= m
->mp_parts
; part
; part
= part
->mp_next
) {
2898 set_text_ctparams(part
->mp_part
, decodetypes
, lf_line_endings
);
2904 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
2905 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
2907 set_text_ctparams(e
->eb_content
, decodetypes
, lf_line_endings
);
2912 if (should_decode(decodetypes
, ct
->c_ctinfo
.ci_type
, ct
->c_ctinfo
.ci_subtype
)) {
2913 if (ct
->c_ctparams
== NULL
) {
2914 ct
->c_ctparams
= mh_xcalloc(1, sizeof (struct text
));
2916 ((struct text
*) ct
->c_ctparams
)->lf_line_endings
= lf_line_endings
;
2923 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2924 * use the standard MH backup file.
2927 remove_file (const char *file
)
2930 char *rmm_command
= concat (rmmproc
, " ", file
, NULL
);
2931 int status
= system (rmm_command
);
2934 return WIFEXITED (status
) ? WEXITSTATUS (status
) : NOTOK
;
2936 /* This is OK for a non-message file, it still uses the
2937 BACKUP_PREFIX form. The backup file will be in the same
2938 directory as file. */
2939 return rename (file
, m_backup (file
));
2944 * Output formatted message to user.
2947 report (char *what
, char *partno
, char *filename
, char *message
, ...)
2953 va_start (args
, message
);
2954 fmt
= concat (filename
, partno
? " part " : ", ",
2955 FENDNULL(partno
), partno
? ", " : "", message
, NULL
);
2957 advertise (what
, NULL
, fmt
, args
);
2970 fprintf (stderr
, "\n");