2 * mhfixmsg.c -- rewrite a message with various transformations
4 * This code is Copyright (c) 2002 and 2013, by the authors of nmh.
5 * See the COPYRIGHT file in the root directory of the nmh
6 * distribution for complete copyright information.
11 #include <h/mhparse.h>
13 #include <h/signals.h>
16 #define MHFIXMSG_SWITCHES \
17 X("decodetext 8bit|7bit|binary", 0, DECODETEXTSW) \
18 X("nodecodetext", 0, NDECODETEXTSW) \
19 X("decodetypes", 0, DECODETYPESW) \
20 X("crlflinebreaks", 0, CRLFLINEBREAKSSW) \
21 X("nocrlflinebreaks", 0, NCRLFLINEBREAKSSW) \
22 X("textcharset", 0, TEXTCHARSETSW) \
23 X("notextcharset", 0, NTEXTCHARSETSW) \
24 X("reformat", 0, REFORMATSW) \
25 X("noreformat", 0, NREFORMATSW) \
26 X("replacetextplain", 0, REPLACETEXTPLAINSW) \
27 X("noreplacetextplain", 0, NREPLACETEXTPLAINSW) \
28 X("fixboundary", 0, FIXBOUNDARYSW) \
29 X("nofixboundary", 0, NFIXBOUNDARYSW) \
30 X("fixcte", 0, FIXCTESW) \
31 X("nofixcte", 0, NFIXCTESW) \
32 X("fixtype mimetype", 0, FIXTYPESW) \
33 X("file file", 0, FILESW) \
34 X("outfile file", 0, OUTFILESW) \
35 X("rmmproc program", 0, RPROCSW) \
36 X("normmproc", 0, NRPRCSW) \
37 X("changecur", 0, CHGSW) \
38 X("nochangecur", 0, NCHGSW) \
39 X("verbose", 0, VERBSW) \
40 X("noverbose", 0, NVERBSW) \
41 X("version", 0, VERSIONSW) \
42 X("help", 0, HELPSW) \
44 #define X(sw, minchars, id) id,
45 DEFINE_SWITCH_ENUM(MHFIXMSG
);
48 #define X(sw, minchars, id) { sw, minchars, id },
49 DEFINE_SWITCH_ARRAY(MHFIXMSG
, switches
);
54 int debugsw
; /* Needed by mhparse.c. */
56 #define quitser pipeser
59 extern int skip_mp_cte_check
; /* flag to InitMultiPart */
60 extern int suppress_bogus_mp_content_warning
; /* flag to InitMultiPart */
61 extern int bogus_mp_content
; /* flag from InitMultiPart */
62 /* flags to/from parse_header_attrs */
63 extern int suppress_extraneous_trailing_semicolon_warning
;
64 extern int extraneous_trailing_semicolon
;
67 int output_message (CT
, char *);
70 void flush_errors (void);
74 void freects_done (int) NORETURN
;
79 typedef struct fix_transformations
{
87 /* Whether to use CRLF linebreaks, per RFC 2046 Sec. 4.1.1, par.1. */
90 } fix_transformations
;
92 int mhfixmsgsbr (CT
*, const fix_transformations
*, char *);
93 static int fix_boundary (CT
*, int *);
94 static int copy_input_to_output (const char *, const char *);
95 static int get_multipart_boundary (CT
, char **);
96 static int replace_boundary (CT
, char *, char *);
97 static int fix_types (CT
, svector_t
, int *);
98 static char *replace_substring (char **, const char *, const char *);
99 static char *remove_parameter (char *, const char *);
100 static int fix_multipart_cte (CT
, int *);
101 static int set_ce (CT
, int);
102 static int ensure_text_plain (CT
*, CT
, int *, int);
103 static int find_textplain_sibling (CT
, int, int *);
104 static int insert_new_text_plain_part (CT
, int, CT
);
105 static CT
build_text_plain_part (CT
);
106 static int insert_into_new_mp_alt (CT
*, int *);
107 static CT
divide_part (CT
);
108 static void copy_ctinfo (CI
, CI
);
109 static int decode_part (CT
);
110 static int reformat_part (CT
, char *, char *, char *, int);
111 static int charset_encoding (CT
);
112 static CT
build_multipart_alt (CT
, CT
, int, int);
113 static int boundary_in_content (FILE **, char *, const char *);
114 static void transfer_noncontent_headers (CT
, CT
);
115 static int set_ct_type (CT
, int type
, int subtype
, int encoding
);
116 static int decode_text_parts (CT
, int, const char *, int *);
117 static int should_decode(const char *, const char *, const char *);
118 static int content_encoding (CT
, const char **);
119 static int strip_crs (CT
, int *);
120 static int convert_charsets (CT
, char *, int *);
121 static int fix_always (CT
, int *);
122 static int write_content (CT
, const char *, char *, int, int);
123 static void set_text_ctparams(CT
, char *, int);
124 static int remove_file (const char *);
125 static void report (char *, char *, char *, char *, ...);
126 static void pipeser (int);
130 main (int argc
, char **argv
) {
132 char *cp
, *file
= NULL
, *folder
= NULL
;
133 char *maildir
, buf
[100], *outfile
= NULL
;
134 char **argp
, **arguments
;
135 struct msgs_array msgs
= { 0, 0, NULL
};
136 struct msgs
*mp
= NULL
;
142 fix_transformations fx
;
143 fx
.reformat
= fx
.fixcte
= fx
.fixboundary
= 1;
145 fx
.replacetextplain
= 0;
146 fx
.decodetext
= CE_8BIT
;
147 fx
.decodetypes
= "text,application/ics"; /* Default, per man page. */
148 fx
.lf_line_endings
= 0;
149 fx
.textcharset
= NULL
;
151 if (nmh_init(argv
[0], 1)) { return 1; }
155 arguments
= getarguments (invo_name
, argc
, argv
, 1);
161 while ((cp
= *argp
++)) {
163 switch (smatch (++cp
, switches
)) {
165 ambigsw (cp
, switches
);
168 adios (NULL
, "-%s unknown", cp
);
171 snprintf (buf
, sizeof buf
, "%s [+folder] [msgs] [switches]",
173 print_help (buf
, switches
, 1);
176 print_version(invo_name
);
180 if (! (cp
= *argp
++) || *cp
== '-') {
181 adios (NULL
, "missing argument to %s", argp
[-2]);
183 if (! strcasecmp (cp
, "8bit")) {
184 fx
.decodetext
= CE_8BIT
;
185 } else if (! strcasecmp (cp
, "7bit")) {
186 fx
.decodetext
= CE_7BIT
;
187 } else if (! strcasecmp (cp
, "binary")) {
188 fx
.decodetext
= CE_BINARY
;
190 adios (NULL
, "invalid argument to %s", argp
[-2]);
197 if (! (cp
= *argp
++) || *cp
== '-') {
198 adios (NULL
, "missing argument to %s", argp
[-2]);
202 case CRLFLINEBREAKSSW
:
203 fx
.lf_line_endings
= 0;
205 case NCRLFLINEBREAKSSW
:
206 fx
.lf_line_endings
= 1;
209 if (! (cp
= *argp
++) || (*cp
== '-' && cp
[1])) {
210 adios (NULL
, "missing argument to %s", argp
[-2]);
230 if (! (cp
= *argp
++) || (*cp
== '-' && cp
[1])) {
231 adios (NULL
, "missing argument to %s", argp
[-2]);
233 if (! strncasecmp (cp
, "multipart/", 10) ||
234 ! strncasecmp (cp
, "message/", 8)) {
235 adios (NULL
, "-fixtype %s not allowed", cp
);
236 } else if (! strchr (cp
, '/')) {
237 adios (NULL
, "-fixtype requires type/subtype");
239 if (fx
.fixtypes
== NULL
) { fx
.fixtypes
= svector_create (10); }
240 svector_push_back (fx
.fixtypes
, cp
);
248 case REPLACETEXTPLAINSW
:
249 fx
.replacetextplain
= 1;
251 case NREPLACETEXTPLAINSW
:
252 fx
.replacetextplain
= 0;
255 if (! (cp
= *argp
++) || (*cp
== '-' && cp
[1])) {
256 adios (NULL
, "missing argument to %s", argp
[-2]);
258 file
= *cp
== '-' ? add (cp
, NULL
) : path (cp
, TFILE
);
261 if (! (cp
= *argp
++) || (*cp
== '-' && cp
[1])) {
262 adios (NULL
, "missing argument to %s", argp
[-2]);
264 outfile
= *cp
== '-' ? add (cp
, NULL
) : path (cp
, TFILE
);
267 if (!(rmmproc
= *argp
++) || *rmmproc
== '-') {
268 adios (NULL
, "missing argument to %s", argp
[-2]);
288 if (*cp
== '+' || *cp
== '@') {
290 adios (NULL
, "only one folder at a time!");
292 folder
= pluspath (cp
);
296 /* Interpret a full path as a filename, not a message. */
297 file
= add (cp
, NULL
);
299 app_msgarg (&msgs
, cp
);
304 SIGNAL (SIGQUIT
, quitser
);
305 SIGNAL (SIGPIPE
, pipeser
);
308 * Read the standard profile setup
310 if ((fp
= fopen (cp
= etcpath ("mhn.defaults"), "r"))) {
311 readconfig ((struct node
**) 0, fp
, cp
, 0);
315 suppress_bogus_mp_content_warning
= skip_mp_cte_check
= 1;
316 suppress_extraneous_trailing_semicolon_warning
= 1;
318 if (! context_find ("path")) {
319 free (path ("./", TFOLDER
));
322 if (file
&& msgs
.size
) {
323 adios (NULL
, "cannot specify msg and file at same time!");
327 * check if message is coming from file
330 /* If file is stdin, create a tmp file name before parse_mime()
331 has a chance, because it might put in on a different
332 filesystem than the output file. Instead, put it in the
333 user's preferred tmp directory. */
336 if (! strcmp ("-", file
)) {
342 if ((cp
= m_mktemp2 (NULL
, invo_name
, &fd
, NULL
)) == NULL
) {
343 adios (NULL
, "unable to create temporary file in %s",
347 file
= add (cp
, NULL
);
348 cpydata (STDIN_FILENO
, fd
, "-", file
);
352 (void) m_unlink (file
);
353 adios (NULL
, "failed to write temporary file");
357 if (! (cts
= (CT
*) mh_xcalloc ((size_t) 2, sizeof *cts
))) {
358 adios (NULL
, "out of memory");
362 if ((ct
= parse_mime (file
))) {
363 set_text_ctparams(ct
, fx
.decodetypes
, fx
.lf_line_endings
);
366 advise (NULL
, "unable to parse message from file %s", file
);
369 /* If there's an outfile, pass the input message unchanged, so the message won't
370 get dropped from a pipeline. */
372 /* Something went wrong. Output might be expected, such as if this were run
373 as a filter. Just copy the input to the output. */
374 if (copy_input_to_output (file
, outfile
) != OK
) {
375 advise (NULL
, "unable to copy message to %s, it might be lost\n", outfile
);
381 * message(s) are coming from a folder
386 app_msgarg(&msgs
, "cur");
389 folder
= getfolder (1);
391 maildir
= m_maildir (folder
);
393 if (chdir (maildir
) == NOTOK
) {
394 adios (maildir
, "unable to change directory to");
397 /* read folder and create message structure */
398 if (! (mp
= folder_read (folder
, 1))) {
399 adios (NULL
, "unable to read folder %s", folder
);
402 /* check for empty folder */
403 if (mp
->nummsg
== 0) {
404 adios (NULL
, "no messages in %s", folder
);
407 /* parse all the message ranges/sequences and set SELECTED */
408 for (msgnum
= 0; msgnum
< msgs
.size
; msgnum
++)
409 if (! m_convert (mp
, msgs
.msgs
[msgnum
])) {
412 seq_setprev (mp
); /* set the previous-sequence */
415 (CT
*) mh_xcalloc ((size_t) (mp
->numsel
+ 1), sizeof *cts
))) {
416 adios (NULL
, "out of memory");
420 for (msgnum
= mp
->lowsel
; msgnum
<= mp
->hghsel
; msgnum
++) {
421 if (is_selected(mp
, msgnum
)) {
424 msgnam
= m_name (msgnum
);
425 if ((ct
= parse_mime (msgnam
))) {
426 set_text_ctparams(ct
, fx
.decodetypes
, fx
.lf_line_endings
);
429 advise (NULL
, "unable to parse message %s", msgnam
);
432 /* If there's an outfile, pass the input message unchanged, so the message won't
433 get dropped from a pipeline. */
435 /* Something went wrong. Output might be expected, such as if this were run
436 as a filter. Just copy the input to the output. */
437 const char *input_filename
= path (msgnam
, TFILE
);
439 if (copy_input_to_output (input_filename
, outfile
) != OK
) {
440 advise (NULL
, "unable to copy message to %s, it might be lost\n", outfile
);
448 seq_setcur (mp
, mp
->hghsel
); /* update current message */
450 seq_save (mp
); /* synchronize sequences */
451 context_replace (pfolder
, folder
);/* update current folder */
452 context_save (); /* save the context file */
456 for (ctp
= cts
; *ctp
; ++ctp
) {
457 status
+= mhfixmsgsbr (ctp
, &fx
, outfile
);
460 (void) m_unlink (file
);
463 /* Just calling m_backup() unlinks the backup file. */
464 (void) m_backup (file
);
472 if (fx
.fixtypes
!= NULL
) { svector_free (fx
.fixtypes
); }
478 /* done is freects_done, which will clean up all of cts. */
485 mhfixmsgsbr (CT
*ctp
, const fix_transformations
*fx
, char *outfile
) {
486 /* Store input filename in case one of the transformations, i.e.,
487 fix_boundary(), rewrites to a tmp file. */
488 char *input_filename
= add ((*ctp
)->c_file
, NULL
);
489 int modify_inplace
= 0;
490 int message_mods
= 0;
493 if (outfile
== NULL
) {
496 if ((*ctp
)->c_file
) {
498 if ((tempfile
= m_mktemp2 (NULL
, invo_name
, NULL
, NULL
)) == NULL
) {
499 adios (NULL
, "unable to create temporary file in %s",
502 outfile
= add (tempfile
, NULL
);
504 adios (NULL
, "missing both input and output filenames\n");
508 reverse_alternative_parts (*ctp
);
509 status
= fix_always (*ctp
, &message_mods
);
510 if (status
== OK
&& fx
->fixboundary
) {
511 status
= fix_boundary (ctp
, &message_mods
);
513 if (status
== OK
&& fx
->fixtypes
!= NULL
) {
514 status
= fix_types (*ctp
, fx
->fixtypes
, &message_mods
);
516 if (status
== OK
&& fx
->fixcte
) {
517 status
= fix_multipart_cte (*ctp
, &message_mods
);
519 if (status
== OK
&& fx
->reformat
) {
521 ensure_text_plain (ctp
, NULL
, &message_mods
, fx
->replacetextplain
);
523 if (status
== OK
&& fx
->decodetext
) {
524 status
= decode_text_parts (*ctp
, fx
->decodetext
, fx
->decodetypes
, &message_mods
);
526 if (status
== OK
&& fx
->textcharset
!= NULL
) {
527 status
= convert_charsets (*ctp
, fx
->textcharset
, &message_mods
);
530 if (status
== OK
&& ! (*ctp
)->c_umask
) {
531 /* Set the umask for the contents file. This currently
532 isn't used but just in case it is in the future. */
535 if (stat ((*ctp
)->c_file
, &st
) != NOTOK
) {
536 (*ctp
)->c_umask
= ~(st
.st_mode
& 0777);
538 (*ctp
)->c_umask
= ~m_gmprot();
543 * Write the content to a file
546 status
= write_content (*ctp
, input_filename
, outfile
, modify_inplace
,
548 } else if (! modify_inplace
) {
549 /* Something went wrong. Output might be expected, such
550 as if this were run as a filter. Just copy the input
552 if (copy_input_to_output (input_filename
, outfile
) != OK
) {
553 advise (NULL
, "unable to copy message to %s, it might be lost\n", outfile
);
557 if (modify_inplace
) {
558 if (status
!= OK
) { (void) m_unlink (outfile
); }
563 free (input_filename
);
569 /* Copy input message to output. Assumes not modifying in place, so this
570 might be running as part of a pipeline. */
572 copy_input_to_output (const char *input_filename
, const char *output_filename
) {
573 int in
= open (input_filename
, O_RDONLY
);
574 int out
= strcmp (output_filename
, "-")
575 ? open (output_filename
, O_WRONLY
| O_CREAT
, m_gmprot ())
579 if (in
!= -1 && out
!= -1) {
580 cpydata (in
, out
, input_filename
, output_filename
);
593 fix_boundary (CT
*ct
, int *message_mods
) {
594 struct multipart
*mp
;
597 if (ct
&& (*ct
)->c_type
== CT_MULTIPART
&& bogus_mp_content
) {
598 mp
= (struct multipart
*) (*ct
)->c_ctparams
;
601 * 1) Get boundary at end of part.
602 * 2) Get boundary at beginning of part and compare to the end-of-part
604 * 3) Write out contents of ct to tmp file, replacing boundary in
605 * header with boundary from part. Set c_unlink to 1.
607 * 5) Call parse_mime() on the tmp file, replacing ct.
610 if (mp
&& mp
->mp_start
) {
613 if (get_multipart_boundary (*ct
, &part_boundary
) == OK
) {
616 if ((fixed
= m_mktemp2 (NULL
, invo_name
, NULL
, &(*ct
)->c_fp
))) {
617 if (replace_boundary (*ct
, fixed
, part_boundary
) == OK
) {
618 char *filename
= add ((*ct
)->c_file
, NULL
);
622 if ((fixed_ct
= parse_mime (fixed
))) {
628 report (NULL
, NULL
, filename
,
629 "fix multipart boundary");
633 advise (NULL
, "unable to parse fixed part");
638 advise (NULL
, "unable to replace broken boundary");
642 advise (NULL
, "unable to create temporary file in %s",
647 free (part_boundary
);
649 /* Couldn't fix the boundary. Report failure so that mhfixmsg
650 doesn't modify the message. */
654 /* No multipart struct, even though the content type is
655 CT_MULTIPART. Report failure so that mhfixmsg doesn't modify
666 get_multipart_boundary (CT ct
, char **part_boundary
) {
668 char *end_boundary
= NULL
;
669 off_t begin
= (off_t
) ct
->c_end
> (off_t
) (ct
->c_begin
+ sizeof buffer
)
670 ? (off_t
) (ct
->c_end
- sizeof buffer
)
671 : (off_t
) ct
->c_begin
;
675 /* This will fail if the boundary spans fread() calls. BUFSIZ should
676 be big enough, even if it's just 1024, to make that unlikely. */
678 /* free_content() will close ct->c_fp. */
679 if (! ct
->c_fp
&& (ct
->c_fp
= fopen (ct
->c_file
, "r")) == NULL
) {
680 advise (ct
->c_file
, "unable to open for reading");
684 /* Get boundary at end of multipart. */
685 while (begin
>= (off_t
) ct
->c_begin
) {
686 fseeko (ct
->c_fp
, begin
, SEEK_SET
);
687 while ((bytes_read
= fread (buffer
, 1, sizeof buffer
, ct
->c_fp
)) > 0) {
688 char *cp
= rfind_str (buffer
, bytes_read
, "--");
693 /* Trim off trailing "--" and anything beyond. */
695 if ((end
= rfind_str (buffer
, cp
- buffer
, "\n"))) {
696 if (strlen (end
) > 3 && *end
++ == '\n' &&
697 *end
++ == '-' && *end
++ == '-') {
698 end_boundary
= add (end
, NULL
);
705 if (! end_boundary
&& begin
> (off_t
) (ct
->c_begin
+ sizeof buffer
)) {
706 begin
-= sizeof buffer
;
712 /* Get boundary at beginning of multipart. */
714 fseeko (ct
->c_fp
, ct
->c_begin
, SEEK_SET
);
715 while ((bytes_read
= fread (buffer
, 1, sizeof buffer
, ct
->c_fp
)) > 0) {
716 if (bytes_read
>= strlen (end_boundary
)) {
717 char *cp
= find_str (buffer
, bytes_read
, end_boundary
);
719 if (cp
&& cp
- buffer
>= 2 && *--cp
== '-' &&
720 *--cp
== '-' && (cp
> buffer
&& *--cp
== '\n')) {
725 /* The start and end boundaries didn't match, or the
726 start boundary doesn't begin with "\n--" (or "--"
727 if at the beginning of buffer). Keep trying. */
736 *part_boundary
= end_boundary
;
738 *part_boundary
= NULL
;
746 /* Open and copy ct->c_file to file, replacing the multipart boundary. */
748 replace_boundary (CT ct
, char *file
, char *boundary
) {
751 char buf
[BUFSIZ
], name
[NAMESZ
];
753 m_getfld_state_t gstate
= 0;
756 if (ct
->c_file
== NULL
) {
757 advise (NULL
, "missing input filename");
761 if ((fpin
= fopen (ct
->c_file
, "r")) == NULL
) {
762 advise (ct
->c_file
, "unable to open for reading");
766 if ((fpout
= fopen (file
, "w")) == NULL
) {
768 advise (file
, "unable to open for writing");
772 for (compnum
= 1;;) {
773 int bufsz
= (int) sizeof buf
;
775 switch (state
= m_getfld (&gstate
, name
, buf
, &bufsz
, fpin
)) {
780 /* get copies of the buffers */
781 np
= add (name
, NULL
);
782 vp
= add (buf
, NULL
);
784 /* if necessary, get rest of field */
785 while (state
== FLDPLUS
) {
787 state
= m_getfld (&gstate
, name
, buf
, &bufsz
, fpin
);
788 vp
= add (buf
, vp
); /* add to previous value */
791 if (strcasecmp (TYPE_FIELD
, np
)) {
792 fprintf (fpout
, "%s:%s", np
, vp
);
794 char *new_ctline
, *new_params
;
796 replace_param(&ct
->c_ctinfo
.ci_first_pm
,
797 &ct
->c_ctinfo
.ci_last_pm
, "boundary",
800 new_ctline
= concat(" ", ct
->c_ctinfo
.ci_type
, "/",
801 ct
->c_ctinfo
.ci_subtype
, NULL
);
802 new_params
= output_params(strlen(TYPE_FIELD
) +
803 strlen(new_ctline
) + 1,
804 ct
->c_ctinfo
.ci_first_pm
, NULL
, 0);
805 fprintf (fpout
, "%s:%s%s\n", np
, new_ctline
,
806 new_params
? new_params
: "");
820 /* buf will have a terminating NULL, skip it. */
821 if ((int) fwrite (buf
, 1, bufsz
-1, fpout
) < bufsz
-1) {
822 advise (file
, "fwrite");
831 advise (NULL
, "message format error in component #%d", compnum
);
836 advise (NULL
, "getfld() returned %d", state
);
844 m_getfld_state_destroy (&gstate
);
853 fix_types (CT ct
, svector_t fixtypes
, int *message_mods
) {
856 switch (ct
->c_type
) {
858 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
861 for (part
= m
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
862 status
= fix_types (part
->mp_part
, fixtypes
, message_mods
);
868 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
869 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
871 status
= fix_types (e
->eb_content
, fixtypes
, message_mods
);
878 if (ct
->c_ctinfo
.ci_type
&& ct
->c_ctinfo
.ci_subtype
) {
879 for (typep
= svector_strs (fixtypes
);
880 typep
&& (type
= *typep
);
883 concat (ct
->c_ctinfo
.ci_type
, "/", ct
->c_ctinfo
.ci_subtype
,
886 if (! strcasecmp (type
, type_subtype
) &&
887 decode_part (ct
) == OK
&&
888 ct
->c_cefile
.ce_file
!= NULL
) {
889 char *ct_type_subtype
= mime_type (ct
->c_cefile
.ce_file
);
892 if ((cp
= strchr (ct_type_subtype
, ';'))) {
893 /* Truncate to remove any parameter list from
894 mime_type () result. */
898 if (strcasecmp (type
, ct_type_subtype
)) {
899 char *ct_type
, *ct_subtype
;
902 /* The Content-Type header does not match the
903 content, so update these struct Content
906 * c_ctinfo.ci_type, c_ctinfo.ci_subtype
909 /* Extract type and subtype from type/subtype. */
910 ct_type
= getcpy (ct_type_subtype
);
911 if ((cp
= strchr (ct_type
, '/'))) {
913 ct_subtype
= getcpy (++cp
);
915 advise (NULL
, "missing / in MIME type of %s %s",
916 ct
->c_file
, ct
->c_partno
);
921 ct
->c_type
= ct_str_type (ct_type
);
922 ct
->c_subtype
= ct_str_subtype (ct
->c_type
, ct_subtype
);
924 free (ct
->c_ctinfo
.ci_type
);
925 ct
->c_ctinfo
.ci_type
= ct_type
;
926 free (ct
->c_ctinfo
.ci_subtype
);
927 ct
->c_ctinfo
.ci_subtype
= ct_subtype
;
928 if (! replace_substring (&ct
->c_ctline
, type
,
930 advise (NULL
, "did not find %s in %s",
934 /* Update Content-Type header field. */
935 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
936 if (! strcasecmp (TYPE_FIELD
, hf
->name
)) {
937 if (replace_substring (&hf
->value
, type
,
941 report (NULL
, ct
->c_partno
, ct
->c_file
,
942 "change Content-Type in header "
944 type
, ct_type_subtype
);
948 advise (NULL
, "did not find %s in %s",
954 free (ct_type_subtype
);
965 replace_substring (char **str
, const char *old
, const char *new) {
968 if ((cp
= strstr (*str
, old
))) {
969 char *remainder
= cp
+ strlen (old
);
970 char *prefix
, *new_str
;
973 prefix
= getcpy (*str
);
974 *(prefix
+ (cp
- *str
)) = '\0';
975 new_str
= concat (prefix
, new, remainder
, NULL
);
978 new_str
= concat (new, remainder
, NULL
);
983 return *str
= new_str
;
990 * Remove a name=value parameter, given just its name, from a header value.
993 remove_parameter (char *str
, const char *name
) {
994 /* It looks to me, based on the BNF in RFC 2045, than there can't
995 be whitespace betwwen the parameter name and the "=", or
996 between the "=" and the parameter value. */
997 char *param_name
= concat (name
, "=", NULL
);
1000 if ((cp
= strstr (str
, param_name
))) {
1004 /* Remove any leading spaces, before the parameter name. */
1006 start
> str
&& isspace ((unsigned char) *(start
-1));
1010 /* Remove a leading semicolon. */
1011 if (start
> str
&& *(start
-1) == ';') { --start
; }
1013 end
= cp
+ strlen (name
) + 1;
1015 /* Skip past the quoted value, and then the final quote. */
1016 for (++end
; *end
&& *end
!= '"'; ++end
) { continue; }
1019 /* Skip past the value. */
1020 for (++end
; *end
&& ! isspace ((unsigned char) *end
); ++end
) {}
1023 /* Count how many characters need to be moved. Include
1024 trailing null, which is accounted for by the
1025 initialization of count to 1. */
1026 for (cp
= end
; *cp
; ++cp
) { ++count
; }
1027 (void) memmove (start
, end
, count
);
1036 fix_multipart_cte (CT ct
, int *message_mods
) {
1039 if (ct
->c_type
== CT_MULTIPART
) {
1040 struct multipart
*m
;
1043 if (ct
->c_encoding
!= CE_7BIT
&& ct
->c_encoding
!= CE_8BIT
&&
1044 ct
->c_encoding
!= CE_BINARY
) {
1047 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
1048 char *name
= hf
->name
;
1049 for (; *name
&& isspace ((unsigned char) *name
); ++name
) {
1053 if (! strncasecmp (name
, ENCODING_FIELD
,
1054 strlen (ENCODING_FIELD
))) {
1055 char *prefix
= "Nmh-REPLACED-INVALID-";
1056 HF h
= mh_xmalloc (sizeof *h
);
1058 h
->name
= add (hf
->name
, NULL
);
1059 h
->hf_encoding
= hf
->hf_encoding
;
1063 /* Retain old header but prefix its name. */
1065 hf
->name
= concat (prefix
, h
->name
, NULL
);
1069 char *encoding
= cpytrim (hf
->value
);
1070 report (NULL
, ct
->c_partno
, ct
->c_file
,
1071 "replace Content-Transfer-Encoding of %s "
1072 "with 8 bit", encoding
);
1076 h
->value
= add (" 8bit\n", NULL
);
1078 /* Don't need to warn for multiple C-T-E header
1079 fields, parse_mime() already does that. But
1080 if there are any, fix them all as necessary. */
1085 set_ce (ct
, CE_8BIT
);
1088 m
= (struct multipart
*) ct
->c_ctparams
;
1089 for (part
= m
->mp_parts
; part
; part
= part
->mp_next
) {
1090 if (fix_multipart_cte (part
->mp_part
, message_mods
) != OK
) {
1102 set_ce (CT ct
, int encoding
) {
1103 const char *ce
= ce_str (encoding
);
1104 const struct str2init
*ctinit
= get_ce_method (ce
);
1107 char *cte
= concat (" ", ce
, "\n", NULL
);
1110 /* Decoded contents might be in ct->c_cefile.ce_file, if the
1111 caller is decode_text_parts (). Save because we'll
1113 struct cefile decoded_content_info
= ct
->c_cefile
;
1115 ct
->c_encoding
= encoding
;
1117 ct
->c_ctinitfnx
= ctinit
->si_init
;
1118 /* This will assign ct->c_cefile with an all-0 struct, which
1120 (*ctinit
->si_init
) (ct
);
1121 /* After returning, the caller should set
1122 ct->c_cefile.ce_file to the name of the file containing
1125 /* Restore the cefile. */
1126 ct
->c_cefile
= decoded_content_info
;
1128 /* Update/add Content-Transfer-Encoding header field. */
1129 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
1130 if (! strcasecmp (ENCODING_FIELD
, hf
->name
)) {
1137 add_header (ct
, add (ENCODING_FIELD
, NULL
), cte
);
1140 /* Update c_celine. It's used only by mhlist -debug. */
1141 free (ct
->c_celine
);
1142 ct
->c_celine
= add (cte
, NULL
);
1151 /* Make sure each text part has a corresponding text/plain part. */
1153 ensure_text_plain (CT
*ct
, CT parent
, int *message_mods
, int replacetextplain
) {
1156 switch ((*ct
)->c_type
) {
1158 /* Nothing to do for text/plain. */
1159 if ((*ct
)->c_subtype
== TEXT_PLAIN
) { return OK
; }
1161 if (parent
&& parent
->c_type
== CT_MULTIPART
&&
1162 parent
->c_subtype
== MULTI_ALTERNATE
) {
1163 int new_subpart_number
= 1;
1164 int has_text_plain
=
1165 find_textplain_sibling (parent
, replacetextplain
,
1166 &new_subpart_number
);
1168 if (! has_text_plain
) {
1169 /* Parent is a multipart/alternative. Insert a new
1170 text/plain subpart. */
1171 const int inserted
=
1172 insert_new_text_plain_part (*ct
, new_subpart_number
,
1177 report (NULL
, parent
->c_partno
, parent
->c_file
,
1178 "insert text/plain part");
1184 } else if (parent
&& parent
->c_type
== CT_MULTIPART
&&
1185 parent
->c_subtype
== MULTI_RELATED
) {
1186 char *type_subtype
=
1187 concat ((*ct
)->c_ctinfo
.ci_type
, "/",
1188 (*ct
)->c_ctinfo
.ci_subtype
, NULL
);
1189 const char *parent_type
=
1190 get_param (parent
->c_ctinfo
.ci_first_pm
, "type", '?', 1);
1191 int new_subpart_number
= 1;
1192 int has_text_plain
= 0;
1194 /* Have to do string comparison on the subtype because we
1195 don't enumerate all of them in c_subtype values.
1196 parent_type will be NULL if the multipart/related part
1197 doesn't have a type parameter. The type parameter must
1198 be specified according to RFC 2387 Sec. 3.1 but not all
1200 if (parent_type
&& strcasecmp (type_subtype
, parent_type
) == 0) {
1201 /* The type of this part matches the root type of the
1202 parent multipart/related. Look to see if there's
1203 text/plain sibling. */
1205 find_textplain_sibling (parent
, replacetextplain
,
1206 &new_subpart_number
);
1209 free (type_subtype
);
1211 if (! has_text_plain
) {
1212 struct multipart
*mp
= (struct multipart
*) parent
->c_ctparams
;
1216 for (part
= mp
->mp_parts
; part
; part
= part
->mp_next
) {
1217 if (*ct
!= part
->mp_part
) {
1223 /* Parent is a multipart/related. Insert a new
1224 text/plain subpart in a new multipart/alternative. */
1225 if (insert_into_new_mp_alt (ct
, message_mods
)) {
1226 /* Not an error if text/plain couldn't be added. */
1229 /* There are no siblings, so insert a new text/plain
1230 subpart, and change the parent type from
1231 multipart/related to multipart/alternative. */
1232 const int inserted
=
1233 insert_new_text_plain_part (*ct
, new_subpart_number
,
1239 parent
->c_subtype
= MULTI_ALTERNATE
;
1240 parent
->c_ctinfo
.ci_subtype
= getcpy ("alternative");
1241 if (! replace_substring (&parent
->c_ctline
, "/related",
1244 "did not find multipart/related in %s",
1248 /* Update Content-Type header field. */
1249 for (hf
= parent
->c_first_hf
; hf
; hf
= hf
->next
) {
1250 if (! strcasecmp (TYPE_FIELD
, hf
->name
)) {
1251 if (replace_substring (&hf
->value
, "/related",
1255 report (NULL
, parent
->c_partno
,
1257 "insert text/plain part");
1260 /* Remove, e.g., type="text/html" from
1261 multipart/alternative. */
1262 remove_parameter (hf
->value
, "type");
1265 advise (NULL
, "did not find multipart/"
1266 "related in header %s",
1272 /* Not an error if text/plain couldn't be inserted. */
1277 if (insert_into_new_mp_alt (ct
, message_mods
)) {
1284 case CT_MULTIPART
: {
1285 struct multipart
*mp
= (struct multipart
*) (*ct
)->c_ctparams
;
1288 for (part
= mp
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
1289 if ((*ct
)->c_type
== CT_MULTIPART
) {
1290 status
= ensure_text_plain (&part
->mp_part
, *ct
, message_mods
,
1298 if ((*ct
)->c_subtype
== MESSAGE_EXTERNAL
) {
1299 struct exbody
*e
= (struct exbody
*) (*ct
)->c_ctparams
;
1301 status
= ensure_text_plain (&e
->eb_content
, *ct
, message_mods
,
1311 /* See if there is a sibling text/plain. */
1313 find_textplain_sibling (CT parent
, int replacetextplain
,
1314 int *new_subpart_number
) {
1315 struct multipart
*mp
= (struct multipart
*) parent
->c_ctparams
;
1316 struct part
*part
, *prev
;
1317 int has_text_plain
= 0;
1319 for (prev
= part
= mp
->mp_parts
; part
; part
= part
->mp_next
) {
1320 ++*new_subpart_number
;
1321 if (part
->mp_part
->c_type
== CT_TEXT
&&
1322 part
->mp_part
->c_subtype
== TEXT_PLAIN
) {
1323 if (replacetextplain
) {
1324 struct part
*old_part
;
1325 if (part
== mp
->mp_parts
) {
1326 old_part
= mp
->mp_parts
;
1327 mp
->mp_parts
= part
->mp_next
;
1329 old_part
= prev
->mp_next
;
1330 prev
->mp_next
= part
->mp_next
;
1333 report (NULL
, parent
->c_partno
, parent
->c_file
,
1334 "remove text/plain part %s",
1335 old_part
->mp_part
->c_partno
);
1337 free_content (old_part
->mp_part
);
1347 return has_text_plain
;
1352 insert_new_text_plain_part (CT ct
, int new_subpart_number
, CT parent
) {
1353 struct multipart
*mp
= (struct multipart
*) parent
->c_ctparams
;
1354 struct part
*new_part
= mh_xmalloc (sizeof *new_part
);
1356 if ((new_part
->mp_part
= build_text_plain_part (ct
))) {
1358 snprintf (buffer
, sizeof buffer
, "%d", new_subpart_number
);
1360 new_part
->mp_next
= mp
->mp_parts
;
1361 mp
->mp_parts
= new_part
;
1362 new_part
->mp_part
->c_partno
=
1363 concat (parent
->c_partno
? parent
->c_partno
: "1", ".",
1368 free_content (new_part
->mp_part
);
1377 build_text_plain_part (CT encoded_part
) {
1378 CT tp_part
= divide_part (encoded_part
);
1379 char *tmp_plain_file
= NULL
;
1381 if (decode_part (tp_part
) == OK
) {
1382 /* Now, tp_part->c_cefile.ce_file is the name of the tmp file that
1383 contains the decoded contents. And the decoding function, such
1384 as openQuoted, will have set ...->ce_unlink to 1 so that it will
1385 be unlinked by free_content (). */
1388 if ((tempfile
= m_mktemp2 (NULL
, invo_name
, NULL
, NULL
)) == NULL
) {
1389 advise (NULL
, "unable to create temporary file in %s",
1392 tmp_plain_file
= add (tempfile
, NULL
);
1393 if (reformat_part (tp_part
, tmp_plain_file
,
1394 tp_part
->c_ctinfo
.ci_type
,
1395 tp_part
->c_ctinfo
.ci_subtype
,
1396 tp_part
->c_type
) == OK
) {
1402 free_content (tp_part
);
1403 if (tmp_plain_file
) { (void) m_unlink (tmp_plain_file
); }
1404 free (tmp_plain_file
);
1410 /* Slip new text/plain part into a new multipart/alternative. */
1412 insert_into_new_mp_alt (CT
*ct
, int *message_mods
) {
1413 CT tp_part
= build_text_plain_part (*ct
);
1417 CT mp_alt
= build_multipart_alt (*ct
, tp_part
, CT_MULTIPART
,
1420 struct multipart
*mp
= (struct multipart
*) mp_alt
->c_ctparams
;
1422 if (mp
&& mp
->mp_parts
) {
1423 mp
->mp_parts
->mp_part
= tp_part
;
1424 /* Make the new multipart/alternative the parent. */
1429 report (NULL
, (*ct
)->c_partno
, (*ct
)->c_file
,
1430 "insert text/plain part");
1433 free_content (tp_part
);
1434 free_content (mp_alt
);
1441 /* Not an error if text/plain couldn't be built. */
1448 divide_part (CT ct
) {
1451 if ((new_part
= (CT
) mh_xcalloc (1, sizeof *new_part
)) == NULL
)
1452 adios (NULL
, "out of memory");
1454 /* Just copy over what is needed for decoding. c_vrsn and
1455 c_celine aren't necessary. */
1456 new_part
->c_file
= add (ct
->c_file
, NULL
);
1457 new_part
->c_begin
= ct
->c_begin
;
1458 new_part
->c_end
= ct
->c_end
;
1459 copy_ctinfo (&new_part
->c_ctinfo
, &ct
->c_ctinfo
);
1460 new_part
->c_type
= ct
->c_type
;
1461 new_part
->c_cefile
= ct
->c_cefile
;
1462 new_part
->c_encoding
= ct
->c_encoding
;
1463 new_part
->c_ctinitfnx
= ct
->c_ctinitfnx
;
1464 new_part
->c_ceopenfnx
= ct
->c_ceopenfnx
;
1465 new_part
->c_ceclosefnx
= ct
->c_ceclosefnx
;
1466 new_part
->c_cesizefnx
= ct
->c_cesizefnx
;
1468 /* c_ctline is used by reformat__part(), so it can preserve
1469 anything after the type/subtype. */
1470 new_part
->c_ctline
= add (ct
->c_ctline
, NULL
);
1477 copy_ctinfo (CI dest
, CI src
) {
1480 dest
->ci_type
= src
->ci_type
? add (src
->ci_type
, NULL
) : NULL
;
1481 dest
->ci_subtype
= src
->ci_subtype
? add (src
->ci_subtype
, NULL
) : NULL
;
1483 for (s_pm
= src
->ci_first_pm
; s_pm
; s_pm
= s_pm
->pm_next
) {
1484 d_pm
= add_param(&dest
->ci_first_pm
, &dest
->ci_last_pm
, s_pm
->pm_name
,
1486 if (s_pm
->pm_charset
)
1487 d_pm
->pm_charset
= getcpy(s_pm
->pm_charset
);
1489 d_pm
->pm_lang
= getcpy(s_pm
->pm_lang
);
1492 dest
->ci_comment
= src
->ci_comment
? add (src
->ci_comment
, NULL
) : NULL
;
1493 dest
->ci_magic
= src
->ci_magic
? add (src
->ci_magic
, NULL
) : NULL
;
1498 decode_part (CT ct
) {
1503 if ((tempfile
= m_mktemp2 (NULL
, invo_name
, NULL
, NULL
)) == NULL
) {
1504 adios (NULL
, "unable to create temporary file in %s", get_temp_dir());
1506 tmp_decoded
= add (tempfile
, NULL
);
1507 /* The following call will load ct->c_cefile.ce_file with the tmp
1508 filename of the decoded content. tmp_decoded will contain the
1509 encoded output, get rid of that. */
1510 status
= output_message (ct
, tmp_decoded
);
1511 (void) m_unlink (tmp_decoded
);
1518 /* Some of the arguments aren't really needed now, but maybe will
1519 be in the future for other than text types. */
1521 reformat_part (CT ct
, char *file
, char *type
, char *subtype
, int c_type
) {
1522 int output_subtype
, output_encoding
;
1526 /* Hacky: this redirects the output from whatever command is used
1527 to show the part to a file. So, the user can't have any output
1528 redirection in that command.
1529 Could show_multi() in mhshowsbr.c avoid this? */
1531 /* Check for invo_name-format-type/subtype. */
1532 if ((cf
= context_find_by_type ("format", type
, subtype
)) == NULL
) {
1534 advise (NULL
, "Don't know how to convert %s, there is no "
1535 "%s-format-%s/%s profile entry",
1536 ct
->c_file
, invo_name
, type
, subtype
);
1540 if (strchr (cf
, '>')) {
1541 advise (NULL
, "'>' prohibited in \"%s\",\nplease fix your "
1542 "%s-format-%s/%s profile entry", cf
, invo_name
, type
,
1543 subtype
? subtype
: "");
1549 cp
= concat (cf
, " >", file
, NULL
);
1550 status
= show_content_aux (ct
, 0, cp
, NULL
, NULL
);
1553 /* Unlink decoded content tmp file and free its filename to avoid
1554 leaks. The file stream should already have been closed. */
1555 if (ct
->c_cefile
.ce_unlink
) {
1556 (void) m_unlink (ct
->c_cefile
.ce_file
);
1557 free (ct
->c_cefile
.ce_file
);
1558 ct
->c_cefile
.ce_file
= NULL
;
1559 ct
->c_cefile
.ce_unlink
= 0;
1562 if (c_type
== CT_TEXT
) {
1563 output_subtype
= TEXT_PLAIN
;
1565 /* Set subtype to 0, which is always an UNKNOWN subtype. */
1568 output_encoding
= charset_encoding (ct
);
1570 if (set_ct_type (ct
, c_type
, output_subtype
, output_encoding
) == OK
) {
1571 ct
->c_cefile
.ce_file
= file
;
1572 ct
->c_cefile
.ce_unlink
= 1;
1574 ct
->c_cefile
.ce_unlink
= 0;
1582 /* Identifies 7bit or 8bit content based on charset. */
1584 charset_encoding (CT ct
) {
1585 char *ct_charset
= content_charset (ct
);
1586 int encoding
= strcasecmp (ct_charset
, "US-ASCII") ? CE_8BIT
: CE_7BIT
;
1595 build_multipart_alt (CT first_alt
, CT new_part
, int type
, int subtype
) {
1596 char *boundary_prefix
= "----=_nmh-multipart";
1597 char *boundary
= concat (boundary_prefix
, first_alt
->c_partno
, NULL
);
1598 char *boundary_indicator
= "; boundary=";
1599 char *typename
, *subtypename
, *name
;
1602 struct multipart
*m
;
1603 const struct str2init
*ctinit
;
1605 if ((ct
= (CT
) mh_xcalloc (1, sizeof *ct
)) == NULL
)
1606 adios (NULL
, "out of memory");
1608 /* Set up the multipart/alternative part. These fields of *ct were
1609 initialized to 0 by mh_xcalloc():
1610 c_fp, c_unlink, c_begin, c_end,
1611 c_vrsn, c_ctline, c_celine,
1612 c_id, c_descr, c_dispo, c_partno,
1613 c_ctinfo.ci_comment, c_ctinfo.ci_magic,
1614 c_cefile, c_encoding,
1615 c_digested, c_digest[16], c_ctexbody,
1616 c_ctinitfnx, c_ceopenfnx, c_ceclosefnx, c_cesizefnx,
1618 c_showproc, c_termproc, c_storeproc, c_storage, c_folder
1621 ct
->c_file
= add (first_alt
->c_file
, NULL
);
1623 ct
->c_subtype
= subtype
;
1625 ctinit
= get_ct_init (ct
->c_type
);
1627 typename
= ct_type_str (type
);
1628 subtypename
= ct_subtype_str (type
, subtype
);
1632 int found_boundary
= 1;
1634 while (found_boundary
&& serial
< 1000000) {
1637 /* Ensure that the boundary doesn't appear in the decoded
1639 if (new_part
->c_cefile
.ce_file
) {
1640 if ((found_boundary
=
1641 boundary_in_content (&new_part
->c_cefile
.ce_fp
,
1642 new_part
->c_cefile
.ce_file
,
1649 /* Ensure that the boundary doesn't appear in the encoded
1651 if (! found_boundary
&& new_part
->c_file
) {
1652 if ((found_boundary
= boundary_in_content (&new_part
->c_fp
,
1660 if (found_boundary
) {
1661 /* Try a slightly different boundary. */
1666 snprintf (buffer2
, sizeof buffer2
, "%d", serial
);
1668 concat (boundary_prefix
,
1669 first_alt
->c_partno
? first_alt
->c_partno
: "",
1670 "-", buffer2
, NULL
);
1674 if (found_boundary
) {
1675 advise (NULL
, "giving up trying to find a unique boundary");
1681 name
= concat (" ", typename
, "/", subtypename
, boundary_indicator
, "\"",
1682 boundary
, "\"", NULL
);
1684 /* Load c_first_hf and c_last_hf. */
1685 transfer_noncontent_headers (first_alt
, ct
);
1686 add_header (ct
, add (TYPE_FIELD
, NULL
), concat (name
, "\n", NULL
));
1689 /* Load c_partno. */
1690 if (first_alt
->c_partno
) {
1691 ct
->c_partno
= add (first_alt
->c_partno
, NULL
);
1692 free (first_alt
->c_partno
);
1693 first_alt
->c_partno
= concat (ct
->c_partno
, ".1", NULL
);
1694 new_part
->c_partno
= concat (ct
->c_partno
, ".2", NULL
);
1696 first_alt
->c_partno
= add ("1", NULL
);
1697 new_part
->c_partno
= add ("2", NULL
);
1701 ct
->c_ctinfo
.ci_type
= add (typename
, NULL
);
1702 ct
->c_ctinfo
.ci_subtype
= add (subtypename
, NULL
);
1705 add_param(&ct
->c_ctinfo
.ci_first_pm
, &ct
->c_ctinfo
.ci_last_pm
,
1706 "boundary", boundary
, 0);
1708 p
= (struct part
*) mh_xmalloc (sizeof *p
);
1709 p
->mp_next
= (struct part
*) mh_xmalloc (sizeof *p
->mp_next
);
1710 p
->mp_next
->mp_next
= NULL
;
1711 p
->mp_next
->mp_part
= first_alt
;
1713 if ((m
= (struct multipart
*) mh_xcalloc (1, sizeof (struct multipart
))) ==
1715 adios (NULL
, "out of memory");
1716 m
->mp_start
= concat (boundary
, "\n", NULL
);
1717 m
->mp_stop
= concat (boundary
, "--\n", NULL
);
1727 /* Check that the boundary does not appear in the content. */
1729 boundary_in_content (FILE **fp
, char *file
, const char *boundary
) {
1730 char buffer
[BUFSIZ
];
1732 int found_boundary
= 0;
1734 /* free_content() will close *fp if we fopen it here. */
1735 if (! *fp
&& (*fp
= fopen (file
, "r")) == NULL
) {
1736 advise (file
, "unable to open %s for reading", file
);
1740 fseeko (*fp
, 0L, SEEK_SET
);
1741 while ((bytes_read
= fread (buffer
, 1, sizeof buffer
, *fp
)) > 0) {
1742 if (find_str (buffer
, bytes_read
, boundary
)) {
1748 return found_boundary
;
1752 /* Remove all non-Content headers. */
1754 transfer_noncontent_headers (CT old
, CT
new) {
1757 hp_prev
= hp
= old
->c_first_hf
;
1761 if (strncasecmp (XXX_FIELD_PRF
, hp
->name
, strlen (XXX_FIELD_PRF
))) {
1762 if (hp
== old
->c_last_hf
) {
1763 if (hp
== old
->c_first_hf
) {
1764 old
->c_last_hf
= old
->c_first_hf
= NULL
;
1766 hp_prev
->next
= NULL
;
1767 old
->c_last_hf
= hp_prev
;
1770 if (hp
== old
->c_first_hf
) {
1771 old
->c_first_hf
= next
;
1773 hp_prev
->next
= next
;
1777 /* Put node hp in the new CT. */
1778 if (new->c_first_hf
== NULL
) {
1779 new->c_first_hf
= hp
;
1781 new->c_last_hf
->next
= hp
;
1783 new->c_last_hf
= hp
;
1785 /* A Content- header, leave in old. */
1795 set_ct_type (CT ct
, int type
, int subtype
, int encoding
) {
1796 char *typename
= ct_type_str (type
);
1797 char *subtypename
= ct_subtype_str (type
, subtype
);
1798 /* E.g, " text/plain" */
1799 char *type_subtypename
= concat (" ", typename
, "/", subtypename
, NULL
);
1800 /* E.g, " text/plain\n" */
1801 char *name_plus_nl
= concat (type_subtypename
, "\n", NULL
);
1802 int found_content_type
= 0;
1804 const char *cp
= NULL
;
1808 /* Update/add Content-Type header field. */
1809 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
1810 if (! strcasecmp (TYPE_FIELD
, hf
->name
)) {
1811 found_content_type
= 1;
1813 hf
->value
= (cp
= strchr (ct
->c_ctline
, ';'))
1814 ? concat (type_subtypename
, cp
, "\n", NULL
)
1815 : add (name_plus_nl
, NULL
);
1818 if (! found_content_type
) {
1819 add_header (ct
, add (TYPE_FIELD
, NULL
),
1820 (cp
= strchr (ct
->c_ctline
, ';'))
1821 ? concat (type_subtypename
, cp
, "\n", NULL
)
1822 : add (name_plus_nl
, NULL
));
1825 /* Some of these might not be used, but set them anyway. */
1827 ? concat (type_subtypename
, cp
, NULL
)
1828 : concat (type_subtypename
, NULL
);
1829 free (ct
->c_ctline
);
1830 ct
->c_ctline
= ctline
;
1831 /* Leave other ctinfo members as they were. */
1832 free (ct
->c_ctinfo
.ci_type
);
1833 ct
->c_ctinfo
.ci_type
= add (typename
, NULL
);
1834 free (ct
->c_ctinfo
.ci_subtype
);
1835 ct
->c_ctinfo
.ci_subtype
= add (subtypename
, NULL
);
1837 ct
->c_subtype
= subtype
;
1839 free (name_plus_nl
);
1840 free (type_subtypename
);
1842 status
= set_ce (ct
, encoding
);
1849 * It's not necessary to update the charset parameter of a Content-Type
1850 * header for a text part. According to RFC 2045 Sec. 6.4, the body
1851 * (content) was originally in the specified charset, "and will be in
1852 * that character set again after decoding."
1855 decode_text_parts (CT ct
, int encoding
, const char *decodetypes
, int *message_mods
) {
1857 int lf_line_endings
= 0;
1859 switch (ct
->c_type
) {
1860 case CT_MULTIPART
: {
1861 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
1864 /* Should check to see if the body for this part is encoded?
1865 For now, it gets passed along as-is by InitMultiPart(). */
1866 for (part
= m
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
1867 status
= decode_text_parts (part
->mp_part
, encoding
, decodetypes
, message_mods
);
1873 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
1874 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
1876 status
= decode_text_parts (e
->eb_content
, encoding
, decodetypes
, message_mods
);
1881 if (! should_decode(decodetypes
, ct
->c_ctinfo
.ci_type
, ct
->c_ctinfo
.ci_subtype
)) {
1886 ct
->c_ctparams
&& ((struct text
*) ct
->c_ctparams
)->lf_line_endings
;
1888 switch (ct
->c_encoding
) {
1893 if (decode_part (ct
) == OK
&& ct
->c_cefile
.ce_file
) {
1894 const char *reason
= NULL
;
1896 if ((ct_encoding
= content_encoding (ct
, &reason
)) == CE_BINARY
1897 && encoding
!= CE_BINARY
) {
1898 /* The decoding isn't acceptable so discard it.
1899 Leave status as OK to allow other transformations. */
1901 report (NULL
, ct
->c_partno
, ct
->c_file
,
1902 "will not decode%s because it is binary (%s)",
1904 : ct
->c_ctline
? ct
->c_ctline
1908 (void) m_unlink (ct
->c_cefile
.ce_file
);
1909 free (ct
->c_cefile
.ce_file
);
1910 ct
->c_cefile
.ce_file
= NULL
;
1911 } else if (ct
->c_encoding
== CE_QUOTED
&&
1912 ct_encoding
== CE_8BIT
&& encoding
== CE_7BIT
) {
1913 /* The decoding isn't acceptable so discard it.
1914 Leave status as OK to allow other transformations. */
1916 report (NULL
, ct
->c_partno
, ct
->c_file
,
1917 "will not decode%s because it is 8bit",
1919 : ct
->c_ctline
? ct
->c_ctline
1922 (void) m_unlink (ct
->c_cefile
.ce_file
);
1923 free (ct
->c_cefile
.ce_file
);
1924 ct
->c_cefile
.ce_file
= NULL
;
1927 if (ct_encoding
== CE_BINARY
) {
1929 } else if (ct_encoding
== CE_8BIT
&& encoding
== CE_7BIT
) {
1932 enc
= charset_encoding (ct
);
1934 if (set_ce (ct
, enc
) == OK
) {
1937 report (NULL
, ct
->c_partno
, ct
->c_file
, "decode%s",
1938 ct
->c_ctline
? ct
->c_ctline
: "");
1940 if (lf_line_endings
) {
1941 strip_crs (ct
, message_mods
);
1954 if (lf_line_endings
) {
1955 strip_crs (ct
, message_mods
);
1969 /* Determine if the part with type[/subtype] should be decoded, according to
1970 decodetypes (which came from the -decodetypes switch). */
1972 should_decode(const char *decodetypes
, const char *type
, const char *subtype
) {
1973 /* Quick search for matching type[/subtype] in decodetypes: bracket
1974 decodetypes with commas, then search for ,type, and ,type/subtype, in
1977 int found_match
= 0;
1978 char *delimited_decodetypes
= concat(",", decodetypes
, ",", NULL
);
1979 char *delimited_type
= concat(",", type
, ",", NULL
);
1981 if (nmh_strcasestr(delimited_decodetypes
, delimited_type
)) {
1983 } else if (subtype
!= NULL
) {
1984 char *delimited_type_subtype
=
1985 concat(",", type
, "/", subtype
, ",", NULL
);
1987 if (nmh_strcasestr(delimited_decodetypes
, delimited_type_subtype
)) {
1990 free(delimited_type_subtype
);
1993 free(delimited_type
);
1994 free(delimited_decodetypes
);
2000 /* See if the decoded content is 7bit, 8bit, or binary. It's binary
2001 if it has any NUL characters, a CR not followed by a LF, or lines
2002 greater than 998 characters in length. If binary, reason is set
2003 to a string explaining why. */
2005 content_encoding (CT ct
, const char **reason
) {
2006 CE ce
= &ct
->c_cefile
;
2007 int encoding
= CE_7BIT
;
2010 size_t line_len
= 0;
2011 char buffer
[BUFSIZ
];
2014 if (! ce
->ce_fp
&& (ce
->ce_fp
= fopen (ce
->ce_file
, "r")) == NULL
) {
2015 advise (ce
->ce_file
, "unable to open for reading");
2019 fseeko (ce
->ce_fp
, 0L, SEEK_SET
);
2020 while (encoding
!= CE_BINARY
&&
2021 (inbytes
= fread (buffer
, 1, sizeof buffer
, ce
->ce_fp
)) > 0) {
2024 int last_char_was_cr
= 0;
2026 for (i
= 0, cp
= buffer
; i
< inbytes
; ++i
, ++cp
) {
2027 if (*cp
== '\0' || ++line_len
> 998 ||
2028 (*cp
!= '\n' && last_char_was_cr
)) {
2029 encoding
= CE_BINARY
;
2031 *reason
= "null character";
2032 } else if (line_len
> 998) {
2033 *reason
= "line length > 998";
2034 } else if (*cp
!= '\n' && last_char_was_cr
) {
2035 *reason
= "CR not followed by LF";
2037 /* Should not reach this. */
2041 } else if (*cp
== '\n') {
2043 } else if (! isascii ((unsigned char) *cp
)) {
2047 last_char_was_cr
= *cp
== '\r' ? 1 : 0;
2053 } /* else should never happen */
2060 strip_crs (CT ct
, int *message_mods
) {
2061 char *charset
= content_charset (ct
);
2064 /* Only strip carriage returns if content is ASCII or another
2065 charset that has the same readily recognizable CR followed by a
2066 LF. We can include UTF-8 here because if the high-order bit of
2067 a UTF-8 byte is 0, then it must be a single-byte ASCII
2069 if (! strcasecmp (charset
, "US-ASCII") ||
2070 ! strcasecmp (charset
, "UTF-8") ||
2071 ! strncasecmp (charset
, "ISO-8859-", 9) ||
2072 ! strncasecmp (charset
, "WINDOWS-12", 10)) {
2078 int opened_input_file
= 0;
2080 if (ct
->c_cefile
.ce_file
) {
2081 file
= &ct
->c_cefile
.ce_file
;
2082 fp
= &ct
->c_cefile
.ce_fp
;
2084 } else if (ct
->c_file
) {
2087 begin
= (size_t) ct
->c_begin
;
2088 end
= (size_t) ct
->c_end
;
2089 } /* else don't know where the content is */
2091 if (file
&& *file
&& fp
) {
2093 if ((*fp
= fopen (*file
, "r")) == NULL
) {
2094 advise (*file
, "unable to open for reading");
2097 opened_input_file
= 1;
2103 char buffer
[BUFSIZ
];
2105 size_t bytes_to_read
=
2106 end
> 0 && end
> begin
? end
- begin
: sizeof buffer
;
2108 fseeko (*fp
, begin
, SEEK_SET
);
2109 while ((bytes_read
= fread (buffer
, 1,
2110 min (bytes_to_read
, sizeof buffer
),
2112 /* Look for CR followed by a LF. This is supposed to
2113 be text so there should be LF's. If not, don't
2114 modify the content. */
2117 int last_char_was_cr
= 0;
2119 if (end
> 0) { bytes_to_read
-= bytes_read
; }
2121 for (i
= 0, cp
= buffer
; i
< bytes_read
; ++i
, ++cp
) {
2122 if (*cp
== '\n' && last_char_was_cr
) {
2127 last_char_was_cr
= *cp
== '\r' ? 1 : 0;
2133 char *stripped_content_file
;
2134 char *tempfile
= m_mktemp2 (NULL
, invo_name
, &fd
, NULL
);
2136 if (tempfile
== NULL
) {
2137 adios (NULL
, "unable to create temporary file in %s",
2140 stripped_content_file
= add (tempfile
, NULL
);
2142 /* Strip each CR before a LF from the content. */
2143 fseeko (*fp
, begin
, SEEK_SET
);
2144 while ((bytes_read
= fread (buffer
, 1, sizeof buffer
, *fp
)) >
2148 int last_char_was_cr
= 0;
2150 for (i
= 0, cp
= buffer
; i
< bytes_read
; ++i
, ++cp
) {
2152 last_char_was_cr
= 1;
2153 } else if (last_char_was_cr
) {
2155 if (write (fd
, "\r", 1) < 0) {
2156 advise (tempfile
, "CR write");
2159 if (write (fd
, cp
, 1) < 0) {
2160 advise (tempfile
, "write");
2162 last_char_was_cr
= 0;
2164 if (write (fd
, cp
, 1) < 0) {
2165 advise (tempfile
, "write");
2167 last_char_was_cr
= 0;
2173 admonish (NULL
, "unable to write temporary file %s",
2174 stripped_content_file
);
2175 (void) m_unlink (stripped_content_file
);
2178 /* Replace the decoded file with the converted one. */
2179 if (ct
->c_cefile
.ce_file
) {
2180 if (ct
->c_cefile
.ce_unlink
) {
2181 (void) m_unlink (ct
->c_cefile
.ce_file
);
2183 free (ct
->c_cefile
.ce_file
);
2185 ct
->c_cefile
.ce_file
= stripped_content_file
;
2186 ct
->c_cefile
.ce_unlink
= 1;
2190 report (NULL
, ct
->c_partno
,
2191 begin
== 0 && end
== 0 ? "" : *file
,
2197 if (opened_input_file
) {
2211 convert_charsets (CT ct
, char *dest_charset
, int *message_mods
) {
2214 switch (ct
->c_type
) {
2216 if (ct
->c_subtype
== TEXT_PLAIN
) {
2217 status
= convert_charset (ct
, dest_charset
, message_mods
);
2220 char *ct_charset
= content_charset (ct
);
2222 report (NULL
, ct
->c_partno
, ct
->c_file
,
2223 "convert %s to %s", ct_charset
, dest_charset
);
2227 char *ct_charset
= content_charset (ct
);
2229 report ("iconv", ct
->c_partno
, ct
->c_file
,
2230 "failed to convert %s to %s", ct_charset
, dest_charset
);
2236 case CT_MULTIPART
: {
2237 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
2240 /* Should check to see if the body for this part is encoded?
2241 For now, it gets passed along as-is by InitMultiPart(). */
2242 for (part
= m
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
2244 convert_charsets (part
->mp_part
, dest_charset
, message_mods
);
2250 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
2251 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
2254 convert_charsets (e
->eb_content
, dest_charset
, message_mods
);
2267 * Fix various problems that aren't handled elsewhere. These
2268 * are fixed unconditionally: there are no switches to disable
2269 * them. (Currently, "problems" is just one: an extraneous
2270 * semicolon at the end of a header parameter list.)
2273 fix_always (CT ct
, int *message_mods
) {
2276 switch (ct
->c_type
) {
2277 case CT_MULTIPART
: {
2278 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
2281 for (part
= m
->mp_parts
; status
== OK
&& part
; part
= part
->mp_next
) {
2282 status
= fix_always (part
->mp_part
, message_mods
);
2288 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
2289 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
2291 status
= fix_always (e
->eb_content
, message_mods
);
2298 for (hf
= ct
->c_first_hf
; hf
; hf
= hf
->next
) {
2299 size_t len
= strlen (hf
->value
);
2301 if (strcasecmp (hf
->name
, TYPE_FIELD
) != 0 &&
2302 strcasecmp (hf
->name
, DISPO_FIELD
) != 0) {
2303 /* Only do this for Content-Type and
2304 Content-Disposition fields because those are the
2305 only headers that parse_mime() warns about. */
2309 /* whitespace following a trailing ';' will be nuked as well */
2310 if (hf
->value
[len
- 1] == '\n') {
2311 while (isspace((unsigned char)(hf
->value
[len
- 2]))) {
2312 if (len
-- == 0) { break; }
2316 if (hf
->value
[len
- 2] == ';') {
2317 /* Remove trailing ';' from parameter value. */
2318 hf
->value
[len
- 2] = '\n';
2319 hf
->value
[len
- 1] = '\0';
2321 /* Also, if Content-Type parameter, remove trailing ';'
2322 from ct->c_ctline. This probably isn't necessary
2324 if (strcasecmp(hf
->name
, TYPE_FIELD
) == 0 && ct
->c_ctline
) {
2325 size_t l
= strlen(ct
->c_ctline
) - 1;
2326 while (isspace((unsigned char)(ct
->c_ctline
[l
])) ||
2327 ct
->c_ctline
[l
] == ';') {
2328 ct
->c_ctline
[l
--] = '\0';
2329 if (l
== 0) { break; }
2335 report (NULL
, ct
->c_partno
, ct
->c_file
,
2336 "remove trailing ; from %s parameter value",
2348 write_content (CT ct
, const char *input_filename
, char *outfile
, int modify_inplace
,
2352 if (modify_inplace
) {
2353 if (message_mods
> 0) {
2354 if ((status
= output_message (ct
, outfile
)) == OK
) {
2355 char *infile
= input_filename
2356 ? add (input_filename
, NULL
)
2357 : add (ct
->c_file
? ct
->c_file
: "-", NULL
);
2359 if (remove_file (infile
) == OK
) {
2360 if (rename (outfile
, infile
)) {
2361 /* Rename didn't work, possibly because of an
2362 attempt to rename across filesystems. Try
2363 brute force copy. */
2364 int old
= open (outfile
, O_RDONLY
);
2366 open (infile
, O_WRONLY
| O_CREAT
, m_gmprot ());
2369 if (old
!= -1 && new != -1) {
2370 char buffer
[BUFSIZ
];
2372 while ((i
= read (old
, buffer
, sizeof buffer
)) >
2374 if (write (new, buffer
, i
) != i
) {
2380 if (new != -1) { close (new); }
2381 if (old
!= -1) { close (old
); }
2382 (void) m_unlink (outfile
);
2385 /* The -file argument processing used path() to
2386 expand filename to absolute path. */
2387 int file
= ct
->c_file
&& ct
->c_file
[0] == '/';
2389 admonish (NULL
, "unable to rename %s %s to %s",
2390 file
? "file" : "message", outfile
,
2396 admonish (NULL
, "unable to remove input file %s, "
2397 "not modifying it", infile
);
2398 (void) m_unlink (outfile
);
2407 /* No modifications and didn't need the tmp outfile. */
2408 (void) m_unlink (outfile
);
2411 /* Output is going to some file. Produce it whether or not
2412 there were modifications. */
2413 status
= output_message (ct
, outfile
);
2422 * parse_mime() does not set lf_line_endings in struct text, so use this function to do it.
2423 * It touches the parts the decodetypes identifies.
2426 set_text_ctparams(CT ct
, char *decodetypes
, int lf_line_endings
) {
2427 switch (ct
->c_type
) {
2428 case CT_MULTIPART
: {
2429 struct multipart
*m
= (struct multipart
*) ct
->c_ctparams
;
2432 for (part
= m
->mp_parts
; part
; part
= part
->mp_next
) {
2433 set_text_ctparams(part
->mp_part
, decodetypes
, lf_line_endings
);
2439 if (ct
->c_subtype
== MESSAGE_EXTERNAL
) {
2440 struct exbody
*e
= (struct exbody
*) ct
->c_ctparams
;
2442 set_text_ctparams(e
->eb_content
, decodetypes
, lf_line_endings
);
2447 if (should_decode(decodetypes
, ct
->c_ctinfo
.ci_type
, ct
->c_ctinfo
.ci_subtype
)) {
2448 if (ct
->c_ctparams
== NULL
) {
2449 if ((ct
->c_ctparams
= (struct text
*) mh_xcalloc (1, sizeof (struct text
))) == NULL
) {
2450 adios (NULL
, "out of memory");
2453 ((struct text
*) ct
->c_ctparams
)->lf_line_endings
= lf_line_endings
;
2460 * If "rmmproc" is defined, call that to remove the file. Otherwise,
2461 * use the standard MH backup file.
2464 remove_file (const char *file
) {
2466 char *rmm_command
= concat (rmmproc
, " ", file
, NULL
);
2467 int status
= system (rmm_command
);
2470 return WIFEXITED (status
) ? WEXITSTATUS (status
) : NOTOK
;
2472 /* This is OK for a non-message file, it still uses the
2473 BACKUP_PREFIX form. The backup file will be in the same
2474 directory as file. */
2475 return rename (file
, m_backup (file
));
2481 report (char *what
, char *partno
, char *filename
, char *message
, ...) {
2486 va_start (args
, message
);
2487 fmt
= concat (filename
, partno
? " part " : ", ",
2488 partno
? partno
: "", partno
? ", " : "", message
, NULL
);
2490 advertise (what
, NULL
, fmt
, args
);
2503 fprintf (stderr
, "\n");