]> diplodocus.org Git - nmh/blob - uip/sortm.c
copyip.c: Move interface to own file.
[nmh] / uip / sortm.c
1 /* sortm.c -- sort messages in a folder by date/time
2 *
3 * This code is Copyright (c) 2002, by the authors of nmh. See the
4 * COPYRIGHT file in the root directory of the nmh distribution for
5 * complete copyright information.
6 */
7
8 #include "h/mh.h"
9 #include "sbr/uprf.h"
10 #include "sbr/m_convert.h"
11 #include "sbr/getfolder.h"
12 #include "sbr/ext_hook.h"
13 #include "sbr/folder_read.h"
14 #include "sbr/folder_free.h"
15 #include "sbr/context_save.h"
16 #include "sbr/context_replace.h"
17 #include "sbr/context_find.h"
18 #include "sbr/ambigsw.h"
19 #include "sbr/path.h"
20 #include "sbr/print_version.h"
21 #include "sbr/print_help.h"
22 #include "sbr/error.h"
23 #include "h/tws.h"
24 #include "h/done.h"
25 #include "h/utils.h"
26 #include "sbr/m_maildir.h"
27
28 #define SORTM_SWITCHES \
29 X("datefield field", 0, DATESW) \
30 X("textfield field", 0, TEXTSW) \
31 X("notextfield", 0, NSUBJSW) \
32 X("subject", -3, SUBJSW) /* backward-compatibility */ \
33 X("limit days", 0, LIMSW) \
34 X("nolimit", 0, NLIMSW) \
35 X("verbose", 0, VERBSW) \
36 X("noverbose", 0, NVERBSW) \
37 X("all", 0, ALLMSGS) \
38 X("noall", 0, NALLMSGS) \
39 X("check", 0, CHECKSW) \
40 X("nocheck", 0, NCHECKSW) \
41 X("version", 0, VERSIONSW) \
42 X("help", 0, HELPSW) \
43
44 #define X(sw, minchars, id) id,
45 DEFINE_SWITCH_ENUM(SORTM);
46 #undef X
47
48 #define X(sw, minchars, id) { sw, minchars, id },
49 DEFINE_SWITCH_ARRAY(SORTM, switches);
50 #undef X
51
52 struct smsg {
53 int s_msg;
54 time_t s_clock;
55 char *s_subj;
56 };
57
58 static struct smsg *smsgs;
59 int nmsgs;
60
61 char *subjsort; /* sort on subject if != 0 */
62 time_t datelimit = 0;
63 bool submajor; /* if true, sort on subject-major */
64 bool verbose;
65 int allmsgs = 1;
66 int check_failed = 0;
67
68 /* This keeps compiler happy on calls to qsort */
69 typedef int (*qsort_comp) (const void *, const void *);
70
71 /*
72 * static prototypes
73 */
74 static int read_hdrs (struct msgs *, char *);
75 static int get_fields (char *, int, struct smsg *);
76 static int dsort (struct smsg **, struct smsg **);
77 static int subsort (struct smsg **, struct smsg **);
78 static int txtsort (struct smsg **, struct smsg **);
79 static void rename_chain (struct msgs *, struct smsg **, int, int);
80 static void rename_msgs (struct msgs *, struct smsg **);
81
82
83 int
84 main (int argc, char **argv)
85 {
86 int i, msgnum;
87 char *cp, *maildir, *datesw = NULL;
88 char *folder = NULL, buf[BUFSIZ], **argp;
89 char **arguments;
90 struct msgs_array msgs = { 0, 0, NULL };
91 struct msgs *mp;
92 struct smsg **dlist;
93 bool checksw = false;
94
95 if (nmh_init(argv[0], true, true)) { return 1; }
96
97 arguments = getarguments (invo_name, argc, argv, 1);
98 argp = arguments;
99
100 /*
101 * Parse arguments
102 */
103 while ((cp = *argp++)) {
104 if (*cp == '-') {
105 switch (smatch (++cp, switches)) {
106 case AMBIGSW:
107 ambigsw (cp, switches);
108 done (1);
109 case UNKWNSW:
110 die("-%s unknown", cp);
111
112 case HELPSW:
113 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]",
114 invo_name);
115 print_help (buf, switches, 1);
116 done (0);
117 case VERSIONSW:
118 print_version(invo_name);
119 done (0);
120
121 case DATESW:
122 if (datesw)
123 die("only one date field at a time");
124 if (!(datesw = *argp++) || *datesw == '-')
125 die("missing argument to %s", argp[-2]);
126 continue;
127
128 case TEXTSW:
129 if (subjsort)
130 die("only one text field at a time");
131 if (!(subjsort = *argp++) || *subjsort == '-')
132 die("missing argument to %s", argp[-2]);
133 continue;
134
135 case SUBJSW:
136 subjsort = "subject";
137 continue;
138 case NSUBJSW:
139 subjsort = NULL;
140 continue;
141
142 case LIMSW:
143 if (!(cp = *argp++) || *cp == '-')
144 die("missing argument to %s", argp[-2]);
145 while (*cp == '0')
146 cp++; /* skip any leading zeros */
147 if (!*cp) { /* hit end of string */
148 submajor = true; /* sort subject-major */
149 continue;
150 }
151 if (!isdigit((unsigned char) *cp) || !(datelimit = atoi(cp)))
152 die("impossible limit %s", cp);
153 datelimit *= 60*60*24;
154 continue;
155 case NLIMSW:
156 submajor = false; /* use date-major, but */
157 datelimit = 0; /* use no limit */
158 continue;
159
160 case VERBSW:
161 verbose = true;
162 continue;
163 case NVERBSW:
164 verbose = false;
165 continue;
166
167 case ALLMSGS:
168 allmsgs = 1;
169 continue;
170 case NALLMSGS:
171 allmsgs = 0;
172 continue;
173
174 case CHECKSW:
175 checksw = true;
176 continue;
177 case NCHECKSW:
178 checksw = false;
179 continue;
180 }
181 }
182 if (*cp == '+' || *cp == '@') {
183 if (folder)
184 die("only one folder at a time!");
185 folder = pluspath (cp);
186 } else
187 app_msgarg(&msgs, cp);
188 }
189
190 if (!context_find ("path"))
191 free (path ("./", TFOLDER));
192 if (!msgs.size) {
193 if (allmsgs) {
194 app_msgarg(&msgs, "all");
195 } else {
196 die("must specify messages to sort with -noall");
197 }
198 }
199 if (!datesw)
200 datesw = "date";
201 if (!folder)
202 folder = getfolder (1);
203 maildir = m_maildir (folder);
204
205 if (chdir (maildir) == NOTOK)
206 adios (maildir, "unable to change directory to");
207
208 /* read folder and create message structure */
209 if (!(mp = folder_read (folder, 1)))
210 die("unable to read folder %s", folder);
211
212 /* check for empty folder */
213 if (mp->nummsg == 0)
214 die("no messages in %s", folder);
215
216 /* parse all the message ranges/sequences and set SELECTED */
217 for (msgnum = 0; msgnum < msgs.size; msgnum++)
218 if (!m_convert (mp, msgs.msgs[msgnum]))
219 done (1);
220 seq_setprev (mp); /* set the previous sequence */
221
222 if ((nmsgs = read_hdrs (mp, datesw)) <= 0)
223 die("no messages to sort");
224
225 if (checksw && check_failed) {
226 die("errors found, no messages sorted");
227 }
228
229 /*
230 * sort a list of pointers to our "messages to be sorted".
231 */
232 dlist = mh_xmalloc ((nmsgs+1) * sizeof(*dlist));
233 for (i = 0; i < nmsgs; i++)
234 dlist[i] = &smsgs[i];
235 dlist[nmsgs] = 0;
236
237 if (verbose) { /* announce what we're doing */
238 if (subjsort)
239 if (submajor)
240 printf ("sorting by %s\n", subjsort);
241 else
242 printf ("sorting by %s-major %s-minor\n", subjsort, datesw);
243 else
244 printf ("sorting by datefield %s\n", datesw);
245 }
246
247 /* first sort by date, or by subject-major, date-minor */
248 qsort (dlist, nmsgs, sizeof(*dlist),
249 (qsort_comp) (submajor && subjsort ? txtsort : dsort));
250
251 /*
252 * if we're sorting on subject, we need another list
253 * in subject order, then a merge pass to collate the
254 * two sorts.
255 */
256 if (!submajor && subjsort) { /* already date sorted */
257 struct smsg **slist, **flist;
258 struct smsg ***il, **fp, **dp;
259
260 slist = mh_xmalloc ((nmsgs+1) * sizeof(*slist));
261 memcpy(slist, dlist, (nmsgs+1)*sizeof(*slist));
262 qsort(slist, nmsgs, sizeof(*slist), (qsort_comp) subsort);
263
264 /*
265 * make an inversion list so we can quickly find
266 * the collection of messages with the same subj
267 * given a message number.
268 */
269 il = mh_xcalloc(mp->hghsel + 1, sizeof *il);
270 for (i = 0; i < nmsgs; i++)
271 il[slist[i]->s_msg] = &slist[i];
272 /*
273 * make up the final list, chronological but with
274 * all the same subjects grouped together.
275 */
276 flist = mh_xmalloc ((nmsgs+1) * sizeof(*flist));
277 fp = flist;
278 for (dp = dlist; *dp;) {
279 struct smsg **s = il[(*dp++)->s_msg];
280
281 /* see if we already did this guy */
282 if (! s)
283 continue;
284
285 *fp++ = *s++;
286 /*
287 * take the next message(s) if there is one,
288 * its subject isn't null and its subject
289 * is the same as this one and it's not too
290 * far away in time.
291 */
292 while (*s && (*s)->s_subj[0] &&
293 strcmp((*s)->s_subj, s[-1]->s_subj) == 0 &&
294 (datelimit == 0 ||
295 (*s)->s_clock - s[-1]->s_clock <= datelimit)) {
296 il[(*s)->s_msg] = 0;
297 *fp++ = *s++;
298 }
299 }
300 *fp = 0;
301 free (il);
302 free (slist);
303 free (dlist);
304 dlist = flist;
305 }
306
307 /*
308 * At this point, dlist is a sorted array of pointers to smsg structures,
309 * each of which contains a message number.
310 */
311
312 rename_msgs (mp, dlist);
313
314 context_replace (pfolder, folder); /* update current folder */
315 seq_save (mp); /* synchronize message sequences */
316 context_save (); /* save the context file */
317 folder_free (mp); /* free folder/message structure */
318 done (0);
319 return 1;
320 }
321
322 static int
323 read_hdrs (struct msgs *mp, char *datesw)
324 {
325 int msgnum;
326 struct smsg *s;
327
328 smsgs = mh_xcalloc(mp->hghsel - mp->lowsel + 2, sizeof *smsgs);
329 s = smsgs;
330 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
331 if (is_selected(mp, msgnum)) {
332 if (get_fields (datesw, msgnum, s)) {
333 s->s_msg = msgnum;
334 s++;
335 }
336 }
337 }
338 s->s_msg = 0;
339 return s - smsgs;
340 }
341
342
343 /*
344 * Parse the message and get the data or subject field,
345 * if needed.
346 */
347
348 static int
349 get_fields (char *datesw, int msg, struct smsg *smsg)
350 {
351 int state;
352 int compnum;
353 char *msgnam, buf[NMH_BUFSIZ], nam[NAMESZ];
354 struct tws *tw;
355 char *datecomp = NULL, *subjcomp = NULL;
356 FILE *in;
357 m_getfld_state_t gstate;
358
359 if ((in = fopen (msgnam = m_name (msg), "r")) == NULL) {
360 admonish (msgnam, "unable to read message");
361 return 0;
362 }
363 gstate = m_getfld_state_init(in);
364 for (compnum = 1;;) {
365 int bufsz = sizeof buf;
366 switch (state = m_getfld2(&gstate, nam, buf, &bufsz)) {
367 case FLD:
368 case FLDPLUS:
369 compnum++;
370 if (!strcasecmp (nam, datesw)) {
371 datecomp = add (buf, datecomp);
372 while (state == FLDPLUS) {
373 bufsz = sizeof buf;
374 state = m_getfld2(&gstate, nam, buf, &bufsz);
375 datecomp = add (buf, datecomp);
376 }
377 if (!subjsort || subjcomp)
378 break;
379 } else if (subjsort && !strcasecmp (nam, subjsort)) {
380 subjcomp = add (buf, subjcomp);
381 while (state == FLDPLUS) {
382 bufsz = sizeof buf;
383 state = m_getfld2(&gstate, nam, buf, &bufsz);
384 subjcomp = add (buf, subjcomp);
385 }
386 if (datecomp)
387 break;
388 } else {
389 /* just flush this guy */
390 while (state == FLDPLUS) {
391 bufsz = sizeof buf;
392 state = m_getfld2(&gstate, nam, buf, &bufsz);
393 }
394 }
395 continue;
396
397 case BODY:
398 case FILEEOF:
399 break;
400
401 case LENERR:
402 case FMTERR:
403 if (state == LENERR || state == FMTERR) {
404 inform("format error in message %d (header #%d), continuing...",
405 msg, compnum);
406 check_failed = 1;
407 }
408 free(datecomp);
409 free(subjcomp);
410 fclose (in);
411 return 0;
412
413 default:
414 die("internal error -- you lose");
415 }
416 break;
417 }
418 m_getfld_state_destroy (&gstate);
419
420 /*
421 * If no date component, then use the modification
422 * time of the file as its date
423 */
424 if (!datecomp || (tw = dparsetime (datecomp)) == NULL) {
425 struct stat st;
426
427 inform("can't parse %s field in message %d, "
428 "will use file modification time", datesw, msg);
429 fstat (fileno (in), &st);
430 smsg->s_clock = st.st_mtime;
431 check_failed = 1;
432 } else {
433 smsg->s_clock = dmktime (tw);
434 }
435
436 if (subjsort) {
437 if (subjcomp) {
438 /*
439 * try to make the subject "canonical": delete
440 * leading "re:", everything but letters & smash
441 * letters to lower case.
442 */
443 char *cp, *cp2, c;
444
445 cp = subjcomp;
446 cp2 = subjcomp;
447 if (strcmp (subjsort, "subject") == 0) {
448 while ((c = *cp)) {
449 if (! isspace((unsigned char) c)) {
450 if(!uprf(cp, "re:"))
451 break;
452 cp += 2;
453 }
454 cp++;
455 }
456 }
457
458 while ((c = *cp++)) {
459 if (isascii((unsigned char) c) && isalnum((unsigned char) c))
460 *cp2++ = tolower((unsigned char)c);
461 }
462
463 *cp2 = '\0';
464 }
465 else
466 subjcomp = "";
467
468 smsg->s_subj = subjcomp;
469 }
470 fclose (in);
471 free(datecomp);
472
473 return 1;
474 }
475
476 /*
477 * sort on dates.
478 */
479 static int
480 dsort (struct smsg **a, struct smsg **b)
481 {
482 if ((*a)->s_clock < (*b)->s_clock)
483 return -1;
484 if ((*a)->s_clock > (*b)->s_clock)
485 return 1;
486 if ((*a)->s_msg < (*b)->s_msg)
487 return -1;
488 return 1;
489 }
490
491 /*
492 * sort on subjects.
493 */
494 static int
495 subsort (struct smsg **a, struct smsg **b)
496 {
497 int i;
498
499 if ((i = strcmp ((*a)->s_subj, (*b)->s_subj)))
500 return i;
501
502 return dsort(a, b);
503 }
504
505 static int
506 txtsort (struct smsg **a, struct smsg **b)
507 {
508 int i;
509
510 if ((i = strcmp ((*a)->s_subj, (*b)->s_subj)))
511 return i;
512 if ((*a)->s_msg < (*b)->s_msg)
513 return -1;
514 return 1;
515 }
516
517 static void
518 rename_chain (struct msgs *mp, struct smsg **mlist, int msg, int endmsg)
519 {
520 int nxt, old, new;
521 char *newname, oldname[BUFSIZ];
522 char newbuf[PATH_MAX + 1];
523
524 for (;;) {
525 nxt = mlist[msg] - smsgs; /* mlist[msg] is a ptr into smsgs */
526 mlist[msg] = NULL;
527 old = smsgs[nxt].s_msg;
528 new = smsgs[msg].s_msg;
529 strncpy (oldname, m_name (old), sizeof(oldname));
530 newname = m_name (new);
531 if (verbose)
532 printf ("message %d becomes message %d\n", old, new);
533
534 (void)snprintf(oldname, sizeof (oldname), "%s/%d", mp->foldpath, old);
535 (void)snprintf(newbuf, sizeof (newbuf), "%s/%d", mp->foldpath, new);
536 ext_hook("ref-hook", oldname, newbuf);
537
538 if (rename (oldname, newname) == NOTOK)
539 adios (newname, "unable to rename %s to", oldname);
540
541 copy_msg_flags (mp, new, old);
542 if (mp->curmsg == old)
543 seq_setcur (mp, new);
544
545 if (nxt == endmsg)
546 break;
547
548 msg = nxt;
549 }
550 /* if (nxt != endmsg); */
551 /* rename_chain (mp, mlist, nxt, endmsg); */
552 }
553
554 static void
555 rename_msgs (struct msgs *mp, struct smsg **mlist)
556 {
557 int i, j, old, new;
558 bvector_t tmpset = bvector_create ();
559 char f1[BUFSIZ], tmpfil[BUFSIZ];
560 char newbuf[PATH_MAX + 1];
561 struct smsg *sp;
562
563 strncpy (tmpfil, m_name (mp->hghmsg + 1), sizeof(tmpfil));
564
565 for (i = 0; i < nmsgs; i++) {
566 if (! (sp = mlist[i]))
567 continue; /* did this one */
568
569 j = sp - smsgs;
570 if (j == i)
571 continue; /* this one doesn't move */
572
573 /*
574 * the guy that was msg j is about to become msg i.
575 * rename 'j' to make a hole, then recursively rename
576 * guys to fill up the hole.
577 */
578 old = smsgs[j].s_msg;
579 new = smsgs[i].s_msg;
580 strncpy (f1, m_name (old), sizeof(f1));
581
582 if (verbose)
583 printf ("renaming message chain from %d to %d\n", old, new);
584
585 /*
586 * Run the external hook to refile the old message as the
587 * temporary message number that is off of the end of the
588 * messages in the folder.
589 */
590
591 (void)snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, old);
592 (void)snprintf(newbuf, sizeof (newbuf), "%s/%d", mp->foldpath, mp->hghmsg + 1);
593 ext_hook("ref-hook", f1, newbuf);
594
595 if (rename (f1, tmpfil) == NOTOK)
596 adios (tmpfil, "unable to rename %s to ", f1);
597
598 get_msg_flags (mp, tmpset, old);
599
600 rename_chain (mp, mlist, j, i);
601
602 /*
603 * Run the external hook to refile the temporary message number
604 * to the real place.
605 */
606
607 (void)snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, new);
608 ext_hook("ref-hook", newbuf, f1);
609
610 if (rename (tmpfil, m_name(new)) == NOTOK)
611 adios (m_name(new), "unable to rename %s to", tmpfil);
612
613 set_msg_flags (mp, tmpset, new);
614 mp->msgflags |= SEQMOD;
615 }
616
617 bvector_free (tmpset);
618 }