]> diplodocus.org Git - nmh/blob - uip/sortm.c
vector.c: Move interface to own file.
[nmh] / uip / sortm.c
1 /* sortm.c -- sort messages in a folder by date/time
2 *
3 * This code is Copyright (c) 2002, by the authors of nmh. See the
4 * COPYRIGHT file in the root directory of the nmh distribution for
5 * complete copyright information.
6 */
7
8 #include "h/mh.h"
9 #include "sbr/path.h"
10 #include "sbr/print_version.h"
11 #include "sbr/print_help.h"
12 #include "sbr/error.h"
13 #include "h/tws.h"
14 #include "h/done.h"
15 #include "h/utils.h"
16 #include "sbr/m_maildir.h"
17
18 #define SORTM_SWITCHES \
19 X("datefield field", 0, DATESW) \
20 X("textfield field", 0, TEXTSW) \
21 X("notextfield", 0, NSUBJSW) \
22 X("subject", -3, SUBJSW) /* backward-compatibility */ \
23 X("limit days", 0, LIMSW) \
24 X("nolimit", 0, NLIMSW) \
25 X("verbose", 0, VERBSW) \
26 X("noverbose", 0, NVERBSW) \
27 X("all", 0, ALLMSGS) \
28 X("noall", 0, NALLMSGS) \
29 X("check", 0, CHECKSW) \
30 X("nocheck", 0, NCHECKSW) \
31 X("version", 0, VERSIONSW) \
32 X("help", 0, HELPSW) \
33
34 #define X(sw, minchars, id) id,
35 DEFINE_SWITCH_ENUM(SORTM);
36 #undef X
37
38 #define X(sw, minchars, id) { sw, minchars, id },
39 DEFINE_SWITCH_ARRAY(SORTM, switches);
40 #undef X
41
42 struct smsg {
43 int s_msg;
44 time_t s_clock;
45 char *s_subj;
46 };
47
48 static struct smsg *smsgs;
49 int nmsgs;
50
51 char *subjsort; /* sort on subject if != 0 */
52 time_t datelimit = 0;
53 bool submajor; /* if true, sort on subject-major */
54 bool verbose;
55 int allmsgs = 1;
56 int check_failed = 0;
57
58 /* This keeps compiler happy on calls to qsort */
59 typedef int (*qsort_comp) (const void *, const void *);
60
61 /*
62 * static prototypes
63 */
64 static int read_hdrs (struct msgs *, char *);
65 static int get_fields (char *, int, struct smsg *);
66 static int dsort (struct smsg **, struct smsg **);
67 static int subsort (struct smsg **, struct smsg **);
68 static int txtsort (struct smsg **, struct smsg **);
69 static void rename_chain (struct msgs *, struct smsg **, int, int);
70 static void rename_msgs (struct msgs *, struct smsg **);
71
72
73 int
74 main (int argc, char **argv)
75 {
76 int i, msgnum;
77 char *cp, *maildir, *datesw = NULL;
78 char *folder = NULL, buf[BUFSIZ], **argp;
79 char **arguments;
80 struct msgs_array msgs = { 0, 0, NULL };
81 struct msgs *mp;
82 struct smsg **dlist;
83 bool checksw = false;
84
85 if (nmh_init(argv[0], true, true)) { return 1; }
86
87 arguments = getarguments (invo_name, argc, argv, 1);
88 argp = arguments;
89
90 /*
91 * Parse arguments
92 */
93 while ((cp = *argp++)) {
94 if (*cp == '-') {
95 switch (smatch (++cp, switches)) {
96 case AMBIGSW:
97 ambigsw (cp, switches);
98 done (1);
99 case UNKWNSW:
100 die("-%s unknown", cp);
101
102 case HELPSW:
103 snprintf(buf, sizeof(buf), "%s [+folder] [msgs] [switches]",
104 invo_name);
105 print_help (buf, switches, 1);
106 done (0);
107 case VERSIONSW:
108 print_version(invo_name);
109 done (0);
110
111 case DATESW:
112 if (datesw)
113 die("only one date field at a time");
114 if (!(datesw = *argp++) || *datesw == '-')
115 die("missing argument to %s", argp[-2]);
116 continue;
117
118 case TEXTSW:
119 if (subjsort)
120 die("only one text field at a time");
121 if (!(subjsort = *argp++) || *subjsort == '-')
122 die("missing argument to %s", argp[-2]);
123 continue;
124
125 case SUBJSW:
126 subjsort = "subject";
127 continue;
128 case NSUBJSW:
129 subjsort = NULL;
130 continue;
131
132 case LIMSW:
133 if (!(cp = *argp++) || *cp == '-')
134 die("missing argument to %s", argp[-2]);
135 while (*cp == '0')
136 cp++; /* skip any leading zeros */
137 if (!*cp) { /* hit end of string */
138 submajor = true; /* sort subject-major */
139 continue;
140 }
141 if (!isdigit((unsigned char) *cp) || !(datelimit = atoi(cp)))
142 die("impossible limit %s", cp);
143 datelimit *= 60*60*24;
144 continue;
145 case NLIMSW:
146 submajor = false; /* use date-major, but */
147 datelimit = 0; /* use no limit */
148 continue;
149
150 case VERBSW:
151 verbose = true;
152 continue;
153 case NVERBSW:
154 verbose = false;
155 continue;
156
157 case ALLMSGS:
158 allmsgs = 1;
159 continue;
160 case NALLMSGS:
161 allmsgs = 0;
162 continue;
163
164 case CHECKSW:
165 checksw = true;
166 continue;
167 case NCHECKSW:
168 checksw = false;
169 continue;
170 }
171 }
172 if (*cp == '+' || *cp == '@') {
173 if (folder)
174 die("only one folder at a time!");
175 folder = pluspath (cp);
176 } else
177 app_msgarg(&msgs, cp);
178 }
179
180 if (!context_find ("path"))
181 free (path ("./", TFOLDER));
182 if (!msgs.size) {
183 if (allmsgs) {
184 app_msgarg(&msgs, "all");
185 } else {
186 die("must specify messages to sort with -noall");
187 }
188 }
189 if (!datesw)
190 datesw = "date";
191 if (!folder)
192 folder = getfolder (1);
193 maildir = m_maildir (folder);
194
195 if (chdir (maildir) == NOTOK)
196 adios (maildir, "unable to change directory to");
197
198 /* read folder and create message structure */
199 if (!(mp = folder_read (folder, 1)))
200 die("unable to read folder %s", folder);
201
202 /* check for empty folder */
203 if (mp->nummsg == 0)
204 die("no messages in %s", folder);
205
206 /* parse all the message ranges/sequences and set SELECTED */
207 for (msgnum = 0; msgnum < msgs.size; msgnum++)
208 if (!m_convert (mp, msgs.msgs[msgnum]))
209 done (1);
210 seq_setprev (mp); /* set the previous sequence */
211
212 if ((nmsgs = read_hdrs (mp, datesw)) <= 0)
213 die("no messages to sort");
214
215 if (checksw && check_failed) {
216 die("errors found, no messages sorted");
217 }
218
219 /*
220 * sort a list of pointers to our "messages to be sorted".
221 */
222 dlist = mh_xmalloc ((nmsgs+1) * sizeof(*dlist));
223 for (i = 0; i < nmsgs; i++)
224 dlist[i] = &smsgs[i];
225 dlist[nmsgs] = 0;
226
227 if (verbose) { /* announce what we're doing */
228 if (subjsort)
229 if (submajor)
230 printf ("sorting by %s\n", subjsort);
231 else
232 printf ("sorting by %s-major %s-minor\n", subjsort, datesw);
233 else
234 printf ("sorting by datefield %s\n", datesw);
235 }
236
237 /* first sort by date, or by subject-major, date-minor */
238 qsort (dlist, nmsgs, sizeof(*dlist),
239 (qsort_comp) (submajor && subjsort ? txtsort : dsort));
240
241 /*
242 * if we're sorting on subject, we need another list
243 * in subject order, then a merge pass to collate the
244 * two sorts.
245 */
246 if (!submajor && subjsort) { /* already date sorted */
247 struct smsg **slist, **flist;
248 struct smsg ***il, **fp, **dp;
249
250 slist = mh_xmalloc ((nmsgs+1) * sizeof(*slist));
251 memcpy(slist, dlist, (nmsgs+1)*sizeof(*slist));
252 qsort(slist, nmsgs, sizeof(*slist), (qsort_comp) subsort);
253
254 /*
255 * make an inversion list so we can quickly find
256 * the collection of messages with the same subj
257 * given a message number.
258 */
259 il = mh_xcalloc(mp->hghsel + 1, sizeof *il);
260 for (i = 0; i < nmsgs; i++)
261 il[slist[i]->s_msg] = &slist[i];
262 /*
263 * make up the final list, chronological but with
264 * all the same subjects grouped together.
265 */
266 flist = mh_xmalloc ((nmsgs+1) * sizeof(*flist));
267 fp = flist;
268 for (dp = dlist; *dp;) {
269 struct smsg **s = il[(*dp++)->s_msg];
270
271 /* see if we already did this guy */
272 if (! s)
273 continue;
274
275 *fp++ = *s++;
276 /*
277 * take the next message(s) if there is one,
278 * its subject isn't null and its subject
279 * is the same as this one and it's not too
280 * far away in time.
281 */
282 while (*s && (*s)->s_subj[0] &&
283 strcmp((*s)->s_subj, s[-1]->s_subj) == 0 &&
284 (datelimit == 0 ||
285 (*s)->s_clock - s[-1]->s_clock <= datelimit)) {
286 il[(*s)->s_msg] = 0;
287 *fp++ = *s++;
288 }
289 }
290 *fp = 0;
291 free (il);
292 free (slist);
293 free (dlist);
294 dlist = flist;
295 }
296
297 /*
298 * At this point, dlist is a sorted array of pointers to smsg structures,
299 * each of which contains a message number.
300 */
301
302 rename_msgs (mp, dlist);
303
304 context_replace (pfolder, folder); /* update current folder */
305 seq_save (mp); /* synchronize message sequences */
306 context_save (); /* save the context file */
307 folder_free (mp); /* free folder/message structure */
308 done (0);
309 return 1;
310 }
311
312 static int
313 read_hdrs (struct msgs *mp, char *datesw)
314 {
315 int msgnum;
316 struct smsg *s;
317
318 smsgs = mh_xcalloc(mp->hghsel - mp->lowsel + 2, sizeof *smsgs);
319 s = smsgs;
320 for (msgnum = mp->lowsel; msgnum <= mp->hghsel; msgnum++) {
321 if (is_selected(mp, msgnum)) {
322 if (get_fields (datesw, msgnum, s)) {
323 s->s_msg = msgnum;
324 s++;
325 }
326 }
327 }
328 s->s_msg = 0;
329 return s - smsgs;
330 }
331
332
333 /*
334 * Parse the message and get the data or subject field,
335 * if needed.
336 */
337
338 static int
339 get_fields (char *datesw, int msg, struct smsg *smsg)
340 {
341 int state;
342 int compnum;
343 char *msgnam, buf[NMH_BUFSIZ], nam[NAMESZ];
344 struct tws *tw;
345 char *datecomp = NULL, *subjcomp = NULL;
346 FILE *in;
347 m_getfld_state_t gstate;
348
349 if ((in = fopen (msgnam = m_name (msg), "r")) == NULL) {
350 admonish (msgnam, "unable to read message");
351 return 0;
352 }
353 gstate = m_getfld_state_init(in);
354 for (compnum = 1;;) {
355 int bufsz = sizeof buf;
356 switch (state = m_getfld2(&gstate, nam, buf, &bufsz)) {
357 case FLD:
358 case FLDPLUS:
359 compnum++;
360 if (!strcasecmp (nam, datesw)) {
361 datecomp = add (buf, datecomp);
362 while (state == FLDPLUS) {
363 bufsz = sizeof buf;
364 state = m_getfld2(&gstate, nam, buf, &bufsz);
365 datecomp = add (buf, datecomp);
366 }
367 if (!subjsort || subjcomp)
368 break;
369 } else if (subjsort && !strcasecmp (nam, subjsort)) {
370 subjcomp = add (buf, subjcomp);
371 while (state == FLDPLUS) {
372 bufsz = sizeof buf;
373 state = m_getfld2(&gstate, nam, buf, &bufsz);
374 subjcomp = add (buf, subjcomp);
375 }
376 if (datecomp)
377 break;
378 } else {
379 /* just flush this guy */
380 while (state == FLDPLUS) {
381 bufsz = sizeof buf;
382 state = m_getfld2(&gstate, nam, buf, &bufsz);
383 }
384 }
385 continue;
386
387 case BODY:
388 case FILEEOF:
389 break;
390
391 case LENERR:
392 case FMTERR:
393 if (state == LENERR || state == FMTERR) {
394 inform("format error in message %d (header #%d), continuing...",
395 msg, compnum);
396 check_failed = 1;
397 }
398 free(datecomp);
399 free(subjcomp);
400 fclose (in);
401 return 0;
402
403 default:
404 die("internal error -- you lose");
405 }
406 break;
407 }
408 m_getfld_state_destroy (&gstate);
409
410 /*
411 * If no date component, then use the modification
412 * time of the file as its date
413 */
414 if (!datecomp || (tw = dparsetime (datecomp)) == NULL) {
415 struct stat st;
416
417 inform("can't parse %s field in message %d, "
418 "will use file modification time", datesw, msg);
419 fstat (fileno (in), &st);
420 smsg->s_clock = st.st_mtime;
421 check_failed = 1;
422 } else {
423 smsg->s_clock = dmktime (tw);
424 }
425
426 if (subjsort) {
427 if (subjcomp) {
428 /*
429 * try to make the subject "canonical": delete
430 * leading "re:", everything but letters & smash
431 * letters to lower case.
432 */
433 char *cp, *cp2, c;
434
435 cp = subjcomp;
436 cp2 = subjcomp;
437 if (strcmp (subjsort, "subject") == 0) {
438 while ((c = *cp)) {
439 if (! isspace((unsigned char) c)) {
440 if(!uprf(cp, "re:"))
441 break;
442 cp += 2;
443 }
444 cp++;
445 }
446 }
447
448 while ((c = *cp++)) {
449 if (isascii((unsigned char) c) && isalnum((unsigned char) c))
450 *cp2++ = tolower((unsigned char)c);
451 }
452
453 *cp2 = '\0';
454 }
455 else
456 subjcomp = "";
457
458 smsg->s_subj = subjcomp;
459 }
460 fclose (in);
461 free(datecomp);
462
463 return 1;
464 }
465
466 /*
467 * sort on dates.
468 */
469 static int
470 dsort (struct smsg **a, struct smsg **b)
471 {
472 if ((*a)->s_clock < (*b)->s_clock)
473 return -1;
474 if ((*a)->s_clock > (*b)->s_clock)
475 return 1;
476 if ((*a)->s_msg < (*b)->s_msg)
477 return -1;
478 return 1;
479 }
480
481 /*
482 * sort on subjects.
483 */
484 static int
485 subsort (struct smsg **a, struct smsg **b)
486 {
487 int i;
488
489 if ((i = strcmp ((*a)->s_subj, (*b)->s_subj)))
490 return i;
491
492 return dsort(a, b);
493 }
494
495 static int
496 txtsort (struct smsg **a, struct smsg **b)
497 {
498 int i;
499
500 if ((i = strcmp ((*a)->s_subj, (*b)->s_subj)))
501 return i;
502 if ((*a)->s_msg < (*b)->s_msg)
503 return -1;
504 return 1;
505 }
506
507 static void
508 rename_chain (struct msgs *mp, struct smsg **mlist, int msg, int endmsg)
509 {
510 int nxt, old, new;
511 char *newname, oldname[BUFSIZ];
512 char newbuf[PATH_MAX + 1];
513
514 for (;;) {
515 nxt = mlist[msg] - smsgs; /* mlist[msg] is a ptr into smsgs */
516 mlist[msg] = NULL;
517 old = smsgs[nxt].s_msg;
518 new = smsgs[msg].s_msg;
519 strncpy (oldname, m_name (old), sizeof(oldname));
520 newname = m_name (new);
521 if (verbose)
522 printf ("message %d becomes message %d\n", old, new);
523
524 (void)snprintf(oldname, sizeof (oldname), "%s/%d", mp->foldpath, old);
525 (void)snprintf(newbuf, sizeof (newbuf), "%s/%d", mp->foldpath, new);
526 ext_hook("ref-hook", oldname, newbuf);
527
528 if (rename (oldname, newname) == NOTOK)
529 adios (newname, "unable to rename %s to", oldname);
530
531 copy_msg_flags (mp, new, old);
532 if (mp->curmsg == old)
533 seq_setcur (mp, new);
534
535 if (nxt == endmsg)
536 break;
537
538 msg = nxt;
539 }
540 /* if (nxt != endmsg); */
541 /* rename_chain (mp, mlist, nxt, endmsg); */
542 }
543
544 static void
545 rename_msgs (struct msgs *mp, struct smsg **mlist)
546 {
547 int i, j, old, new;
548 bvector_t tmpset = bvector_create ();
549 char f1[BUFSIZ], tmpfil[BUFSIZ];
550 char newbuf[PATH_MAX + 1];
551 struct smsg *sp;
552
553 strncpy (tmpfil, m_name (mp->hghmsg + 1), sizeof(tmpfil));
554
555 for (i = 0; i < nmsgs; i++) {
556 if (! (sp = mlist[i]))
557 continue; /* did this one */
558
559 j = sp - smsgs;
560 if (j == i)
561 continue; /* this one doesn't move */
562
563 /*
564 * the guy that was msg j is about to become msg i.
565 * rename 'j' to make a hole, then recursively rename
566 * guys to fill up the hole.
567 */
568 old = smsgs[j].s_msg;
569 new = smsgs[i].s_msg;
570 strncpy (f1, m_name (old), sizeof(f1));
571
572 if (verbose)
573 printf ("renaming message chain from %d to %d\n", old, new);
574
575 /*
576 * Run the external hook to refile the old message as the
577 * temporary message number that is off of the end of the
578 * messages in the folder.
579 */
580
581 (void)snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, old);
582 (void)snprintf(newbuf, sizeof (newbuf), "%s/%d", mp->foldpath, mp->hghmsg + 1);
583 ext_hook("ref-hook", f1, newbuf);
584
585 if (rename (f1, tmpfil) == NOTOK)
586 adios (tmpfil, "unable to rename %s to ", f1);
587
588 get_msg_flags (mp, tmpset, old);
589
590 rename_chain (mp, mlist, j, i);
591
592 /*
593 * Run the external hook to refile the temporary message number
594 * to the real place.
595 */
596
597 (void)snprintf(f1, sizeof (f1), "%s/%d", mp->foldpath, new);
598 ext_hook("ref-hook", newbuf, f1);
599
600 if (rename (tmpfil, m_name(new)) == NOTOK)
601 adios (m_name(new), "unable to rename %s to", tmpfil);
602
603 set_msg_flags (mp, tmpset, new);
604 mp->msgflags |= SEQMOD;
605 }
606
607 bvector_free (tmpset);
608 }