From: epg <> Date: Thu, 30 Mar 2006 02:28:59 +0000 (+0000) Subject: Add a sample processor. This talks to spamassassin spamd to find out X-Git-Url: https://diplodocus.org/git/mdeliver/commitdiff_plain/eefa3c3bdf7230652826a39c5f7586ad3b672016?ds=sidebyside Add a sample processor. This talks to spamassassin spamd to find out if a message is spam. If so, it moves it to maildir/spam, else it moves it to maildir/new. {libspamc,utils}.[ch] are icky, icky bits from spamassassin's spamc. The only substantial change is to make libspamc.c:_message_read_raw mmap the message instead of reading the entire thing into memory (wtf were they thinking?!). --- diff --git a/Makefile b/Makefile index 53aa1be..2d691b4 100644 --- a/Makefile +++ b/Makefile @@ -1,14 +1,8 @@ # $Id$ -PROG= mdeliver -NOMAN= #defined +all: mdeliver processor -CFLAGS= -Wall -LDFLAGS= -static +processor: processor.o libspamc.o utils.o -prefix?= /usr/local -exec_prefix?= ${prefix} -BINDIR?= ${exec_prefix}/bin -MANDIR?= ${prefix}/man - -.include +clean: + rm -f mdeliver processor *.o diff --git a/libspamc.c b/libspamc.c new file mode 100644 index 0000000..44ab02b --- /dev/null +++ b/libspamc.c @@ -0,0 +1,1700 @@ +/* <@LICENSE> + * Copyright 2004 Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "libspamc.h" +#include "utils.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define closesocket(x) close(x) + +#include +#include +#include +#include + +void _err(int, bool, char *, int, char *, ...); +#define libspamc_log(flags, level, ...) _err(-1, false, __FILE__, __LINE__, __VA_ARGS__) + +enum { LOG_ERR, LOG_DEBUG, LOG_NOTICE }; + +/* FIXME: Make this configurable */ +#define MAX_CONNECT_RETRIES 3 +#define CONNECT_RETRY_SLEEP 1 + +/* RedHat 5.2 doesn't define Shutdown 2nd Parameter Constants */ +/* KAM 12-4-01 */ +/* SJF 2003/04/25 - now test for macros directly */ +#ifndef SHUT_RD +# define SHUT_RD 0 /* no more receptions */ +#endif +#ifndef SHUT_WR +# define SHUT_WR 1 /* no more transmissions */ +#endif +#ifndef SHUT_RDWR +# define SHUT_RDWR 2 /* no more receptions or transmissions */ +#endif + +#ifndef HAVE_OPTARG +extern char *optarg; +#endif + +#ifndef HAVE_INADDR_NONE +#define INADDR_NONE ((in_addr_t) 0xffffffff) +#endif + +/* jm: turned off for now, it should not be necessary. */ +#undef USE_TCP_NODELAY + +#undef DO_CONNECT_DEBUG_SYSLOGS +/* or #define DO_CONNECT_DEBUG_SYSLOGS 1 */ + +static const int ESC_PASSTHROUGHRAW = EX__MAX + 666; + +/* set EXPANSION_ALLOWANCE to something more than might be + added to a message in X-headers and the report template */ +static const int EXPANSION_ALLOWANCE = 16384; + +/* set NUM_CHECK_BYTES to number of bytes that have to match at beginning and end + of the data streams before and after processing by spamd + Aug 7 2002 jm: no longer seems to be used + static const int NUM_CHECK_BYTES = 32; + */ + +/* Set the protocol version that this spamc speaks */ +static const char *PROTOCOL_VERSION = "SPAMC/1.3"; + +/* "private" part of struct message. + * we use this instead of the struct message directly, so that we + * can add new members without affecting the ABI. + */ +struct libspamc_private_message +{ + int flags; /* copied from "flags" arg to message_read() */ +}; + +int libspamc_timeout = 0; + +/* + * translate_connect_errno() + * + * Given a UNIX error number obtained (probably) from "connect(2)", + * translate this to a failure code. This module is shared by both + * transport modules - UNIX and TCP. + * + * This should ONLY be called when there is an error. + */ +static int _translate_connect_errno(int err) +{ + switch (err) { + case EBADF: + case EFAULT: + case ENOTSOCK: + case EISCONN: + case EADDRINUSE: + case EINPROGRESS: + case EALREADY: + case EAFNOSUPPORT: + return EX_SOFTWARE; + + case ECONNREFUSED: + case ETIMEDOUT: + case ENETUNREACH: + return EX_UNAVAILABLE; + + case EACCES: + return EX_NOPERM; + + default: + return EX_SOFTWARE; + } +} + +/* + * opensocket() + * + * Given a socket type (PF_INET or PF_UNIX), try to create this socket + * and store the FD in the pointed-to place. If it's successful, do any + * other setup required to make the socket ready to use, such as setting + * TCP_NODELAY mode, and in any case we return EX_OK if all is well. + * + * Upon failure we return one of the other EX_??? error codes. + */ +static int _opensocket(int flags, int type, int *psock) +{ + const char *typename; + int proto = 0; + +#ifdef _WIN32 + int socktout; +#endif + + assert(psock != 0); + + /*---------------------------------------------------------------- + * Create a few induction variables that are implied by the socket + * type given by the user. The typename is strictly used for debug + * reporting. + */ + if (type == PF_UNIX) { + typename = "PF_UNIX"; + } + else { + typename = "PF_INET"; + proto = IPPROTO_TCP; + } + +#ifdef DO_CONNECT_DEBUG_SYSLOGS + libspamc_log(flags, DEBUG_LEVEL, "dbg: create socket(%s)", typename); +#endif + + if ((*psock = socket(type, SOCK_STREAM, proto)) +#ifndef _WIN32 + < 0 +#else + == INVALID_SOCKET +#endif + ) { + int origerr; + + /*-------------------------------------------------------- + * At this point we had a failure creating the socket, and + * this is pretty much fatal. Translate the error reason + * into something the user can understand. + */ +#ifndef _WIN32 + origerr = errno; /* take a copy before syslog() */ + libspamc_log(flags, LOG_ERR, "socket(%s) to spamd failed: %s", typename, strerror(origerr)); +#else + origerr = WSAGetLastError(); + libspamc_log(flags, LOG_ERR, "socket(%s) to spamd failed: %d", typename, origerr); +#endif + + switch (origerr) { + case EPROTONOSUPPORT: + case EINVAL: + return EX_SOFTWARE; + + case EACCES: + return EX_NOPERM; + + case ENFILE: + case EMFILE: + case ENOBUFS: + case ENOMEM: + return EX_OSERR; + + default: + return EX_SOFTWARE; + } + } + +#ifdef _WIN32 + /* bug 4344: makes timeout functional on Win32 */ + socktout = libspamc_timeout * 1000; + if (type == PF_INET + && setsockopt(*psock, SOL_SOCKET, SO_RCVTIMEO, (char *)&socktout, sizeof(socktout)) != 0) + { + int origerrno; + + origerrno = WSAGetLastError(); + switch (origerrno) + { + case EBADF: + case ENOTSOCK: + case ENOPROTOOPT: + case EFAULT: + libspamc_log(flags, LOG_ERR, "setsockopt(SO_RCVTIMEO) failed: %d", origerrno); + closesocket(*psock); + return EX_SOFTWARE; + + default: + break; /* ignored */ + } + } +#endif + + /*---------------------------------------------------------------- + * Do a bit of setup on the TCP socket if required. Notes above + * suggest this is probably not set + */ +#ifdef USE_TCP_NODELAY + { + int one = 1; + + if (type == PF_INET + && setsockopt(*psock, 0, TCP_NODELAY, &one, sizeof one) != 0) { + int origerrno; +#ifndef _WIN32 + origerr = errno; +#else + origerrno = WSAGetLastError(); +#endif + switch (origerr) { + case EBADF: + case ENOTSOCK: + case ENOPROTOOPT: + case EFAULT: + libspamc_log(flags, LOG_ERR, +#ifndef _WIN32 + "setsockopt(TCP_NODELAY) failed: %s", strerror(origerr)); +#else + "setsockopt(TCP_NODELAY) failed: %d", origerr); +#endif + closesocket(*psock); + return EX_SOFTWARE; + + default: + break; /* ignored */ + } + } + } +#endif /* USE_TCP_NODELAY */ + + return EX_OK; /* all is well */ +} + +/* + * try_to_connect_unix() + * + * Given a transport handle that implies using a UNIX domain + * socket, try to make a connection to it and store the resulting + * file descriptor in *sockptr. Return is EX_OK if we did it, + * and some other error code otherwise. + */ +static int _try_to_connect_unix(struct transport *tp, int *sockptr) +{ +#ifndef _WIN32 + int mysock, status, origerr; + struct sockaddr_un addrbuf; + int ret; + + assert(tp != 0); + assert(sockptr != 0); + assert(tp->socketpath != 0); + + /*---------------------------------------------------------------- + * If the socket itself can't be created, this is a fatal error. + */ + if ((ret = _opensocket(tp->flags, PF_UNIX, &mysock)) != EX_OK) + return ret; + + /* set up the UNIX domain socket */ + memset(&addrbuf, 0, sizeof addrbuf); + addrbuf.sun_family = AF_UNIX; + strncpy(addrbuf.sun_path, tp->socketpath, sizeof addrbuf.sun_path - 1); + addrbuf.sun_path[sizeof addrbuf.sun_path - 1] = '\0'; + +#ifdef DO_CONNECT_DEBUG_SYSLOGS + libspamc_log(tp->flags, DEBUG_LEVEL, "dbg: connect(AF_UNIX) to spamd at %s", + addrbuf.sun_path); +#endif + + status = connect(mysock, (struct sockaddr *) &addrbuf, sizeof(addrbuf)); + + origerr = errno; + + if (status >= 0) { +#ifdef DO_CONNECT_DEBUG_SYSLOGS + libspamc_log(tp->flags, DEBUG_LEVEL, "dbg: connect(AF_UNIX) ok"); +#endif + + *sockptr = mysock; + + return EX_OK; + } + + libspamc_log(tp->flags, LOG_ERR, "connect(AF_UNIX) to spamd %s failed: %s", + addrbuf.sun_path, strerror(origerr)); + closesocket(mysock); + + return _translate_connect_errno(origerr); +#else + (void) tp; /* not used. suppress compiler warning */ + (void) sockptr; /* not used. suppress compiler warning */ + return EX_OSERR; +#endif +} + +/* + * try_to_connect_tcp() + * + * Given a transport that implies a TCP connection, either to + * localhost or a list of IP addresses, attempt to connect. The + * list of IP addresses has already been randomized (if requested) + * and limited to just one if fallback has been enabled. + */ +static int _try_to_connect_tcp(const struct transport *tp, int *sockptr) +{ + int numloops; + int origerr = 0; + int ret; + + assert(tp != 0); + assert(sockptr != 0); + assert(tp->nhosts > 0); + +#ifdef DO_CONNECT_DEBUG_SYSLOGS + for (numloops = 0; numloops < tp->nhosts; numloops++) { + libspamc_log(tp->flags, LOG_ERR, "dbg: %d/%d: %s", + numloops + 1, tp->nhosts, inet_ntoa(tp->hosts[numloops])); + } +#endif + + for (numloops = 0; numloops < MAX_CONNECT_RETRIES; numloops++) { + struct sockaddr_in addrbuf; + const int hostix = numloops % tp->nhosts; + int status, mysock; + const char *ipaddr; + + /*-------------------------------------------------------- + * We always start by creating the socket, as we get only + * one attempt to connect() on each one. If this fails, + * we're done. + */ + if ((ret = _opensocket(tp->flags, PF_INET, &mysock)) != EX_OK) + return ret; + + memset(&addrbuf, 0, sizeof(addrbuf)); + + addrbuf.sin_family = AF_INET; + addrbuf.sin_port = htons(tp->port); + addrbuf.sin_addr = tp->hosts[hostix]; + + ipaddr = inet_ntoa(addrbuf.sin_addr); + +#ifdef DO_CONNECT_DEBUG_SYSLOGS + libspamc_log(tp->flags, DEBUG_LEVEL, + "dbg: connect(AF_INET) to spamd at %s (try #%d of %d)", + ipaddr, numloops + 1, MAX_CONNECT_RETRIES); +#endif + + status = + connect(mysock, (struct sockaddr *) &addrbuf, sizeof(addrbuf)); + + if (status != 0) { +#ifndef _WIN32 + origerr = errno; + libspamc_log(tp->flags, LOG_ERR, + "connect(AF_INET) to spamd at %s failed, retrying (#%d of %d): %s", + ipaddr, numloops + 1, MAX_CONNECT_RETRIES, strerror(origerr)); +#else + origerr = WSAGetLastError(); + libspamc_log(tp->flags, LOG_ERR, + "connect(AF_INET) to spamd at %s failed, retrying (#%d of %d): %d", + ipaddr, numloops + 1, MAX_CONNECT_RETRIES, origerr); +#endif + closesocket(mysock); + + sleep(CONNECT_RETRY_SLEEP); + } + else { +#ifdef DO_CONNECT_DEBUG_SYSLOGS + libspamc_log(tp->flags, DEBUG_LEVEL, + "dbg: connect(AF_INET) to spamd at %s done", ipaddr); +#endif + *sockptr = mysock; + + return EX_OK; + } + } + + libspamc_log(tp->flags, LOG_ERR, "connection attempt to spamd aborted after %d retries", + MAX_CONNECT_RETRIES); + + return _translate_connect_errno(origerr); +} + +/* Aug 14, 2002 bj: Reworked things. Now we have message_read, message_write, + * message_dump, lookup_host, message_filter, and message_process, and a bunch + * of helper functions. + */ + +static void _clear_message(struct message *m) +{ + m->type = MESSAGE_NONE; + m->raw = NULL; + m->raw_len = 0; + m->pre = NULL; + m->pre_len = 0; + m->msg = NULL; + m->msg_len = 0; + m->post = NULL; + m->post_len = 0; + m->is_spam = EX_TOOBIG; + m->score = 0.0; + m->threshold = 0.0; + m->outbuf = NULL; + m->out = NULL; + m->out_len = 0; + m->content_length = -1; +} + +static void _use_msg_for_out(struct message *m) +{ + if (m->outbuf) + free(m->outbuf); + m->outbuf = NULL; + m->out = m->msg; + m->out_len = m->msg_len; +} + +static int _message_read_raw(int fd, struct message *m) +{ + struct stat st; + _clear_message(m); + + if (fstat(fd, &st) == -1) { + libspamc_log(0,0, "fstat"); + return EX_IOERR; + } + + m->raw = mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (m->raw == MAP_FAILED) { + libspamc_log(0,0, "mmap"); + return EX_IOERR; + } + m->raw_len = st.st_size; + + m->type = MESSAGE_RAW; + m->msg = m->raw; + m->msg_len = m->raw_len; + m->out = m->msg; + m->out_len = m->msg_len; + return EX_OK; +} + +static int _message_read_bsmtp(int fd, struct message *m) +{ + unsigned int i, j; + char prev; + + _clear_message(m); + if ((m->raw = malloc(m->max_len + 1)) == NULL) + return EX_OSERR; + + /* Find the DATA line */ + m->raw_len = full_read(fd, 1, m->raw, m->max_len + 1, m->max_len + 1); + if (m->raw_len <= 0) { + free(m->raw); + m->raw = NULL; + m->raw_len = 0; + return EX_IOERR; + } + m->type = MESSAGE_ERROR; + if (m->raw_len > m->max_len) + return EX_TOOBIG; + m->pre = m->raw; + for (i = 0; i < m->raw_len - 6; i++) { + if ((m->raw[i] == '\n') && + (m->raw[i + 1] == 'D' || m->raw[i + 1] == 'd') && + (m->raw[i + 2] == 'A' || m->raw[i + 2] == 'a') && + (m->raw[i + 3] == 'T' || m->raw[i + 3] == 't') && + (m->raw[i + 4] == 'A' || m->raw[i + 4] == 'a') && + ((m->raw[i + 5] == '\r' && m->raw[i + 6] == '\n') + || m->raw[i + 5] == '\n')) { + /* Found it! */ + i += 6; + if (m->raw[i - 1] == '\r') + i++; + m->pre_len = i; + m->msg = m->raw + i; + m->msg_len = m->raw_len - i; + break; + } + } + if (m->msg == NULL) + return EX_DATAERR; + + /* Find the end-of-DATA line */ + prev = '\n'; + for (i = j = 0; i < m->msg_len; i++) { + if (prev == '\n' && m->msg[i] == '.') { + /* Dot at the beginning of a line */ + if ((m->msg[i + 1] == '\r' && m->msg[i + 2] == '\n') + || m->msg[i + 1] == '\n') { + /* Lone dot! That's all, folks */ + m->post = m->msg + i; + m->post_len = m->msg_len - i; + m->msg_len = j; + break; + } + else if (m->msg[i + 1] == '.') { + /* Escaping dot, eliminate. */ + prev = '.'; + continue; + } /* Else an ordinary dot, drop down to ordinary char handler */ + } + prev = m->msg[i]; + m->msg[j++] = m->msg[i]; + } + + m->type = MESSAGE_BSMTP; + m->out = m->msg; + m->out_len = m->msg_len; + return EX_OK; +} + +int message_read(int fd, int flags, struct message *m) +{ + assert(m != NULL); + + libspamc_timeout = 0; + + /* create the "private" part of the struct message */ + m->priv = malloc(sizeof(struct libspamc_private_message)); + if (m->priv == NULL) { + libspamc_log(flags, LOG_ERR, "message_read: malloc failed"); + return EX_OSERR; + } + m->priv->flags = flags; + + switch (flags & SPAMC_MODE_MASK) { + case SPAMC_RAW_MODE: + return _message_read_raw(fd, m); + + case SPAMC_BSMTP_MODE: + return _message_read_bsmtp(fd, m); + + default: + libspamc_log(flags, LOG_ERR, "message_read: Unknown mode %d", + flags & SPAMC_MODE_MASK); + return EX_USAGE; + } +} + +long message_write(int fd, struct message *m) +{ + long total = 0; + off_t i, j; + off_t jlimit; + char buffer[1024]; + + assert(m != NULL); + + if (m->priv->flags & SPAMC_CHECK_ONLY) { + if (m->is_spam == EX_ISSPAM || m->is_spam == EX_NOTSPAM) { + return full_write(fd, 1, m->out, m->out_len); + + } + else { + libspamc_log(m->priv->flags, LOG_ERR, "oops! SPAMC_CHECK_ONLY is_spam: %d", + m->is_spam); + return -1; + } + } + + /* else we're not in CHECK_ONLY mode */ + switch (m->type) { + case MESSAGE_NONE: + libspamc_log(m->priv->flags, LOG_ERR, "Cannot write this message, it's MESSAGE_NONE!"); + return -1; + + case MESSAGE_ERROR: + return full_write(fd, 1, m->raw, m->raw_len); + + case MESSAGE_RAW: + return full_write(fd, 1, m->out, m->out_len); + + case MESSAGE_BSMTP: + total = full_write(fd, 1, m->pre, m->pre_len); + for (i = 0; i < m->out_len;) { + jlimit = (off_t) (sizeof(buffer) / sizeof(*buffer) - 4); + for (j = 0; i < (off_t) m->out_len && j < jlimit;) { + if (i + 1 < m->out_len && m->out[i] == '\n' + && m->out[i + 1] == '.') { + if (j > jlimit - 4) { + break; /* avoid overflow */ + } + buffer[j++] = m->out[i++]; + buffer[j++] = m->out[i++]; + buffer[j++] = '.'; + } + else { + buffer[j++] = m->out[i++]; + } + } + total += full_write(fd, 1, buffer, j); + } + return total + full_write(fd, 1, m->post, m->post_len); + + default: + libspamc_log(m->priv->flags, LOG_ERR, "Unknown message type %d", m->type); + return -1; + } +} + +void message_dump(int in_fd, int out_fd, struct message *m) +{ + char buf[8196]; + int bytes; + + if (m != NULL && m->type != MESSAGE_NONE) { + message_write(out_fd, m); + } + while ((bytes = full_read(in_fd, 1, buf, 8192, 8192)) > 0) { + if (bytes != full_write(out_fd, 1, buf, bytes)) { + libspamc_log(m->priv->flags, LOG_ERR, "oops! message_dump of %d returned different", + bytes); + } + } +} + +static int +_spamc_read_full_line(struct message *m, int flags, SSL * ssl, int sock, + char *buf, size_t *lenp, size_t bufsiz) +{ + int failureval; + int bytesread = 0; + size_t len; + + UNUSED_VARIABLE(m); + + *lenp = 0; + /* Now, read from spamd */ + for (len = 0; len < bufsiz - 1; len++) { + if (flags & SPAMC_USE_SSL) { + bytesread = ssl_timeout_read(ssl, buf + len, 1); + } + else { + bytesread = fd_timeout_read(sock, 0, buf + len, 1); + } + + if (bytesread <= 0) { + failureval = EX_IOERR; + goto failure; + } + + if (buf[len] == '\n') { + buf[len] = '\0'; + if (len > 0 && buf[len - 1] == '\r') { + len--; + buf[len] = '\0'; + } + *lenp = len; + return EX_OK; + } + } + + libspamc_log(flags, LOG_ERR, "spamd responded with line of %d bytes, dying", len); + failureval = EX_TOOBIG; + + failure: + return failureval; +} + +/* + * May 7 2003 jm: using %f is bad where LC_NUMERIC is "," in the locale. + * work around using our own locale-independent float-parser code. + */ +static float _locale_safe_string_to_float(char *buf, int siz) +{ + int is_neg; + char *cp, *dot; + int divider; + float ret, postdot; + + buf[siz - 1] = '\0'; /* ensure termination */ + + /* ok, let's illustrate using "100.033" as an example... */ + + is_neg = 0; + if (*buf == '-') { + is_neg = 1; + } + + ret = (float) (strtol(buf, &dot, 10)); + if (dot == NULL) { + return 0.0; + } + if (dot != NULL && *dot != '.') { + return ret; + } + + /* ex: ret == 100.0 */ + + cp = (dot + 1); + postdot = (float) (strtol(cp, NULL, 10)); + /* note: don't compare floats == 0.0, it's unsafe. use a range */ + if (postdot >= -0.00001 && postdot <= 0.00001) { + return ret; + } + + /* ex: postdot == 33.0, cp="033" */ + + /* now count the number of decimal places and figure out what power of 10 to use */ + divider = 1; + while (*cp != '\0') { + divider *= 10; + cp++; + } + + /* ex: + * cp="033", divider=1 + * cp="33", divider=10 + * cp="3", divider=100 + * cp="", divider=1000 + */ + + if (is_neg) { + ret -= (postdot / ((float) divider)); + } + else { + ret += (postdot / ((float) divider)); + } + /* ex: ret == 100.033, tada! ... hopefully */ + + return ret; +} + +static int +_handle_spamd_header(struct message *m, int flags, char *buf, int len, + unsigned int *didtellflags) +{ + char is_spam[6]; + char s_str[21], t_str[21]; + char didset_ret[15]; + char didremove_ret[15]; + + UNUSED_VARIABLE(len); + + /* Feb 12 2003 jm: actually, I think sccanf is working fine here ;) + * let's stick with it for this parser. + * May 7 2003 jm: using %f is bad where LC_NUMERIC is "," in the locale. + * work around using our own locale-independent float-parser code. + */ + if (sscanf(buf, "Spam: %5s ; %20s / %20s", is_spam, s_str, t_str) == 3) { + m->score = _locale_safe_string_to_float(s_str, 20); + m->threshold = _locale_safe_string_to_float(t_str, 20); + + /* set bounds on these to ensure no buffer overflow in the sprintf */ + if (m->score > 1e10) + m->score = 1e10; + else if (m->score < -1e10) + m->score = -1e10; + if (m->threshold > 1e10) + m->threshold = 1e10; + else if (m->threshold < -1e10) + m->threshold = -1e10; + + /* Format is "Spam: x; y / x" */ + m->is_spam = + strcasecmp("true", is_spam) == 0 ? EX_ISSPAM : EX_NOTSPAM; + + if (flags & SPAMC_CHECK_ONLY) { + m->out_len = sprintf(m->out, + "%.1f/%.1f\n", m->score, m->threshold); + } + else if ((flags & SPAMC_REPORT_IFSPAM && m->is_spam == EX_ISSPAM) + || (flags & SPAMC_REPORT)) { + m->out_len = sprintf(m->out, + "%.1f/%.1f\n", m->score, m->threshold); + } + return EX_OK; + + } + else if (sscanf(buf, "Content-length: %d", &m->content_length) == 1) { + if (m->content_length < 0) { + libspamc_log(flags, LOG_ERR, "spamd responded with bad Content-length '%s'", + buf); + return EX_PROTOCOL; + } + return EX_OK; + } + else if (sscanf(buf, "DidSet: %s", didset_ret) == 1) { + if (strstr(didset_ret, "local")) { + *didtellflags |= SPAMC_SET_LOCAL; + } + if (strstr(didset_ret, "remote")) { + *didtellflags |= SPAMC_SET_REMOTE; + } + } + else if (sscanf(buf, "DidRemove: %s", didremove_ret) == 1) { + if (strstr(didremove_ret, "local")) { + *didtellflags |= SPAMC_REMOVE_LOCAL; + } + if (strstr(didremove_ret, "remote")) { + *didtellflags |= SPAMC_REMOVE_REMOTE; + } + } + + return EX_OK; +} + +int message_filter(struct transport *tp, const char *username, + int flags, struct message *m) +{ + char buf[8192]; + size_t bufsiz = (sizeof(buf) / sizeof(*buf)) - 4; /* bit of breathing room */ + size_t len; + int sock = -1; + int rc; + char versbuf[20]; + float version; + int response; + int failureval; + unsigned int throwaway; + SSL_CTX *ctx = NULL; + SSL *ssl = NULL; + SSL_METHOD *meth; + + assert(tp != NULL); + assert(m != NULL); + + if (flags & SPAMC_USE_SSL) { +#ifdef SPAMC_SSL + SSLeay_add_ssl_algorithms(); + meth = SSLv2_client_method(); + SSL_load_error_strings(); + ctx = SSL_CTX_new(meth); +#else + UNUSED_VARIABLE(ssl); + UNUSED_VARIABLE(meth); + UNUSED_VARIABLE(ctx); + libspamc_log(flags, LOG_ERR, "spamc not built with SSL support"); + return EX_SOFTWARE; +#endif + } + + m->is_spam = EX_TOOBIG; + if ((m->outbuf = malloc(m->max_len + EXPANSION_ALLOWANCE + 1)) == NULL) { + failureval = EX_OSERR; + goto failure; + } + m->out = m->outbuf; + m->out_len = 0; + + /* Build spamd protocol header */ + if (flags & SPAMC_CHECK_ONLY) + strcpy(buf, "CHECK "); + else if (flags & SPAMC_REPORT_IFSPAM) + strcpy(buf, "REPORT_IFSPAM "); + else if (flags & SPAMC_REPORT) + strcpy(buf, "REPORT "); + else if (flags & SPAMC_SYMBOLS) + strcpy(buf, "SYMBOLS "); + else + strcpy(buf, "PROCESS "); + + len = strlen(buf); + if (len + strlen(PROTOCOL_VERSION) + 2 >= bufsiz) { + _use_msg_for_out(m); + return EX_OSERR; + } + + strcat(buf, PROTOCOL_VERSION); + strcat(buf, "\r\n"); + len = strlen(buf); + + if (username != NULL) { + if (strlen(username) + 8 >= (bufsiz - len)) { + _use_msg_for_out(m); + return EX_OSERR; + } + strcpy(buf + len, "User: "); + strcat(buf + len, username); + strcat(buf + len, "\r\n"); + len += strlen(buf + len); + } + if ((m->msg_len > 9999999) || ((len + 27) >= (bufsiz - len))) { + _use_msg_for_out(m); + return EX_OSERR; + } + len += sprintf(buf + len, "Content-length: %d\r\n\r\n", m->msg_len); + + libspamc_timeout = m->timeout; + + if (tp->socketpath) + rc = _try_to_connect_unix(tp, &sock); + else + rc = _try_to_connect_tcp(tp, &sock); + + if (rc != EX_OK) { + _use_msg_for_out(m); + return rc; /* use the error code try_to_connect_*() gave us. */ + } + + if (flags & SPAMC_USE_SSL) { +#ifdef SPAMC_SSL + ssl = SSL_new(ctx); + SSL_set_fd(ssl, sock); + SSL_connect(ssl); +#endif + } + + /* Send to spamd */ + if (flags & SPAMC_USE_SSL) { +#ifdef SPAMC_SSL + SSL_write(ssl, buf, len); + SSL_write(ssl, m->msg, m->msg_len); +#endif + } + else { + full_write(sock, 0, buf, len); + full_write(sock, 0, m->msg, m->msg_len); + shutdown(sock, SHUT_WR); + } + + /* ok, now read and parse it. SPAMD/1.2 line first... */ + failureval = + _spamc_read_full_line(m, flags, ssl, sock, buf, &len, bufsiz); + if (failureval != EX_OK) { + goto failure; + } + + if (sscanf(buf, "SPAMD/%18s %d %*s", versbuf, &response) != 2) { + libspamc_log(flags, LOG_ERR, "spamd responded with bad string '%s'", buf); + failureval = EX_PROTOCOL; + goto failure; + } + + versbuf[19] = '\0'; + version = _locale_safe_string_to_float(versbuf, 20); + if (version < 1.0) { + libspamc_log(flags, LOG_ERR, "spamd responded with bad version string '%s'", + versbuf); + failureval = EX_PROTOCOL; + goto failure; + } + + m->score = 0; + m->threshold = 0; + m->is_spam = EX_TOOBIG; + while (1) { + failureval = + _spamc_read_full_line(m, flags, ssl, sock, buf, &len, bufsiz); + if (failureval != EX_OK) { + goto failure; + } + + if (len == 0 && buf[0] == '\0') { + break; /* end of headers */ + } + + if (_handle_spamd_header(m, flags, buf, len, &throwaway) < 0) { + failureval = EX_PROTOCOL; + goto failure; + } + } + + len = 0; /* overwrite those headers */ + + if (flags & SPAMC_CHECK_ONLY) { + closesocket(sock); + sock = -1; + if (m->is_spam == EX_TOOBIG) { + /* We should have gotten headers back... Damnit. */ + failureval = EX_PROTOCOL; + goto failure; + } + return EX_OK; + } + else { + if (m->content_length < 0) { + /* should have got a length too. */ + failureval = EX_PROTOCOL; + goto failure; + } + + /* have we already got something in the buffer (e.g. REPORT and + * REPORT_IFSPAM both create a line from the "Spam:" hdr)? If + * so, add the size of that so our sanity check passes. + */ + if (m->out_len > 0) { + m->content_length += m->out_len; + } + + if (flags & SPAMC_USE_SSL) { + len = full_read_ssl(ssl, (unsigned char *) m->out + m->out_len, + m->max_len + EXPANSION_ALLOWANCE + 1 - + m->out_len, + m->max_len + EXPANSION_ALLOWANCE + 1 - + m->out_len); + } + else { + len = full_read(sock, 0, m->out + m->out_len, + m->max_len + EXPANSION_ALLOWANCE + 1 - m->out_len, + m->max_len + EXPANSION_ALLOWANCE + 1 - + m->out_len); + } + + + if (len + m->out_len > m->max_len + EXPANSION_ALLOWANCE) { + failureval = EX_TOOBIG; + goto failure; + } + m->out_len += len; + + shutdown(sock, SHUT_RD); + closesocket(sock); + sock = -1; + } + libspamc_timeout = 0; + + if (m->out_len != m->content_length) { + libspamc_log(flags, LOG_ERR, + "failed sanity check, %d bytes claimed, %d bytes seen", + m->content_length, m->out_len); + failureval = EX_PROTOCOL; + goto failure; + } + + return EX_OK; + + failure: + _use_msg_for_out(m); + if (sock != -1) { + closesocket(sock); + } + libspamc_timeout = 0; + + if (flags & SPAMC_USE_SSL) { +#ifdef SPAMC_SSL + SSL_free(ssl); + SSL_CTX_free(ctx); +#endif + } + return failureval; +} + + +int message_process(struct transport *trans, char *username, int max_size, + int in_fd, int out_fd, const int flags) +{ + int ret; + struct message m; + + assert(trans != NULL); + + m.type = MESSAGE_NONE; + + m.max_len = max_size; + ret = message_read(in_fd, flags, &m); + if (ret != EX_OK) + goto FAIL; + ret = message_filter(trans, username, flags, &m); + if (ret != EX_OK) + goto FAIL; + if (message_write(out_fd, &m) < 0) + goto FAIL; + if (m.is_spam != EX_TOOBIG) { + message_cleanup(&m); + return m.is_spam; + } + message_cleanup(&m); + return ret; + + FAIL: + if (flags & SPAMC_CHECK_ONLY) { + full_write(out_fd, 1, "0/0\n", 4); + message_cleanup(&m); + return EX_NOTSPAM; + } + else { + message_dump(in_fd, out_fd, &m); + message_cleanup(&m); + return ret; + } +} + +int message_tell(struct transport *tp, const char *username, int flags, + struct message *m, int msg_class, + unsigned int tellflags, unsigned int *didtellflags) +{ + char buf[8192]; + size_t bufsiz = (sizeof(buf) / sizeof(*buf)) - 4; /* bit of breathing room */ + size_t len; + int sock = -1; + int rc; + char versbuf[20]; + float version; + int response; + int failureval; + SSL_CTX *ctx = NULL; + SSL *ssl = NULL; + SSL_METHOD *meth; + + assert(tp != NULL); + assert(m != NULL); + + if (flags & SPAMC_USE_SSL) { +#ifdef SPAMC_SSL + SSLeay_add_ssl_algorithms(); + meth = SSLv2_client_method(); + SSL_load_error_strings(); + ctx = SSL_CTX_new(meth); +#else + UNUSED_VARIABLE(ssl); + UNUSED_VARIABLE(meth); + UNUSED_VARIABLE(ctx); + libspamc_log(flags, LOG_ERR, "spamc not built with SSL support"); + return EX_SOFTWARE; +#endif + } + + m->is_spam = EX_TOOBIG; + if ((m->outbuf = malloc(m->max_len + EXPANSION_ALLOWANCE + 1)) == NULL) { + failureval = EX_OSERR; + goto failure; + } + m->out = m->outbuf; + m->out_len = 0; + + /* Build spamd protocol header */ + strcpy(buf, "TELL "); + + len = strlen(buf); + if (len + strlen(PROTOCOL_VERSION) + 2 >= bufsiz) { + _use_msg_for_out(m); + return EX_OSERR; + } + + strcat(buf, PROTOCOL_VERSION); + strcat(buf, "\r\n"); + len = strlen(buf); + + if (msg_class != 0) { + strcpy(buf + len, "Message-class: "); + if (msg_class == SPAMC_MESSAGE_CLASS_SPAM) { + strcat(buf + len, "spam\r\n"); + } + else { + strcat(buf + len, "ham\r\n"); + } + len += strlen(buf + len); + } + + if ((tellflags & SPAMC_SET_LOCAL) || (tellflags & SPAMC_SET_REMOTE)) { + int needs_comma_p = 0; + strcat(buf + len, "Set: "); + if (tellflags & SPAMC_SET_LOCAL) { + strcat(buf + len, "local"); + needs_comma_p = 1; + } + if (tellflags & SPAMC_SET_REMOTE) { + if (needs_comma_p == 1) { + strcat(buf + len, ","); + } + strcat(buf + len, "remote"); + } + strcat(buf + len, "\r\n"); + len += strlen(buf + len); + } + + if ((tellflags & SPAMC_REMOVE_LOCAL) || (tellflags & SPAMC_REMOVE_REMOTE)) { + int needs_comma_p = 0; + strcat(buf + len, "Remove: "); + if (tellflags & SPAMC_REMOVE_LOCAL) { + strcat(buf + len, "local"); + needs_comma_p = 1; + } + if (tellflags & SPAMC_REMOVE_REMOTE) { + if (needs_comma_p == 1) { + strcat(buf + len, ","); + } + strcat(buf + len, "remote"); + } + strcat(buf + len, "\r\n"); + len += strlen(buf + len); + } + + if (username != NULL) { + if (strlen(username) + 8 >= (bufsiz - len)) { + _use_msg_for_out(m); + return EX_OSERR; + } + strcpy(buf + len, "User: "); + strcat(buf + len, username); + strcat(buf + len, "\r\n"); + len += strlen(buf + len); + } + if ((m->msg_len > 9999999) || ((len + 27) >= (bufsiz - len))) { + _use_msg_for_out(m); + return EX_OSERR; + } + len += sprintf(buf + len, "Content-length: %d\r\n\r\n", m->msg_len); + + libspamc_timeout = m->timeout; + + if (tp->socketpath) + rc = _try_to_connect_unix(tp, &sock); + else + rc = _try_to_connect_tcp(tp, &sock); + + if (rc != EX_OK) { + _use_msg_for_out(m); + return rc; /* use the error code try_to_connect_*() gave us. */ + } + + if (flags & SPAMC_USE_SSL) { +#ifdef SPAMC_SSL + ssl = SSL_new(ctx); + SSL_set_fd(ssl, sock); + SSL_connect(ssl); +#endif + } + + /* Send to spamd */ + if (flags & SPAMC_USE_SSL) { +#ifdef SPAMC_SSL + SSL_write(ssl, buf, len); + SSL_write(ssl, m->msg, m->msg_len); +#endif + } + else { + full_write(sock, 0, buf, len); + full_write(sock, 0, m->msg, m->msg_len); + shutdown(sock, SHUT_WR); + } + + /* ok, now read and parse it. SPAMD/1.2 line first... */ + failureval = + _spamc_read_full_line(m, flags, ssl, sock, buf, &len, bufsiz); + if (failureval != EX_OK) { + goto failure; + } + + if (sscanf(buf, "SPAMD/%18s %d %*s", versbuf, &response) != 2) { + libspamc_log(flags, LOG_ERR, "spamd responded with bad string '%s'", buf); + failureval = EX_PROTOCOL; + goto failure; + } + + versbuf[19] = '\0'; + version = _locale_safe_string_to_float(versbuf, 20); + if (version < 1.0) { + libspamc_log(flags, LOG_ERR, "spamd responded with bad version string '%s'", + versbuf); + failureval = EX_PROTOCOL; + goto failure; + } + + m->score = 0; + m->threshold = 0; + m->is_spam = EX_TOOBIG; + while (1) { + failureval = + _spamc_read_full_line(m, flags, ssl, sock, buf, &len, bufsiz); + if (failureval != EX_OK) { + goto failure; + } + + if (len == 0 && buf[0] == '\0') { + break; /* end of headers */ + } + + if (_handle_spamd_header(m, flags, buf, len, didtellflags) < 0) { + failureval = EX_PROTOCOL; + goto failure; + } + } + + len = 0; /* overwrite those headers */ + + shutdown(sock, SHUT_RD); + closesocket(sock); + sock = -1; + + libspamc_timeout = 0; + + return EX_OK; + + failure: + _use_msg_for_out(m); + if (sock != -1) { + closesocket(sock); + } + libspamc_timeout = 0; + + if (flags & SPAMC_USE_SSL) { +#ifdef SPAMC_SSL + SSL_free(ssl); + SSL_CTX_free(ctx); +#endif + } + return failureval; +} + +void message_cleanup(struct message *m) +{ + assert(m != NULL); + if (m->outbuf != NULL) + free(m->outbuf); + if (m->raw != NULL) { + if (munmap(m->raw, m->raw_len) != 0) { + libspamc_log(0,0, "munmap"); + exit(EX_SOFTWARE); + } + } + if (m->priv != NULL) + free(m->priv); + _clear_message(m); +} + +/* Aug 14, 2002 bj: Obsolete! */ +int process_message(struct transport *tp, char *username, int max_size, + int in_fd, int out_fd, const int my_check_only, + const int my_safe_fallback) +{ + int flags; + + flags = SPAMC_RAW_MODE; + if (my_check_only) + flags |= SPAMC_CHECK_ONLY; + if (my_safe_fallback) + flags |= SPAMC_SAFE_FALLBACK; + + return message_process(tp, username, max_size, in_fd, out_fd, flags); +} + +/* +* init_transport() +* +* Given a pointer to a transport structure, set it to "all empty". +* The default is a localhost connection. +*/ +void transport_init(struct transport *tp) +{ + assert(tp != 0); + + memset(tp, 0, sizeof *tp); + + tp->type = TRANSPORT_LOCALHOST; + tp->port = 783; + tp->flags = 0; +} + +/* +* randomize_hosts() +* +* Given the transport object that contains one or more IP addresses +* in this "hosts" list, rotate it by a random number of shifts to +* randomize them - this is a kind of load balancing. It's possible +* that the random number will be 0, which says not to touch. We don't +* do anything unless +*/ + +static void _randomize_hosts(struct transport *tp) +{ + int rnum; + + assert(tp != 0); + + if (tp->nhosts <= 1) + return; + + rnum = rand() % tp->nhosts; + + while (rnum-- > 0) { + struct in_addr tmp = tp->hosts[0]; + int i; + + for (i = 1; i < tp->nhosts; i++) + tp->hosts[i - 1] = tp->hosts[i]; + + tp->hosts[i - 1] = tmp; + } +} + +/* +* transport_setup() +* +* Given a "transport" object that says how we're to connect to the +* spam daemon, perform all the initial setup required to make the +* connection process a smooth one. The main work is to do the host +* name lookup and copy over all the IP addresses to make a local copy +* so they're not kept in the resolver's static state. +* +* Here we also manage quasi-load balancing and failover: if we're +* doing load balancing, we randomly "rotate" the list to put it in +* a different order, and then if we're not doing failover we limit +* the hosts to just one. This way *all* connections are done with +* the intention of failover - makes the code a bit more clear. +*/ +int transport_setup(struct transport *tp, int flags) +{ + struct hostent *hp; + char *hostlist, *hostname; + int errbits; + char **addrp; + +#ifdef _WIN32 + // Start Winsock up + WSADATA wsaData; + int nCode; + if ((nCode = WSAStartup(MAKEWORD(1, 1), &wsaData)) != 0) { + printf("WSAStartup() returned error code %d\n", nCode); + return EX_OSERR; + } + +#endif + + assert(tp != NULL); + tp->flags = flags; + + switch (tp->type) { +#ifndef _WIN32 + case TRANSPORT_UNIX: + assert(tp->socketpath != 0); + return EX_OK; +#endif + case TRANSPORT_LOCALHOST: + tp->hosts[0].s_addr = inet_addr("127.0.0.1"); + tp->nhosts = 1; + return EX_OK; + + case TRANSPORT_TCP: + if ((hostlist = strdup(tp->hostname)) == NULL) + return EX_OSERR; + + /* We want to return the least permanent error, in this bitmask we + * record the errors seen with: + * 0: no error + * 1: EX_TEMPFAIL + * 2: EX_NOHOST + * EX_OSERR will return immediately. + * Bits aren't reset so a check against nhosts is needed to determine + * if something went wrong. + */ + errbits = 0; + tp->nhosts = 0; + /* Start with char offset in front of the string because we'll add + * one in the loop + */ + hostname = hostlist - 1; + do { + char *hostend; + + hostname += 1; + hostend = strchr(hostname, ','); + if (hostend != NULL) { + *hostend = '\0'; + } + + if ((hp = gethostbyname(hostname)) == NULL) { + int origerr = h_errno; /* take a copy before syslog() */ + libspamc_log(flags, LOG_DEBUG, "gethostbyname(%s) failed: h_errno=%d", + hostname, origerr); + switch (origerr) { + case TRY_AGAIN: + errbits |= 1; + break; + case HOST_NOT_FOUND: + case NO_ADDRESS: + case NO_RECOVERY: + errbits |= 2; + break; + default: + /* should not happen, all errors are checked above */ + free(hostlist); + return EX_OSERR; + } + goto nexthost; /* try next host in list */ + } + + /* If we have no hosts at all, or if they are some other + * kind of address family besides IPv4, then we really + * just have no hosts at all. TODO: IPv6 + */ + if (hp->h_addr_list[0] == NULL + || hp->h_length != sizeof tp->hosts[0] + || hp->h_addrtype != AF_INET) { + /* no hosts/bad size/wrong family */ + errbits |= 1; + goto nexthost; /* try next host in list */ + } + + /* Copy all the IP addresses into our private structure. + * This gets them out of the resolver's static area and + * means we won't ever walk all over the list with other + * calls. + */ + for (addrp = hp->h_addr_list; *addrp; addrp++) { + if (tp->nhosts == TRANSPORT_MAX_HOSTS) { + libspamc_log(flags, LOG_NOTICE, "hit limit of %d hosts, ignoring remainder", + TRANSPORT_MAX_HOSTS); + break; + } + memcpy(&tp->hosts[tp->nhosts], *addrp, hp->h_length); + tp->nhosts++; + } + +nexthost: + hostname = hostend; + } while (hostname != NULL); + free(hostlist); + + if (tp->nhosts == 0) { + if (errbits & 1) { + libspamc_log(flags, LOG_ERR, "could not resolve any hosts (%s): a temporary error occurred", + tp->hostname); + return EX_TEMPFAIL; + } + else { + libspamc_log(flags, LOG_ERR, "could not resolve any hosts (%s): no such host", + tp->hostname); + return EX_NOHOST; + } + } + + /* QUASI-LOAD-BALANCING + * + * If the user wants to do quasi load balancing, "rotate" + * the list by a random amount based on the current time. + * This may later be truncated to a single item. This is + * meaningful only if we have more than one host. + */ + if ((flags & SPAMC_RANDOMIZE_HOSTS) && tp->nhosts > 1) { + _randomize_hosts(tp); + } + + /* If the user wants no fallback, simply truncate the host + * list to just one - this pretends that this is the extent + * of our connection list - then it's not a special case. + */ + if (!(flags & SPAMC_SAFE_FALLBACK) && tp->nhosts > 1) { + /* truncating list */ + tp->nhosts = 1; + } + + return EX_OK; + } + + /* oops, unknown transport type */ + return EX_OSERR; +} + +/* --------------------------------------------------------------------------- */ + +#define LOG_BUFSIZ 1023 + +#ifndef libspamc_log +void +libspamc_log (int flags, int level, char *msg, ...) +{ + va_list ap; + char buf[LOG_BUFSIZ+1]; + int len = 0; + + va_start(ap, msg); + + if ((flags & SPAMC_LOG_TO_STDERR) != 0) { + // create a log-line buffer + len = snprintf(buf, LOG_BUFSIZ, "spamc: "); + len += vsnprintf(buf+len, LOG_BUFSIZ-len, msg, ap); + + // avoid buffer overflow + if (len > (LOG_BUFSIZ-2)) { len = (LOG_BUFSIZ-3); } + + len += snprintf(buf+len, LOG_BUFSIZ-len, "\n"); + buf[LOG_BUFSIZ] = '\0'; /* ensure termination */ + (void) write (2, buf, len); + + } else { + vsnprintf(buf, LOG_BUFSIZ, msg, ap); + buf[LOG_BUFSIZ] = '\0'; /* ensure termination */ +#ifndef _WIN32 + syslog (level, "%s", buf); +#else + (void) level; /* not used. suppress compiler warning */ + fprintf (stderr, "%s\n", buf); +#endif + } + + va_end(ap); +} +#endif + +/* --------------------------------------------------------------------------- */ + +/* +* Unit tests. Must be built externally, e.g.: +* +* gcc -g -DLIBSPAMC_UNIT_TESTS spamd/spamc.c spamd/libspamc.c spamd/utils.c -o libspamctest +* ./libspamctest +* +*/ +#ifdef LIBSPAMC_UNIT_TESTS + +static void _test_locale_safe_string_to_float_val(float input) +{ + char inputstr[99], cmpbuf1[99], cmpbuf2[99]; + float output; + + /* sprintf instead of snprintf is safe here because it is only a controlled test */ + sprintf(inputstr, "%f", input); + output = _locale_safe_string_to_float(inputstr, 99); + if (input == output) { + return; + } + + /* could be a rounding error. print as string and compare those */ + sprintf(cmpbuf1, "%f", input); + sprintf(cmpbuf2, "%f", output); + if (!strcmp(cmpbuf1, cmpbuf2)) { + return; + } + + printf("FAIL: input=%f != output=%f\n", input, output); +} + +static void unit_test_locale_safe_string_to_float(void) +{ + float statictestset[] = { /* will try both +ve and -ve */ + 0.1, 0.01, 0.001, 0.0001, 0.00001, 0.000001, + 9.1, 9.91, 9.991, 9.9991, 9.99991, 9.999991, + 0.0 /* end of set constant */ + }; + float num; + int i; + + printf("starting unit_test_locale_safe_string_to_float\n"); + /* tests of precision */ + for (i = 0; statictestset[i] != 0.0; i++) { + _test_locale_safe_string_to_float_val(statictestset[i]); + _test_locale_safe_string_to_float_val(-statictestset[i]); + _test_locale_safe_string_to_float_val(1 - statictestset[i]); + _test_locale_safe_string_to_float_val(1 + statictestset[i]); + } + /* now exhaustive, in steps of 0.01 */ + for (num = -1000.0; num < 1000.0; num += 0.01) { + _test_locale_safe_string_to_float_val(num); + } + printf("finished unit_test_locale_safe_string_to_float\n"); +} + +void do_libspamc_unit_tests(void) +{ + unit_test_locale_safe_string_to_float(); + exit(0); +} + +#endif /* LIBSPAMC_UNIT_TESTS */ diff --git a/libspamc.h b/libspamc.h new file mode 100644 index 0000000..4dfc2e5 --- /dev/null +++ b/libspamc.h @@ -0,0 +1,259 @@ +/* <@LICENSE> + * Copyright 2004 Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +#ifndef LIBSPAMC_H +#define LIBSPAMC_H 1 + +#include +#include +#include +#ifdef _WIN32 +#ifdef _MSC_VER +/* ignore MSVC++ warnings that are annoying and hard to remove: + 4115 named type definition in parentheses + 4127 conditional expression is constant + 4514 unreferenced inline function removed + */ +#pragma warning( disable : 4115 4127 4514 ) +#endif +#include +#else +#include +#include +#include +#endif + +#ifdef _WIN32 +/* FIXME: This stuff has to go somewhere else */ + +#define EX_OK 0 +#define EX_USAGE 64 +#define EX_DATAERR 65 +#define EX_NOINPUT 66 +#define EX_NOUSER 67 +#define EX_NOHOST 68 +#define EX_UNAVAILABLE 69 +#define EX_SOFTWARE 70 +#define EX_OSERR 71 +#define EX_OSFILE 72 +#define EX_CANTCREAT 73 +#define EX_IOERR 74 +#define EX_TEMPFAIL 75 +#define EX_PROTOCOL 76 +#define EX_NOPERM 77 +#define EX_CONFIG 78 + +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 + +/* FIXME: This doesn't belong here either */ +#define LOG_EMERG 0 /* system is unusable */ +#define LOG_ALERT 1 /* action must be taken immediately */ +#define LOG_CRIT 2 /* critical conditions */ +#define LOG_ERR 3 /* error conditions */ +#define LOG_WARNING 4 /* warning conditions */ +#define LOG_NOTICE 5 /* normal but significant condition */ +#define LOG_INFO 6 /* informational */ +#define LOG_DEBUG 7 /* debug-level messages */ + +#endif + +#define EX_NOTSPAM 0 +#define EX_ISSPAM 1 +#define EX_TOOBIG 866 + +/* Aug 14, 2002 bj: Bitflags instead of lots of bool parameters */ +#define SPAMC_MODE_MASK 1 +#define SPAMC_RAW_MODE 0 +#define SPAMC_BSMTP_MODE 1 + +#define SPAMC_USE_SSL (1<<27) +#define SPAMC_SAFE_FALLBACK (1<<28) +#define SPAMC_CHECK_ONLY (1<<29) + +/* Jan 30, 2003 ym: added reporting options */ +#define SPAMC_REPORT (1<<26) +#define SPAMC_REPORT_IFSPAM (1<<25) + +/* Feb 1 2003 jm: might as well fix bug 191 as well */ +#define SPAMC_SYMBOLS (1<<24) + +/* 2003/04/16 SJF: randomize hostname order (quasi load balancing) */ +#define SPAMC_RANDOMIZE_HOSTS (1<<23) + +/* log to stderr */ +#define SPAMC_LOG_TO_STDERR (1<<22) + +/* Nov 24, 2004 NP: added learning support */ +#define SPAMC_LEARN (1<<21) + +/* May 5, 2005 NP: added list reporting support */ +#define SPAMC_REPORT_MSG (1<<20) + + +#define SPAMC_MESSAGE_CLASS_SPAM 1 +#define SPAMC_MESSAGE_CLASS_HAM 2 + +#define SPAMC_SET_LOCAL 1 +#define SPAMC_SET_REMOTE 2 + +#define SPAMC_REMOVE_LOCAL 4 +#define SPAMC_REMOVE_REMOTE 8 + +/* Aug 14, 2002 bj: A struct for storing a message-in-progress */ +typedef enum +{ + MESSAGE_NONE, + MESSAGE_ERROR, + MESSAGE_RAW, + MESSAGE_BSMTP, + MAX_MESSAGE_TYPE +} message_type_t; + +struct libspamc_private_message; + +struct message +{ + /* Set before passing the struct on! */ + unsigned int max_len; /* messages larger than this will return EX_TOOBIG */ + int timeout; /* timeout for read() system calls */ + + /* Filled in by message_read */ + message_type_t type; + char *raw; + unsigned int raw_len; /* Raw message buffer */ + char *pre; + int pre_len; /* Pre-message data (e.g. SMTP commands) */ + char *msg; + unsigned int msg_len; /* The message */ + char *post; + int post_len; /* Post-message data (e.g. SMTP commands) */ + int content_length; + + /* Filled in by filter_message */ + int is_spam; /* EX_ISSPAM if the message is spam, EX_NOTSPAM + if not */ + float score, threshold; /* score and threshold */ + char *outbuf; /* Buffer for output from spamd */ + char *out; + int out_len; /* Output from spamd. Either the filtered + message, or the check-only response. Or else, + a pointer to msg above. */ + + /* these members added in SpamAssassin version 2.60: */ + struct libspamc_private_message *priv; +}; + +/*------------------------------------------------------------------------ + * TRANSPORT (2004/04/16 - SJF) + * + * The code to connect with the daemon has gotten more complicated: support + * for SSL, fallback to multiple hosts, and using UNIX domain sockets. The + * code has gotten ugly with way too many parameters being passed all around. + * + * So we've created this object to hold all the info required to connect with + * the remote site, including a self-contained list of all the IP addresses + * in the event this is using TCP sockets. These multiple IPs can be obtained + * only from DNS returning more than one A record for a single name, and + * this allows for fallback. + * + * We also allow a kind of quasi-load balancing, where we take the list of + * A records from DNS and randomize them before starting out - this lets + * us spread the load out among multiple servers if desired. The idea for + * load balancing goes to Jeremy Zawodny. + * + * By putting all our data here, we remove "fallback" from being a special + * case. We may find ourselves with several IP addresses, but if the user + * disables fallback, we set the IP address count to one. Now the connect + * code just loops over that same address. + */ +#define TRANSPORT_LOCALHOST 0x01 /* TCP to localhost only */ +#define TRANSPORT_TCP 0x02 /* standard TCP socket */ +#define TRANSPORT_UNIX 0x03 /* UNIX domain socket */ + +#define TRANSPORT_MAX_HOSTS 256 /* max hosts we can failover between */ + +struct transport +{ + int type; + + const char *socketpath; /* for UNIX dommain socket */ + const char *hostname; /* for TCP sockets */ + + unsigned short port; /* for TCP sockets */ + + struct in_addr hosts[TRANSPORT_MAX_HOSTS]; + int nhosts; + int flags; +}; + +extern void transport_init(struct transport *tp); +extern int transport_setup(struct transport *tp, int flags); + +/* Aug 14, 2002 bj: New interface functions */ + +/* Read in a message from the fd, with the mode specified in the flags. + * Returns EX_OK on success, EX_otherwise on failure. On failure, m may be + * either MESSAGE_NONE or MESSAGE_ERROR. */ +int message_read(int in_fd, int flags, struct message *m); + +/* Write out a message to the fd, as specified by m->type. Note that + * MESSAGE_NONE messages have nothing to write. Also note that if you ran the + * message through message_filter with SPAMC_CHECK_ONLY, it will only output + * the "score/threshold" line. */ +long message_write(int out_fd, struct message *m); + +/* Process the message through the spamd filter, making as many connection + * attempts as are implied by the transport structure. To make this do + * failover, more than one host is defined, but if there is only one there, + * no failover is done. + */ +int message_filter(struct transport *tp, const char *username, + int flags, struct message *m); + +/* Process the message through the spamd tell command, making as many + * connection attempts as are implied by the transport structure. To make + * this do failover, more than one host is defined, but if there is only + * one there, no failover is done. + */ +int message_tell(struct transport *tp, const char *username, int flags, + struct message *m, int msg_class, + unsigned int tellflags, unsigned int *didtellflags); + +/* Dump the message. If there is any data in the message (typically, m->type + * will be MESSAGE_ERROR) it will be message_writed. Then, fd_in will be piped + * to fd_out intol EOF. This is particularly useful if you get back an + * EX_TOOBIG. */ +void message_dump(int in_fd, int out_fd, struct message *m); + +/* Do a message_read->message_filter->message_write sequence, handling errors + * appropriately with dump_message or appropriate CHECK_ONLY output. Returns + * EX_OK or EX_ISSPAM/EX_NOTSPAM on success, some error EX on error. */ +int message_process(struct transport *trans, char *username, int max_size, + int in_fd, int out_fd, const int flags); + +/* Cleanup the resources we allocated for storing the message. Call after + * you're done processing. */ +void message_cleanup(struct message *m); + +/* Aug 14, 2002 bj: This is now legacy, don't use it. */ +int process_message(struct transport *tp, char *username, + int max_size, int in_fd, int out_fd, + const int check_only, const int safe_fallback); + +void libspamc_log(int flags, int level, char *msg, ...); + +#endif diff --git a/processor.c b/processor.c new file mode 100644 index 0000000..5f2753a --- /dev/null +++ b/processor.c @@ -0,0 +1,194 @@ +static char ident[] = "$Id$"; + +// Configuration +#define T_TYPE TRANSPORT_LOCALHOST +#define T_SOCK NULL /* for TRANSPORT_UNIX */ +#define T_HOST NULL /* for TRANSPORT_TCP */ +#define T_PORT 7783 /* for TRANSPORT_{LOCALHOST,TCP} */ +#define M_TIME 30 /* call it ham if no answer in 30 seconds */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "libspamc.h" + +static char *errfn; + +static int flags = SPAMC_CHECK_ONLY | SPAMC_RAW_MODE; + +#define err(n, ...) _err(n, true, __FILE__, __LINE__, __VA_ARGS__) +#define errx(n, ...) _err(n, false, __FILE__, __LINE__, __VA_ARGS__) + +void +_err(int exitcode, bool want_strerror, char *fn, int lineno, char *format, ...) +{ + FILE *fp; + va_list ap; + + /* If anything fails in here, we're fucked, so just exit. If + * exitcode is greather than 0 (i.e. the caller is trying to exit + * with a failure code), use that, else use EX_IOERR. */ + + if ((fp = fopen(errfn, "a")) == NULL) { + exit(exitcode > 0 ? exitcode : EX_IOERR); + } + + if (fprintf(fp, ".mdeliver-processor:%s:%d:", fn, lineno) < 0) { + exit(exitcode > 0 ? exitcode : EX_IOERR); + } + + va_start(ap, format); + if (vfprintf(fp, format, ap) < 0) { + exit(exitcode > 0 ? exitcode : EX_IOERR); + } + va_end(ap); + + if (want_strerror) { + if (fputs(": ", fp) == EOF) { + exit(exitcode > 0 ? exitcode : EX_IOERR); + } + if (fputs(strerror(errno), fp) == EOF) { + exit(exitcode > 0 ? exitcode : EX_IOERR); + } + } + if (fputs("\n", fp) == EOF) { + exit(exitcode > 0 ? exitcode : EX_IOERR); + } + + if (fclose(fp) == EOF) { + exit(exitcode > 0 ? exitcode : EX_IOERR); + } + + if (exitcode >= 0) { + exit(exitcode); + } +} + +static char * +logname() +{ + char *logname; + struct passwd *p; + + if ((logname = getenv("LOGNAME")) != NULL) { + return logname; + } + + errno = 0; + if ((p = getpwuid(getuid())) == NULL) { + if (errno != 0) { + err(EX_OSERR, "getpwuid(getuid())"); + } + errx(EX_NOUSER, "Who are you? What do you want? Where are you going? Why are you here?"); + } + + return p->pw_name; +} + +static void +get_transport(struct transport *t) +{ + int status; + + t->type = T_TYPE; + t->socketpath = T_SOCK; + t->hostname = T_HOST; + t->port = T_PORT; + + if ((status = transport_setup(t, 0)) != EX_OK) { + errx(status, "transport_setup"); + } +} + +static void +get_message(char *fn, struct message *m) +{ + int fd; + int status; + + m->timeout = M_TIME; + /* message_filter stupidly allocates a buffer a little bigger than + * max_len for reading the processed message back from spamd even + * when told not to. It never uses the buffer. */ + m->max_len = 0; + + if ((fd = open(fn, O_RDONLY)) == -1) { + err(EX_IOERR, "open(%s)", fn); + } + + if ((status = message_read(fd, flags, m)) != EX_OK) { + errx(status, "message_read"); + } +} + +static bool +is_spam(char *message) +{ + struct transport t; + struct message m; + int status; + + get_transport(&t); + get_message(message, &m); + + if ((status = message_filter(&t, logname(), flags, &m)) != EX_OK) { + errx(status, "message_filter"); + } + + if (m.is_spam == EX_ISSPAM) { + return true; + } else if (m.is_spam == EX_NOTSPAM) { + return false; + } + + errx(m.is_spam, "message_filter"); + /*NOTREACHED*/ + return false; +} + +static int +process(char *tmp) +{ + size_t len = strlen(tmp); + errfn = malloc(len + 1); + /* +2 because this one has an extra char ("mdp" vs. "spam"). */ + char *new = malloc(len + 2); + + strcpy(errfn, "err"); + strcat(errfn, tmp + 3); + + strcpy(new, is_spam(tmp) ? "spam" : "new"); + strcat(new, tmp + 3); + + if (rename(tmp, new) == -1) { + err(EX_OSERR, "rename(%s, %s)", tmp, new); + } + + return 0; +} + + +int +main(int argc, char *argv[]) +{ + if (argc < 2) { + exit(EX_USAGE); + } + + if (argv[1][0] == '-' && argv[1][1] == 'v') { + puts(ident); + return 0; + } + + return process(argv[1]); +} diff --git a/utils.c b/utils.c new file mode 100644 index 0000000..f42c961 --- /dev/null +++ b/utils.c @@ -0,0 +1,241 @@ +/* <@LICENSE> + * Copyright 2004 Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef _WIN32 +#include +#include +#include +#include +#else + +#ifdef _MSC_VER +/* ignore MSVC++ warnings that are annoying and hard to remove: + 4115 named type definition in parentheses + 4127 conditional expression is constant + 4514 unreferenced inline function removed + */ +#pragma warning( disable : 4115 4127 4514 ) +#endif + +#include +#endif +#include +#include +#include +#include "utils.h" + +/* Dec 13 2001 jm: added safe full-read and full-write functions. These + * can cope with networks etc., where a write or read may not read all + * the data that's there, in one call. + */ +/* Aug 14, 2002 bj: EINTR and EAGAIN aren't fatal, are they? */ +/* Aug 14, 2002 bj: moved these to utils.c */ +/* Jan 13, 2003 ym: added timeout functionality */ +/* Apr 24, 2003 sjf: made full_read and full_write void* params */ + +/* -------------------------------------------------------------------------- */ +#ifndef _WIN32 +typedef void sigfunc(int); /* for signal handlers */ + +sigfunc *sig_catch(int sig, void (*f) (int)) +{ + struct sigaction act, oact; + act.sa_handler = f; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + sigaction(sig, &act, &oact); + return oact.sa_handler; +} + +static void catch_alrm(int x) +{ + UNUSED_VARIABLE(x); +} +#endif + +int fd_timeout_read(int fd, char fdflag, void *buf, size_t nbytes) +{ + int nred; + int origerr; +#ifndef _WIN32 + sigfunc *sig; + + sig = sig_catch(SIGALRM, catch_alrm); + if (libspamc_timeout > 0) { + alarm(libspamc_timeout); + } +#endif + + do { + if (fdflag) { + nred = (int)read(fd, buf, nbytes); + origerr = errno; + } + else { + nred = (int)recv(fd, buf, nbytes, 0); +#ifndef _WIN32 + origerr = errno; +#else + origerr = WSAGetLastError(); +#endif + } + } while (nred < 0 && origerr == EWOULDBLOCK); + +#ifndef _WIN32 + if (nred < 0 && origerr == EINTR) + errno = ETIMEDOUT; + + if (libspamc_timeout > 0) { + alarm(0); + } + + /* restore old signal handler */ + sig_catch(SIGALRM, sig); +#endif + + return nred; +} + +int ssl_timeout_read(SSL * ssl, void *buf, int nbytes) +{ + int nred; + +#ifndef _WIN32 + sigfunc *sig; + + sig = sig_catch(SIGALRM, catch_alrm); + if (libspamc_timeout > 0) { + alarm(libspamc_timeout); + } +#endif + + do { + +#ifdef SPAMC_SSL + nred = SSL_read(ssl, buf, nbytes); +#else + UNUSED_VARIABLE(ssl); + UNUSED_VARIABLE(buf); + UNUSED_VARIABLE(nbytes); + nred = 0; /* never used */ +#endif + + } while (nred < 0 && errno == EWOULDBLOCK); + +#ifndef _WIN32 + if (nred < 0 && errno == EINTR) + errno = ETIMEDOUT; + + if (libspamc_timeout > 0) { + alarm(0); + } + + /* restore old signal handler */ + sig_catch(SIGALRM, sig); +#endif + + return nred; +} + +/* -------------------------------------------------------------------------- */ + +int full_read(int fd, char fdflag, void *vbuf, int min, int len) +{ + unsigned char *buf = (unsigned char *) vbuf; + int total; + int thistime; + + for (total = 0; total < min;) { + thistime = fd_timeout_read(fd, fdflag, buf + total, len - total); + + if (thistime < 0) { + if (total >= min) { + /* error, but we read *some*. return what we've read + * so far and next read (if there is one) will return -1. */ + return total; + } else { + return -1; + } + } + else if (thistime == 0) { + /* EOF, but we didn't read the minimum. return what we've read + * so far and next read (if there is one) will return 0. */ + return total; + } + + total += thistime; + } + return total; +} + +int full_read_ssl(SSL * ssl, unsigned char *buf, int min, int len) +{ + int total; + int thistime; + + for (total = 0; total < min;) { + thistime = ssl_timeout_read(ssl, buf + total, len - total); + + if (thistime < 0) { + if (total >= min) { + /* error, but we read *some*. return what we've read + * so far and next read (if there is one) will return -1. */ + return total; + } else { + return -1; + } + } + else if (thistime == 0) { + /* EOF, but we didn't read the minimum. return what we've read + * so far and next read (if there is one) will return 0. */ + return total; + } + + total += thistime; + } + return total; +} + +int full_write(int fd, char fdflag, const void *vbuf, int len) +{ + const char *buf = (const char *) vbuf; + int total; + int thistime; + int origerr; + + for (total = 0; total < len;) { + if (fdflag) { + thistime = write(fd, buf + total, len - total); + origerr = errno; + } + else { + thistime = send(fd, buf + total, len - total, 0); +#ifndef _WIN32 + origerr = errno; +#else + origerr = WSAGetLastError(); +#endif + } + if (thistime < 0) { + if (EINTR == origerr || EWOULDBLOCK == origerr) + continue; + return thistime; /* always an error for writes */ + } + total += thistime; + } + return total; +} diff --git a/utils.h b/utils.h new file mode 100644 index 0000000..3a28754 --- /dev/null +++ b/utils.h @@ -0,0 +1,96 @@ +/* <@LICENSE> + * Copyright 2004 Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef UTILS_H +#define UTILS_H + +#define UNUSED_VARIABLE(v) ((void)(v)) + +extern int libspamc_timeout; /* default timeout in seconds */ + +#ifdef SPAMC_SSL +#include +#include +#include +#include +#else +typedef int SSL; /* fake type to avoid conditional compilation */ +typedef int SSL_CTX; +typedef int SSL_METHOD; +#endif + +#ifdef _WIN32 +#include +// +// BSD-compatible socket error codes for Win32 +// + +#define EWOULDBLOCK WSAEWOULDBLOCK +#define EINPROGRESS WSAEINPROGRESS +#define EALREADY WSAEALREADY +#define ENOTSOCK WSAENOTSOCK +#define EDESTADDRREQ WSAEDESTADDRREQ +#define EMSGSIZE WSAEMSGSIZE +#define EPROTOTYPE WSAEPROTOTYPE +#define ENOPROTOOPT WSAENOPROTOOPT +#define EPROTONOSUPPORT WSAEPROTONOSUPPORT +#define ESOCKTNOSUPPORT WSAESOCKTNOSUPPORT +#define EOPNOTSUPP WSAEOPNOTSUPP +#define EPFNOSUPPORT WSAEPFNOSUPPORT +#define EAFNOSUPPORT WSAEAFNOSUPPORT +#define EADDRINUSE WSAEADDRINUSE +#define EADDRNOTAVAIL WSAEADDRNOTAVAIL +#define ENETDOWN WSAENETDOWN +#define ENETUNREACH WSAENETUNREACH +#define ENETRESET WSAENETRESET +#define ECONNABORTED WSAECONNABORTED +#define ECONNRESET WSAECONNRESET +#define ENOBUFS WSAENOBUFS +#define EISCONN WSAEISCONN +#define ENOTCONN WSAENOTCONN +#define ESHUTDOWN WSAESHUTDOWN +#define ETOOMANYREFS WSAETOOMANYREFS +#define ETIMEDOUT WSAETIMEDOUT +#define ECONNREFUSED WSAECONNREFUSED +#define ELOOP WSAELOOP +// #define ENAMETOOLONG WSAENAMETOOLONG +#define EHOSTDOWN WSAEHOSTDOWN +#define EHOSTUNREACH WSAEHOSTUNREACH +// #define ENOTEMPTY WSAENOTEMPTY +#define EPROCLIM WSAEPROCLIM +#define EUSERS WSAEUSERS +#define EDQUOT WSAEDQUOT +#define ESTALE WSAESTALE +#define EREMOTE WSAEREMOTE + +// NOTE: these are not errno constants in UNIX! +#define HOST_NOT_FOUND WSAHOST_NOT_FOUND +#define TRY_AGAIN WSATRY_AGAIN +#define NO_RECOVERY WSANO_RECOVERY +#define NO_DATA WSANO_DATA + +#endif + +int fd_timeout_read(int fd, char fdflag, void *, size_t); +int ssl_timeout_read(SSL * ssl, void *, int); + +/* these are fd-only, no SSL support */ +int full_read(int fd, char fdflag, void *buf, int min, int len); +int full_read_ssl(SSL * ssl, unsigned char *buf, int min, int len); +int full_write(int fd, char fdflag, const void *buf, int len); + +#endif