X-Git-Url: https://diplodocus.org/git/nmh/blobdiff_plain/5aaf4f1c6bea2bffff48524e76bf9d03dd3e2dc6..cdbb097c8f061dfea2e92f0beafc64fdf50a4eb7:/uip/mhparse.c?ds=inline diff --git a/uip/mhparse.c b/uip/mhparse.c index 1b83ae87..5bbaf449 100644 --- a/uip/mhparse.c +++ b/uip/mhparse.c @@ -135,7 +135,7 @@ static int readDigest (CT, char *); static int get_leftover_mp_content (CT, int); static int InitURL (CT); static int openURL (CT, char **); -static size_t param_len(PM, int, size_t, int *); +static size_t param_len(PM, int, size_t, int *, int *, size_t *); static size_t encode_param(PM, char *, size_t, size_t, size_t, int); static size_t normal_param(PM, char *, size_t, size_t, size_t); static int get_dispo (char *, CT, int); @@ -3437,8 +3437,8 @@ output_params(size_t initialwidth, PM params, int *offsetout) { char *paramout = NULL; char line[CPERLIN * 2], *q; - int curlen, index, eightbit, encode, i; - size_t valoff; + int curlen, index, cont, encode, i; + size_t valoff, numchars; while (params != NULL) { encode = 0; @@ -3453,7 +3453,7 @@ output_params(size_t initialwidth, PM params, int *offsetout) return NULL; } - curlen = param_len(params, index, valoff, &eightbit); + curlen = param_len(params, index, valoff, &encode, &cont, &numchars); /* * Loop until we get a parameter that fits within a line. We @@ -3461,22 +3461,11 @@ output_params(size_t initialwidth, PM params, int *offsetout) * on that. */ - while (curlen + 8 > CPERLIN - 1) { - int curvallen = strlen(params->pm_value + valoff) - - (initialwidth + curlen - (CPERLIN - 1)); - + while (cont) { *q++ = ';'; *q++ = '\n'; *q++ = '\t'; - /* - * curvallen holds how many characters we take from this - * current value. Make sure it's at least 1. - */ - - if (curvallen < 1) - curvallen = 1; - /* * At this point we're definitely continuing the line, so * be sure to include the parameter name and section index. @@ -3485,28 +3474,16 @@ output_params(size_t initialwidth, PM params, int *offsetout) q += snprintf(q, sizeof(line) - (q - line), "%s*%d", params->pm_name, index); - /* - * If eightbit was set and we're on index 0, we need to include - * the character set and encode the first section. Otherwise - * only encode if the section we're on contains an 8bit character. - */ - - if (eightbit && index == 0) - encode = 1; - else if (eightbit && contains8bit(params->pm_value + valoff, - params->pm_value + valoff + curvallen)) - encode = 1; - /* * Both of these functions do a NUL termination */ if (encode) i = encode_param(params, q, sizeof(line) - (q - line), - curvallen, valoff, index); + numchars, valoff, index); else i = normal_param(params, q, sizeof(line) - (q - line), - curvallen, valoff); + numchars, valoff); if (i == 0) { if (paramout) @@ -3514,9 +3491,10 @@ output_params(size_t initialwidth, PM params, int *offsetout) return NULL; } - valoff += curvallen; + valoff += numchars; index++; - curlen = param_len(params, index, valoff, &eightbit); + curlen = param_len(params, index, valoff, &encode, &cont, + &numchars); q = line; /* @@ -3564,7 +3542,7 @@ output_params(size_t initialwidth, PM params, int *offsetout) q += strlen(q); } - if (eightbit) + if (encode) i = encode_param(params, q, sizeof(line) - (q - line), strlen(params->pm_value + valoff), valoff, index); else @@ -3590,37 +3568,82 @@ output_params(size_t initialwidth, PM params, int *offsetout) } /* - * Calculate the size of a parameter. Include any necessary encoding. - * Start the length computation from where "offset" is marked. + * Calculate the size of a parameter. + * + * Arguments include + * + * pm - The parameter being output + * index - If continuing the parameter, the index of the section + * we're on. + * valueoff - The current offset into the parameter value that we're + * working on (previous sections have consumed valueoff bytes). + * encode - Set if we should perform encoding on this parameter section + * (given that we're consuming bytesfit bytes). + * cont - Set if the remaining data in value will not fit on a single + * line and will need to be continued. + * bytesfit - The number of bytes that we can consume from the parameter + * value and still fit on a completely new line. The + * calculation assumes the new line starts with a tab, + * includes the parameter name and any encoding, and fits + * within CPERLIN bytes. Will always be at least 1. */ static size_t -param_len(PM pm, int index, size_t valueoff, int *eightbit) +param_len(PM pm, int index, size_t valueoff, int *encode, int *cont, + size_t *bytesfit) { - char *start = pm->pm_value + valueoff, *p; - size_t len = 0; + char *start = pm->pm_value + valueoff, *p, indexchar[32]; + size_t len = 0, fit = 0; + int fitlimit = 0, eightbit, maxfit; + + *encode = 0; /* - * Add up the length. First, start with the parameter name, and include - * the equal sign. + * Add up the length. First, start with the parameter name. */ - len += strlen(pm->pm_name) + 1; + len = strlen(pm->pm_name); /* - * Scan the parameter value. If we find an 8-bit character, then - * we need to compute the locale name for the length. + * Scan the parameter value and see if we need to do encoding for this + * section. */ - *eightbit = contains8bit(start, NULL); + eightbit = contains8bit(start, NULL); /* - * If we've got 8-bit character, put the locale on the front (if we're - * doing part 0. Also compute the length of the string based on the - * encoding we need to do. + * Determine if we need to encode this section. Encoding is necessary if: + * + * - There are any 8-bit characters at all and we're on the first + * section. + * - There are 8-bit characters within N bytes of our section start. + * N is calculated based on the number of bytes it would take to + * reach CPERLIN. Specifically: + * 8 (starting tab) + + * strlen(param name) + + * 4 ('* for section marker, '=', opening/closing '"') + * strlen (index) + * is the number of bytes used by everything that isn't part of the + * value. So that gets subtracted from CPERLIN. */ - if (*eightbit) { + snprintf(indexchar, sizeof(indexchar), "%d", index); + maxfit = CPERLIN - (12 + len + strlen(indexchar)); + if ((eightbit && index == 0) || contains8bit(start, start + maxfit)) { + *encode = 1; + } + + len++; /* Add in equal sign */ + + if (*encode) { + /* + * We're using maxfit as a marker for how many characters we can + * fit into the line. Bump it by two because we're not using quotes + * when encoding. + */ + + maxfit += 2; + /* * If we don't have a charset or language tag in this parameter, * add them now. @@ -3631,24 +3654,35 @@ param_len(PM pm, int index, size_t valueoff, int *eightbit) if (! pm->pm_lang) pm->pm_lang = getcpy(NULL); /* Default to a blank lang tag */ - len++; /* For the encoding we need to do */ + len++; /* For the encoding marker */ + maxfit--; if (index == 0) { - len += strlen(pm->pm_charset) + strlen(pm->pm_lang) + 2; + int enclen = strlen(pm->pm_charset) + strlen(pm->pm_lang) + 2; + len += enclen; + maxfit-= enclen; } else { /* - * We know we definitely need to include an index. - * This will get the length wrong if we have more than 99 - * sections. I can live with that. + * We know we definitely need to include an index. maxfit already + * includes the section marker. */ - len += 2; /* * */ - if (index > 9) - len++; + len += strlen(indexchar); } for (p = start; *p != '\0'; p++) { - if (isparamencode(*p)) + if (isparamencode(*p)) { len += 3; - else + maxfit -= 3; + } else { len++; + maxfit--; + } + /* + * Just so there's no confusion: maxfit is counting OUTPUT + * characters (post-encoding). fit is counting INPUT characters. + */ + if (! fitlimit && maxfit >= 0) + fit++; + else if (! fitlimit) + fitlimit++; } } else { /* @@ -3661,15 +3695,27 @@ param_len(PM pm, int index, size_t valueoff, int *eightbit) case '"': case '\\': len++; + maxfit--; /* FALL THROUGH */ default: len++; + maxfit--; } + if (! fitlimit && maxfit >= 0) + fit++; + else if (! fitlimit) + fitlimit++; } len += 2; } + if (fit < 1) + fit = 1; + + *cont = fitlimit; + *bytesfit = fit; + return len; }