Arquivos
hhvm/hphp/runtime/base/zend/zend_string.cpp
T
2013-06-25 13:19:07 -07:00

3183 linhas
83 KiB
C++

/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
| Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 2.00 of the Zend license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.zend.com/license/2_00.txt. |
| If you did not receive a copy of the Zend license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@zend.com so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include "hphp/runtime/base/zend/zend_string.h"
#include "hphp/runtime/base/zend/zend_printf.h"
#include "hphp/runtime/base/zend/zend_math.h"
#include "hphp/util/lock.h"
#include <math.h>
#include <monetary.h>
#include "hphp/runtime/base/bstring.h"
#include "hphp/runtime/base/util/exceptions.h"
#include "hphp/runtime/base/complex_types.h"
#include "hphp/runtime/base/util/string_buffer.h"
#include "hphp/runtime/base/runtime_error.h"
#include "hphp/runtime/base/type_conversions.h"
#include "hphp/runtime/base/builtin_functions.h"
#ifdef __APPLE__
#ifndef isnan
#define isnan(x) \
( sizeof (x) == sizeof(float ) ? __inline_isnanf((float)(x)) \
: sizeof (x) == sizeof(double) ? __inline_isnand((double)(x)) \
: __inline_isnan ((long double)(x)))
#endif
#ifndef isinf
#define isinf(x) \
( sizeof (x) == sizeof(float ) ? __inline_isinff((float)(x)) \
: sizeof (x) == sizeof(double) ? __inline_isinfd((double)(x)) \
: __inline_isinf ((long double)(x)))
#endif
#endif
#define PHP_QPRINT_MAXL 75
namespace HPHP {
///////////////////////////////////////////////////////////////////////////////
// helpers
bool string_substr_check(int len, int &f, int &l, bool strict /* = true */) {
// if "from" position is negative, count start position from the end
if (f < 0) {
f += len;
if (f < 0) {
return false;
}
}
if (f > len || (f == len && strict)) {
return false;
}
// if "length" position is negative, set it to the length
// needed to stop that many chars from the end of the string
if (l < 0) {
l += len - f;
if (l < 0) {
return false;
}
}
if ((unsigned int)f + (unsigned int)l > (unsigned int)len) {
l = len - f;
}
return true;
}
void string_charmask(const char *sinput, int len, char *mask) {
const unsigned char *input = (unsigned char *)sinput;
const unsigned char *end;
unsigned char c;
memset(mask, 0, 256);
for (end = input+len; input < end; input++) {
c=*input;
if ((input+3 < end) && input[1] == '.' && input[2] == '.'
&& input[3] >= c) {
memset(mask+c, 1, input[3] - c + 1);
input+=3;
} else if ((input+1 < end) && input[0] == '.' && input[1] == '.') {
/* Error, try to be as helpful as possible:
(a range ending/starting with '.' won't be captured here) */
if (end-len >= input) { /* there was no 'left' char */
throw_invalid_argument
("charlist: Invalid '..'-range, missing left of '..'");
continue;
}
if (input+2 >= end) { /* there is no 'right' char */
throw_invalid_argument
("charlist: Invalid '..'-range, missing right of '..'");
continue;
}
if (input[-1] > input[2]) { /* wrong order */
throw_invalid_argument
("charlist: '..'-range needs to be incrementing");
continue;
}
/* FIXME: better error (a..b..c is the only left possibility?) */
throw_invalid_argument("charlist: Invalid '..'-range");
continue;
} else {
mask[c]=1;
}
}
}
int string_copy(char *dst, const char *src, int siz) {
register char *d = dst;
register const char *s = src;
register size_t n = siz;
/* Copy as many bytes as will fit */
if (n != 0 && --n != 0) {
do {
if ((*d++ = *s++) == 0)
break;
} while (--n != 0);
}
/* Not enough room in dst, add NUL and traverse rest of src */
if (n == 0) {
if (siz != 0)
*d = '\0'; /* NUL-terminate dst */
while (*s++)
;
}
return(s - src - 1); /* count does not include NUL */
}
///////////////////////////////////////////////////////////////////////////////
char *string_concat(const char *s1, int len1, const char *s2, int len2,
int &len) {
len = len1 + len2;
char *buf = (char *)malloc(len + 1);
if (buf == nullptr) {
throw FatalErrorException(0, "malloc failed: %d", len);
}
memcpy(buf, s1, len1);
memcpy(buf + len1, s2, len2);
buf[len] = 0;
return buf;
}
///////////////////////////////////////////////////////////////////////////////
// comparisons
int string_ncmp(const char *s1, const char *s2, int len) {
for (int i = 0; i < len; i++) {
char c1 = s1[i];
char c2 = s2[i];
if (c1 > c2) return 1;
if (c1 < c2) return -1;
}
return 0;
}
static int compare_right(char const **a, char const *aend,
char const **b, char const *bend) {
int bias = 0;
/* The longest run of digits wins. That aside, the greatest
value wins, but we can't know that it will until we've scanned
both numbers to know that they have the same magnitude, so we
remember it in BIAS. */
for(;; (*a)++, (*b)++) {
if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
(*b == bend || !isdigit((int)(unsigned char)**b)))
return bias;
else if (*a == aend || !isdigit((int)(unsigned char)**a))
return -1;
else if (*b == bend || !isdigit((int)(unsigned char)**b))
return +1;
else if (**a < **b) {
if (!bias)
bias = -1;
} else if (**a > **b) {
if (!bias)
bias = +1;
}
}
return 0;
}
static int compare_left(char const **a, char const *aend,
char const **b, char const *bend) {
/* Compare two left-aligned numbers: the first to have a
different value wins. */
for(;; (*a)++, (*b)++) {
if ((*a == aend || !isdigit((int)(unsigned char)**a)) &&
(*b == bend || !isdigit((int)(unsigned char)**b)))
return 0;
else if (*a == aend || !isdigit((int)(unsigned char)**a))
return -1;
else if (*b == bend || !isdigit((int)(unsigned char)**b))
return +1;
else if (**a < **b)
return -1;
else if (**a > **b)
return +1;
}
return 0;
}
int string_natural_cmp(char const *a, size_t a_len,
char const *b, size_t b_len, int fold_case) {
char ca, cb;
char const *ap, *bp;
char const *aend = a + a_len, *bend = b + b_len;
int fractional, result;
if (a_len == 0 || b_len == 0)
return a_len - b_len;
ap = a;
bp = b;
while (1) {
ca = *ap; cb = *bp;
/* skip over leading spaces or zeros */
while (isspace((int)(unsigned char)ca))
ca = *++ap;
while (isspace((int)(unsigned char)cb))
cb = *++bp;
/* process run of digits */
if (isdigit((int)(unsigned char)ca) && isdigit((int)(unsigned char)cb)) {
fractional = (ca == '0' || cb == '0');
if (fractional)
result = compare_left(&ap, aend, &bp, bend);
else
result = compare_right(&ap, aend, &bp, bend);
if (result != 0)
return result;
else if (ap == aend && bp == bend)
/* End of the strings. Let caller sort them out. */
return 0;
else {
/* Keep on comparing from the current point. */
ca = *ap; cb = *bp;
}
}
if (fold_case) {
ca = toupper((int)(unsigned char)ca);
cb = toupper((int)(unsigned char)cb);
}
if (ca < cb)
return -1;
else if (ca > cb)
return +1;
++ap; ++bp;
if (ap >= aend && bp >= bend)
/* The strings compare the same. Perhaps the caller
will want to call strcmp to break the tie. */
return 0;
else if (ap >= aend)
return -1;
else if (bp >= bend)
return 1;
}
}
///////////////////////////////////////////////////////////////////////////////
char *string_to_case(const char *s, int len, int (*tocase)(int)) {
assert(s);
assert(tocase);
char *ret = (char *)malloc(len + 1);
for (int i = 0; i < len; i++) {
ret[i] = tocase(s[i]);
}
ret[len] = '\0';
return ret;
}
char *string_to_case_first(const char *s, int len, int (*tocase)(int)) {
assert(s);
assert(tocase);
char *ret = string_duplicate(s, len);
if (*ret) {
*ret = tocase(*ret);
}
return ret;
}
char *string_to_case_words(const char *s, int len, int (*tocase)(int)) {
assert(s);
assert(tocase);
char *ret = string_duplicate(s, len);
if (*ret) {
*ret = tocase(*ret);
for (int i = 1; i < len; i++) {
if (isspace(ret[i-1])) {
ret[i] = tocase(ret[i]);
}
}
}
return ret;
}
///////////////////////////////////////////////////////////////////////////////
char *string_trim(const char *s, int &len,
const char *charlist, int charlistlen, int mode) {
assert(s);
char mask[256];
string_charmask(charlist, charlistlen, mask);
int trimmed = 0;
if (mode & 1) {
for (int i = 0; i < len; i++) {
if (mask[(unsigned char)s[i]]) {
trimmed++;
} else {
break;
}
}
len -= trimmed;
s += trimmed;
}
if (mode & 2) {
for (int i = len - 1; i >= 0; i--) {
if (mask[(unsigned char)s[i]]) {
len--;
trimmed++;
} else {
break;
}
}
}
if (trimmed == 0) {
return nullptr;
}
return string_duplicate(s, len);
}
#define STR_PAD_LEFT 0
#define STR_PAD_RIGHT 1
#define STR_PAD_BOTH 2
char *string_pad(const char *input, int &len, int pad_length,
const char *pad_string, int pad_str_len,
int pad_type) {
assert(input);
int num_pad_chars = pad_length - len;
/* If resulting string turns out to be shorter than input string,
we simply copy the input and return. */
if (pad_length < 0 || num_pad_chars < 0) {
return string_duplicate(input, len);
}
/* Setup the padding string values if specified. */
if (pad_str_len == 0) {
throw_invalid_argument("pad_string: (empty)");
return nullptr;
}
char *result = (char *)malloc(pad_length + 1);
/* We need to figure out the left/right padding lengths. */
int left_pad, right_pad;
switch (pad_type) {
case STR_PAD_RIGHT:
left_pad = 0;
right_pad = num_pad_chars;
break;
case STR_PAD_LEFT:
left_pad = num_pad_chars;
right_pad = 0;
break;
case STR_PAD_BOTH:
left_pad = num_pad_chars / 2;
right_pad = num_pad_chars - left_pad;
break;
default:
throw_invalid_argument("pad_type: %d", pad_type);
return nullptr;
}
/* First we pad on the left. */
int result_len = 0;
for (int i = 0; i < left_pad; i++) {
result[result_len++] = pad_string[i % pad_str_len];
}
/* Then we copy the input string. */
memcpy(result + result_len, input, len);
result_len += len;
/* Finally, we pad on the right. */
for (int i = 0; i < right_pad; i++) {
result[result_len++] = pad_string[i % pad_str_len];
}
result[result_len] = '\0';
len = result_len;
return result;
}
///////////////////////////////////////////////////////////////////////////////
char *string_substr(const char *s, int &len, int start, int length,
bool nullable) {
assert(s);
if (string_substr_check(len, start, length)) {
len = length;
return string_duplicate(s + start, length);
}
len = 0;
if (nullable) {
return nullptr;
}
return string_duplicate("", 0);
}
int string_find(const char *input, int len, char ch, int pos,
bool case_sensitive) {
assert(input);
if (pos < 0 || pos > len) {
return -1;
}
const void *ptr;
if (case_sensitive) {
ptr = memchr(input + pos, ch, len - pos);
} else {
ptr = bstrcasechr(input + pos, ch, len - pos);
}
if (ptr != nullptr) {
return (int)((const char *)ptr - input);
}
return -1;
}
int string_rfind(const char *input, int len, char ch, int pos,
bool case_sensitive) {
assert(input);
if (pos < -len || pos > len) {
return -1;
}
const void *ptr;
if (case_sensitive) {
if (pos >= 0) {
ptr = memrchr(input + pos, ch, len - pos);
} else {
ptr = memrchr(input, ch, len + pos + 1);
}
} else {
if (pos >= 0) {
ptr = bstrrcasechr(input + pos, ch, len - pos);
} else {
ptr = bstrrcasechr(input, ch, len + pos + 1);
}
}
if (ptr != nullptr) {
return (int)((const char *)ptr - input);
}
return -1;
}
int string_find(const char *input, int len, const char *s, int s_len,
int pos, bool case_sensitive) {
assert(input);
assert(s);
if (!s_len || pos < 0 || pos > len) {
return -1;
}
void *ptr;
if (case_sensitive) {
ptr = (void*)string_memnstr(input + pos, s, s_len, input + len);
} else {
ptr = bstrcasestr(input + pos, len - pos, s, s_len);
}
if (ptr != nullptr) {
return (int)((const char *)ptr - input);
}
return -1;
}
int string_rfind(const char *input, int len, const char *s, int s_len,
int pos, bool case_sensitive) {
assert(input);
assert(s);
if (!s_len || pos < -len || pos > len) {
return -1;
}
void *ptr;
if (case_sensitive) {
if (pos >= 0) {
ptr = bstrrstr(input + pos, len - pos, s, s_len);
} else {
ptr = bstrrstr(input, len + pos + 1, s, s_len);
}
} else {
if (pos >= 0) {
ptr = bstrrcasestr(input + pos, len - pos, s, s_len);
} else {
ptr = bstrrcasestr(input, len + pos + 1, s, s_len);
}
}
if (ptr != nullptr) {
return (int)((const char *)ptr - input);
}
return -1;
}
const char *string_memnstr(const char *haystack, const char *needle,
int needle_len, const char *end) {
const char *p = haystack;
char ne = needle[needle_len-1];
end -= needle_len;
while (p <= end) {
if ((p = (char *)memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
if (!memcmp(needle, p, needle_len-1)) {
return p;
}
}
if (p == nullptr) {
return nullptr;
}
p++;
}
return nullptr;
}
char *string_replace(const char *s, int &len, int start, int length,
const char *replacement, int len_repl) {
assert(s);
assert(replacement);
if (!string_substr_check(len, start, length, false)) {
len = 0;
return string_duplicate("", 0);
}
char *ret = (char *)malloc(len + len_repl - length + 1);
int ret_len = 0;
if (start) {
memcpy(ret, s, start);
ret_len += start;
}
if (len_repl) {
memcpy(ret + ret_len, replacement, len_repl);
ret_len += len_repl;
}
len -= (start + length);
if (len) {
memcpy(ret + ret_len, s + start + length, len);
ret_len += len;
}
len = ret_len;
ret[ret_len] = '\0';
return ret;
}
char *string_replace(const char *input, int &len,
const char *search, int len_search,
const char *replacement, int len_replace,
int &count, bool case_sensitive) {
assert(input);
assert(search && len_search);
if (len == 0) {
return nullptr;
}
std::vector<int> founds;
founds.reserve(16);
if (len_search == 1) {
for (int pos = string_find(input, len, *search, 0, case_sensitive);
pos >= 0;
pos = string_find(input, len, *search, pos + len_search,
case_sensitive)) {
founds.push_back(pos);
}
} else {
for (int pos = string_find(input, len, search, len_search, 0,
case_sensitive);
pos >= 0;
pos = string_find(input, len, search, len_search,
pos + len_search, case_sensitive)) {
founds.push_back(pos);
}
}
count = founds.size();
if (count == 0) {
return nullptr; // not found
}
char *ret = (char *)malloc(len + (len_replace - len_search) * count + 1);
char *p = ret;
int pos = 0; // last position in input that hasn't been copied over yet
int n;
for (unsigned int i = 0; i < founds.size(); i++) {
n = founds[i];
if (n > pos) {
n -= pos;
memcpy(p, input, n);
p += n;
input += n;
pos += n;
}
if (len_replace) {
memcpy(p, replacement, len_replace);
p += len_replace;
}
input += len_search;
pos += len_search;
}
n = len;
if (n > pos) {
n -= pos;
memcpy(p, input, n);
p += n;
}
*p = '\0';
len = p - ret;
return ret;
}
///////////////////////////////////////////////////////////////////////////////
char *string_reverse(const char *s, int len) {
assert(s);
char *n = (char *)malloc(len + 1);
char *p = n;
const char *e = s + len;
while (--e >= s) {
*p++ = *e;
}
*p = '\0';
return n;
}
char *string_repeat(const char *s, int &len, int count) {
assert(s);
if (len == 0 || count <= 0) {
return nullptr;
}
char *ret = (char *)malloc(len * count + 1);
if (len == 1) {
memset(ret, *s, count);
len = count;
} else {
char *p = ret;
for (int i = 0; i < count; i++) {
memcpy(p, s, len);
p += len;
}
len = p - ret;
}
ret[len] = '\0';
return ret;
}
char *string_shuffle(const char *str, int len) {
assert(str);
if (len <= 1) {
return nullptr;
}
char *ret = string_duplicate(str, len);
int n_left = len;
while (--n_left) {
int rnd_idx = rand() % n_left;
char temp = ret[n_left];
ret[n_left] = ret[rnd_idx];
ret[rnd_idx] = temp;
}
return ret;
}
char *string_chunk_split(const char *src, int &srclen, const char *end,
int endlen, int chunklen) {
int chunks = srclen / chunklen; // complete chunks!
int restlen = srclen - chunks * chunklen; /* srclen % chunklen */
int out_len = (chunks + 1) * endlen + srclen + 1;
char *dest = (char *)malloc(out_len);
const char *p; char *q;
const char *pMax = src + srclen - chunklen + 1;
for (p = src, q = dest; p < pMax; ) {
memcpy(q, p, chunklen);
q += chunklen;
memcpy(q, end, endlen);
q += endlen;
p += chunklen;
}
if (restlen) {
memcpy(q, p, restlen);
q += restlen;
memcpy(q, end, endlen);
q += endlen;
}
*q = '\0';
srclen = q - dest;
return(dest);
}
///////////////////////////////////////////////////////////////////////////////
#define PHP_TAG_BUF_SIZE 1023
/**
* Check if tag is in a set of tags
*
* states:
*
* 0 start tag
* 1 first non-whitespace char seen
*/
static int string_tag_find(const char *tag, int len, char *set) {
char c, *n;
const char *t;
int state=0, done=0;
char *norm;
if (len <= 0) {
return 0;
}
norm = (char *)malloc(len+1);
n = norm;
t = tag;
c = tolower(*t);
/*
normalize the tag removing leading and trailing whitespace
and turn any <a whatever...> into just <a> and any </tag>
into <tag>
*/
while (!done) {
switch (c) {
case '<':
*(n++) = c;
break;
case '>':
done =1;
break;
default:
if (!isspace((int)c)) {
if (state == 0) {
state=1;
if (c != '/')
*(n++) = c;
} else {
*(n++) = c;
}
} else {
if (state == 1)
done=1;
}
break;
}
c = tolower(*(++t));
}
*(n++) = '>';
*n = '\0';
if (strstr(set, norm)) {
done=1;
} else {
done=0;
}
free(norm);
return done;
}
/**
* A simple little state-machine to strip out html and php tags
*
* State 0 is the output state, State 1 means we are inside a
* normal html tag and state 2 means we are inside a php tag.
*
* The state variable is passed in to allow a function like fgetss
* to maintain state across calls to the function.
*
* lc holds the last significant character read and br is a bracket
* counter.
*
* When an allow string is passed in we keep track of the string
* in state 1 and when the tag is closed check it against the
* allow string to see if we should allow it.
* swm: Added ability to strip <?xml tags without assuming it PHP
* code.
*/
static size_t strip_tags_impl(char *rbuf, int len, int *stateptr,
char *allow, int allow_len,
bool allow_tag_spaces) {
char *tbuf, *buf, *p, *tp, *rp, c, lc;
int br, i=0, depth=0, in_q = 0;
int state = 0;
if (stateptr)
state = *stateptr;
buf = string_duplicate(rbuf, len);
c = *buf;
lc = '\0';
p = buf;
rp = rbuf;
br = 0;
if (allow) {
for (char *tmp = allow; *tmp; tmp++) {
*tmp = tolower((int)*(unsigned char *)tmp);
}
tbuf = (char *)malloc(PHP_TAG_BUF_SIZE+1);
tp = tbuf;
} else {
tbuf = tp = nullptr;
}
while (i < len) {
switch (c) {
case '\0':
break;
case '<':
if (isspace(*(p + 1)) && !allow_tag_spaces) {
goto reg_char;
}
if (state == 0) {
lc = '<';
state = 1;
if (allow) {
tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
*(tp++) = '<';
}
} else if (state == 1) {
depth++;
}
break;
case '(':
if (state == 2) {
if (lc != '"' && lc != '\'') {
lc = '(';
br++;
}
} else if (allow && state == 1) {
tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
*(tp++) = c;
} else if (state == 0) {
*(rp++) = c;
}
break;
case ')':
if (state == 2) {
if (lc != '"' && lc != '\'') {
lc = ')';
br--;
}
} else if (allow && state == 1) {
tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
*(tp++) = c;
} else if (state == 0) {
*(rp++) = c;
}
break;
case '>':
if (depth) {
depth--;
break;
}
if (in_q) {
break;
}
switch (state) {
case 1: /* HTML/XML */
lc = '>';
in_q = state = 0;
if (allow) {
tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
*(tp++) = '>';
*tp='\0';
if (string_tag_find(tbuf, tp-tbuf, allow)) {
memcpy(rp, tbuf, tp-tbuf);
rp += tp-tbuf;
}
tp = tbuf;
}
break;
case 2: /* PHP */
if (!br && lc != '\"' && *(p-1) == '?') {
in_q = state = 0;
tp = tbuf;
}
break;
case 3:
in_q = state = 0;
tp = tbuf;
break;
case 4: /* JavaScript/CSS/etc... */
if (p >= buf + 2 && *(p-1) == '-' && *(p-2) == '-') {
in_q = state = 0;
tp = tbuf;
}
break;
default:
*(rp++) = c;
break;
}
break;
case '"':
case '\'':
if (state == 2 && *(p-1) != '\\') {
if (lc == c) {
lc = '\0';
} else if (lc != '\\') {
lc = c;
}
} else if (state == 0) {
*(rp++) = c;
} else if (allow && state == 1) {
tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
*(tp++) = c;
}
if (state && p != buf && *(p-1) != '\\' && (!in_q || *p == in_q)) {
if (in_q) {
in_q = 0;
} else {
in_q = *p;
}
}
break;
case '!':
/* JavaScript & Other HTML scripting languages */
if (state == 1 && *(p-1) == '<') {
state = 3;
lc = c;
} else {
if (state == 0) {
*(rp++) = c;
} else if (allow && state == 1) {
tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
*(tp++) = c;
}
}
break;
case '-':
if (state == 3 && p >= buf + 2 && *(p-1) == '-' && *(p-2) == '!') {
state = 4;
} else {
goto reg_char;
}
break;
case '?':
if (state == 1 && *(p-1) == '<') {
br=0;
state=2;
break;
}
case 'E':
case 'e':
/* !DOCTYPE exception */
if (state==3 && p > buf+6
&& tolower(*(p-1)) == 'p'
&& tolower(*(p-2)) == 'y'
&& tolower(*(p-3)) == 't'
&& tolower(*(p-4)) == 'c'
&& tolower(*(p-5)) == 'o'
&& tolower(*(p-6)) == 'd') {
state = 1;
break;
}
/* fall-through */
case 'l':
/* swm: If we encounter '<?xml' then we shouldn't be in
* state == 2 (PHP). Switch back to HTML.
*/
if (state == 2 && p > buf+2 && *(p-1) == 'm' && *(p-2) == 'x') {
state = 1;
break;
}
/* fall-through */
default:
reg_char:
if (state == 0) {
*(rp++) = c;
} else if (allow && state == 1) {
tp = ((tp-tbuf) >= PHP_TAG_BUF_SIZE ? tbuf: tp);
*(tp++) = c;
}
break;
}
c = *(++p);
i++;
}
if (rp < rbuf + len) {
*rp = '\0';
}
free(buf);
if (allow)
free(tbuf);
if (stateptr)
*stateptr = state;
return (size_t)(rp - rbuf);
}
char *string_strip_tags(const char *s, int &len, const char *allow,
int allow_len, bool allow_tag_spaces) {
assert(s);
assert(allow);
char *ret = string_duplicate(s, len);
char *sallow = string_duplicate(allow, allow_len);
len = strip_tags_impl(ret, len, nullptr, sallow, allow_len, allow_tag_spaces);
free(sallow);
return ret;
}
///////////////////////////////////////////////////////////////////////////////
char *string_wordwrap(const char *text, int &textlen, int linelength,
const char *breakchar, int breakcharlen, bool docut) {
assert(text);
assert(breakchar);
char *newtext;
int newtextlen, chk;
size_t alloced;
long current = 0, laststart = 0, lastspace = 0;
if (textlen == 0) {
return strdup("");
}
if (breakcharlen == 0) {
throw_invalid_argument("wordbreak: (empty)");
return nullptr;
}
if (linelength == 0 && docut) {
throw_invalid_argument("width", "can't force cut when width = 0");
return nullptr;
}
/* Special case for a single-character break as it needs no
additional storage space */
if (breakcharlen == 1 && !docut) {
newtext = string_duplicate(text, textlen);
laststart = lastspace = 0;
for (current = 0; current < textlen; current++) {
if (text[current] == breakchar[0]) {
laststart = lastspace = current;
} else if (text[current] == ' ') {
if (current - laststart >= linelength) {
newtext[current] = breakchar[0];
laststart = current + 1;
}
lastspace = current;
} else if (current - laststart >= linelength && laststart != lastspace) {
newtext[lastspace] = breakchar[0];
laststart = lastspace + 1;
}
}
return newtext;
}
/* Multiple character line break or forced cut */
if (linelength > 0) {
chk = (int)(textlen/linelength + 1);
alloced = textlen + chk * breakcharlen + 1;
} else {
chk = textlen;
alloced = textlen * (breakcharlen + 1) + 1;
}
newtext = (char *)malloc(alloced);
/* now keep track of the actual new text length */
newtextlen = 0;
laststart = lastspace = 0;
for (current = 0; current < textlen; current++) {
if (chk <= 0) {
alloced += (int) (((textlen - current + 1)/linelength + 1) *
breakcharlen) + 1;
newtext = (char *)realloc(newtext, alloced);
chk = (int) ((textlen - current)/linelength) + 1;
}
/* when we hit an existing break, copy to new buffer, and
* fix up laststart and lastspace */
if (text[current] == breakchar[0]
&& current + breakcharlen < textlen
&& !strncmp(text+current, breakchar, breakcharlen)) {
memcpy(newtext+newtextlen, text+laststart,
current-laststart+breakcharlen);
newtextlen += current-laststart+breakcharlen;
current += breakcharlen - 1;
laststart = lastspace = current + 1;
chk--;
}
/* if it is a space, check if it is at the line boundary,
* copy and insert a break, or just keep track of it */
else if (text[current] == ' ') {
if (current - laststart >= linelength) {
memcpy(newtext+newtextlen, text+laststart, current-laststart);
newtextlen += current - laststart;
memcpy(newtext+newtextlen, breakchar, breakcharlen);
newtextlen += breakcharlen;
laststart = current + 1;
chk--;
}
lastspace = current;
}
/* if we are cutting, and we've accumulated enough
* characters, and we haven't see a space for this line,
* copy and insert a break. */
else if (current - laststart >= linelength
&& docut && laststart >= lastspace) {
memcpy(newtext+newtextlen, text+laststart, current-laststart);
newtextlen += current - laststart;
memcpy(newtext+newtextlen, breakchar, breakcharlen);
newtextlen += breakcharlen;
laststart = lastspace = current;
chk--;
}
/* if the current word puts us over the linelength, copy
* back up until the last space, insert a break, and move
* up the laststart */
else if (current - laststart >= linelength
&& laststart < lastspace) {
memcpy(newtext+newtextlen, text+laststart, lastspace-laststart);
newtextlen += lastspace - laststart;
memcpy(newtext+newtextlen, breakchar, breakcharlen);
newtextlen += breakcharlen;
laststart = lastspace = lastspace + 1;
chk--;
}
}
/* copy over any stragglers */
if (laststart != current) {
memcpy(newtext+newtextlen, text+laststart, current-laststart);
newtextlen += current - laststart;
}
textlen = newtextlen;
newtext[newtextlen] = '\0';
return newtext;
}
///////////////////////////////////////////////////////////////////////////////
char *string_addcslashes(const char *str, int &length, const char *what,
int wlength) {
assert(str);
assert(what);
char flags[256];
string_charmask(what, wlength, flags);
char *new_str = (char *)malloc((length << 2) + 1);
const char *source;
const char *end;
char *target;
for (source = str, end = source + length, target = new_str; source < end;
source++) {
char c = *source;
if (flags[(unsigned char)c]) {
if ((unsigned char) c < 32 || (unsigned char) c > 126) {
*target++ = '\\';
switch (c) {
case '\n': *target++ = 'n'; break;
case '\t': *target++ = 't'; break;
case '\r': *target++ = 'r'; break;
case '\a': *target++ = 'a'; break;
case '\v': *target++ = 'v'; break;
case '\b': *target++ = 'b'; break;
case '\f': *target++ = 'f'; break;
default: target += sprintf(target, "%03o", (unsigned char) c);
}
continue;
}
*target++ = '\\';
}
*target++ = c;
}
*target = 0;
length = target - new_str;
return new_str;
}
char *string_stripcslashes(const char *input, int &nlen) {
assert(input);
if (nlen == 0) {
return nullptr;
}
char *str = string_duplicate(input, nlen);
char *source, *target, *end;
int i;
char numtmp[4];
for (source=str, end=str+nlen, target=str; source < end; source++) {
if (*source == '\\' && source+1 < end) {
source++;
switch (*source) {
case 'n': *target++='\n'; nlen--; break;
case 'r': *target++='\r'; nlen--; break;
case 'a': *target++='\a'; nlen--; break;
case 't': *target++='\t'; nlen--; break;
case 'v': *target++='\v'; nlen--; break;
case 'b': *target++='\b'; nlen--; break;
case 'f': *target++='\f'; nlen--; break;
case '\\': *target++='\\'; nlen--; break;
case 'x':
if (source+1 < end && isxdigit((int)(*(source+1)))) {
numtmp[0] = *++source;
if (source+1 < end && isxdigit((int)(*(source+1)))) {
numtmp[1] = *++source;
numtmp[2] = '\0';
nlen-=3;
} else {
numtmp[1] = '\0';
nlen-=2;
}
*target++=(char)strtol(numtmp, nullptr, 16);
break;
}
/* break is left intentionally */
default:
i=0;
while (source < end && *source >= '0' && *source <= '7' && i<3) {
numtmp[i++] = *source++;
}
if (i) {
numtmp[i]='\0';
*target++=(char)strtol(numtmp, nullptr, 8);
nlen-=i;
source--;
} else {
*target++=*source;
nlen--;
}
}
} else {
*target++=*source;
}
}
*target='\0';
nlen = target - str;
return str;
}
char *string_addslashes(const char *str, int &length) {
assert(str);
if (length == 0) {
return nullptr;
}
char *new_str = (char *)malloc((length << 1) + 1);
const char *source = str;
const char *end = source + length;
char *target = new_str;
while (source < end) {
switch (*source) {
case '\0':
*target++ = '\\';
*target++ = '0';
break;
case '\'':
case '\"':
case '\\':
*target++ = '\\';
/* break is missing *intentionally* */
default:
*target++ = *source;
break;
}
source++;
}
*target = 0;
length = target - new_str;
return new_str;
}
char *string_stripslashes(const char *input, int &l) {
assert(input);
if (!*input) {
return nullptr;
}
char *str = string_duplicate(input, l);
char *s, *t;
s = str;
t = str;
while (l > 0) {
if (*t == '\\') {
t++; /* skip the slash */
l--;
if (l > 0) {
if (*t == '0') {
*s++='\0';
t++;
} else {
*s++ = *t++; /* preserve the next character */
}
l--;
}
} else {
*s++ = *t++;
l--;
}
}
if (s != t) {
*s = '\0';
}
l = s - str;
return str;
}
char *string_quotemeta(const char *input, int &len) {
assert(input);
if (len == 0) {
return nullptr;
}
char *ret = (char *)malloc((len << 1) + 1);
char *q = ret;
for (const char *p = input; *p; p++) {
char c = *p;
switch (c) {
case '.':
case '\\':
case '+':
case '*':
case '?':
case '[':
case '^':
case ']':
case '$':
case '(':
case ')':
*q++ = '\\';
/* break is missing _intentionally_ */
default:
*q++ = c;
}
}
*q = 0;
len = q - ret;
return ret;
}
///////////////////////////////////////////////////////////////////////////////
static char string_hex2int(int c) {
if (isdigit(c)) {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
char *string_quoted_printable_encode(const char *input, int &len) {
const char *hex = "0123456789ABCDEF";
unsigned char *ret =
(unsigned char *)malloc(3 * len + 3 * (((3 * len)/PHP_QPRINT_MAXL) + 1));
unsigned char *d = ret;
int length = len;
unsigned char c;
unsigned long lp = 0;
while (length--) {
if (((c = *input++) == '\015') && (*input == '\012') && length > 0) {
*d++ = '\015';
*d++ = *input++;
length--;
lp = 0;
} else {
if (iscntrl (c) || (c == 0x7f) || (c & 0x80) || (c == '=') ||
((c == ' ') && (*input == '\015'))) {
if ((lp += 3) > PHP_QPRINT_MAXL) {
*d++ = '=';
*d++ = '\015';
*d++ = '\012';
lp = 3;
}
*d++ = '=';
*d++ = hex[c >> 4];
*d++ = hex[c & 0xf];
} else {
if ((++lp) > PHP_QPRINT_MAXL) {
*d++ = '=';
*d++ = '\015';
*d++ = '\012';
lp = 1;
}
*d++ = c;
}
}
}
*d = '\0';
len = d - ret;
return (char*)ret;
}
char *string_quoted_printable_decode(const char *input, int &len, bool is_q) {
assert(input);
if (len == 0) {
return nullptr;
}
int i = 0, j = 0, k;
const char *str_in = input;
char *str_out = (char *)malloc(len + 1);
while (i < len && str_in[i]) {
switch (str_in[i]) {
case '=':
if (i + 2 < len && str_in[i + 1] && str_in[i + 2] &&
isxdigit((int) str_in[i + 1]) && isxdigit((int) str_in[i + 2]))
{
str_out[j++] = (string_hex2int((int) str_in[i + 1]) << 4)
+ string_hex2int((int) str_in[i + 2]);
i += 3;
} else /* check for soft line break according to RFC 2045*/ {
k = 1;
while (str_in[i + k] &&
((str_in[i + k] == 32) || (str_in[i + k] == 9))) {
/* Possibly, skip spaces/tabs at the end of line */
k++;
}
if (!str_in[i + k]) {
/* End of line reached */
i += k;
}
else if ((str_in[i + k] == 13) && (str_in[i + k + 1] == 10)) {
/* CRLF */
i += k + 2;
}
else if ((str_in[i + k] == 13) || (str_in[i + k] == 10)) {
/* CR or LF */
i += k + 1;
}
else {
str_out[j++] = str_in[i++];
}
}
break;
case '_':
if (is_q) {
str_out[j++] = ' ';
i++;
} else {
str_out[j++] = str_in[i++];
}
break;
default:
str_out[j++] = str_in[i++];
}
}
str_out[j] = '\0';
len = j;
return str_out;
}
char *string_hex2bin(const char *input, int &len) {
if (len % 2 != 0) {
throw InvalidArgumentException("hex2bin: odd length input");
}
len >>= 1;
char *str = (char *)malloc(len + 1);
int i, j;
for (i = j = 0; i < len; i++) {
char c = input[j++];
if (c >= '0' && c <= '9') {
str[i] = (c - '0') << 4;
} else if (c >= 'a' && c <= 'f') {
str[i] = (c - 'a' + 10) << 4;
} else if (c >= 'A' && c <= 'F') {
str[i] = (c - 'A' + 10) << 4;
} else {
free(str);
throw InvalidArgumentException("bad encoding at position", j);
}
c = input[j++];
if (c >= '0' && c <= '9') {
str[i] |= c - '0';
} else if (c >= 'a' && c <= 'f') {
str[i] |= c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
str[i] |= c - 'A' + 10;
} else {
free(str);
throw InvalidArgumentException("bad encoding at position", j);
}
}
str[len] = '\0';
return str;
}
Variant string_base_to_numeric(const char *s, int len, int base) {
int64_t num = 0;
double fnum = 0;
int mode = 0;
int64_t cutoff;
int cutlim;
assert(string_validate_base(base));
cutoff = LONG_MAX / base;
cutlim = LONG_MAX % base;
for (int i = len; i > 0; i--) {
char c = *s++;
/* might not work for EBCDIC */
if (c >= '0' && c <= '9')
c -= '0';
else if (c >= 'A' && c <= 'Z')
c -= 'A' - 10;
else if (c >= 'a' && c <= 'z')
c -= 'a' - 10;
else
continue;
if (c >= base)
continue;
switch (mode) {
case 0: /* Integer */
if (num < cutoff || (num == cutoff && c <= cutlim)) {
num = num * base + c;
break;
} else {
fnum = num;
mode = 1;
}
/* fall-through */
case 1: /* Float */
fnum = fnum * base + c;
}
}
if (mode == 1) {
return fnum;
}
return num;
}
char *string_long_to_base(unsigned long value, int base) {
static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
char buf[(sizeof(unsigned long) << 3) + 1];
char *ptr, *end;
assert(string_validate_base(base));
end = ptr = buf + sizeof(buf) - 1;
*ptr = '\0';
do {
*--ptr = digits[value % base];
value /= base;
} while (ptr > buf && value);
return string_duplicate(ptr, end - ptr);
}
char *string_numeric_to_base(CVarRef value, int base) {
static char digits[] = "0123456789abcdefghijklmnopqrstuvwxyz";
assert(string_validate_base(base));
if ((!value.isInteger() && !value.isDouble())) {
return string_duplicate("", 0);
}
if (value.isDouble()) {
double fvalue = floor(value.toDouble()); /* floor it just in case */
char *ptr, *end;
char buf[(sizeof(double) << 3) + 1];
/* Don't try to convert +/- infinity */
if (fvalue == HUGE_VAL || fvalue == -HUGE_VAL) {
raise_warning("Number too large");
return string_duplicate("", 0);
}
end = ptr = buf + sizeof(buf) - 1;
*ptr = '\0';
do {
*--ptr = digits[(int) fmod(fvalue, base)];
fvalue /= base;
} while (ptr > buf && fabs(fvalue) >= 1);
return string_duplicate(ptr, end - ptr);
}
return string_long_to_base(value.toInt64(), base);
}
///////////////////////////////////////////////////////////////////////////////
// uuencode
#define PHP_UU_ENC(c) \
((c) ? ((c) & 077) + ' ' : '`')
#define PHP_UU_ENC_C2(c) \
PHP_UU_ENC(((*(c) << 4) & 060) | ((*((c) + 1) >> 4) & 017))
#define PHP_UU_ENC_C3(c) \
PHP_UU_ENC(((*(c + 1) << 2) & 074) | ((*((c) + 2) >> 6) & 03))
#define PHP_UU_DEC(c) \
(((c) - ' ') & 077)
char *string_uuencode(const char *src, int src_len, int &dest_len) {
assert(src);
assert(src_len);
int len = 45;
char *p;
const char *s, *e, *ee;
char *dest;
/* encoded length is ~ 38% greater then the original */
p = dest = (char *)malloc((int)ceil(src_len * 1.38) + 46);
s = src;
e = src + src_len;
while ((s + 3) < e) {
ee = s + len;
if (ee > e) {
ee = e;
len = ee - s;
if (len % 3) {
ee = s + (int) (floor(len / 3) * 3);
}
}
*p++ = PHP_UU_ENC(len);
while (s < ee) {
*p++ = PHP_UU_ENC(*s >> 2);
*p++ = PHP_UU_ENC_C2(s);
*p++ = PHP_UU_ENC_C3(s);
*p++ = PHP_UU_ENC(*(s + 2) & 077);
s += 3;
}
if (len == 45) {
*p++ = '\n';
}
}
if (s < e) {
if (len == 45) {
*p++ = PHP_UU_ENC(e - s);
len = 0;
}
*p++ = PHP_UU_ENC(*s >> 2);
*p++ = PHP_UU_ENC_C2(s);
*p++ = ((e - s) > 1) ? PHP_UU_ENC_C3(s) : PHP_UU_ENC('\0');
*p++ = ((e - s) > 2) ? PHP_UU_ENC(*(s + 2) & 077) : PHP_UU_ENC('\0');
}
if (len < 45) {
*p++ = '\n';
}
*p++ = PHP_UU_ENC('\0');
*p++ = '\n';
*p = '\0';
dest_len = p - dest;
return dest;
}
char *string_uudecode(const char *src, int src_len, int &total_len) {
total_len = 0;
int len;
const char *s, *e, *ee;
char *p, *dest;
p = dest = (char *)malloc((int)ceil(src_len * 0.75) + 1);
s = src;
e = src + src_len;
while (s < e) {
if ((len = PHP_UU_DEC(*s++)) <= 0) {
break;
}
/* sanity check */
if (len > src_len) {
goto err;
}
total_len += len;
ee = s + (len == 45 ? 60 : (int) floor(len * 1.33));
/* sanity check */
if (ee > e) {
goto err;
}
while (s < ee) {
*p++ = PHP_UU_DEC(*s) << 2 | PHP_UU_DEC(*(s + 1)) >> 4;
*p++ = PHP_UU_DEC(*(s + 1)) << 4 | PHP_UU_DEC(*(s + 2)) >> 2;
*p++ = PHP_UU_DEC(*(s + 2)) << 6 | PHP_UU_DEC(*(s + 3));
s += 4;
}
if (len < 45) {
break;
}
/* skip \n */
s++;
}
if ((len = total_len > (p - dest))) {
*p++ = PHP_UU_DEC(*s) << 2 | PHP_UU_DEC(*(s + 1)) >> 4;
if (len > 1) {
*p++ = PHP_UU_DEC(*(s + 1)) << 4 | PHP_UU_DEC(*(s + 2)) >> 2;
if (len > 2) {
*p++ = PHP_UU_DEC(*(s + 2)) << 6 | PHP_UU_DEC(*(s + 3));
}
}
}
*(dest + total_len) = '\0';
return dest;
err:
free(dest);
return nullptr;
}
///////////////////////////////////////////////////////////////////////////////
// base64
static const char base64_table[] = {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
};
static const char base64_pad = '=';
static const short base64_reverse_table[256] = {
-2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
-2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
-2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
};
static unsigned char *php_base64_encode(const unsigned char *str, int length,
int *ret_length) {
const unsigned char *current = str;
unsigned char *p;
unsigned char *result;
if ((length + 2) < 0 || ((length + 2) / 3) >= (1 << (sizeof(int) * 8 - 2))) {
if (ret_length != nullptr) {
*ret_length = 0;
}
return nullptr;
}
result = (unsigned char *)malloc(((length + 2) / 3) * 4 + 1);
p = result;
while (length > 2) { /* keep going until we have less than 24 bits */
*p++ = base64_table[current[0] >> 2];
*p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
*p++ = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
*p++ = base64_table[current[2] & 0x3f];
current += 3;
length -= 3; /* we just handle 3 octets of data */
}
/* now deal with the tail end of things */
if (length != 0) {
*p++ = base64_table[current[0] >> 2];
if (length > 1) {
*p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
*p++ = base64_table[(current[1] & 0x0f) << 2];
*p++ = base64_pad;
} else {
*p++ = base64_table[(current[0] & 0x03) << 4];
*p++ = base64_pad;
*p++ = base64_pad;
}
}
if (ret_length != nullptr) {
*ret_length = (int)(p - result);
}
*p = '\0';
return result;
}
static unsigned char *php_base64_decode(const unsigned char *str,
int length, int *ret_length,
bool strict) {
const unsigned char *current = str;
int ch, i = 0, j = 0, k;
/* this sucks for threaded environments */
unsigned char *result;
result = (unsigned char *)malloc(length + 1);
/* run through the whole string, converting as we go */
while ((ch = *current++) != '\0' && length-- > 0) {
if (ch == base64_pad) {
if (*current != '=' && (i % 4) == 1) {
free(result);
return nullptr;
}
continue;
}
ch = base64_reverse_table[ch];
if ((!strict && ch < 0) || ch == -1) {
/* a space or some other separator character, we simply skip over */
continue;
} else if (ch == -2) {
free(result);
return nullptr;
}
switch(i % 4) {
case 0:
result[j] = ch << 2;
break;
case 1:
result[j++] |= ch >> 4;
result[j] = (ch & 0x0f) << 4;
break;
case 2:
result[j++] |= ch >>2;
result[j] = (ch & 0x03) << 6;
break;
case 3:
result[j++] |= ch;
break;
}
i++;
}
k = j;
/* mop things up if we ended on a boundary */
if (ch == base64_pad) {
switch(i % 4) {
case 1:
free(result);
return nullptr;
case 2:
k++;
case 3:
result[k] = 0;
}
}
if(ret_length) {
*ret_length = j;
}
result[j] = '\0';
return result;
}
char *string_base64_encode(const char *input, int &len) {
return (char *)php_base64_encode((unsigned char *)input, len, &len);
}
char *string_base64_decode(const char *input, int &len, bool strict) {
return (char *)php_base64_decode((unsigned char *)input, len, &len, strict);
}
///////////////////////////////////////////////////////////////////////////////
char *string_escape_shell_arg(const char *str) {
int x, y, l;
char *cmd;
y = 0;
l = strlen(str);
cmd = (char *)malloc((l << 2) + 3); /* worst case */
cmd[y++] = '\'';
for (x = 0; x < l; x++) {
switch (str[x]) {
case '\'':
cmd[y++] = '\'';
cmd[y++] = '\\';
cmd[y++] = '\'';
/* fall-through */
default:
cmd[y++] = str[x];
}
}
cmd[y++] = '\'';
cmd[y] = '\0';
return cmd;
}
char *string_escape_shell_cmd(const char *str) {
register int x, y, l;
char *cmd;
char *p = nullptr;
l = strlen(str);
cmd = (char *)malloc((l << 1) + 1);
for (x = 0, y = 0; x < l; x++) {
switch (str[x]) {
case '"':
case '\'':
if (!p && (p = (char *)memchr(str + x + 1, str[x], l - x - 1))) {
/* noop */
} else if (p && *p == str[x]) {
p = nullptr;
} else {
cmd[y++] = '\\';
}
cmd[y++] = str[x];
break;
case '#': /* This is character-set independent */
case '&':
case ';':
case '`':
case '|':
case '*':
case '?':
case '~':
case '<':
case '>':
case '^':
case '(':
case ')':
case '[':
case ']':
case '{':
case '}':
case '$':
case '\\':
case '\x0A': /* excluding these two */
case '\xFF':
cmd[y++] = '\\';
/* fall-through */
default:
cmd[y++] = str[x];
}
}
cmd[y] = '\0';
return cmd;
}
std::string string_cplus_escape(const char *s, int len)
{
std::string sb;
static const char digits[] = "01234567";
for (int i = 0; i < len; i++) {
unsigned char uc = *s++;
switch (uc) {
case '"': sb.append("\\\"", 2); break;
case '\\': sb.append("\\\\", 2); break;
case '\b': sb.append("\\b", 2); break;
case '\f': sb.append("\\f", 2); break;
case '\n': sb.append("\\n", 2); break;
case '\r': sb.append("\\r", 2); break;
case '\t': sb.append("\\t", 2); break;
case '?': sb.append("\\?", 2); break;
default:
if (uc >= ' ' && (uc & 127) == uc) {
sb += (char) uc;
} else {
sb += '\\';
sb += digits[(uc >> 6) & 7];
sb += digits[(uc >> 3) & 7];
sb += digits[(uc >> 0) & 7];
}
break;
}
}
return sb;
}
///////////////////////////////////////////////////////////////////////////////
static void string_similar_str(const char *txt1, int len1,
const char *txt2, int len2,
int *pos1, int *pos2, int *max) {
const char *p, *q;
const char *end1 = txt1 + len1;
const char *end2 = txt2 + len2;
int l;
*max = 0;
for (p = txt1; p < end1; p++) {
for (q = txt2; q < end2; q++) {
for (l = 0; (p + l < end1) && (q + l < end2) && (p[l] == q[l]); l++);
if (l > *max) {
*max = l;
*pos1 = p - txt1;
*pos2 = q - txt2;
}
}
}
}
static int string_similar_char(const char *txt1, int len1,
const char *txt2, int len2) {
int sum;
int pos1 = 0, pos2 = 0, max;
string_similar_str(txt1, len1, txt2, len2, &pos1, &pos2, &max);
if ((sum = max)) {
if (pos1 && pos2) {
sum += string_similar_char(txt1, pos1, txt2, pos2);
}
if ((pos1 + max < len1) && (pos2 + max < len2)) {
sum += string_similar_char(txt1 + pos1 + max, len1 - pos1 - max,
txt2 + pos2 + max, len2 - pos2 - max);
}
}
return sum;
}
int string_similar_text(const char *t1, int len1,
const char *t2, int len2, float *percent) {
if (len1 == 0 && len2 == 0) {
if (percent) *percent = 0.0;
return 0;
}
int sim = string_similar_char(t1, len1, t2, len2);
if (percent) *percent = sim * 200.0 / (len1 + len2);
return sim;
}
///////////////////////////////////////////////////////////////////////////////
#define LEVENSHTEIN_MAX_LENTH 255
// reference implementation, only optimized for memory usage, not speed
int string_levenshtein(const char *s1, int l1, const char *s2, int l2,
int cost_ins, int cost_rep, int cost_del ) {
int *p1, *p2, *tmp;
int i1, i2, c0, c1, c2;
if(l1==0) return l2*cost_ins;
if(l2==0) return l1*cost_del;
if((l1>LEVENSHTEIN_MAX_LENTH)||(l2>LEVENSHTEIN_MAX_LENTH)) {
raise_warning("levenshtein(): Argument string(s) too long");
return -1;
}
p1 = (int*)malloc((l2+1) * sizeof(int));
p2 = (int*)malloc((l2+1) * sizeof(int));
for(i2=0;i2<=l2;i2++) {
p1[i2] = i2*cost_ins;
}
for(i1=0;i1<l1;i1++) {
p2[0]=p1[0]+cost_del;
for(i2=0;i2<l2;i2++) {
c0=p1[i2]+((s1[i1]==s2[i2])?0:cost_rep);
c1=p1[i2+1]+cost_del; if(c1<c0) c0=c1;
c2=p2[i2]+cost_ins; if(c2<c0) c0=c2;
p2[i2+1]=c0;
}
tmp=p1; p1=p2; p2=tmp;
}
c0=p1[l2];
free(p1);
free(p2);
return c0;
}
///////////////////////////////////////////////////////////////////////////////
char *string_money_format(const char *format, double value) {
bool check = false;
const char *p = format;
while ((p = strchr(p, '%'))) {
if (*(p + 1) == '%') {
p += 2;
} else if (!check) {
check = true;
p++;
} else {
throw_invalid_argument
("format: Only a single %%i or %%n token can be used");
return nullptr;
}
}
int format_len = strlen(format);
int str_len = format_len + 1024;
char *str = (char *)malloc(str_len);
if ((str_len = strfmon(str, str_len, format, value)) < 0) {
free(str);
return nullptr;
}
str[str_len] = 0;
return str;
}
///////////////////////////////////////////////////////////////////////////////
char *string_number_format(double d, int dec, char dec_point,
char thousand_sep) {
char *tmpbuf = nullptr, *resbuf;
char *s, *t; /* source, target */
char *dp;
int integral;
int tmplen, reslen=0;
int count=0;
int is_negative=0;
if (d < 0) {
is_negative = 1;
d = -d;
}
if (dec < 0) dec = 0;
d = php_math_round(d, dec);
// departure from PHP: we got rid of dependencies on spprintf() here.
tmpbuf = (char *)malloc(64);
snprintf(tmpbuf, 64, "%.*F", dec, d);
tmplen = strlen(tmpbuf);
if (tmpbuf == nullptr || !isdigit((int)tmpbuf[0])) {
return tmpbuf;
}
/* find decimal point, if expected */
if (dec) {
dp = strpbrk(tmpbuf, ".,");
} else {
dp = nullptr;
}
/* calculate the length of the return buffer */
if (dp) {
integral = dp - tmpbuf;
} else {
/* no decimal point was found */
integral = tmplen;
}
/* allow for thousand separators */
if (thousand_sep) {
integral += (integral-1) / 3;
}
reslen = integral;
if (dec) {
reslen += dec;
if (dec_point) {
reslen++;
}
}
/* add a byte for minus sign */
if (is_negative) {
reslen++;
}
resbuf = (char *) malloc(reslen+1); /* +1 for NUL terminator */
s = tmpbuf+tmplen-1;
t = resbuf+reslen;
*t-- = '\0';
/* copy the decimal places.
* Take care, as the sprintf implementation may return less places than
* we requested due to internal buffer limitations */
if (dec) {
int declen = dp ? s - dp : 0;
int topad = dec > declen ? dec - declen : 0;
/* pad with '0's */
while (topad--) {
*t-- = '0';
}
if (dp) {
s -= declen + 1; /* +1 to skip the point */
t -= declen;
/* now copy the chars after the point */
memcpy(t + 1, dp + 1, declen);
}
/* add decimal point */
if (dec_point) {
*t-- = dec_point;
}
}
/* copy the numbers before the decimal point, adding thousand
* separator every three digits */
while(s >= tmpbuf) {
*t-- = *s--;
if (thousand_sep && (++count%3)==0 && s>=tmpbuf) {
*t-- = thousand_sep;
}
}
/* and a minus sign, if needed */
if (is_negative) {
*t-- = '-';
}
free(tmpbuf);
return resbuf;
}
///////////////////////////////////////////////////////////////////////////////
// soundex
/* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
char *string_soundex(const char *str) {
assert(str);
int _small, code, last;
char soundex[4 + 1];
static char soundex_table[26] = {
0, /* A */
'1', /* B */
'2', /* C */
'3', /* D */
0, /* E */
'1', /* F */
'2', /* G */
0, /* H */
0, /* I */
'2', /* J */
'2', /* K */
'4', /* L */
'5', /* M */
'5', /* N */
0, /* O */
'1', /* P */
'2', /* Q */
'6', /* R */
'2', /* S */
'3', /* T */
0, /* U */
'1', /* V */
0, /* W */
'2', /* X */
0, /* Y */
'2' /* Z */
};
if (!*str) {
return nullptr;
}
/* build soundex string */
last = -1;
const char *p = str;
for (_small = 0; *p && _small < 4; p++) {
/* convert chars to upper case and strip non-letter chars */
/* BUG: should also map here accented letters used in non */
/* English words or names (also found in English text!): */
/* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
code = toupper((int)(unsigned char)(*p));
if (code >= 'A' && code <= 'Z') {
if (_small == 0) {
/* remember first valid char */
soundex[_small++] = code;
last = soundex_table[code - 'A'];
} else {
/* ignore sequences of consonants with same soundex */
/* code in trail, and vowels unless they separate */
/* consonant letters */
code = soundex_table[code - 'A'];
if (code != last) {
if (code != 0) {
soundex[_small++] = code;
}
last = code;
}
}
}
}
/* pad with '0' and terminate with 0 ;-) */
while (_small < 4) {
soundex[_small++] = '0';
}
soundex[_small] = '\0';
return strdup(soundex);
}
///////////////////////////////////////////////////////////////////////////////
// metaphone
/**
* this is now the original code by Michael G Schwern:
* i've changed it just a slightly bit (use emalloc,
* get rid of includes etc)
* - thies - 13.09.1999
*/
/*----------------------------- */
/* this used to be "metaphone.h" */
/*----------------------------- */
/* Special encodings */
#define SH 'X'
#define TH '0'
/*----------------------------- */
/* end of "metaphone.h" */
/*----------------------------- */
/*----------------------------- */
/* this used to be "metachar.h" */
/*----------------------------- */
/* Metachar.h ... little bits about characters for metaphone */
/*-- Character encoding array & accessing macros --*/
/* Stolen directly out of the book... */
char _codes[26] = { 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0};
#define ENCODE(c) (isalpha(c) ? _codes[((toupper(c)) - 'A')] : 0)
#define isvowel(c) (ENCODE(c) & 1) /* AEIOU */
/* These letters are passed through unchanged */
#define NOCHANGE(c) (ENCODE(c) & 2) /* FJMNR */
/* These form dipthongs when preceding H */
#define AFFECTH(c) (ENCODE(c) & 4) /* CGPST */
/* These make C and G soft */
#define MAKESOFT(c) (ENCODE(c) & 8) /* EIY */
/* These prevent GH from becoming F */
#define NOGHTOF(c) (ENCODE(c) & 16) /* BDH */
/*----------------------------- */
/* end of "metachar.h" */
/*----------------------------- */
/* I suppose I could have been using a character pointer instead of
* accesssing the array directly... */
/* Look at the next letter in the word */
#define Next_Letter (toupper(word[w_idx+1]))
/* Look at the current letter in the word */
#define Curr_Letter (toupper(word[w_idx]))
/* Go N letters back. */
#define Look_Back_Letter(n) (w_idx >= n ? toupper(word[w_idx-n]) : '\0')
/* Previous letter. I dunno, should this return null on failure? */
#define Prev_Letter (Look_Back_Letter(1))
/* Look two letters down. It makes sure you don't walk off the string. */
#define After_Next_Letter (Next_Letter != '\0' ? toupper(word[w_idx+2]) \
: '\0')
#define Look_Ahead_Letter(n) (toupper(Lookahead(word+w_idx, n)))
/* Allows us to safely look ahead an arbitrary # of letters */
/* I probably could have just used strlen... */
static char Lookahead(unsigned char *word, int how_far) {
char letter_ahead = '\0'; /* null by default */
int idx;
for (idx = 0; word[idx] != '\0' && idx < how_far; idx++);
/* Edge forward in the string... */
letter_ahead = (char)word[idx]; /* idx will be either == to how_far or
* at the end of the string
*/
return letter_ahead;
}
/* phonize one letter
* We don't know the buffers size in advance. On way to solve this is to just
* re-allocate the buffer size. We're using an extra of 2 characters (this
* could be one though; or more too). */
#define Phonize(c) { \
if (p_idx >= max_buffer_len) { \
phoned_word = (char *)realloc(phoned_word, max_buffer_len + 2); \
max_buffer_len += 2; \
} \
phoned_word[p_idx++] = c; \
}
/* Slap a null character on the end of the phoned word */
#define End_Phoned_Word {phoned_word[p_idx] = '\0';}
/* How long is the phoned word? */
#define Phone_Len (p_idx)
/* Note is a letter is a 'break' in the word */
#define Isbreak(c) (!isalpha(c))
char *string_metaphone(const char *input, int word_len, long max_phonemes,
int traditional) {
unsigned char *word = (unsigned char *)input;
char *phoned_word;
int w_idx = 0; /* point in the phonization we're at. */
int p_idx = 0; /* end of the phoned phrase */
int max_buffer_len = 0; /* maximum length of the destination buffer */
/*-- Parameter checks --*/
/* Negative phoneme length is meaningless */
if (max_phonemes < 0)
return nullptr;
/* Empty/null string is meaningless */
/* Overly paranoid */
/* always_assert(word != NULL && word[0] != '\0'); */
if (word == nullptr)
return nullptr;
/*-- Allocate memory for our phoned_phrase --*/
if (max_phonemes == 0) { /* Assume largest possible */
max_buffer_len = word_len;
phoned_word = (char *)malloc(word_len + 1);
} else {
max_buffer_len = max_phonemes;
phoned_word = (char *)malloc(max_phonemes +1);
}
/*-- The first phoneme has to be processed specially. --*/
/* Find our first letter */
for (; !isalpha(Curr_Letter); w_idx++) {
/* On the off chance we were given nothing but crap... */
if (Curr_Letter == '\0') {
End_Phoned_Word
return phoned_word; /* For testing */
}
}
switch (Curr_Letter) {
/* AE becomes E */
case 'A':
if (Next_Letter == 'E') {
Phonize('E');
w_idx += 2;
}
/* Remember, preserve vowels at the beginning */
else {
Phonize('A');
w_idx++;
}
break;
/* [GKP]N becomes N */
case 'G':
case 'K':
case 'P':
if (Next_Letter == 'N') {
Phonize('N');
w_idx += 2;
}
break;
/* WH becomes H,
WR becomes R
W if followed by a vowel */
case 'W':
if (Next_Letter == 'H' ||
Next_Letter == 'R') {
Phonize(Next_Letter);
w_idx += 2;
} else if (isvowel(Next_Letter)) {
Phonize('W');
w_idx += 2;
}
/* else ignore */
break;
/* X becomes S */
case 'X':
Phonize('S');
w_idx++;
break;
/* Vowels are kept */
/* We did A already
case 'A':
case 'a':
*/
case 'E':
case 'I':
case 'O':
case 'U':
Phonize(Curr_Letter);
w_idx++;
break;
default:
/* do nothing */
break;
}
/* On to the metaphoning */
for (; Curr_Letter != '\0' &&
(max_phonemes == 0 || Phone_Len < max_phonemes);
w_idx++) {
/* How many letters to skip because an eariler encoding handled
* multiple letters */
unsigned short int skip_letter = 0;
/* THOUGHT: It would be nice if, rather than having things like...
* well, SCI. For SCI you encode the S, then have to remember
* to skip the C. So the phonome SCI invades both S and C. It would
* be better, IMHO, to skip the C from the S part of the encoding.
* Hell, I'm trying it.
*/
/* Ignore non-alphas */
if (!isalpha(Curr_Letter))
continue;
/* Drop duplicates, except CC */
if (Curr_Letter == Prev_Letter &&
Curr_Letter != 'C')
continue;
switch (Curr_Letter) {
/* B -> B unless in MB */
case 'B':
if (Prev_Letter != 'M')
Phonize('B');
break;
/* 'sh' if -CIA- or -CH, but not SCH, except SCHW.
* (SCHW is handled in S)
* S if -CI-, -CE- or -CY-
* dropped if -SCI-, SCE-, -SCY- (handed in S)
* else K
*/
case 'C':
if (MAKESOFT(Next_Letter)) { /* C[IEY] */
if (After_Next_Letter == 'A' &&
Next_Letter == 'I') { /* CIA */
Phonize(SH);
}
/* SC[IEY] */
else if (Prev_Letter == 'S') {
/* Dropped */
} else {
Phonize('S');
}
} else if (Next_Letter == 'H') {
if ((!traditional) && (After_Next_Letter == 'R' ||
Prev_Letter == 'S')) { /* Christ, School */
Phonize('K');
} else {
Phonize(SH);
}
skip_letter++;
} else {
Phonize('K');
}
break;
/* J if in -DGE-, -DGI- or -DGY-
* else T
*/
case 'D':
if (Next_Letter == 'G' && MAKESOFT(After_Next_Letter)) {
Phonize('J');
skip_letter++;
} else
Phonize('T');
break;
/* F if in -GH and not B--GH, D--GH, -H--GH, -H---GH
* else dropped if -GNED, -GN,
* else dropped if -DGE-, -DGI- or -DGY- (handled in D)
* else J if in -GE-, -GI, -GY and not GG
* else K
*/
case 'G':
if (Next_Letter == 'H') {
if (!(NOGHTOF(Look_Back_Letter(3)) || Look_Back_Letter(4) == 'H')) {
Phonize('F');
skip_letter++;
} else {
/* silent */
}
} else if (Next_Letter == 'N') {
if (Isbreak(After_Next_Letter) ||
(After_Next_Letter == 'E' && Look_Ahead_Letter(3) == 'D')) {
/* dropped */
} else
Phonize('K');
} else if (MAKESOFT(Next_Letter) && Prev_Letter != 'G') {
Phonize('J');
} else {
Phonize('K');
}
break;
/* H if before a vowel and not after C,G,P,S,T */
case 'H':
if (isvowel(Next_Letter) && !AFFECTH(Prev_Letter))
Phonize('H');
break;
/* dropped if after C
* else K
*/
case 'K':
if (Prev_Letter != 'C')
Phonize('K');
break;
/* F if before H
* else P
*/
case 'P':
if (Next_Letter == 'H') {
Phonize('F');
} else {
Phonize('P');
}
break;
/* K
*/
case 'Q':
Phonize('K');
break;
/* 'sh' in -SH-, -SIO- or -SIA- or -SCHW-
* else S
*/
case 'S':
if (Next_Letter == 'I' &&
(After_Next_Letter == 'O' || After_Next_Letter == 'A')) {
Phonize(SH);
} else if (Next_Letter == 'H') {
Phonize(SH);
skip_letter++;
} else if ((!traditional) &&
(Next_Letter == 'C' && Look_Ahead_Letter(2) == 'H' &&
Look_Ahead_Letter(3) == 'W')) {
Phonize(SH);
skip_letter += 2;
} else {
Phonize('S');
}
break;
/* 'sh' in -TIA- or -TIO-
* else 'th' before H
* else T
*/
case 'T':
if (Next_Letter == 'I' &&
(After_Next_Letter == 'O' || After_Next_Letter == 'A')) {
Phonize(SH);
} else if (Next_Letter == 'H') {
Phonize(TH);
skip_letter++;
} else {
Phonize('T');
}
break;
/* F */
case 'V':
Phonize('F');
break;
/* W before a vowel, else dropped */
case 'W':
if (isvowel(Next_Letter))
Phonize('W');
break;
/* KS */
case 'X':
Phonize('K');
Phonize('S');
break;
/* Y if followed by a vowel */
case 'Y':
if (isvowel(Next_Letter))
Phonize('Y');
break;
/* S */
case 'Z':
Phonize('S');
break;
/* No transformation */
case 'F':
case 'J':
case 'L':
case 'M':
case 'N':
case 'R':
Phonize(Curr_Letter);
break;
default:
/* nothing */
break;
} /* END SWITCH */
w_idx += skip_letter;
} /* END FOR */
End_Phoned_Word;
return phoned_word;
}
///////////////////////////////////////////////////////////////////////////////
// Cyrillic
/**
* This is codetables for different Cyrillic charsets (relative to koi8-r).
* Each table contains data for 128-255 symbols from ASCII table.
* First 256 symbols are for conversion from koi8-r to corresponding charset,
* second 256 symbols are for reverse conversion, from charset to koi8-r.
*
* Here we have the following tables:
* _cyr_win1251 - for windows-1251 charset
* _cyr_iso88595 - for iso8859-5 charset
* _cyr_cp866 - for x-cp866 charset
* _cyr_mac - for x-mac-cyrillic charset
*/
typedef unsigned char _cyr_charset_table[512];
static const _cyr_charset_table _cyr_win1251 = {
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183,
46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167,
225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32,
32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169,
254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218,
};
static const _cyr_charset_table _cyr_cp866 = {
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43,
43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45,
45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35,
210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198,
199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32,
238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
};
static const _cyr_charset_table _cyr_iso88595 = {
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32,
32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32,
238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222,
223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234,
206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190,
191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202,
};
static const _cyr_charset_table _cyr_mac = {
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209,
193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255,
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191,
254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
};
/**
* This is the function that performs real in-place conversion of the string
* between charsets.
* Parameters:
* str - string to be converted
* from,to - one-symbol label of source and destination charset
* The following symbols are used as labels:
* k - koi8-r
* w - windows-1251
* i - iso8859-5
* a - x-cp866
* d - x-cp866
* m - x-mac-cyrillic
*/
char *string_convert_cyrillic_string(const char *input, int length,
char from, char to) {
assert(input);
const unsigned char *from_table, *to_table;
unsigned char tmp;
unsigned char *str = (unsigned char *)string_duplicate(input, length);
from_table = nullptr;
to_table = nullptr;
switch (toupper((int)(unsigned char)from)) {
case 'W': from_table = _cyr_win1251; break;
case 'A':
case 'D': from_table = _cyr_cp866; break;
case 'I': from_table = _cyr_iso88595; break;
case 'M': from_table = _cyr_mac; break;
case 'K':
break;
default:
throw_invalid_argument("Unknown source charset: %c", from);
break;
}
switch (toupper((int)(unsigned char)to)) {
case 'W': to_table = _cyr_win1251; break;
case 'A':
case 'D': to_table = _cyr_cp866; break;
case 'I': to_table = _cyr_iso88595; break;
case 'M': to_table = _cyr_mac; break;
case 'K':
break;
default:
throw_invalid_argument("Unknown destination charset: %c", to);
break;
}
if (!str) {
return (char *)str;
}
for (int i = 0; i<length; i++) {
tmp = (from_table == nullptr)? str[i] : from_table[ str[i] ];
str[i] = (to_table == nullptr) ? tmp : to_table[tmp + 256];
}
return (char *)str;
}
///////////////////////////////////////////////////////////////////////////////
// Hebrew
#define HEB_BLOCK_TYPE_ENG 1
#define HEB_BLOCK_TYPE_HEB 2
#define isheb(c) \
(((((unsigned char) c) >= 224) && (((unsigned char) c) <= 250)) ? 1 : 0)
#define _isblank(c) \
(((((unsigned char) c) == ' ' || ((unsigned char) c) == '\t')) ? 1 : 0)
#define _isnewline(c) \
(((((unsigned char) c) == '\n' || ((unsigned char) c) == '\r')) ? 1 : 0)
/**
* Converts Logical Hebrew text (Hebrew Windows style) to Visual text
* Cheers/complaints/flames - Zeev Suraski <zeev@php.net>
*/
char *string_convert_hebrew_string(const char *str, int &str_len,
int max_chars_per_line,
int convert_newlines) {
assert(str);
const char *tmp;
char *heb_str, *broken_str;
char *target;
int block_start, block_end, block_type, block_length, i;
long max_chars=0;
int begin, end, char_count, orig_begin;
if (str_len == 0) {
return nullptr;
}
tmp = str;
block_start=block_end=0;
heb_str = (char *) malloc(str_len + 1);
target = heb_str+str_len;
*target = 0;
target--;
block_length=0;
if (isheb(*tmp)) {
block_type = HEB_BLOCK_TYPE_HEB;
} else {
block_type = HEB_BLOCK_TYPE_ENG;
}
do {
if (block_type == HEB_BLOCK_TYPE_HEB) {
while ((isheb((int)*(tmp+1)) ||
_isblank((int)*(tmp+1)) ||
ispunct((int)*(tmp+1)) ||
(int)*(tmp+1)=='\n' ) && block_end<str_len-1) {
tmp++;
block_end++;
block_length++;
}
for (i = block_start; i<= block_end; i++) {
*target = str[i];
switch (*target) {
case '(': *target = ')'; break;
case ')': *target = '('; break;
case '[': *target = ']'; break;
case ']': *target = '['; break;
case '{': *target = '}'; break;
case '}': *target = '{'; break;
case '<': *target = '>'; break;
case '>': *target = '<'; break;
case '\\': *target = '/'; break;
case '/': *target = '\\'; break;
default:
break;
}
target--;
}
block_type = HEB_BLOCK_TYPE_ENG;
} else {
while (!isheb(*(tmp+1)) &&
(int)*(tmp+1)!='\n' && block_end < str_len-1) {
tmp++;
block_end++;
block_length++;
}
while ((_isblank((int)*tmp) ||
ispunct((int)*tmp)) && *tmp!='/' &&
*tmp!='-' && block_end > block_start) {
tmp--;
block_end--;
}
for (i = block_end; i >= block_start; i--) {
*target = str[i];
target--;
}
block_type = HEB_BLOCK_TYPE_HEB;
}
block_start=block_end+1;
} while (block_end < str_len-1);
broken_str = (char *) malloc(str_len+1);
begin=end=str_len-1;
target = broken_str;
while (1) {
char_count=0;
while ((!max_chars || char_count < max_chars) && begin > 0) {
char_count++;
begin--;
if (begin <= 0 || _isnewline(heb_str[begin])) {
while (begin > 0 && _isnewline(heb_str[begin-1])) {
begin--;
char_count++;
}
break;
}
}
if (char_count == max_chars) { /* try to avoid breaking words */
int new_char_count=char_count, new_begin=begin;
while (new_char_count > 0) {
if (_isblank(heb_str[new_begin]) || _isnewline(heb_str[new_begin])) {
break;
}
new_begin++;
new_char_count--;
}
if (new_char_count > 0) {
char_count=new_char_count;
begin=new_begin;
}
}
orig_begin=begin;
if (_isblank(heb_str[begin])) {
heb_str[begin]='\n';
}
while (begin <= end && _isnewline(heb_str[begin])) {
/* skip leading newlines */
begin++;
}
for (i = begin; i <= end; i++) { /* copy content */
*target = heb_str[i];
target++;
}
for (i = orig_begin; i <= end && _isnewline(heb_str[i]); i++) {
*target = heb_str[i];
target++;
}
begin=orig_begin;
if (begin <= 0) {
*target = 0;
break;
}
begin--;
end=begin;
}
free((void*)heb_str);
if (convert_newlines) {
int count;
char *ret = string_replace(broken_str, str_len, "\n", strlen("\n"),
"<br />\n", strlen("<br />\n"), count, true);
if (ret) {
free(broken_str);
return ret;
}
}
return broken_str;
}
#if defined(__APPLE__)
void *memrchr(const void *s, int c, size_t n) {
for (const char *p = (const char *)s + n - 1; p >= s; p--) {
if (*p == c) return (void *)p;
}
return nullptr;
}
#endif
///////////////////////////////////////////////////////////////////////////////
}