Get the rest of the ExtIcu-ish things to php
Esse commit está contido em:
@@ -35,9 +35,6 @@
|
||||
#include "hphp/facebook/extensions/tao/test_ext_tao.h"
|
||||
#include "hphp/facebook/extensions/urlextraction/test_ext_urlextraction.h"
|
||||
#include "hphp/test/ext/test_ext_curl.h"
|
||||
#include "hphp/test/ext/test_ext_icu_ucnv.h"
|
||||
#include "hphp/test/ext/test_ext_icu_ucsdet.h"
|
||||
#include "hphp/test/ext/test_ext_icu_uspoof.h"
|
||||
#include "hphp/test/ext/test_ext_imagesprite.h"
|
||||
#include "hphp/test/ext/test_ext_intl.h"
|
||||
#include "hphp/test/ext/test_ext_ipc.h"
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| HipHop for PHP |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
#include "hphp/test/ext/test_ext_icu_ucnv.h"
|
||||
#include "hphp/runtime/ext/ext_icu_ucnv.h"
|
||||
|
||||
IMPLEMENT_SEP_EXTENSION_TEST(Icu_ucnv);
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool TestExtIcu_ucnv::RunTests(const std::string &which) {
|
||||
bool ret = true;
|
||||
|
||||
RUN_TEST(test_UConverter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool TestExtIcu_ucnv::test_UConverter() {
|
||||
// Handled in TestCodeRun
|
||||
return Count(true);
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| HipHop for PHP |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
#ifndef incl_HPHP_TEST_EXT_ICU_UCNV_H_
|
||||
#define incl_HPHP_TEST_EXT_ICU_UCNV_H_
|
||||
|
||||
// >>>>>> Generated by idl.php. Do NOT modify. <<<<<<
|
||||
|
||||
#include "hphp/test/ext/test_cpp_ext.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class TestExtIcu_ucnv : public TestCppExt {
|
||||
public:
|
||||
virtual bool RunTests(const std::string &which);
|
||||
|
||||
bool test_UConverter();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#endif // incl_HPHP_TEST_EXT_ICU_UCNV_H_
|
||||
@@ -1,382 +0,0 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| HipHop for PHP |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
#include "hphp/test/ext/test_ext_icu_ucsdet.h"
|
||||
#include "hphp/runtime/ext/ext_icu_ucsdet.h"
|
||||
#include "hphp/runtime/ext/ext_array.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool TestExtIcu_ucsdet::RunTests(const std::string &which) {
|
||||
bool ret = true;
|
||||
|
||||
RUN_TEST(test_basics);
|
||||
|
||||
// Special cases
|
||||
RUN_TEST(test_empty);
|
||||
RUN_TEST(test_cannot_detect);
|
||||
RUN_TEST(test_declared_encoding);
|
||||
|
||||
// English and Western European
|
||||
RUN_TEST(test_hello_world);
|
||||
RUN_TEST(test_windows_1252);
|
||||
|
||||
// Eastern European
|
||||
RUN_TEST(test_windows_1250);
|
||||
|
||||
// Arabic
|
||||
RUN_TEST(test_windows_1256);
|
||||
|
||||
// Japanese
|
||||
RUN_TEST(test_shift_jis);
|
||||
RUN_TEST(test_euc_jp);
|
||||
RUN_TEST(test_iso_2022_jp);
|
||||
|
||||
// Chinese
|
||||
RUN_TEST(test_gb2312);
|
||||
RUN_TEST(test_big5);
|
||||
|
||||
// Cyrillic
|
||||
RUN_TEST(test_koi8r);
|
||||
RUN_TEST(test_windows_1251);
|
||||
|
||||
// Universal
|
||||
RUN_TEST(test_utf8);
|
||||
RUN_TEST(test_utf16);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::detect_and_convert_to_utf8(
|
||||
CStrRef bytes,
|
||||
CStrRef utf8) {
|
||||
p_EncodingDetector detector(NEWOBJ(c_EncodingDetector)());
|
||||
detector->t_settext(bytes);
|
||||
Object matchObj = detector->t_detect();
|
||||
p_EncodingMatch match = matchObj.getTyped<c_EncodingMatch>();
|
||||
if (!match->t_isvalid()) {
|
||||
return false;
|
||||
}
|
||||
return match->t_getutf8() == utf8;
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_uninitialized() {
|
||||
p_EncodingDetector detector(NEWOBJ(c_EncodingDetector)());
|
||||
|
||||
try {
|
||||
Object matchObj = detector->t_detect();
|
||||
} catch (Exception& e) {
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
return Count(false);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_basics() {
|
||||
// This is as unmistakably UTF-8 as it gets.
|
||||
const char* utf8_snowman_with_bom = "\uFEFF\u2603";
|
||||
|
||||
p_EncodingDetector detector(NEWOBJ(c_EncodingDetector)());
|
||||
|
||||
detector->t_settext(utf8_snowman_with_bom);
|
||||
Object matchObj = detector->t_detect();
|
||||
p_EncodingMatch match = matchObj.getTyped<c_EncodingMatch>();
|
||||
VERIFY(match->t_isvalid() == true);
|
||||
VERIFY(match->t_getencoding() == "UTF-8");
|
||||
VERIFY(match->t_getconfidence() == 100);
|
||||
VERIFY(match->t_getutf8() == utf8_snowman_with_bom);
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_empty() {
|
||||
VERIFY(detect_and_convert_to_utf8("", "") == true);
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_cannot_detect() {
|
||||
p_EncodingDetector detector(NEWOBJ(c_EncodingDetector)());
|
||||
|
||||
// The detector has no idea what to do with this.
|
||||
detector->t_settext("\xc7\xe8\xec\xed\xe8\xe9 \xe2\xe5\xf7\xe5\xf0");
|
||||
Object matchObj = detector->t_detect();
|
||||
p_EncodingMatch match = matchObj.getTyped<c_EncodingMatch>();
|
||||
VERIFY(match->t_isvalid() == false);
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_declared_encoding() {
|
||||
// Right now (ICU 4.6), this API doesn't actually do anything, but
|
||||
// let's at least verify it doesn't crash.
|
||||
p_EncodingDetector detector(NEWOBJ(c_EncodingDetector)());
|
||||
detector->t_settext("Yo!");
|
||||
detector->t_setdeclaredencoding("windows-1251");
|
||||
|
||||
Object matchObj = detector->t_detect();
|
||||
p_EncodingMatch match = matchObj.getTyped<c_EncodingMatch>();
|
||||
VERIFY(match->t_isvalid() == true);
|
||||
VERIFY(match->t_getutf8() == "Yo!");
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_hello_world() {
|
||||
VERIFY(detect_and_convert_to_utf8("Hello, world!", "Hello, world!") == true);
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_windows_1252() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Toda Europa ley\xf3 Don Quijote como una s\xe1tira.",
|
||||
"Toda Europa ley\u00f3 Don Quijote como una s\u00e1tira.") == true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Notre P\xe8re, qui \xeates aux cieux",
|
||||
"Notre P\u00e8re, qui \u00eates aux cieux") == true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Marta da Silva, als beste Spielerin und beste Torsch\xFCtzin der WM "
|
||||
"2007 sowie bisher f\xFCnf Mal als \x84Weltfu\xDF" "ballerin des "
|
||||
"Jahres\x93 ausgezeichnet, kommt zur Welt.",
|
||||
"Marta da Silva, als beste Spielerin und beste Torsch\u00fctzin der WM "
|
||||
"2007 sowie bisher f\u00FCnf Mal als \u201EWeltfu\u00DFballerin des "
|
||||
"Jahres\u201C ausgezeichnet, kommt zur Welt.") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_windows_1250() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Do Wikipedie m\xf9\x9e" "e p\xf8isp\xedvat kdokoliv.",
|
||||
"Do Wikipedie m\u016f\u017ee p\u0159isp\u00edvat kdokoliv.") == true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"O\xe8" "e na\x9a, koji jesi na nebesima, sveti se ime Tvoje.",
|
||||
"O\u010de na\u0161, koji jesi na nebesima, sveti se ime Tvoje.") == true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Prezentacj\xea pierwszego graficznego \x9crodowiska pracy z rodziny "
|
||||
"Windows firmy Microsoft przeprowadzono w listopadzie 1985.",
|
||||
"Prezentacj\u0119 pierwszego graficznego \u015brodowiska pracy z rodziny "
|
||||
"Windows firmy Microsoft przeprowadzono w listopadzie 1985.") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_windows_1256() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xe1\xc7 \xc3\xca\xdf\xe1\xe3 \xc7\xe1\xda\xd1\xc8\xed\xc9",
|
||||
"\u0644\u0627 \u0623\u062a\u0643\u0644\u0645 \u0627\u0644\u0639\u0631\u0628"
|
||||
"\u064a\u0629") == true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xe6\xed\xdf\xed\xc8\xed\xcf\xed\xc7 \xe5\xed \xe3\xd4\xd1\xe6\xda \xe3"
|
||||
"\xe6\xd3\xe6\xda\xc9 \xe3\xca\xda\xcf\xcf\xc9 \xc7\xe1\xe1\xdb\xc7\xca"
|
||||
"\xa1 \xe3\xc8\xe4\xed\xc9 \xda\xe1\xec \xc7\xe1\xe6\xed\xc8\xa1 \xd0\xc7"
|
||||
"\xca \xe3\xcd\xca\xe6\xec \xcd\xd1\xa1 \xca\xd4\xdb\xe1\xe5\xc7 \xe3\xc4"
|
||||
"\xd3\xd3\xc9 \xe6\xed\xdf\xed\xe3\xed\xcf\xed\xc7\xa1 \xc7\xe1\xca"
|
||||
"\xed \xe5\xed \xe3\xe4\xd9\xe3\xc9 \xdb\xed\xd1 \xd1\xc8\xcd\xed\xc9.",
|
||||
"\u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627 \u0647\u064a \u0645"
|
||||
"\u0634\u0631\u0648\u0639 \u0645\u0648\u0633\u0648\u0639\u0629 \u0645\u062a"
|
||||
"\u0639\u062f\u062f\u0629 \u0627\u0644\u0644\u063a\u0627\u062a\u060c \u0645"
|
||||
"\u0628\u0646\u064a\u0629 \u0639\u0644\u0649 \u0627\u0644\u0648\u064a\u0628"
|
||||
"\u060c \u0630\u0627\u062a \u0645\u062d\u062a\u0648\u0649 \u062d\u0631"
|
||||
"\u060c \u062a\u0634\u063a\u0644\u0647\u0627 \u0645\u0624\u0633\u0633"
|
||||
"\u0629 \u0648\u064a\u0643\u064a\u0645\u064a\u062f\u064a\u0627\u060c \u0627"
|
||||
"\u0644\u062a\u064a \u0647\u064a \u0645\u0646\u0638\u0645\u0629 \u063a"
|
||||
"\u064a\u0631 \u0631\u0628\u062d\u064a\u0629.") == true);
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_shift_jis() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\x81w\x82\xc6\x82\xc8\x82\xe8\x82\xcc\x83g\x83g\x83\x8d\x81x\x82\xcd\x81"
|
||||
"A\x83X\x83^\x83W\x83I\x83W\x83u\x83\x8a\x90\xa7\x8d\xec\x82\xcc\x93\xfa"
|
||||
"\x96{\x82\xcc\x92\xb7\x95\xd2\x83" "A\x83j\x83\x81\x81[\x83V\x83\x87\x83"
|
||||
"\x93\x8d\xec\x95i\x81" "B",
|
||||
"\u300e\u3068\u306a\u308a\u306e\u30c8\u30c8\u30ed\u300f\u306f\u3001\u30b9"
|
||||
"\u30bf\u30b8\u30aa\u30b8\u30d6\u30ea\u5236\u4f5c\u306e\u65e5\u672c\u306e"
|
||||
"\u9577\u7de8\u30a2\u30cb\u30e1\u30fc\u30b7\u30e7\u30f3\u4f5c\u54c1\u3002")
|
||||
== true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\x83" "E\x83" "B\x83L\x83y\x83" "f\x83" "B\x83" "A (Wikipedia) \x82\xcd"
|
||||
"\x83" "E\x83" "B\x83L\x83\x81\x83" "f\x83" "B\x83" "A\x8d\xe0\x92"
|
||||
"c\x82\xaa\x89^\x89" "c\x82\xb7\x82\xe9\x83I\x83\x93\x83\x89\x83" "C\x83"
|
||||
"\x93\x95S\x89\xc8\x8e\x96\x93T\x81" "B",
|
||||
"\u30a6\u30a3\u30ad\u30da\u30c7\u30a3\u30a2 (Wikipedia) \u306f\u30a6"
|
||||
"\u30a3\u30ad\u30e1\u30c7\u30a3\u30a2\u8ca1\u56e3\u304c\u904b\u55b6\u3059"
|
||||
"\u308b\u30aa\u30f3\u30e9\u30a4\u30f3\u767e\u79d1\u4e8b\u5178\u3002")
|
||||
== true);
|
||||
|
||||
// Too short; detector thinks it's most likely Windows-1252.
|
||||
//
|
||||
// VERIFY(detect_and_convert_to_utf8(
|
||||
// "\x90\xa2\x8a" "E\x90l\x8c\xa0\x90\xe9\x8c\xbe",
|
||||
// "\u4e16\u754c\u4eba\u6a29\u5ba3\u8a00") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_euc_jp() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xb2\xbf\xbf\xcd\xa4\xe2\xa1\xa2\xa4\xdb\xa4\xb7\xa4\xa4\xa4\xde\xa4\xde"
|
||||
"\xa4\xcb\xc2\xe1\xca\xe1\xa1\xa2\xb9\xb4\xb6\xd8\xa1\xa2\xcb\xf4\xa4\xcf"
|
||||
"\xc4\xc9\xca\xfc\xa4\xb5\xa4\xec\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xcf\xa4\xca"
|
||||
"\xa4\xa4\xa1\xa3",
|
||||
"\u4f55\u4eba\u3082\u3001\u307b\u3057\u3044\u307e\u307e\u306b\u902e\u6355"
|
||||
"\u3001\u62d8\u7981\u3001\u53c8\u306f\u8ffd\u653e\u3055\u308c\u308b\u3053"
|
||||
"\u3068\u306f\u306a\u3044\u3002") == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"1920\xc7\xaf \xa5\xa6\xa5\xa3\xa5\xf3\xa5\xd6\xa5\xeb\xa5\xc9\xa5\xf3\xc1"
|
||||
"\xaa\xbc\xea\xb8\xa2\xa1\xcaThe Championships, Wimbledon 1920\xa1\xcb\xa4"
|
||||
"\xcb\xb4\xd8\xa4\xb9\xa4\xeb\xb5\xad\xbb\xf6\xa1\xa3",
|
||||
"1920\u5e74 \u30a6\u30a3\u30f3\u30d6\u30eb\u30c9\u30f3\u9078\u624b\u6a29"
|
||||
"\uff08The Championships, Wimbledon 1920\uff09\u306b\u95a2\u3059\u308b"
|
||||
"\u8a18\u4e8b\u3002") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_iso_2022_jp() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\x1b$B%-%M%F%#%C%/%3%M%/%7%g%s!J\x1b(Bkinetic connection\x1b$B!K$O\x1b(B1"
|
||||
"986\x1b$BG/$K%=%K!<$,\x1b(BMSX2\x1b$B$GH/Gd$7$?%Q%:%k%2!<%`!#\x1b(B",
|
||||
"\u30ad\u30cd\u30c6\u30a3\u30c3\u30af\u30b3\u30cd\u30af\u30b7\u30e7\u30f3"
|
||||
"\uff08kinetic connection\uff09\u306f1986\u5e74\u306b\u30bd\u30cb\u30fc"
|
||||
"\u304cMSX2\u3067\u767a\u58f2\u3057\u305f\u30d1\u30ba\u30eb\u30b2\u30fc"
|
||||
"\u30e0\u3002") == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\x1b$B5f6K\x1b(B!!\x1b$BJQBV2>LL\x1b(B",
|
||||
"\u7a76\u6975!!\u5909\u614b\u4eee\u9762") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_gb2312() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xca\xc7\xd2\xbb\xb8\xf6\xd3\xef\xd1\xd4\xa1\xa2\xc4\xda\xc8\xdd\xbf\xaa"
|
||||
"\xb7\xc5\xb5\xc4\xcd\xf8\xc2\xe7\xb0\xd9\xbf\xc6\xc8\xab\xca\xe9\xbc\xc6"
|
||||
"\xbb\xae",
|
||||
"\u662f\u4e00\u4e2a\u8bed\u8a00\u3001\u5185\u5bb9\u5f00\u653e\u7684\u7f51"
|
||||
"\u7edc\u767e\u79d1\u5168\u4e66\u8ba1\u5212") == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xa1\xb6\xd2\xbb\xc1\xa3\xd6\xd3\xd5\xe6\xc8\xcb\xcb\xd5\xa1\xb7\xa3\xa8"
|
||||
"\xd3\xa2\xce\xc4\xc3\xfb\xa3\xbaSo Real Time Cooking\xa3\xa9",
|
||||
"\u300a\u4e00\u7c92\u949f\u771f\u4eba\u82cf\u300b\uff08\u82f1\u6587\u540d"
|
||||
"\uff1aSo Real Time Cooking\uff09") == true);
|
||||
|
||||
// Too short; detector thinks it's most likely Shift-JIS.
|
||||
// VERIFY(detect_and_convert_to_utf8(
|
||||
// "\xce\xe2\xb8\xe7\xbf\xdf",
|
||||
// "\u5434\u54e5\u7a9f") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_big5() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"1\xa1]\xa4@\xa1^\xacO0\xbbP2\xa4\xa7\xb6\xa1\xaa\xba\xa6\xdb\xb5M\xbc\xc6"
|
||||
"\xa1" "A\xacO\xb3\xcc\xa4p\xaa\xba\xa5\xbf\xa9_\xbc\xc6\xa1" "C",
|
||||
"1\uff08\u4e00\uff09\u662f0\u82072\u4e4b\u9593\u7684\u81ea\u7136\u6578"
|
||||
"\uff0c\u662f\u6700\u5c0f\u7684\u6b63\u5947\u6578\u3002") == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xbeG\xa5\xf2\xaf\xf4\xa1]\xad^\xa4\xe5\xa6W \xa1G Cheng Chung Yin\xa1^"
|
||||
"\xa1" "A\xacO\xa4@\xa6W\xa5x\xc6W\xa4k\xbat\xad\xfb",
|
||||
"\u912d\u4ef2\u8335\uff08\u82f1\u6587\u540d \uff1a Cheng Chung Yin\uff09"
|
||||
"\uff0c\u662f\u4e00\u540d\u53f0\u7063\u5973\u6f14\u54e1") == true);
|
||||
|
||||
// Too short; detector thinks it's most likely Shift-JIS.
|
||||
// VERIFY(detect_and_convert_to_utf8(
|
||||
// "\xa7" "d\xad\xf4\xb8]",
|
||||
// "\u5433\u54e5\u7a9f") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_koi8r() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xeb\xd7\xc5\xc2\xc5\xcb \xd0\xc5\xd2\xd7\xc1\xd1 \xd0\xcf \xd0\xcc\xcf"
|
||||
"\xdd\xc1\xc4\xc9 \xc9 \xd7\xd4\xcf\xd2\xc1\xd1 \xd0\xcf \xce\xc1\xd3\xc5"
|
||||
"\xcc\xc5\xce\xc9\xc0 \xd0\xd2\xcf\xd7\xc9\xce\xc3\xc9\xd1 \xeb\xc1\xce"
|
||||
"\xc1\xc4\xd9.",
|
||||
"\u041a\u0432\u0435\u0431\u0435\u043a \u043f\u0435\u0440\u0432\u0430"
|
||||
"\u044f \u043f\u043e \u043f\u043b\u043e\u0449\u0430\u0434\u0438 \u0438 "
|
||||
"\u0432\u0442\u043e\u0440\u0430\u044f \u043f\u043e \u043d\u0430\u0441"
|
||||
"\u0435\u043b\u0435\u043d\u0438\u044e \u043f\u0440\u043e\u0432\u0438"
|
||||
"\u043d\u0446\u0438\u044f \u041a\u0430\u043d\u0430\u0434\u044b.") == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xe2\xdf\xd2\xc4\xc5\xce\xc9 \xc5 \xd3\xc5\xcc\xcf \xd7 \xf3\xc5\xd7\xc5"
|
||||
"\xd2\xce\xc1 \xe2\xdf\xcc\xc7\xc1\xd2\xc9\xd1",
|
||||
"\u0411\u044a\u0440\u0434\u0435\u043d\u0438 \u0435 \u0441\u0435\u043b"
|
||||
"\u043e \u0432 \u0421\u0435\u0432\u0435\u0440\u043d\u0430 \u0411\u044a"
|
||||
"\u043b\u0433\u0430\u0440\u0438\u044f") == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xfe\xc5\xd2\xd7\xc5\xce\xc1 \xd0\xd2\xc5\xd7\xdf\xda\xc8\xcf\xc4\xce"
|
||||
"\xc1",
|
||||
"\u0427\u0435\u0440\u0432\u0435\u043d\u0430 \u043f\u0440\u0435\u0432"
|
||||
"\u044a\u0437\u0445\u043e\u0434\u043d\u0430") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_windows_1251() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xce\xf7\xe5 \xed\xe0\xf8, \xea\xee\xbc \xf1\xe8 \xed\xe0 \xed\xe5\xe1"
|
||||
"\xe5\xf1\xe0\xf2\xe0",
|
||||
"\u041e\u0447\u0435 \u043d\u0430\u0448, \u043a\u043e\u0458 \u0441\u0438 "
|
||||
"\u043d\u0430 \u043d\u0435\u0431\u0435\u0441\u0430\u0442\u0430") == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xd1 \xe7\xee\xee\xeb\xee\xe3\xe8\xf7\xe5\xf1\xea\xee\xe9 \xf2\xee\xf7\xea"
|
||||
"\xe8 \xe7\xf0\xe5\xed\xe8\xff, \xe4\xee\xec\xe0\xf8\xed\xff\xff \xea\xee"
|
||||
"\xf8\xea\xe0 \x97 \xec\xeb\xe5\xea\xee\xef\xe8\xf2\xe0\xfe\xf9\xe5\xe5"
|
||||
" \xf1\xe5\xec\xe5\xe9\xf1\xf2\xe2\xe0 \xea\xee\xf8\xe0\xf7\xfc\xe8\xf5 "
|
||||
"\xee\xf2\xf0\xff\xe4\xe0 \xf5\xe8\xf9\xed\xfb\xf5.",
|
||||
"\u0421 \u0437\u043e\u043e\u043b\u043e\u0433\u0438\u0447\u0435\u0441\u043a"
|
||||
"\u043e\u0439 \u0442\u043e\u0447\u043a\u0438 \u0437\u0440\u0435\u043d\u0438"
|
||||
"\u044f, \u0434\u043e\u043c\u0430\u0448\u043d\u044f\u044f \u043a\u043e"
|
||||
"\u0448\u043a\u0430 \u2014 \u043c\u043b\u0435\u043a\u043e\u043f\u0438\u0442"
|
||||
"\u0430\u044e\u0449\u0435\u0435 \u0441\u0435\u043c\u0435\u0439\u0441\u0442"
|
||||
"\u0432\u0430 \u043a\u043e\u0448\u0430\u0447\u044c\u0438\u0445 \u043e\u0442"
|
||||
"\u0440\u044f\u0434\u0430 \u0445\u0438\u0449\u043d\u044b\u0445.") == true);
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_utf8() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\u10e8\u10d8\u10dc\u10d0\u10e3\u10e0\u10d8 \u10d9\u10d0\u10e2\u10d0",
|
||||
"\u10e8\u10d8\u10dc\u10d0\u10e3\u10e0\u10d8 \u10d9\u10d0\u10e2\u10d0")
|
||||
== true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e25\u0e31\u0e01",
|
||||
"\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e25\u0e31\u0e01") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_ucsdet::test_utf16() {
|
||||
// The detector only handles UTF-16 if there's a BOM at the front.
|
||||
char utf16[] =
|
||||
"\xff\xfeH\x00" "e\x00l\x00l\x00o\x00,\x00 \x00w\x00o\x00r\x00l\x00"
|
||||
"d\x00!\x00";
|
||||
|
||||
// Take off 1 byte for the NUL at the end of the char[].
|
||||
String utf16Str(utf16, sizeof utf16 - 1, AttachLiteral);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
utf16Str,
|
||||
"\ufeffHello, world!") == true);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| HipHop for PHP |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
#ifndef incl_HPHP_TEST_EXT_ICU_UCSDET_H_
|
||||
#define incl_HPHP_TEST_EXT_ICU_UCSDET_H_
|
||||
|
||||
// >>>>>> Generated by idl.php. Do NOT modify. <<<<<<
|
||||
|
||||
#include "hphp/test/ext/test_cpp_ext.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class TestExtIcu_ucsdet : public TestCppExt {
|
||||
public:
|
||||
virtual bool RunTests(const std::string &which);
|
||||
|
||||
private:
|
||||
bool detect_and_convert_to_utf8(CStrRef bytes, CStrRef utf8);
|
||||
|
||||
bool test_basics();
|
||||
|
||||
bool test_uninitialized();
|
||||
bool test_empty();
|
||||
bool test_cannot_detect();
|
||||
bool test_declared_encoding();
|
||||
|
||||
// English and Western European
|
||||
bool test_hello_world();
|
||||
bool test_windows_1252();
|
||||
|
||||
// Eastern European
|
||||
bool test_windows_1250();
|
||||
|
||||
// Arabic
|
||||
bool test_windows_1256();
|
||||
|
||||
// Japanese
|
||||
bool test_shift_jis();
|
||||
bool test_euc_jp();
|
||||
bool test_iso_2022_jp();
|
||||
|
||||
// Chinese
|
||||
bool test_gb2312();
|
||||
bool test_big5();
|
||||
|
||||
// Cyrillic
|
||||
bool test_koi8r();
|
||||
bool test_windows_1251();
|
||||
|
||||
// Universal
|
||||
bool test_utf8();
|
||||
bool test_utf16();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#endif // incl_HPHP_TEST_EXT_ICU_UCSDET_H_
|
||||
@@ -1,213 +0,0 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| HipHop for PHP |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
#include "hphp/test/ext/test_ext_icu_uspoof.h"
|
||||
#include "hphp/runtime/ext/ext_icu_uspoof.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool TestExtIcu_uspoof::RunTests(const std::string &which) {
|
||||
bool ret = true;
|
||||
|
||||
RUN_TEST(test_SpoofChecker_issuspicious);
|
||||
RUN_TEST(test_SpoofChecker_areconfusable);
|
||||
RUN_TEST(test_SpoofChecker_issuesfound);
|
||||
RUN_TEST(test_SpoofChecker_setchecks);
|
||||
RUN_TEST(test_SpoofChecker_setallowedlocales);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
bool TestExtIcu_uspoof::test_SpoofChecker_issuspicious() {
|
||||
p_SpoofChecker checker(NEWOBJ(c_SpoofChecker)());
|
||||
VS(checker->t_issuspicious("facebook"), false);
|
||||
|
||||
// facebook with Cyrillic spoof characters
|
||||
VS(checker->t_issuspicious("f\u0430\u0441\u0435b\u043e\u043ek"), true);
|
||||
|
||||
// "Russia" in Cyrillic with Latin spoof characters
|
||||
VS(checker->t_issuspicious("Pocc\u0438\u044f"), true);
|
||||
|
||||
// paypal with Cyrillic spoof characters
|
||||
VS(checker->t_issuspicious("http://www.payp\u0430l.com"), true);
|
||||
|
||||
// certain all-uppercase Latin sequences can be spoof of Greek
|
||||
VS(checker->t_issuspicious("NAPKIN PEZ"), true);
|
||||
VS(checker->t_issuspicious("napkin pez"), false);
|
||||
|
||||
// English with Japanese characters
|
||||
VS(checker->t_issuspicious("True fact: \u5fcd\u8005 are mammals"), false);
|
||||
|
||||
// Japanese name with mixed kanji and hiragana
|
||||
VS(checker->t_issuspicious("\u6a4b\u672c\u611b\u307f"), false);
|
||||
|
||||
try {
|
||||
checker->t_issuspicious("this is not UTF-8: \x87\xFB\xCA\x94\xDB");
|
||||
} catch (Exception& e) {
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
return Count(false);
|
||||
}
|
||||
|
||||
bool TestExtIcu_uspoof::test_SpoofChecker_areconfusable() {
|
||||
p_SpoofChecker checker(NEWOBJ(c_SpoofChecker)());
|
||||
VS(checker->t_areconfusable("hello, world", "goodbye, world"), false);
|
||||
VS(checker->t_areconfusable("hello, world", "hello, world"), true);
|
||||
VS(checker->t_areconfusable("hello, world", "he11o, wor1d"), true);
|
||||
VS(checker->t_areconfusable("hell\u00f8", "hello\u0337"), true);
|
||||
|
||||
VS(checker->t_areconfusable("facebook", "f\u0430\u0441\u0435b\u043e\u043ek"),
|
||||
true);
|
||||
|
||||
VS(checker->t_areconfusable("facebook", "\U0001d41faceboo\u1d0b"), true);
|
||||
|
||||
VS(checker->t_areconfusable("facebook", "\u017facebook"), true);
|
||||
|
||||
VS(checker->t_areconfusable("paypal", "payp\u0430l"), true);
|
||||
VS(checker->t_areconfusable(
|
||||
"NAPKIN PEZ",
|
||||
"\u039d\u0391\u03a1\u039a\u0399\u039d \u03a1\u0395\u0396"),
|
||||
true);
|
||||
|
||||
VS(checker->t_areconfusable(
|
||||
"facebook",
|
||||
"ufiek-a\u048ba\u049d \u049da\u048b\u00f0a\u048b\u01e5a\u048b-\u049dota-"
|
||||
"\u00f0o\u00f0ol"),
|
||||
false);
|
||||
|
||||
try {
|
||||
checker->t_areconfusable(
|
||||
"this is not UTF-8: \x87\xFB\xCA\x94\xDB",
|
||||
"so there.");
|
||||
} catch (Exception& e) {
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
return Count(false);
|
||||
}
|
||||
|
||||
bool TestExtIcu_uspoof::test_SpoofChecker_issuesfound() {
|
||||
p_SpoofChecker checker(NEWOBJ(c_SpoofChecker)());
|
||||
Variant ret;
|
||||
|
||||
VS(checker->t_issuspicious("NAPKIN PEZ", ref(ret)), true);
|
||||
VS(ret.getInt64(), q_SpoofChecker$$WHOLE_SCRIPT_CONFUSABLE);
|
||||
|
||||
VS(checker->t_issuspicious("f\u0430\u0441\u0435b\u043e\u043ek", ref(ret)),
|
||||
true);
|
||||
VS(ret.getInt64(), q_SpoofChecker$$MIXED_SCRIPT_CONFUSABLE);
|
||||
|
||||
VS(checker->t_areconfusable("hello, world", "he11o, wor1d", ref(ret)), true);
|
||||
VS(ret.getInt64(), q_SpoofChecker$$SINGLE_SCRIPT_CONFUSABLE);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
bool TestExtIcu_uspoof::test_SpoofChecker_setchecks() {
|
||||
{
|
||||
p_SpoofChecker checker(NEWOBJ(c_SpoofChecker)());
|
||||
|
||||
// The checker should start in any-case mode.
|
||||
VS(checker->t_areconfusable("HELLO", "H\u0415LLO"), true);
|
||||
VS(checker->t_areconfusable("hello", "h\u0435llo"), true);
|
||||
|
||||
// Go to lower-case only mode (assumes all strings have been
|
||||
// case-folded).
|
||||
checker->t_setchecks(
|
||||
q_SpoofChecker$$MIXED_SCRIPT_CONFUSABLE |
|
||||
q_SpoofChecker$$WHOLE_SCRIPT_CONFUSABLE |
|
||||
q_SpoofChecker$$SINGLE_SCRIPT_CONFUSABLE
|
||||
);
|
||||
VS(checker->t_areconfusable("HELLO", "H\u0415LLO"), false);
|
||||
VS(checker->t_areconfusable("hello", "h\u0435llo"), true);
|
||||
}
|
||||
|
||||
{
|
||||
p_SpoofChecker checker(NEWOBJ(c_SpoofChecker)());
|
||||
VS(checker->t_issuspicious("True fact: \u5fcd\u8005 are mammals"), false);
|
||||
|
||||
// Only allow characters of a single script.
|
||||
checker->t_setchecks(q_SpoofChecker$$SINGLE_SCRIPT);
|
||||
VS(checker->t_issuspicious("True fact: \u5fcd\u8005 are mammals"), true);
|
||||
}
|
||||
|
||||
try {
|
||||
p_SpoofChecker checker(NEWOBJ(c_SpoofChecker)());
|
||||
checker->t_setchecks(0xDEADBEEF);
|
||||
} catch (Exception& e) {
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
return Count(false);
|
||||
}
|
||||
|
||||
bool TestExtIcu_uspoof::test_SpoofChecker_setallowedlocales() {
|
||||
p_SpoofChecker checker(NEWOBJ(c_SpoofChecker)());
|
||||
|
||||
const char* common = "Rogers";
|
||||
const char* japanese_kanji_hiragana = "\u6a4b\u672c\u611b\u307f";
|
||||
const char* korean = "\ud55c\uad6d\ub9d0";
|
||||
const char* arabic = "\u0645\u0631\u062d\u0628\u064b\u0627";
|
||||
const char* russian_cyrillic =
|
||||
"\u0417\u0438\u0301\u043c\u043d\u0438\u0439 "
|
||||
"\u0432\u0435\u0301\u0447\u0435\u0440";
|
||||
const char* snowman = "\u2603";
|
||||
|
||||
checker->t_setallowedlocales("en_US");
|
||||
VS(checker->t_issuspicious(common), false);
|
||||
VS(checker->t_issuspicious(japanese_kanji_hiragana), true);
|
||||
VS(checker->t_issuspicious(russian_cyrillic), true);
|
||||
VS(checker->t_issuspicious(arabic), true);
|
||||
VS(checker->t_issuspicious(korean), true);
|
||||
VS(checker->t_issuspicious(snowman), false);
|
||||
|
||||
checker->t_setallowedlocales("en_US, ja_JP");
|
||||
VS(checker->t_issuspicious(common), false);
|
||||
VS(checker->t_issuspicious(japanese_kanji_hiragana), false);
|
||||
VS(checker->t_issuspicious(russian_cyrillic), true);
|
||||
VS(checker->t_issuspicious(arabic), true);
|
||||
VS(checker->t_issuspicious(korean), true);
|
||||
VS(checker->t_issuspicious(snowman), false);
|
||||
|
||||
checker->t_setallowedlocales("en_US, ko_KR");
|
||||
VS(checker->t_issuspicious(common), false);
|
||||
VS(checker->t_issuspicious(japanese_kanji_hiragana), true);
|
||||
VS(checker->t_issuspicious(russian_cyrillic), true);
|
||||
VS(checker->t_issuspicious(arabic), true);
|
||||
VS(checker->t_issuspicious(korean), false);
|
||||
VS(checker->t_issuspicious(snowman), false);
|
||||
|
||||
checker->t_setallowedlocales("en_US, ar_AR");
|
||||
VS(checker->t_issuspicious(common), false);
|
||||
VS(checker->t_issuspicious(japanese_kanji_hiragana), true);
|
||||
VS(checker->t_issuspicious(russian_cyrillic), true);
|
||||
VS(checker->t_issuspicious(arabic), false);
|
||||
VS(checker->t_issuspicious(korean), true);
|
||||
VS(checker->t_issuspicious(snowman), false);
|
||||
|
||||
checker->t_setallowedlocales("en_US, ru_RU");
|
||||
VS(checker->t_issuspicious(common), false);
|
||||
VS(checker->t_issuspicious(japanese_kanji_hiragana), true);
|
||||
VS(checker->t_issuspicious(russian_cyrillic), false);
|
||||
VS(checker->t_issuspicious(arabic), true);
|
||||
VS(checker->t_issuspicious(korean), true);
|
||||
VS(checker->t_issuspicious(snowman), false);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| HipHop for PHP |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.01 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_01.txt |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
#ifndef TEST_EXT_ICU_USPOOF_H
|
||||
#define TEST_EXT_ICU_USPOOF_H
|
||||
|
||||
// >>>>>> Generated by idl.php. Do NOT modify. <<<<<<
|
||||
|
||||
#include "hphp/test/ext/test_cpp_ext.h"
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class TestExtIcu_uspoof : public TestCppExt {
|
||||
public:
|
||||
virtual bool RunTests(const std::string &which);
|
||||
|
||||
bool test_SpoofChecker_issuspicious();
|
||||
bool test_SpoofChecker_areconfusable();
|
||||
bool test_SpoofChecker_issuesfound();
|
||||
bool test_SpoofChecker_setchecks();
|
||||
bool test_SpoofChecker_setallowedlocales();
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#endif // TEST_EXT_ICU_USPOOF_H
|
||||
@@ -0,0 +1,322 @@
|
||||
<?php
|
||||
|
||||
function VS($x, $y) {
|
||||
var_dump($x === $y);
|
||||
if ($x !== $y) { echo "Failed: $y\n"; echo "Got: $x\n";
|
||||
var_dump(debug_backtrace()); }
|
||||
}
|
||||
function VERIFY($x) { VS($x, true); }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Php doesn't support \u escapes.
|
||||
function getunicode($x) { return json_decode("\"" . $x . "\""); }
|
||||
|
||||
function detect_and_convert_to_utf8($bytes, $utf8) {
|
||||
$detector = new EncodingDetector();
|
||||
$detector->settext($bytes);
|
||||
$match = $detector->detect();
|
||||
if (!$match->isvalid()) {
|
||||
return false;
|
||||
}
|
||||
// echo "Got: " . $match->getutf8() . "\n";
|
||||
// echo "Want: " . $utf8 . "\n";
|
||||
return $match->getutf8() == $utf8;
|
||||
}
|
||||
|
||||
function test_basics() {
|
||||
// This is as unmistakably UTF-8 as it gets.
|
||||
$utf8_snowman_with_bom = getunicode("\\uFEFF\\u2603");
|
||||
|
||||
$detector = new EncodingDetector();
|
||||
|
||||
$detector->settext($utf8_snowman_with_bom);
|
||||
$match = $detector->detect();
|
||||
VERIFY($match->isvalid() == true);
|
||||
VERIFY($match->getencoding() == "UTF-8");
|
||||
VERIFY($match->getconfidence() == 100);
|
||||
VERIFY($match->getutf8() == $utf8_snowman_with_bom);
|
||||
}
|
||||
|
||||
function test_cannot_detect() {
|
||||
$detector = new EncodingDetector();
|
||||
|
||||
// The detector has no idea what to do with this.
|
||||
$detector->settext("\xc7\xe8\xec\xed\xe8\xe9 \xe2\xe5\xf7\xe5\xf0");
|
||||
$match = $detector->detect();
|
||||
VERIFY($match->isvalid() == false);
|
||||
}
|
||||
|
||||
function test_declared_encoding() {
|
||||
// Right now (ICU 4.6), this API doesn't actually do anything, but
|
||||
// let's at least verify it doesn't crash.
|
||||
$detector = new EncodingDetector();
|
||||
$detector->settext("Yo!");
|
||||
$detector->setdeclaredencoding("windows-1251");
|
||||
|
||||
$match = $detector->detect();
|
||||
VERIFY($match->isvalid() == true);
|
||||
VERIFY($match->getutf8() == "Yo!");
|
||||
}
|
||||
|
||||
function test_hello_world() {
|
||||
VERIFY(detect_and_convert_to_utf8("Hello, world!", "Hello, world!") == true);
|
||||
}
|
||||
|
||||
function test_windows_1252() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Toda Europa ley\xf3 Don Quijote como una s\xe1tira.",
|
||||
getunicode('Toda Europa ley\u00f3 Don Quijote como una s\u00e1tira.'))
|
||||
== true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Notre P\xe8re, qui \xeates aux cieux",
|
||||
getunicode('Notre P\u00e8re, qui \u00eates aux cieux')) == true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Marta da Silva, als beste Spielerin und beste Torsch\xFCtzin der WM ".
|
||||
"2007 sowie bisher f\xFCnf Mal als \x84Weltfu\xDF". "ballerin des ".
|
||||
"Jahres\x93 ausgezeichnet, kommt zur Welt.",
|
||||
getunicode(
|
||||
'Marta da Silva, als beste Spielerin und beste Torsch\u00fctzin der WM '.
|
||||
'2007 sowie bisher f\u00FCnf Mal als \u201EWeltfu\u00DFballerin des '.
|
||||
'Jahres\u201C ausgezeichnet, kommt zur Welt.')) == true);
|
||||
}
|
||||
|
||||
function test_windows_1250() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Do Wikipedie m\xf9\x9e" ."e p\xf8isp\xedvat kdokoliv.",
|
||||
getunicode('Do Wikipedie m\u016f\u017ee p\u0159isp\u00edvat kdokoliv.'))
|
||||
== true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"O\xe8". "e na\x9a, koji jesi na nebesima, sveti se ime Tvoje.",
|
||||
getunicode('O\u010de na\u0161, koji jesi na nebesima, sveti se ime Tvoje.'))
|
||||
== true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"Prezentacj\xea pierwszego graficznego \x9crodowiska pracy z rodziny ".
|
||||
"Windows firmy Microsoft przeprowadzono w listopadzie 1985.",
|
||||
getunicode('Prezentacj\u0119 pierwszego graficznego \u015brodowiska pracy z rodziny '.
|
||||
'Windows firmy Microsoft przeprowadzono w listopadzie 1985.')) == true);
|
||||
}
|
||||
|
||||
function test_windows_1256() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xe1\xc7 \xc3\xca\xdf\xe1\xe3 \xc7\xe1\xda\xd1\xc8\xed\xc9",
|
||||
getunicode(
|
||||
'\u0644\u0627 \u0623\u062a\u0643\u0644\u0645 \u0627\u0644\u0639\u0631\u0628'.
|
||||
'\u064a\u0629')) == true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xe6\xed\xdf\xed\xc8\xed\xcf\xed\xc7 \xe5\xed \xe3\xd4\xd1\xe6\xda \xe3".
|
||||
"\xe6\xd3\xe6\xda\xc9 \xe3\xca\xda\xcf\xcf\xc9 \xc7\xe1\xe1\xdb\xc7\xca".
|
||||
"\xa1 \xe3\xc8\xe4\xed\xc9 \xda\xe1\xec \xc7\xe1\xe6\xed\xc8\xa1 \xd0\xc7".
|
||||
"\xca \xe3\xcd\xca\xe6\xec \xcd\xd1\xa1 \xca\xd4\xdb\xe1\xe5\xc7 \xe3\xc4".
|
||||
"\xd3\xd3\xc9 \xe6\xed\xdf\xed\xe3\xed\xcf\xed\xc7\xa1 \xc7\xe1\xca".
|
||||
"\xed \xe5\xed \xe3\xe4\xd9\xe3\xc9 \xdb\xed\xd1 \xd1\xc8\xcd\xed\xc9.",
|
||||
getunicode(
|
||||
'\u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627 \u0647\u064a \u0645'.
|
||||
'\u0634\u0631\u0648\u0639 \u0645\u0648\u0633\u0648\u0639\u0629 \u0645\u062a'.
|
||||
'\u0639\u062f\u062f\u0629 \u0627\u0644\u0644\u063a\u0627\u062a\u060c \u0645'.
|
||||
'\u0628\u0646\u064a\u0629 \u0639\u0644\u0649 \u0627\u0644\u0648\u064a\u0628'.
|
||||
'\u060c \u0630\u0627\u062a \u0645\u062d\u062a\u0648\u0649 \u062d\u0631'.
|
||||
'\u060c \u062a\u0634\u063a\u0644\u0647\u0627 \u0645\u0624\u0633\u0633'.
|
||||
'\u0629 \u0648\u064a\u0643\u064a\u0645\u064a\u062f\u064a\u0627\u060c \u0627'.
|
||||
'\u0644\u062a\u064a \u0647\u064a \u0645\u0646\u0638\u0645\u0629 \u063a'.
|
||||
'\u064a\u0631 \u0631\u0628\u062d\u064a\u0629.')) == true);
|
||||
}
|
||||
|
||||
function test_shift_jis() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\x81w\x82\xc6\x82\xc8\x82\xe8\x82\xcc\x83g\x83g\x83\x8d\x81x\x82\xcd\x81".
|
||||
"A\x83X\x83^\x83W\x83I\x83W\x83u\x83\x8a\x90\xa7\x8d\xec\x82\xcc\x93\xfa".
|
||||
"\x96{\x82\xcc\x92\xb7\x95\xd2\x83". "A\x83j\x83\x81\x81[\x83V\x83\x87\x83".
|
||||
"\x93\x8d\xec\x95i\x81". "B",
|
||||
getunicode(
|
||||
'\u300e\u3068\u306a\u308a\u306e\u30c8\u30c8\u30ed\u300f\u306f\u3001\u30b9'.
|
||||
'\u30bf\u30b8\u30aa\u30b8\u30d6\u30ea\u5236\u4f5c\u306e\u65e5\u672c\u306e'.
|
||||
'\u9577\u7de8\u30a2\u30cb\u30e1\u30fc\u30b7\u30e7\u30f3\u4f5c\u54c1\u3002'))
|
||||
== true);
|
||||
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\x83". "E\x83". "B\x83L\x83y\x83". "f\x83". "B\x83". "A (Wikipedia) \x82\xcd".
|
||||
"\x83". "E\x83". "B\x83L\x83\x81\x83". "f\x83". "B\x83". "A\x8d\xe0\x92".
|
||||
"c\x82\xaa\x89^\x89". "c\x82\xb7\x82\xe9\x83I\x83\x93\x83\x89\x83". "C\x83".
|
||||
"\x93\x95S\x89\xc8\x8e\x96\x93T\x81". "B",
|
||||
getunicode(
|
||||
'\u30a6\u30a3\u30ad\u30da\u30c7\u30a3\u30a2 (Wikipedia) \u306f\u30a6'.
|
||||
'\u30a3\u30ad\u30e1\u30c7\u30a3\u30a2\u8ca1\u56e3\u304c\u904b\u55b6\u3059'.
|
||||
'\u308b\u30aa\u30f3\u30e9\u30a4\u30f3\u767e\u79d1\u4e8b\u5178\u3002'))
|
||||
== true);
|
||||
|
||||
// Too short; detector thinks it's most likely Windows-1252.
|
||||
//
|
||||
// VERIFY(detect_and_convert_to_utf8(
|
||||
// "\x90\xa2\x8a" "E\x90l\x8c\xa0\x90\xe9\x8c\xbe",
|
||||
// "\u4e16\u754c\u4eba\u6a29\u5ba3\u8a00") == true);
|
||||
}
|
||||
|
||||
function test_euc_jp() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xb2\xbf\xbf\xcd\xa4\xe2\xa1\xa2\xa4\xdb\xa4\xb7\xa4\xa4\xa4\xde\xa4\xde".
|
||||
"\xa4\xcb\xc2\xe1\xca\xe1\xa1\xa2\xb9\xb4\xb6\xd8\xa1\xa2\xcb\xf4\xa4\xcf".
|
||||
"\xc4\xc9\xca\xfc\xa4\xb5\xa4\xec\xa4\xeb\xa4\xb3\xa4\xc8\xa4\xcf\xa4\xca".
|
||||
"\xa4\xa4\xa1\xa3",
|
||||
getunicode(
|
||||
'\u4f55\u4eba\u3082\u3001\u307b\u3057\u3044\u307e\u307e\u306b\u902e\u6355'.
|
||||
'\u3001\u62d8\u7981\u3001\u53c8\u306f\u8ffd\u653e\u3055\u308c\u308b\u3053'.
|
||||
'\u3068\u306f\u306a\u3044\u3002')) == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"1920\xc7\xaf \xa5\xa6\xa5\xa3\xa5\xf3\xa5\xd6\xa5\xeb\xa5\xc9\xa5\xf3\xc1".
|
||||
"\xaa\xbc\xea\xb8\xa2\xa1\xcaThe Championships, Wimbledon 1920\xa1\xcb\xa4".
|
||||
"\xcb\xb4\xd8\xa4\xb9\xa4\xeb\xb5\xad\xbb\xf6\xa1\xa3",
|
||||
getunicode(
|
||||
'1920\u5e74 \u30a6\u30a3\u30f3\u30d6\u30eb\u30c9\u30f3\u9078\u624b\u6a29'.
|
||||
'\uff08The Championships, Wimbledon 1920\uff09\u306b\u95a2\u3059\u308b'.
|
||||
'\u8a18\u4e8b\u3002')) == true);
|
||||
}
|
||||
|
||||
function test_iso_2022_jp() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\x1b\$B%-%M%F%#%C%/%3%M%/%7%g%s!J\x1b(Bkinetic connection\x1b\$B!K\$O\x1b(B1".
|
||||
"986\x1b\$BG/\$K%=%K!<\$,\x1b(BMSX2\x1b\$B\$GH/Gd\$7\$?%Q%:%k%2!<%`!#\x1b(B",
|
||||
getunicode(
|
||||
'\u30ad\u30cd\u30c6\u30a3\u30c3\u30af\u30b3\u30cd\u30af\u30b7\u30e7\u30f3'.
|
||||
'\uff08kinetic connection\uff09\u306f1986\u5e74\u306b\u30bd\u30cb\u30fc'.
|
||||
'\u304cMSX2\u3067\u767a\u58f2\u3057\u305f\u30d1\u30ba\u30eb\u30b2\u30fc'.
|
||||
'\u30e0\u3002')) == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\x1b\$B5f6K\x1b(B!!\x1b\$BJQBV2>LL\x1b(B",
|
||||
getunicode("\u7a76\u6975!!\u5909\u614b\u4eee\u9762")) == true);
|
||||
}
|
||||
|
||||
function test_gb2312() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xca\xc7\xd2\xbb\xb8\xf6\xd3\xef\xd1\xd4\xa1\xa2\xc4\xda\xc8\xdd\xbf\xaa".
|
||||
"\xb7\xc5\xb5\xc4\xcd\xf8\xc2\xe7\xb0\xd9\xbf\xc6\xc8\xab\xca\xe9\xbc\xc6".
|
||||
"\xbb\xae",
|
||||
getunicode(
|
||||
'\u662f\u4e00\u4e2a\u8bed\u8a00\u3001\u5185\u5bb9\u5f00\u653e\u7684\u7f51'.
|
||||
'\u7edc\u767e\u79d1\u5168\u4e66\u8ba1\u5212')) == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xa1\xb6\xd2\xbb\xc1\xa3\xd6\xd3\xd5\xe6\xc8\xcb\xcb\xd5\xa1\xb7\xa3\xa8".
|
||||
"\xd3\xa2\xce\xc4\xc3\xfb\xa3\xbaSo Real Time Cooking\xa3\xa9",
|
||||
getunicode(
|
||||
'\u300a\u4e00\u7c92\u949f\u771f\u4eba\u82cf\u300b\uff08\u82f1\u6587\u540d'.
|
||||
'\uff1aSo Real Time Cooking\uff09')) == true);
|
||||
|
||||
// Too short; detector thinks it's most likely Shift-JIS.
|
||||
// VERIFY(detect_and_convert_to_utf8(
|
||||
// "\xce\xe2\xb8\xe7\xbf\xdf",
|
||||
// "\u5434\u54e5\u7a9f") == true);
|
||||
}
|
||||
|
||||
function test_big5() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"1\xa1]\xa4@\xa1^\xacO0\xbbP2\xa4\xa7\xb6\xa1\xaa\xba\xa6\xdb\xb5M\xbc\xc6".
|
||||
"\xa1". "A\xacO\xb3\xcc\xa4p\xaa\xba\xa5\xbf\xa9_\xbc\xc6\xa1". "C",
|
||||
getunicode(
|
||||
'1\uff08\u4e00\uff09\u662f0\u82072\u4e4b\u9593\u7684\u81ea\u7136\u6578'.
|
||||
'\uff0c\u662f\u6700\u5c0f\u7684\u6b63\u5947\u6578\u3002')) == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xbeG\xa5\xf2\xaf\xf4\xa1]\xad^\xa4\xe5\xa6W \xa1G Cheng Chung Yin\xa1^".
|
||||
"\xa1". "A\xacO\xa4@\xa6W\xa5x\xc6W\xa4k\xbat\xad\xfb",
|
||||
getunicode(
|
||||
'\u912d\u4ef2\u8335\uff08\u82f1\u6587\u540d \uff1a Cheng Chung Yin\uff09'.
|
||||
'\uff0c\u662f\u4e00\u540d\u53f0\u7063\u5973\u6f14\u54e1')) == true);
|
||||
|
||||
// Too short; detector thinks it's most likely Shift-JIS.
|
||||
// VERIFY(detect_and_convert_to_utf8(
|
||||
// "\xa7" "d\xad\xf4\xb8]",
|
||||
// "\u5433\u54e5\u7a9f") == true);
|
||||
}
|
||||
|
||||
function test_koi8r() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xeb\xd7\xc5\xc2\xc5\xcb \xd0\xc5\xd2\xd7\xc1\xd1 \xd0\xcf \xd0\xcc\xcf".
|
||||
"\xdd\xc1\xc4\xc9 \xc9 \xd7\xd4\xcf\xd2\xc1\xd1 \xd0\xcf \xce\xc1\xd3\xc5".
|
||||
"\xcc\xc5\xce\xc9\xc0 \xd0\xd2\xcf\xd7\xc9\xce\xc3\xc9\xd1 \xeb\xc1\xce".
|
||||
"\xc1\xc4\xd9.",
|
||||
getunicode(
|
||||
'\u041a\u0432\u0435\u0431\u0435\u043a \u043f\u0435\u0440\u0432\u0430'.
|
||||
'\u044f \u043f\u043e \u043f\u043b\u043e\u0449\u0430\u0434\u0438 \u0438 '.
|
||||
'\u0432\u0442\u043e\u0440\u0430\u044f \u043f\u043e \u043d\u0430\u0441'.
|
||||
'\u0435\u043b\u0435\u043d\u0438\u044e \u043f\u0440\u043e\u0432\u0438'.
|
||||
'\u043d\u0446\u0438\u044f \u041a\u0430\u043d\u0430\u0434\u044b.')) == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xe2\xdf\xd2\xc4\xc5\xce\xc9 \xc5 \xd3\xc5\xcc\xcf \xd7 \xf3\xc5\xd7\xc5".
|
||||
"\xd2\xce\xc1 \xe2\xdf\xcc\xc7\xc1\xd2\xc9\xd1",
|
||||
getunicode(
|
||||
'\u0411\u044a\u0440\u0434\u0435\u043d\u0438 \u0435 \u0441\u0435\u043b'.
|
||||
'\u043e \u0432 \u0421\u0435\u0432\u0435\u0440\u043d\u0430 \u0411\u044a'.
|
||||
'\u043b\u0433\u0430\u0440\u0438\u044f')) == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xfe\xc5\xd2\xd7\xc5\xce\xc1 \xd0\xd2\xc5\xd7\xdf\xda\xc8\xcf\xc4\xce".
|
||||
"\xc1",
|
||||
getunicode(
|
||||
'\u0427\u0435\u0440\u0432\u0435\u043d\u0430 \u043f\u0440\u0435\u0432'.
|
||||
'\u044a\u0437\u0445\u043e\u0434\u043d\u0430')) == true);
|
||||
}
|
||||
|
||||
function test_windows_1251() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xce\xf7\xe5 \xed\xe0\xf8, \xea\xee\xbc \xf1\xe8 \xed\xe0 \xed\xe5\xe1".
|
||||
"\xe5\xf1\xe0\xf2\xe0",
|
||||
getunicode(
|
||||
'\u041e\u0447\u0435 \u043d\u0430\u0448, \u043a\u043e\u0458 \u0441\u0438 '.
|
||||
'\u043d\u0430 \u043d\u0435\u0431\u0435\u0441\u0430\u0442\u0430')) == true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
"\xd1 \xe7\xee\xee\xeb\xee\xe3\xe8\xf7\xe5\xf1\xea\xee\xe9 \xf2\xee\xf7\xea".
|
||||
"\xe8 \xe7\xf0\xe5\xed\xe8\xff, \xe4\xee\xec\xe0\xf8\xed\xff\xff \xea\xee".
|
||||
"\xf8\xea\xe0 \x97 \xec\xeb\xe5\xea\xee\xef\xe8\xf2\xe0\xfe\xf9\xe5\xe5".
|
||||
" \xf1\xe5\xec\xe5\xe9\xf1\xf2\xe2\xe0 \xea\xee\xf8\xe0\xf7\xfc\xe8\xf5 ".
|
||||
"\xee\xf2\xf0\xff\xe4\xe0 \xf5\xe8\xf9\xed\xfb\xf5.",
|
||||
getunicode(
|
||||
'\u0421 \u0437\u043e\u043e\u043b\u043e\u0433\u0438\u0447\u0435\u0441\u043a'.
|
||||
'\u043e\u0439 \u0442\u043e\u0447\u043a\u0438 \u0437\u0440\u0435\u043d\u0438'.
|
||||
'\u044f, \u0434\u043e\u043c\u0430\u0448\u043d\u044f\u044f \u043a\u043e'.
|
||||
'\u0448\u043a\u0430 \u2014 \u043c\u043b\u0435\u043a\u043e\u043f\u0438\u0442'.
|
||||
'\u0430\u044e\u0449\u0435\u0435 \u0441\u0435\u043c\u0435\u0439\u0441\u0442'.
|
||||
'\u0432\u0430 \u043a\u043e\u0448\u0430\u0447\u044c\u0438\u0445 \u043e\u0442'.
|
||||
'\u0440\u044f\u0434\u0430 \u0445\u0438\u0449\u043d\u044b\u0445.')) == true);
|
||||
}
|
||||
|
||||
function test_utf8() {
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
getunicode(
|
||||
"\u10e8\u10d8\u10dc\u10d0\u10e3\u10e0\u10d8 \u10d9\u10d0\u10e2\u10d0"),
|
||||
getunicode(
|
||||
'\u10e8\u10d8\u10dc\u10d0\u10e3\u10e0\u10d8 \u10d9\u10d0\u10e2\u10d0'))
|
||||
== true);
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
getunicode('\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e25\u0e31\u0e01'),
|
||||
getunicode('\u0e2b\u0e19\u0e49\u0e32\u0e2b\u0e25\u0e31\u0e01')) == true);
|
||||
}
|
||||
|
||||
function test_utf16() {
|
||||
// The detector only handles UTF-16 if there's a BOM at the front.
|
||||
$utf16 =
|
||||
"\xff\xfeH\x00". "e\x00l\x00l\x00o\x00,\x00 \x00w\x00o\x00r\x00l\x00".
|
||||
"d\x00!\x00";
|
||||
VERIFY(detect_and_convert_to_utf8(
|
||||
$utf16,
|
||||
getunicode("\ufeffHello, world!")) == true);
|
||||
}
|
||||
|
||||
test_basics();
|
||||
test_cannot_detect();
|
||||
test_declared_encoding();
|
||||
test_hello_world();
|
||||
test_windows_1252();
|
||||
test_windows_1250();
|
||||
test_windows_1256();
|
||||
test_shift_jis();
|
||||
test_euc_jp();
|
||||
test_iso_2022_jp();
|
||||
test_gb2312();
|
||||
test_big5();
|
||||
test_koi8r();
|
||||
test_windows_1251();
|
||||
test_utf8();
|
||||
test_utf16();
|
||||
@@ -0,0 +1,34 @@
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
@@ -0,0 +1,187 @@
|
||||
<?php
|
||||
|
||||
function VS($x, $y) {
|
||||
var_dump($x === $y);
|
||||
if ($x !== $y) { echo "Failed: $y\n"; echo "Got: $x\n";
|
||||
var_dump(debug_backtrace()); }
|
||||
}
|
||||
function VERIFY($x) { VS($x, true); }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Php doesn't support \u escapes.
|
||||
function u($x) { return json_decode("\"" . $x . "\""); }
|
||||
|
||||
function test_SpoofChecker_issuspicious() {
|
||||
$checker = new SpoofChecker();
|
||||
VS($checker->issuspicious("facebook"), false);
|
||||
|
||||
// facebook with Cyrillic spoof characters
|
||||
VS($checker->issuspicious(u('f\u0430\u0441\u0435b\u043e\u043ek')), true);
|
||||
|
||||
// "Russia" in Cyrillic with Latin spoof characters
|
||||
VS($checker->issuspicious(u('Pocc\u0438\u044f')), true);
|
||||
|
||||
// paypal with Cyrillic spoof characters
|
||||
VS($checker->issuspicious(u('http://www.payp\u0430l.com')), true);
|
||||
|
||||
// certain all-uppercase Latin sequences can be spoof of Greek
|
||||
VS($checker->issuspicious('NAPKIN PEZ'), true);
|
||||
VS($checker->issuspicious('napkin pez'), false);
|
||||
|
||||
// English with Japanese characters
|
||||
VS($checker->issuspicious(u('True fact: \u5fcd\u8005 are mammals')), false);
|
||||
|
||||
// Japanese name with mixed kanji and hiragana
|
||||
VS($checker->issuspicious(u('\u6a4b\u672c\u611b\u307f')), false);
|
||||
|
||||
// try {
|
||||
// $checker->issuspicious("this is not UTF-8: \x87\xFB\xCA\x94\xDB");
|
||||
// } catch (Exception $e) {
|
||||
// VS(true, true);
|
||||
// }
|
||||
}
|
||||
|
||||
function test_SpoofChecker_areconfusable() {
|
||||
$checker = new SpoofChecker();
|
||||
VS($checker->areconfusable("hello, world", "goodbye, world"), false);
|
||||
VS($checker->areconfusable("hello, world", "hello, world"), true);
|
||||
VS($checker->areconfusable("hello, world", "he11o, wor1d"), true);
|
||||
VS($checker->areconfusable(u('hell\u00f8'), u('hello\u0337')), true);
|
||||
|
||||
VS($checker->areconfusable("facebook",
|
||||
u('f\u0430\u0441\u0435b\u043e\u043ek')),
|
||||
true);
|
||||
|
||||
VS($checker->areconfusable("facebook", "\xf0\x9d\x90\x9faceboo".u('\u1d0b')),
|
||||
true);
|
||||
|
||||
VS($checker->areconfusable("facebook", u('\u017facebook')), true);
|
||||
|
||||
VS($checker->areconfusable("paypal", u('payp\u0430l')), true);
|
||||
VS($checker->areconfusable(
|
||||
"NAPKIN PEZ",
|
||||
u('\u039d\u0391\u03a1\u039a\u0399\u039d \u03a1\u0395\u0396')),
|
||||
true);
|
||||
|
||||
VS($checker->areconfusable(
|
||||
"facebook",
|
||||
u('ufiek-a\u048ba\u049d \u049da\u048b\u00f0a\u048b\u01e5a\u048b-\u049dota-'.
|
||||
'\u00f0o\u00f0ol')),
|
||||
false);
|
||||
|
||||
// try {
|
||||
// $checker->areconfusable(
|
||||
// "this is not UTF-8: \x87\xFB\xCA\x94\xDB",
|
||||
// "so there.");
|
||||
// } catch (Exception $e) {
|
||||
// VS(true, true);
|
||||
// }
|
||||
}
|
||||
|
||||
function test_SpoofChecker_issuesfound() {
|
||||
$checker = new SpoofChecker();
|
||||
|
||||
VS($checker->issuspicious("NAPKIN PEZ", $ret), true);
|
||||
VS($ret, Spoofchecker::WHOLE_SCRIPT_CONFUSABLE);
|
||||
|
||||
VS($checker->issuspicious(u('f\u0430\u0441\u0435b\u043e\u043ek'), $ret),
|
||||
true);
|
||||
VS($ret, SpoofChecker::MIXED_SCRIPT_CONFUSABLE);
|
||||
|
||||
VS($checker->areconfusable("hello, world", "he11o, wor1d", $ret), true);
|
||||
VS($ret, SpoofChecker::SINGLE_SCRIPT_CONFUSABLE);
|
||||
|
||||
return Count(true);
|
||||
}
|
||||
|
||||
function test_SpoofChecker_setchecks() {
|
||||
$checker = new SpoofChecker();
|
||||
|
||||
// The checker should start in any-case mode.
|
||||
VS($checker->areconfusable("HELLO", u('H\u0415LLO')), true);
|
||||
VS($checker->areconfusable("hello", u('h\u0435llo')), true);
|
||||
|
||||
// Go to lower-case only mode (assumes all strings have been
|
||||
// case-folded).
|
||||
$checker->setchecks(
|
||||
SpoofChecker::MIXED_SCRIPT_CONFUSABLE |
|
||||
SpoofChecker::WHOLE_SCRIPT_CONFUSABLE |
|
||||
SpoofChecker::SINGLE_SCRIPT_CONFUSABLE
|
||||
);
|
||||
VS($checker->areconfusable("HELLO", u('H\u0415LLO')), false);
|
||||
VS($checker->areconfusable("hello", u('h\u0435llo')), true);
|
||||
|
||||
$checker = new SpoofChecker();
|
||||
VS($checker->issuspicious(u('True fact: \u5fcd\u8005 are mammals')), false);
|
||||
|
||||
// Only allow characters of a single script.
|
||||
$checker->setchecks(SpoofChecker::SINGLE_SCRIPT);
|
||||
VS($checker->issuspicious(u('True fact: \u5fcd\u8005 are mammals')), true);
|
||||
|
||||
// try {
|
||||
// $checker = new SpoofChecker();
|
||||
// $checker->setchecks(0xDEADBEEF);
|
||||
// } catch (Exception $e) {
|
||||
// VS(true, true);
|
||||
// }
|
||||
}
|
||||
|
||||
function test_SpoofChecker_setallowedlocales() {
|
||||
$checker = new SpoofChecker();
|
||||
|
||||
$common = "Rogers";
|
||||
$japanese_kanji_hiragana = u('\u6a4b\u672c\u611b\u307f');
|
||||
$korean = u('\ud55c\uad6d\ub9d0');
|
||||
$arabic = u('\u0645\u0631\u062d\u0628\u064b\u0627');
|
||||
$russian_cyrillic =
|
||||
u('\u0417\u0438\u0301\u043c\u043d\u0438\u0439 '.
|
||||
'\u0432\u0435\u0301\u0447\u0435\u0440');
|
||||
$snowman = u('\u2603');
|
||||
|
||||
$checker->setallowedlocales("en_US");
|
||||
VS($checker->issuspicious($common), false);
|
||||
VS($checker->issuspicious($japanese_kanji_hiragana), true);
|
||||
VS($checker->issuspicious($russian_cyrillic), true);
|
||||
VS($checker->issuspicious($arabic), true);
|
||||
VS($checker->issuspicious($korean), true);
|
||||
VS($checker->issuspicious($snowman), false);
|
||||
|
||||
$checker->setallowedlocales("en_US, ja_JP");
|
||||
VS($checker->issuspicious($common), false);
|
||||
VS($checker->issuspicious($japanese_kanji_hiragana), false);
|
||||
VS($checker->issuspicious($russian_cyrillic), true);
|
||||
VS($checker->issuspicious($arabic), true);
|
||||
VS($checker->issuspicious($korean), true);
|
||||
VS($checker->issuspicious($snowman), false);
|
||||
|
||||
$checker->setallowedlocales("en_US, ko_KR");
|
||||
VS($checker->issuspicious($common), false);
|
||||
VS($checker->issuspicious($japanese_kanji_hiragana), true);
|
||||
VS($checker->issuspicious($russian_cyrillic), true);
|
||||
VS($checker->issuspicious($arabic), true);
|
||||
VS($checker->issuspicious($korean), false);
|
||||
VS($checker->issuspicious($snowman), false);
|
||||
|
||||
$checker->setallowedlocales("en_US, ar_AR");
|
||||
VS($checker->issuspicious($common), false);
|
||||
VS($checker->issuspicious($japanese_kanji_hiragana), true);
|
||||
VS($checker->issuspicious($russian_cyrillic), true);
|
||||
VS($checker->issuspicious($arabic), false);
|
||||
VS($checker->issuspicious($korean), true);
|
||||
VS($checker->issuspicious($snowman), false);
|
||||
|
||||
$checker->setallowedlocales("en_US, ru_RU");
|
||||
VS($checker->issuspicious($common), false);
|
||||
VS($checker->issuspicious($japanese_kanji_hiragana), true);
|
||||
VS($checker->issuspicious($russian_cyrillic), false);
|
||||
VS($checker->issuspicious($arabic), true);
|
||||
VS($checker->issuspicious($korean), true);
|
||||
VS($checker->issuspicious($snowman), false);
|
||||
}
|
||||
|
||||
test_SpoofChecker_issuspicious();
|
||||
test_SpoofChecker_areconfusable();
|
||||
test_SpoofChecker_issuesfound();
|
||||
test_SpoofChecker_setchecks();
|
||||
test_SpoofChecker_setallowedlocales();
|
||||
@@ -0,0 +1,60 @@
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
bool(true)
|
||||
Referência em uma Nova Issue
Bloquear um usuário