fixed problems with some verbs and composed substantives, and added some unit tests more

Esse commit está contido em:
Felipe Nascimento de Moura
2011-02-25 11:23:43 -03:00
commit 4618c35c79
16 arquivos alterados com 300 adições e 45 exclusões
+2 -1
Ver Arquivo
@@ -13,5 +13,6 @@
require_once dirname(__FILE__) . '/../mind3rd/API/classes/MindProperty.php';
require_once dirname(__FILE__) . '/../mind3rd/API/cortex/analyst/Analyst.php';
require_once dirname(__FILE__) . '/../mind3rd/API/cortex/analyst/Normalizer.php';
require_once dirname(__FILE__) . '/../mind3rd/API/cortex/tokenizer/Token.php';
require_once dirname(__FILE__) . '/../mind3rd/API/cortex/tokenizer/Tokenizer.php';
require_once dirname(__FILE__) . '/../mind3rd/API/cortex/tokenizer/Tokenizer.php';
@@ -177,7 +177,5 @@ class AnalystPtTest extends PHPUnit_Framework_TestCase {
'rel'=>'student',
'focus'=>'teacher'));
}
}
?>
@@ -0,0 +1,217 @@
<?php
require_once dirname(__FILE__) . '/../../../../../Tests/config.php';
/**
* Test class for Syntaxer.
* Generated by PHPUnit on 2011-02-25 at 09:21:58.
*/
class SyntaxerTest extends PHPUnit_Framework_TestCase {
/**
* @var Syntaxer
*/
protected $object;
/**
* Sets up the fixture, for example, opens a network connection.
* This method is called before a test is executed.
*/
protected function setUp() {
$this->setIdiom();
}
/**
* Tears down the fixture, for example, closes a network connection.
* This method is called after a test is executed.
*/
protected function tearDown() {
}
public function setIdiom($idiom='en')
{
Mind::$currentProject['idiom']= $idiom;
Mind::$langPath= dirname(__FILE__) . '/../../../../../mind3rd/API/languages/';
require_once dirname(__FILE__) . '/../../../../../mind3rd/API/languages/'.$idiom.'/IgnoreForms.php';
require_once dirname(__FILE__) . '/../../../../../mind3rd/API/languages/'.$idiom.'/Verbalizer.php';
Tokenizer::loadModifiers(dirname(__FILE__) . '/../../../../../mind3rd/API/languages/'.$idiom.'/');
Mind::$tokenizer= new Tokenizer;
Mind::$lexer= new Lexer;
$this->object = new Syntaxer;
}
public function testSweepWords1() {
Mind::$tokenizer->sweep(Array('child', 'of', 'heart', 'has', 'parent', 'of', 'family'));
$this->object->sweep();
$this->assertEquals(Tokenizer::$words,
Array(
'child_heart',
'has',
'parent_family',
)
);
}
public function testSweepWords2() {
Mind::$tokenizer->sweep(Array('child',
'of',
'heart',
'and',
'friend',
'of',
'child',
'of',
'heart',
'has',
'parent',
'of',
'family'));
$this->object->sweep();
$this->assertEquals(Tokenizer::$words,
Array(
'child_heart',
',',
'friend_child_heart',
'has',
'parent_family'
)
);
}
public function testSweepWords3() {
Mind::$tokenizer->sweep(Array('child',
'of',
'heart',
',',
'friend',
'of',
'kid',
'of',
'heart',
'has',
'parent',
'of',
'family'));
$this->object->sweep();
$this->assertEquals(Tokenizer::$words,
Array(
'child_heart',
',',
'friend_kid_heart',
'has',
'parent_family'
)
);
}
public function testSweepWords4() {
Mind::$tokenizer->sweep(Array('child',
'of',
'heart',
',',
'friend',
'of',
'kid',
'of',
'heart',
'has',
'parent',
'of',
'family',
',',
'mother',
'of',
'parent'));
$this->object->sweep();
$this->assertEquals(Tokenizer::$words,
Array(
'child_heart',
',',
'friend_kid_heart',
'has',
'parent_family',
',',
'mother_parent'
)
);
}
public function testSweepWords5() {
Mind::$tokenizer->sweep(Array('child',
'has',
'parent'
));
$this->object->sweep();
$this->assertEquals(Tokenizer::$words,
Array(
'child',
'has',
'parent'
)
);
}
public function testSweepWords6() {
Mind::$tokenizer->sweep(Array('child',
'of',
'heart',
',',
'friend',
'of',
'kid',
'of',
'heart',
'has',
'name of origin:varchar(250)',
',',
'age:int'));
$this->object->sweep();
$this->assertEquals(Tokenizer::$words,
Array(
'child_heart',
',',
'friend_kid_heart',
'has',
'name of origin:varchar(250)',
',',
'age:int'
)
);
}
public function testSweepWords7() {
Mind::$tokenizer->sweep(Array('child',
',',
'friend',
'has',
'name of origin:varchar(250)',
',',
'age:int'));
$this->object->sweep();
$this->assertEquals(Tokenizer::$words,
Array(
'child',
',',
'friend',
'has',
'name of origin:varchar(250)',
',',
'age:int'
)
);
}
public function testSweepWords8() {
Mind::$tokenizer->sweep(Array('child',
'has',
'name of origin:varchar(250)',
',',
'age:int'));
$this->object->sweep();
$this->assertEquals(Tokenizer::$words,
Array(
'child',
'has',
'name of origin:varchar(250)',
',',
'age:int'
)
);
}
}
@@ -5,7 +5,7 @@ require(dirname(__FILE__) . '/../../../../../Tests/config.php');
* Test class for Tokenizer.
* Generated by PHPUnit on 2011-02-21 at 11:46:35.
*/
class TokenizerPTTest extends PHPUnit_Framework_TestCase {
class TokenizerTest extends PHPUnit_Framework_TestCase {
/**
* @var Tokenizer
+3 -1
Ver Arquivo
@@ -5,4 +5,6 @@ which
whom
whose
well
too
too
another
also
+23 -18
Ver Arquivo
@@ -30,6 +30,11 @@ class VerbalizerTest extends PHPUnit_Framework_TestCase {
}
/* NOTE: we wont be testing any first person verb flection
* because such rules are commented on the main class as
* mind3rd wont treat such sentences
*/
public function testIsVerb() {
$this->assertTrue(pt\Verbalizer::isVerb('correr'));
}
@@ -49,37 +54,37 @@ class VerbalizerTest extends PHPUnit_Framework_TestCase {
$this->assertTrue(pt\Verbalizer::isVerb('sobrescreve'));
}
public function testIsVerb6() {
$this->assertTrue(pt\Verbalizer::isVerb('lemos'));
$this->assertTrue(pt\Verbalizer::isVerb('lerão'));
}
public function testIsVerb7() {
$this->assertTrue(pt\Verbalizer::isVerb('correremos'));
$this->assertTrue(pt\Verbalizer::isVerb('correm'));
}
public function testIsVerb8() {
$this->assertTrue(pt\Verbalizer::isVerb('ministrarão'));
}
public function testIsVerb9() {
$this->assertTrue(pt\Verbalizer::isVerb('tomamos'));
$this->assertTrue(pt\Verbalizer::isVerb('tomarão'));
}
public function testIsVerb10() {
$this->assertTrue(pt\Verbalizer::isVerb('comemos'));
$this->assertTrue(pt\Verbalizer::isVerb('come'));
}
public function testIsVerb11() {
$this->assertTrue(pt\Verbalizer::isVerb('beberemos'));
$this->assertTrue(pt\Verbalizer::isVerb('bebe'));
}
public function testIsVerb12() {
$this->assertTrue(pt\Verbalizer::isVerb('tomo'));
}
public function testIsVerb13() {
$this->assertTrue(pt\Verbalizer::isVerb('como'));
}
public function testIsVerb14() {
$this->assertTrue(pt\Verbalizer::isVerb('toma'));
}
public function testIsVerb13() {
$this->assertTrue(pt\Verbalizer::isVerb('come'));
}
public function testIsVerb14() {
$this->assertTrue(pt\Verbalizer::isVerb('tomará'));
}
public function testIsVerb15() {
$this->assertTrue(pt\Verbalizer::isVerb('coma'));
$this->assertTrue(pt\Verbalizer::isVerb('comerá'));
}
public function testIsVerb17() {
$this->assertTrue(pt\Verbalizer::isVerb('levantaremos'));
$this->assertTrue(pt\Verbalizer::isVerb('levantar'));
}
public function testIsVerb18() {
$this->assertFalse(pt\Verbalizer::isVerb('abajur'));
@@ -94,22 +99,22 @@ class VerbalizerTest extends PHPUnit_Framework_TestCase {
$this->assertTrue(pt\Verbalizer::isVerb('falhar'));
}
public function testIsVerb22() {
$this->assertTrue(pt\Verbalizer::isVerb('falhei'));
$this->assertTrue(pt\Verbalizer::isVerb('falharão'));
}
public function testIsVerb23() {
$this->assertTrue(pt\Verbalizer::isVerb('falharei'));
$this->assertTrue(pt\Verbalizer::isVerb('falhar'));
}
public function testIsVerb24() {
$this->assertTrue(pt\Verbalizer::isVerb('falhou'));
}
public function testIsVerb25() {
$this->assertTrue(pt\Verbalizer::isVerb('falhamos'));
$this->assertTrue(pt\Verbalizer::isVerb('comenta'));
}
public function testIsVerb26() {
$this->assertTrue(pt\Verbalizer::isVerb('falharei'));
$this->assertTrue(pt\Verbalizer::isVerb('comentará'));
}
public function testIsVerb27() {
$this->assertTrue(pt\Verbalizer::isVerb('falharemos'));
$this->assertTrue(pt\Verbalizer::isVerb('comentarão'));
}
public function testIsVerb28() {
$this->assertTrue(pt\Verbalizer::isVerb('correu'));
+10 -3
Ver Arquivo
@@ -1,4 +1,11 @@
de
a
o
as
os
aos
que
da
do
bem
também
tanto
todo
cada
+12 -1
Ver Arquivo
@@ -11,8 +11,19 @@
public static $tmpEntities= Array();
public static $tmpRelations= Array();
public static function normalize()
public static function fixOneByOneRel()
{
}
public static function fixNByNRel()
{
}
public static function normalize()
{
self::fixOneByOneRel();
self::fixNByNRel();
}
}
+8 -7
Ver Arquivo
@@ -9,7 +9,7 @@
* V=Verb
* 0,N=Quantifiers
* O=Or
* C=Complement(like "of", or "de")
* C=Complement/Composite(like "of", or "de")
* A=Addition(like "," or "and"
*
* @author felipe
@@ -37,11 +37,14 @@ class Syntaxer {
/**
* Fixes the composed substantives(defined by the use of
* the "of" tokens)
* the "of" tokens defined into the qualifiers.xml of the current idiom)
*/
public function fetchComposedSubstantives()
{
while(preg_match('/SCS/', Token::$string, $matches, PREG_OFFSET_CAPTURE))
while(preg_match(COMPOSED_SUBST,
Token::$string,
$matches,
PREG_OFFSET_CAPTURE))
{
$matches= $matches[0];
array_splice(Token::$spine, $matches[1], 3, Token::MT_SUBST);
@@ -49,9 +52,8 @@ class Syntaxer {
Token::$words[$matches[1]].
'_'.
Token::$words[$matches[1]+2]);
Token::$string= preg_replace('/SCS/', 'S', Token::$string, 1);
Token::$string= preg_replace(COMPOSED_SUBST, 'S', Token::$string, 1);
}
return true;
}
/**
@@ -84,7 +86,6 @@ class Syntaxer {
$matches= $matches[0];
Analyst::sweep($matches);
return $this;
return $matches;
}
}
+2 -1
Ver Arquivo
@@ -80,7 +80,8 @@ class Tokenizer extends Token{
self::$qualifiers['of'] = self::parseByComa($xml->of);
self::$qualifiers['be'] = self::parseByComa($xml->be);
self::$qualifiers['coma'] = self::parseByComa($xml->coma);
self::$qualifiers['unique'] = self::parseByComa($xml->unique);
self::$qualifiers['unique'] = self::parseByComa($xml->unique);
self::$qualifiers['brake'] = self::parseByComa($xml->brake);
return self::$qualifiers;
}
+2 -1
Ver Arquivo
@@ -5,4 +5,5 @@ which
whom
whose
well
too
too
also
+2
Ver Arquivo
@@ -15,4 +15,6 @@
<key>key,pk,index</key>
<coma>and</coma>
<unique>unique,single,only,alone</unique>
<!-- indicates a change of subject in the middle of a sentence -->
<brake>all,each,while,when,but</brake>
</root>
+9 -4
Ver Arquivo
@@ -38,16 +38,19 @@ class Verbalizer {
'/vo$/' => 'ver',
'/i$/' => 'er',
'/am$/' => 'ar',
'/o$/' => 'ar',
'/ndo$/' => 'r',
// these rules below are commented on theWebMind because
// we wont treat first person sentenses
/*'/o$/' => 'ar',
'/(.+)o$/' => '$1er',
'/(.+)a$/' => '$1er',
'/em$/' => 'er',
'/remos$/' => 'r',
'/emos$/' => 'er',
'/mos$/' => 'r',
'/ei$/' => '',
'/.ei$/' => 'ar',
'/(.)ei$/' => '$1er',
'/(.)ei$/' => '$1er',*/
'/(.+)a$/' => '$1er',
'/em$/' => 'er',
'/ou$/' => 'ar',
'/eu$/' => 'er',
'/ás$/' => '',
@@ -67,6 +70,8 @@ class Verbalizer {
'dei' => 'dar',
'dou' => 'dar',
'deu' => 'dar',
'farão' => 'fazer',
'fará' => 'fazer',
'dão' => 'dar',
'dará' => 'dar',
'darão' => 'dar',
+1
Ver Arquivo
@@ -15,4 +15,5 @@
<key>chave,key,pk,indice,índice</key>
<coma>e,quanto</coma>
<unique>unico,único,isolado,isolada,isoladamente</unique>
<brake>todo,cada,qualquer,enquanto</brake>
</root>
+1
Ver Arquivo
@@ -18,6 +18,7 @@
define('NEW_LINE', "/\n/");
define('EXEC_STRING', "exec:"); // equal(=) is also acceptable
define('VALID_SUBST_SYNTAX', "S((( )?\,( )?S)?)+");
define('COMPOSED_SUBST', "/SCS/");
// addresses
define('PROJECTS_DIR', '/mind3rd/projects/');
+7 -5
Ver Arquivo
@@ -1,9 +1,10 @@
cada professor tem muitos alunos, sendo que todo aluno tem muitos professor.// e todo aluno pode ter um ou vários professores.
/*
comandante da tropa-militar tem armamentos.
armamento tem número de série:int(20, único).
armamento tem número de série:int(20, único), id:int(), peso do equipamento:int().
filhos da mãezinha do coração tem netos do \peito do véio:int.
/*cada professor deve ter um ou vários alunos.
cada professor deve ter um ou vários alunos.
sogra e sogro tem um genro e nora.
sogra, sogro, genro e nora tem nome:varchar(200, obrigatório), idade:int e
@@ -22,4 +23,5 @@ tanto mãe quanto pai tem nome:varchar, idade:int e sexo:char(1, {F=Feminino|M=M
professor também tem pai e mãe.
aluno pode ter divérsos irmãos.
Cada irmão pode ter filhos, e cada filho pode ter filho.
A grade de cadeiras terá várias disciplinas.*/
A grade de cadeiras terá várias disciplinas.
*/