179 linhas
3.9 KiB
PHP
Arquivo Executável
179 linhas
3.9 KiB
PHP
Arquivo Executável
<?php
|
|
/**
|
|
* This file is part of TheWebMind 3rd generation.
|
|
*
|
|
* Token, within the Cortex/Tokenizer packages.<br/>
|
|
* Notice that, these packages are being used only for documentation,
|
|
* not to organize the classes.
|
|
*
|
|
* @author Felipe Nascimento de Moura <felipenmoura@gmail.com>
|
|
* @license licenses/mind3rd.license
|
|
*/
|
|
/**
|
|
* The Token itself.
|
|
* @author Felipe Nascimento de Moura <felipenmoura@gmail.com>
|
|
* @package Cortex
|
|
* @subpackage Tokenizer
|
|
*/
|
|
class Token
|
|
{
|
|
// Tokens to be used
|
|
// MT stands for MindTokenizer
|
|
// MS stands for MindSyntaxer
|
|
const MT_PERIOD = -2;
|
|
const MS_PERIOD = '.';
|
|
const MT_COMA = -1;
|
|
const MS_COMA = ',';
|
|
const MT_VOID = 0;
|
|
const MS_VOID = '';
|
|
const MT_VERB = 1;
|
|
const MS_VERB = 'V';
|
|
const MT_SUBST = 2;
|
|
const MS_SUBST = 'S';
|
|
const MT_NONE = 4;
|
|
const MS_NONE = 'N';
|
|
const MT_ONE = 8;
|
|
const MS_ONE = 'N';
|
|
const MT_OR = 16;
|
|
const MS_OR = 'O';
|
|
const MT_MANY = 32;
|
|
const MS_MANY = 'N';
|
|
const MT_QMUST = 64;
|
|
const MS_QMUST = 'Q';
|
|
const MT_QMAY = 128;
|
|
const MS_QMAY = 'Q';
|
|
const MT_QNOTNULL= 254;
|
|
const MT_QKEY = 564;
|
|
const MT_QOF =1024;
|
|
const MS_QOF = 'C';
|
|
const MT_QBE =2048;
|
|
const MS_QBE = 'B';
|
|
const MT_QBRAKE = -4;
|
|
const MS_QBRAKE = 'b';
|
|
const MT_ANY =4096;
|
|
const MS_ANY = '*';
|
|
|
|
public static $spine= Array();
|
|
public static $words= Array();
|
|
public static $string= '';
|
|
|
|
/**
|
|
* Identifies the word type and adds it to the structure.
|
|
* As the identified type, the word represents an assumed token.
|
|
* @param string $word
|
|
* @return string
|
|
*/
|
|
public function add($word)
|
|
{
|
|
$ignoreForms= Mind::$currentProject['idiom'].'\IgnoreForms';
|
|
$verbalizer= Mind::$currentProject['idiom'].'\Verbalizer';
|
|
|
|
if(in_array($word, Tokenizer::$qualifiers['coma']))
|
|
{
|
|
$word= ',';
|
|
}
|
|
|
|
self::$words[]= $word;
|
|
|
|
if($ignoreForms::shouldBeIgnored($word))
|
|
{
|
|
self::$spine[]= Token::MT_ANY;
|
|
self::$string.= Token::MS_ANY;
|
|
return;
|
|
}
|
|
if($word==',')
|
|
{
|
|
self::$spine[]= Token::MT_COMA;
|
|
self::$string.= Token::MS_COMA;
|
|
return;
|
|
}
|
|
if($word=='.')
|
|
{
|
|
self::$spine[]= Token::MT_PERIOD;
|
|
self::$string.= Token::MS_PERIOD;
|
|
return;
|
|
}
|
|
|
|
// let's check for quantifiers
|
|
if(Tokenizer::isQuantifier('none', $word))
|
|
{
|
|
self::$spine[]= Token::MT_NONE;
|
|
self::$string.= Token::MS_NONE;
|
|
return;
|
|
}
|
|
if(Tokenizer::isQuantifier('one', $word))
|
|
{
|
|
self::$spine[]= Token::MT_ONE;
|
|
self::$string.= Token::MS_ONE;
|
|
return;
|
|
}
|
|
if(Tokenizer::isQuantifier('many', $word))
|
|
{
|
|
self::$spine[]= Token::MT_MANY;
|
|
self::$string.= Token::MS_MANY;
|
|
return;
|
|
|
|
}
|
|
if(Tokenizer::isQuantifier('or', $word))
|
|
{
|
|
self::$spine[]= Token::MT_OR;
|
|
self::$string.= Token::MS_OR;
|
|
return;
|
|
}
|
|
|
|
// and here, the qualifiers
|
|
if(Tokenizer::isQualifier('must', $word))
|
|
{
|
|
self::$spine[]= Token::MT_QMUST;
|
|
self::$string.= Token::MS_QMUST;
|
|
return;
|
|
}
|
|
if(Tokenizer::isQualifier('may', $word))
|
|
{
|
|
self::$spine[]= Token::MT_QMAY;
|
|
self::$string.= Token::MS_QMAY;
|
|
return;
|
|
}
|
|
if(Tokenizer::isQualifier('notnull', $word))
|
|
{
|
|
self::$spine[]= Token::MT_QNOTNULL;
|
|
return;
|
|
}
|
|
if(Tokenizer::isQualifier('of', $word))
|
|
{
|
|
self::$string.= Token::MS_QOF;
|
|
self::$spine[]= Token::MT_QOF;
|
|
return;
|
|
}
|
|
if(Tokenizer::isQualifier('be', $word))
|
|
{
|
|
self::$spine[]= Token::MT_QBE;
|
|
self::$string.= Token::MS_QBE;
|
|
return;
|
|
}
|
|
if(Tokenizer::isQualifier('key', $word))
|
|
{
|
|
self::$spine[]= Token::MT_QKEY;
|
|
return;
|
|
}
|
|
if(Tokenizer::isQualifier('brake', $word))
|
|
{
|
|
self::$spine[]= Token::MT_QBRAKE;
|
|
self::$string.= Token::MS_QBRAKE;
|
|
return;
|
|
}
|
|
// we know these words are already on its
|
|
// canonic form, so, we can simply look for
|
|
// it on the list
|
|
if($verbalizer::isInVerbList($word))
|
|
{
|
|
self::$spine[]= Token::MT_VERB;
|
|
self::$string.= Token::MS_VERB;
|
|
return;
|
|
}
|
|
self::$spine[]= Token::MT_SUBST;
|
|
self::$string.= Token::MS_SUBST;
|
|
|
|
return $word;
|
|
}
|
|
} |