Arquivos
wMind/mind3rd/API/cortex/syntaxer/Syntaxer.php
T
2011-03-15 12:36:46 -03:00

106 linhas
2.8 KiB
PHP
Arquivo Executável

<?php
/**
* This file is part of TheWebMind 3rd generation.
*
* Syntaxer, within the Cortex/Syntaxer packages.<br/>
* Notice that, these packages are being used only for documentation,
* not to organize the classes.
*
* @author Felipe Nascimento de Moura <felipenmoura@gmail.com>
* @license licenses/mind3rd.license
*/
/**
* Identifies and organizes the valid syntaxes on the content.
*
* This class will work with the tokenized strucure and
* the original code, due to identify patterns and apply
* any needed rules.
* <pre>
* Legend:
* Q=May have, Must have
* S=Substantive
* V=Verb
* 0,N=Quantifiers
* O=Or
* C=Complement/Composite(like "of", or "de")
* A=Addition(like "," or "and"
*</pre>
* @author Felipe Nascimento de Moura <felipenmoura@gmail.com>
* @package Cortex
* @subpackage Syntaxer
*/
class Syntaxer {
public static $sintatics= Array();
/**
* Loads the file which has the list of avaliable sintaxes.
*/
public static function loadSintaticList()
{
if(!file_exists('sintatics.list'))
$fR= fopen(Mind::$langPath.Mind::$curLang.'/sintatics.list', 'rb');
else
$fR= fopen('sintatics.list', 'rb');
while (!feof($fR)){
$sint= preg_replace('/\s/', '', fgets($fR, 4096));
self::$sintatics[]= $sint;
}
}
/**
* Fetches the identified composed substantives.
*
* Fixes the composed substantives(defined by the use of
* the "of" tokens defined into the qualifiers.xml of the current idiom)
*/
public function fetchComposedSubstantives()
{
while(preg_match(COMPOSED_SUBST,
Token::$string,
$matches,
PREG_OFFSET_CAPTURE))
{
$matches= $matches[0];
array_splice(Token::$spine, $matches[1], 3, Token::MT_SUBST);
array_splice(Token::$words, $matches[1], 3,
Token::$words[$matches[1]].
'_'.
Token::$words[$matches[1]+2]);
Token::$string= preg_replace(COMPOSED_SUBST, 'S', Token::$string, 1);
}
}
/**
* Sweeps the content to apply the rules and identify patterns.
*/
public function sweep()
{
// loading stintatic list: all the avaliable, allowed syntaxes
// formats, specified by the selected idiom
self::loadSintaticList();
// mounting the regular expression
$pattern= implode('|', self::$sintatics);
// let's find all the patterns that match
// that means that we'll find only expressions with valid syntax
// reminding that the pattern was dynamicaly created before
$pattern= str_replace('S', VALID_SUBST_SYNTAX, $pattern);
$this->fetchComposedSubstantives();
// finding the valid syntaxes
preg_match_all('/'.$pattern.'/',
Token::$string,
$matches,
PREG_OFFSET_CAPTURE);
// as we know it's only one block, we can use it straightly
$matches= $matches[0];
Analyst::sweep($matches);
return $matches;
}
}