added treatment for parentheses for attributes and added interpreter for dataTypes

2011-01-04 03:32:30 -02:00
commit 9b6f47f2f5
@@ -89,14 +89,19 @@ class Lexer

 		// now, it's time to start working with the data
 		$this->content= trim(str_replace("\n", ' ', $content));
+		while(strstr($this->content, '	')!==false) // ignoring tabs
+			$this->content= str_replace('	', ' ', $this->content);
+		while(strstr($this->content, '  ')!==false) // ignoring multiple spaces
+			$this->content= str_replace('  ', ' ', $this->content);
+
 		$this->originalContent= $this->content;
 		$this->content= $this->str_split_utf8($this->content);

 		// the fixed content;
 		$fixed= "";
 		// for each charactere on the content
-		// let's remove th invalid ones
-		for($i=0, $j=sizeof($this->content); $i<$j; $i++)
+		// let's remove the invalid ones
+		for($i=0, $j=sizeof($this->content); $i<$j; ++$i)
 		{
 			$letter= $this->content[$i];
 			if($this->isValidChar($letter))
@@ -109,12 +114,25 @@ class Lexer
 			$fixed= str_replace($char, $token, $fixed);
 		}

+		// but content between parentheses should be left with
+		// normal spaces, instead of the space token
+		//preg_match_all('/(\(.+?\))|(".+?")/', $fixed, $matches);
+
+		// todo: fix: when the default value, between " has a ) it ends the expression
+		preg_match_all('/(\(.+?\))|(".+?")/', $fixed, $matches);
+		$i= 1;
+		$matches= $matches[0];
+		foreach($matches as $match)
+		{
+			$fixedMatch= str_replace($this->tokens[' '], ' ', $match);
+			$fixed= str_replace($match, $fixedMatch, $fixed, $i);
+		}
+
 		// let's deal with the \n and multiline comments
 		$fixed= preg_replace("/\n/", $this->tokens[' '], $fixed);
 		$fixed= preg_replace('/\/\*.+\*\//', '', $fixed);

 		$exploded= explode($this->tokens[' '], $fixed);
-		//$exploded= preg_split('//', $fixed);

 		$fixed= array_filter($exploded);

@@ -142,6 +160,7 @@ class Lexer
 		$this->lang= Mind::$l10n->name;
 		$xml= simplexml_load_file(Mind::$langPath.$this->lang.'/lexics.xml');
 		include(Mind::$langPath.$this->lang.'/Inflect.php');
+
 		$this->validChars= (string)$xml->validchars->lower;
 		$this->validChars.= (string)$xml->validchars->upper;
 		$this->validChars.= (string)$xml->validchars->special;
@@ -47,7 +47,7 @@ class Syntaxer {
 		$pattern= implode('|', self::$sintatics);

 		// let's find all the patterns that match
-		// that means that we'll find only expressions with valid expressions
+		// that means that we'll find only expressions with valid syntax
 		$pattern= str_replace('S', 'S((( )?\,( )?S)?)+', $pattern);

 		preg_match_all('/'.$pattern.'/',
@@ -12,7 +12,18 @@ class Tokenizer extends Token{
 	public static $quantifiers;
 	public static $qualifiers;
 	public static $spine= Array();
-	public static $string='';
+	public static $string= '';
+	public static $dataTypes= Array();
+
+	/**
+	 * Parses a string into an array, splited by comas
+	 * @param Mixed $str
+	 * @return Array The string splited by comas, ignoring a space after each coma
+	 */
+	private static function parseByComa($str)
+	{
+		return explode(',', str_replace(', ', ',', (String)$str));
+	}

 	/**
 	 * This method builds the required structure from the
@@ -43,10 +54,10 @@ class Tokenizer extends Token{
 	public static function loadQuantifiers($xml)
 	{
 		self::$quantifiers= Array();
-		self::$quantifiers['none'] = explode(',', str_replace(', ', ',', (String)$xml->none));
-		self::$quantifiers['one']  = explode(',', str_replace(', ', ',', (String)$xml->one));
-		self::$quantifiers['many'] = explode(',', str_replace(', ', ',', (String)$xml->many));
-		self::$quantifiers['or']   = explode(',', str_replace(', ', ',', (String)$xml->or));
+		self::$quantifiers['none'] = self::parseByComa($xml->none);
+		self::$quantifiers['one']  = self::parseByComa($xml->one);
+		self::$quantifiers['many'] = self::parseByComa($xml->many);
+		self::$quantifiers['or']   = self::parseByComa($xml->or);
 		return self::$quantifiers;
 	}

@@ -62,16 +73,34 @@ class Tokenizer extends Token{
 	{

 		self::$qualifiers= Array();
-		self::$qualifiers['must']   = explode(',', str_replace(', ', ',', (String)$xml->must));
-		self::$qualifiers['may']   = explode(',', str_replace(', ', ',', (String)$xml->may));
-		self::$qualifiers['notnull']   = explode(',', str_replace(', ', ',', (String)$xml->notnull));
-		self::$qualifiers['key']   = explode(',', str_replace(', ', ',', (String)$xml->key));
-		self::$qualifiers['of']   = explode(',', str_replace(', ', ',', (String)$xml->of));
-		self::$qualifiers['be']   = explode(',', str_replace(', ', ',', (String)$xml->be));
-		self::$qualifiers['coma'] = explode(',', str_replace(', ', ',', (String)$xml->coma));
+		self::$qualifiers['must']   = self::parseByComa($xml->must);
+		self::$qualifiers['may']    = self::parseByComa($xml->may);
+		self::$qualifiers['notnull']= self::parseByComa($xml->notnull);
+		self::$qualifiers['key']    = self::parseByComa($xml->key);
+		self::$qualifiers['of']     = self::parseByComa($xml->of);
+		self::$qualifiers['be']     = self::parseByComa($xml->be);
+		self::$qualifiers['coma']   = self::parseByComa($xml->coma);
 		return self::$qualifiers;
 	}

+	/**
+	 * Loads the possible types to be accepted
+	 * @param SimpleXML $xml
+	 * @return Array The parsed avaliable types
+	 */
+	public static function loadTypes($xml)
+	{
+		self::$dataTypes['varchar']  = self::parseByComa($xml->varchar);
+		self::$dataTypes['char']     = self::parseByComa($xml->char);
+		self::$dataTypes['int']      = self::parseByComa($xml->int);
+		self::$dataTypes['float']    = self::parseByComa($xml->float);
+		self::$dataTypes['boolean']  = self::parseByComa($xml->boolean);
+		self::$dataTypes['date']     = self::parseByComa($xml->date);
+		self::$dataTypes['time']     = self::parseByComa($xml->time);
+		self::$dataTypes['file']     = self::parseByComa($xml->file);
+		return self::$dataTypes;
+	}
+
 	/**
 	 * This method verifies whether the passed word is
 	 * a valid quantifier in the passed list of quantifiers
@@ -131,14 +160,20 @@ class Tokenizer extends Token{
 			$qlf= simplexml_load_file(Mind::$langPath.
 									  Mind::$l10n->name.
 									  '/qualifiers.xml');
+			$tps= simplexml_load_file(Mind::$langPath.
+									  Mind::$l10n->name.
+									  '/datatypes.xml');
 			self::loadQuantifiers($qnt);
 			self::loadQualifiers($qlf);
+			self::loadTypes($tps);
 		}else{
 				self::loadSintatics(fopen('sintatics.list', 'rb'));
 				$qnt= simplexml_load_file('quantifiers.xml');
 				$qlf= simplexml_load_file('qualifiers.xml');
+				$tps= simplexml_load_file('datatypes.xml');
 				self::loadQuantifiers($qnt);
 				self::loadQualifiers($qlf);
+				self::loadTypes($tps);
 			 }
 		self::$sintaticsList= Array();
 	}
@@ -151,7 +186,18 @@ class Tokenizer extends Token{
 	 */
 	public function sweep()
 	{
-		$cont= Mind::$content;
+		$cont= &Mind::$content;
+
+		// seek for data types
+		foreach(self::$dataTypes as $type=>$options)
+		{
+			$cont= preg_replace(
+				"/\:".implode('(\(| )|\:', $options)."(\(| )/",
+				':'.$type.'(',
+				$cont
+			);
+		}
+
 		foreach($cont as $word)
 		{
 			$word= strtolower($word);
@@ -163,6 +209,6 @@ class Tokenizer extends Token{
 	}

 	public function __construct(){
-		self::loadModifiers();;
+		self::loadModifiers();
 	}
 }
@@ -11,11 +11,11 @@
 -->
 <root>
 	<varchar>string,characteres,varchar,descrição,desc,obs</varchar>
-	<char>char,caracter,letra,digito</char>
+	<char>char,caracter,caractere,letra,digito</char>
 	<int>int,integer,inteiro,dígito,número</int>
 	<float>float,real,double,dinheiro,percentagem,perc,porcentagem,precisão</float>
 	<boolean>bool,boolean,boleano</boolean>
 	<date>date,data,dia</date>
 	<time>time,tempo,datahora,datetime,dthr,dt</time>
-	<file>blob,oid,lo,file,arquivo,binario,binary</file>
+	<file>blob,oid,lo,file,arquivo,binario,binary,imagem,image,foto</file>
 </root>
@@ -1,6 +1,7 @@
-aluno tem professor
+aluno tem      professor
 //professor tem piça
-professor tem aluno
+	professor tem aluno
+aluno tem foto:arquivo(xx)
 /*

 blablabla
@@ -9,4 +10,5 @@ xxx
 blabelaebamimi
 */
 aluno tem pai, mãe e papagaio.
-aluno tem nome:caractere(128, obrigatório, não nulo, "josé da conceição")
+aluno tem nome:caractere(128, obrigatório, não nulo, "josé's da conceição")
+pai tem \pilha.