package udmParser;
# GPL
# author: Rohan Baxter, Ultimode Inc. rohan@ultimode.com
# modifications: Heiko Stoermer, innominate AG, stoermer@innominate.de

use strict;
# use diagnostics;

# Input: query_string e.g. (mysql | msql) & ~postgres
#
# Output: words: comma,separated words to go in SQL WHERE clause
#         e.g. ( mysql, msql)
#         (Using words is the big speed trick achieved by udmSearch)
#         query_function: sum( dict.word = 'mysql') 
sub parse(){
# <innominate>
	my $query_string = shift || "";
# </innominate>	

#open(FILE,">/tmp/tmp.parse");
#print FILE "1st: $query_string \n";
	# Save query string
	my $query_orig = $query_string;

	# Fix up spaces 
	$query_string =~ s/^(\s+)//g; # remove white space from front of string
	$query_string =~ s/(\s+)\|(\s+)/\|/g; # remove white space around | 
	$query_string =~ s/(\s+)\&(\s+)/\&/g; # remove white space around & 
	$query_string =~ s/(\s+)\((\s+)/\(/g; # remove white space around ( 
	$query_string =~ s/(\s+)\((\s+)/\)/g; # remove white space around ) 
	$query_string =~ s/(\s+)~(\s+)/~)/g; # remove white space around ~ 
	$query_string =~ s/(\s+)\,(\s+)/\,/g; # remove white space around , 
	$query_string =~ s/(\s)+/\,/g; # replace remaining white space with a comma 

	$query_string =~ s/\(/ \( /g;
	$query_string =~ s/\)/ \) /g;

	# Preprocess query string into normal form
	# ie. convert | to ||
	# ie. convert ~ to !
	# i.e. convert & to &&
	$query_string =~ s/\,/ \| /g;
	$query_string =~ s/\|/ \|\| /g;
	$query_string =~ s/&/ && /g;
	$query_string =~ s/~/ ! /g;
	
	# wrap query in ()
	#$query_string = "( ". $query_string . " )";

	#print "query_string: $query_string \n";

#print FILE "2nd: $query_string \n";
	# Break query string into tokens
	my @q = split(' ',$query_string);

	# Process each token in turn
	my $unmatched_brackets = 0;
	my $error = "";
	my $state = 0;
	my $query_function = ""; # function a document needs to satisfy
	my $words = ""; # words that should appear in document
	my $token = "";
	$token = shift @q;
	while ( (defined($token)) && ($error eq "")){
#	  print FILE "token: $token state: $state \n";
	  if ($state == 0){
	    if ( ($token eq "||") ||
		 ($token eq "&&") ||
		 ($token eq ")")){
	      $error = "at $token";
	    } elsif ($token eq "!"){
	      $state = 0;
	      $query_function = $query_function . " ! ";
	    } elsif ($token eq "("){
	      $unmatched_brackets++;
	      $state = 0;
	      $query_function = $query_function . "(";
	    } else { # must be a word
	      $state = 1;
	      $query_function = $query_function . 
		"sum(dict.word = '$token')";
	      if ($words eq ""){
		$words = "'$token'";
	      } else {
		$words = $words . ",'$token'";
	      }  
		}
	  } elsif ($state == 1){
	    if ( ($token eq "||") ||
		 ($token eq "&&")){
	      $state = 0;
	      $query_function = $query_function . " $token";
	    } elsif ($token eq ")"){
	      $unmatched_brackets--;
	      $state = 1;
	      $query_function = $query_function . ")";
	    } else {
	      $error = " at '$token'";
	    }
	  } else {
	    $error = "parser in invalid state \n";
	  }
	  $token = shift @q; 
	}
	# check for errors
	if (($error eq "") && 
	    ($unmatched_brackets != 0)){
	  $error = "unmatched brackets ";
	}
# print FILE "error:...$error...\n";
# print FILE "3rh: $query_function \n";
# close(FILE);
	return ($query_function, $words, $error);
}


1;
