/*
Copyright (C) 2000-2010  Ministere de la culture et de la communication (France), AJLSM
See LICENCE file
*/
package fr.gouv.culture.sdx.thesaurus;

import java.io.IOException;
import java.io.StringReader;
import java.util.Enumeration;
import java.util.Hashtable;

import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.TermQuery;

import fr.gouv.culture.sdx.exception.SDXException;
import fr.gouv.culture.sdx.exception.SDXExceptionCode;
import fr.gouv.culture.sdx.search.lucene.Field;
import fr.gouv.culture.sdx.search.lucene.analysis.Analyzer;
import fr.gouv.culture.sdx.search.lucene.query.SearchLocations;
import fr.gouv.culture.sdx.utils.AbstractSdxObject;
import fr.gouv.culture.sdx.utils.Utilities;
import fr.gouv.culture.sdx.utils.lucene.LuceneTools;

public class LuceneQueryExpander extends AbstractSdxObject {

    public static final String CLASS_NAME_SUFFIX = "QueryExpander";
    //we have a query
    //we get the lucene query
    //we get the terms for the lucene query
    //we need the field for each term
    //we get matches for each term in the thesaurus
    //we analyze the new term and add it to the query within the query structure
    private SDXThesaurus lThesaurus = null;
    private SearchLocations searchLocations = null;

    public void setUp(SDXThesaurus lt) {
        this.lThesaurus = lt;
    }


    public org.apache.lucene.search.Query expandQuery(fr.gouv.culture.sdx.search.lucene.query.Query query, int[] relations, int depth, String[] langs) throws SDXException {
        return this.expandQuery(query, null, relations, depth, langs);
    }

    public org.apache.lucene.search.Query expandQuery(fr.gouv.culture.sdx.search.lucene.query.Query query, String fieldName, int[] relations, int depth, String[] langs) throws SDXException {
        org.apache.lucene.search.Query newLQ = null;
        if (query == null)
            return newLQ;
        else {
            this.searchLocations = query.getSearchLocations();
            //need to ensure that the correct sub queries are built
            org.apache.lucene.search.Query origLQ = query.getLuceneQuery();

            if (origLQ != null)
                newLQ = rebuildQuery(origLQ, fieldName, relations, depth, langs);
        }
        return newLQ;
    }

    /**TODO: this method is too big and dirty.
     * refactoring points
     * 1) query analysis should be done externally (in another method)
     * i) terms lists should be derived externally and passed in
     *ii) queries should be cloned in a smarter fashion
     *
     * @param origQuery
     * @param field
     * @param relations
     * @param depth
     * @param langs
     * @throws SDXException
     */
    protected org.apache.lucene.search.Query rebuildQuery(org.apache.lucene.search.Query origQuery, String field, int[] relations, int depth, String[] langs) throws SDXException {
    	
        org.apache.lucene.search.BooleanQuery newLQ = null;// la nouvelle requete qui sera construite ici. C'est elle qui est renvoyee
        
        if (!(origQuery instanceof BooleanQuery)) {
        	/* La requete originale n'est pas une BooleanQuery
        	 * Creation d'une BooleanQuery (la nouvelle requete sera toujours une BooleanQuery)
        	 * Ajout de la requete originale avec un OU. */
        	// FIXME (MP): Pourquoi ?
            newLQ = Utilities.newBooleanQuery();
            // MAJ Lucene 2.1.0
            //newLQ.add(origQuery, false, false);	// adding the original query with or behavoir
            newLQ.add(origQuery, BooleanClause.Occur.SHOULD);
        } else
        	// La requete originale est une BooleanQuery, on la clone pour en tirer la nouvelle requete
            newLQ = (BooleanQuery) origQuery;

        BooleanClause[] queryClauses = newLQ.getClauses();// recuperer l'ensemble des clauses de la requete
        Hashtable newLQTerms = new Hashtable();// we keep track fo all terms so as to not repeat them in subQueries

        // Boucler sur l'ensmeble des clauses de la requete
        for (int i = 0; i < queryClauses.length; i++) {
            //we get the current terms of the lucene query to avoid repetition
            try {
                LuceneTools.getTerms(newLQ, newLQTerms, false);
            } catch (IOException e) {
                throw new SDXException(super.getLog(), SDXExceptionCode.ERROR_GET_QUERY_TERMS, null, e);
            }

            BooleanClause clause = queryClauses[i];// la clause courante
            BooleanQuery expandedQuery = Utilities.newBooleanQuery();// la future extension de la clause courante
            
            // MAJ Lucene 2.1.0
            //org.apache.lucene.search.Query clauseQuery = clause.query;//TODO soon; we need to do better deep recreate of each query in the clause
            org.apache.lucene.search.Query clauseQuery = clause.getQuery();

            // Analyser la clause courante
            if (clauseQuery instanceof BooleanQuery) 
            {
                //recursive call the expanded query(ies) will be inserted within the boolean query
            	// la clause courante peut se composer de plusieurs requetes. on doit donc boucler
                rebuildQuery(clauseQuery, field, relations, depth, langs);
            } 
            else 
            {
                Hashtable clauseTerms = new Hashtable();// les termes recherches dans la clause courante
                try {
                	// MAJ Lucene 2.1.0
                    //LuceneTools.getTerms(clause.query, clauseTerms, false);
                    LuceneTools.getTerms(clause.getQuery(), clauseTerms, false);
                } catch (IOException e) {
                    throw new SDXException(super.getLog(), SDXExceptionCode.ERROR_GET_QUERY_TERMS, null, e);
                }
                Enumeration keys = clauseTerms.keys();

                if (keys != null) {
                	
                    Hashtable expandedQueryTerms = new Hashtable();
                    
                    // Boucler sur chaque terme
                    //we keep track of expanded clauseTerms so that they aren't added more than once
                    while (keys.hasMoreElements()) {
                    	
                        try {
                            LuceneTools.getTerms(expandedQuery, expandedQueryTerms, false);
                        } catch (IOException e) {
                            throw new SDXException(super.getLog(), SDXExceptionCode.ERROR_GET_QUERY_TERMS, null, e);
                        }
                        
                        String key = ((String) keys.nextElement());
                        String searchTerm = (String) clauseTerms.get(key);// le terme recherche
                        String fieldName = LuceneTools.getFieldNameFromKey(key);// le champ recherche
                        Concept[] matches = null;
                        
                        // On ne s'interesse pas au champ SDX
                        if (fieldName != null 
                        	&& !fieldName.startsWith("sdx") 
                        	&& ( !Utilities.checkString(field) || fieldName.equals(field) ) )
                        {
                        	/*we dont want to do query expansion on internal fields
                        	/*this should return relevant concept of which the clauseTerms we want to include in a new query.*/
                            matches = lThesaurus.search(searchTerm);
                        }
                        
                        Concept[] relatedConcepts = null;
                        
                        //we get relations based upon the parameter provided
                        if (matches != null)
                        {
                            relatedConcepts = lThesaurus.getRelations(searchTerm, matches, relations, depth);
                        }
                        
                        //filtering on language
                        relatedConcepts = lThesaurus.filterByLangs(relatedConcepts, langs);
                        
                        if (relatedConcepts != null) 
                        {
                            //we build a new query text based upone the relatied concept values/clauseTerms
                            for (int k = 0; k < relatedConcepts.length; k++) 
                            {
                                if (relatedConcepts[k] != null) 
                                {
                                    String relatedTerm = relatedConcepts[k].getValue();
                                    if (!searchTerm.equalsIgnoreCase(relatedTerm)) 
                                    {//we dont want to readd the same term
                                        Field searchTermField = null;
                                        int searchTermFieldType = -1;
                                        TokenStream tokenStream = null;
                                        Analyzer searchTermFieldAnalyzer = null;
                                        if (this.searchLocations != null)
                                            searchTermField = this.searchLocations.getField(fieldName);
                                        if (searchTermField != null) 
                                        {
                                            //getting the field type to determine wheter related term should be tokenized for searching
                                            searchTermFieldType = searchTermField.getFieldType();
                                            //getting the field type to determine wheter related term should be tokenized for searching
                                            searchTermFieldAnalyzer = searchTermField.getAnalyzer();
                                            if (searchTermFieldType == Field.WORD)
                                            	//if the original search field type is word we attempt to accquire a tokenStream for tokenizing the related concept term (relatedTerm)
                                                tokenStream = searchTermFieldAnalyzer.tokenStream(fieldName, new StringReader(relatedTerm));
                                        }
                                        
                                        //if we have a token stream we tokenize the relatedTerm
                                        if (tokenStream != null) 
                                        {
                                            Token token = null;
                                            try {
                                                while ((token = tokenStream.next()) != null) 
                                                {
                                                    //we don't want to readd any term already in the query for this field
                                                    String tokenText = token.termText();
                                                    String possibleTermKey = fieldName + "_" + tokenText;
                                                    //TODO better handling of prohibited term expansion
                                                    if (!newLQTerms.containsKey(possibleTermKey) && !clauseTerms.containsKey(possibleTermKey) && !expandedQueryTerms.containsKey(possibleTermKey)) {
                                                        Term newTerm = new Term(fieldName, tokenText);
                                                        TermQuery tq = new TermQuery(newTerm);
                                                        // MAJ Lucene 2.1.0
                                                        //expandedQuery.add(tq, false, false);	//adding any matched tokenized term with OR behavior
                                                        expandedQuery.add(tq, BooleanClause.Occur.SHOULD);
                                                        LuceneTools.getTerms(expandedQuery, expandedQueryTerms, false);
                                                    }
                                                }
                                                tokenStream.close();
                                            } catch (IOException e) {
                                                //not throwing the exception here, because we could have valuable expanded terms already added
                                                new SDXException(super.getLog(), SDXExceptionCode.ERROR_CLOSE_STREAM, null, e);
                                            }
                                        } 
                                        else 
                                        {
                                            //we don't tokenize and add the related concept term to the query (relatedTerm)
                                            String possibleTermKey = fieldName + "_" + relatedTerm;
                                            if (/*!newLQTerms.containsKey(possibleTermKey) 
                                            	&& !clauseTerms.containsKey(possibleTermKey) &&*/ 
                                            	!expandedQueryTerms.containsKey(possibleTermKey)) {
                                                Term newTerm = new Term(fieldName, relatedTerm);
                                                TermQuery tq = new TermQuery(newTerm);
                                                // MAJ Lucene 2.1.0
                                                //expandedQuery.add(tq, false, false);	//adding any matched tokenized term with OR behavior
                                                expandedQuery.add(tq, BooleanClause.Occur.SHOULD);
                                            }
                                        }
                                    }
                                }
                            }
                        }

                    }
                }

                /*if we have expanded queryTerms we add them to the parent boolean 
                 * query based upon the original clause constraints. */
                if (expandedQuery != null && expandedQuery.getClauses().length > 0) 
                {
                    // MAJ Lucene 2.1.0
                	//boolean required = clause.required;
                    //boolean prohibited = clause.prohibited;
                    //if (newLQ == origQuery) {
                    //	same query so we should use an OR behavior
                    //    required = false;
                    //    prohibited = false;
                    //}
                    //newLQ.add(expandedQuery, required, prohibited);//TODO: should this always be a OR behavoir ie, exQuery, false, false
                	//newLQ.add(expandedQuery, clause.getOccur());
                	/* Maintenant qu'on a l'extension de la clause courante, il faut
                	 * recreer la clause courante.*/
                	expandedQuery.add(clauseQuery, BooleanClause.Occur.SHOULD);
                	clause.setQuery(expandedQuery);
                }
            }


        }

        if (newLQ.getClauses().length == 1) //added this to compensate for new lucene problems
        	// MAJ Lucene 2.1.0
            //return newLQ.getClauses()[0].query;
        	return newLQ.getClauses()[0].getQuery();

        return newLQ;
    }

    protected String getClassNameSuffix() {
        return LuceneQueryExpander.CLASS_NAME_SUFFIX;
    }


	/* (non-Javadoc)
	 * @see fr.gouv.culture.sdx.utils.AbstractSdxObject#initToSax()
	 */
	protected boolean initToSax() {
		return true;
	}


	/**Init the LinkedHashMap _xmlizable_volatile_objects with the objects in order to describ them in XML
	 * Some objects need to be refresh each time a toSAX is called*/
	protected void initVolatileObjectsToSax() {

	}


}
