/*
SDX: Documentary System in XML.
Copyright (C) 2000, 2001, 2002  Ministere de la culture et de la communication (France), AJLSM

Ministere de la culture et de la communication,
Mission de la recherche et de la technologie
3 rue de Valois, 75042 Paris Cedex 01 (France)
mrt@culture.fr, michel.bottin@culture.fr

AJLSM, 17, rue Vital Carles, 33000 Bordeaux (France)
sevigny@ajlsm.com

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the
Free Software Foundation, Inc.
59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
or connect to:
http://www.fsf.org/copyleft/gpl.html
 */
package fr.gouv.culture.sdx.documentbase;

// SDX imports

//import fr.gouv.culture.sdx.query.SDXResults;

import fr.gouv.culture.sdx.application.Application;
import fr.gouv.culture.sdx.document.Document;
import fr.gouv.culture.sdx.document.IndexableDocument;
import fr.gouv.culture.sdx.document.IndexableFieldProperty;
import fr.gouv.culture.sdx.exception.SDXException;
import fr.gouv.culture.sdx.exception.SDXExceptionCode;
import fr.gouv.culture.sdx.framework.FrameworkImpl;
import fr.gouv.culture.sdx.oai.LuceneDocumentBaseOAIHarvester;
import fr.gouv.culture.sdx.oai.LuceneDocumentBaseOAIRepository;
import fr.gouv.culture.sdx.repository.Repository;
import fr.gouv.culture.sdx.search.lucene.DateField;
import fr.gouv.culture.sdx.search.lucene.FieldsDefinition;
import fr.gouv.culture.sdx.search.lucene.analysis.MetaAnalyzer;
import fr.gouv.culture.sdx.search.lucene.query.Index;
import fr.gouv.culture.sdx.search.lucene.query.LuceneIndex;
import fr.gouv.culture.sdx.utils.Utilities;
import org.apache.avalon.framework.component.ComponentException;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.avalon.framework.context.ContextException;
import org.apache.cocoon.ProcessingException;
import org.apache.lucene.document.Field;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

import java.io.File;
import java.util.Date;
import java.util.Enumeration;
import java.util.Hashtable;

/**
 *	A document base within an SDX application.
 *
 * <p>
 * A document base is a very important document in SDX development. A document base
 * is where documents are searched and retrieved, thus added (indexed), deleted or
 * updated. A search cannot occur in a smaller unit than the document base. To exclude
 * some parts of a document base, one should use query constructions, possibly filters.
 * <p>
 * A document base has a structure ; this structure is basically a list of fields. An
 * application may have many document bases, and these document bases may have different
 * structures. As always, indexable documents (XML, HTML or the like) with different
 * structures can be indexed within a single document base.
 * <p>
 * Most applications will have only one document base, but in some cases it could
 * be interesting to have more than one, like when different kinds of documents are
 * never searched at the same time, in this case it would speed up the searching and
 * indexing process if they are separated in different document bases.
 * <p>
 * A document base uses an indexer to index documents. It uses repositories
 * to store the documents, either indexable ones or attached ones (i.e. non-indexable documents
 * that are logically dependant of the indexable documents, images or the like).
 * An application can get a searcher to perform searches within this document base,
 * possibly with other document bases.
 * <p>
 * In order to work properly, a document base must be instantiated given the following sequence :
 * 1) creation,
 * 2) setting the logger (optional, but suggested for errors messages),
 * 3) configuration,
 * 4) initialization.
 *
 * @see #enableLogging
 * @see #configure
 * @see #init
 *
 */
public class LuceneDocumentBase extends SDXDocumentBase {

    /************
     Class members
     ************/

    /** The configuration object to be used for this document base.  */
    protected Configuration configuration = null;
    /** The index for this document base : obviously a Lucene one... */
    protected LuceneIndex luceneSearchIndex;
    /** The (Lucene) fields that are to be handled by the index. */
    protected FieldsDefinition fieldsDef;



    /**************************************************************
     String representation of the keys used by the properties obvject
     **************************************************************/


    /** String representation for a key in the Properties object : fields definition object. */
    public static final String FIELDS_DEFINITION = "fieldsDefinition";

    /***************************************
     Directory names to be provided to Lucene
     ***************************************/

    /** The directory name for the index that stores documents' indexation. */
    protected final String SEARCH_INDEX_DIRECTORY_NAME = "sdx-search-index";

    /**********************************************************************
     Attribute names for the configuration element in the configuration file
     **********************************************************************/

    /** The implied attribute stating whether this document base is to be exposed to remote access or not. */
    public static final String DBELEM_ATTRIBUTE_REMOTE_ACCESS = "remote-access";

    /*************************************************************************
     Child element names of the configuration element in the configuration file
     *************************************************************************/

    /** The element used to define the indexation field list. */
    public static final String ELEMENT_NAME_FIELD_LIST = "fieldList";


    /************************************************************************
     Internal fields for documents (kind of metadata required/proposed by SDX)
     //TODO : move them to a Document/super-DB/application class ? -pb
     ************************************************************************/

    /** The element used to define system fields in sdx.xconf. */
    public static final String ELEMENT_NAME_LUCENE_SDX_INTERNAL_FIELDS = "luceneSDXInternalFields"; //TODO : as is, is it the right place for defining this ? Isn't it Framework dependant (as sdxConf is) or, in a better way, application dependant ? -pb

    /** Internal field name for identifying ordered relationships between documents. */
    // private final String RELATION_PROPERTY_ORDER = "order"; //TODO : use value defined in a DB/Relationship  class  ? -pb

    /*
        TODO :
        Some of the values below could probably be moved next to the value sets as they are defined above.
        In such a case, their names, visibility and modifiers shohld be harmonized.
        Some other ones could also be moved to DB or Document related classes.
        I still need futher investigations to dipatch them accurately ;-)
        - pb
    */


    /**Creates the document base.
     *
     * After a document base is created, the logger  could be set (optional, but suggested for errors messages) ;
     * it should then be configured and after, initialized in order to work properly.
     *
     * @see #enableLogging
     * @see #configure
     * @see #init
     */
    public LuceneDocumentBase() {
    }

    /** Sets the configuration options for this document base.
     *
     *
     * @param configuration      The configuration object from which to build a document base.
     * <p>Sample configuration entry:
     <pre>&lt;sdx:documentBase sdx:id = "myDocumentBaseName" sdx:type = "lucene">
     &nbsp;&nbsp;&lt;sdx:fieldList xml:lang = "fr-FR" sdx:variant = "" sdx:analyzerConf = "" sdx:analyzerClass = "">
     &lt;sdx:field code = "fieldName" type = "word" xml:lang = "fr-FR" sdx:analyzerClass = "" sdx:analyzerConf = ""/>
     &lt;sdx:field code = "fieldName2" type = "field" xml:lang = "fr-FR" brief = "true"/>
     &lt;sdx:field code = "fieldName3" type = "date" xml:lang = "fr-FR"/>
     &lt;sdx:field code = "fieldName4" type = "unindexed" xml:lang = "fr-FR"/>
     &lt;/sdx:fieldList>
     &lt;sdx:index>
     &lt;sdx:pipeline sdx:id = "sdxIndexationPipeline">
     &lt;sdx:transformation src = "path to stylesheet, can be absolute or relative to the directory containing this file" sdx:id = "step2" sdx:type = "xslt"/>
     &lt;sdx:transformation src = "path to stylesheet, can be absolute or relative to the directory containing this file" sdx:id = "step3" sdx:type = "xslt" keep = "true"/>
     &lt;/sdx:pipeline>
     &lt;/sdx:index>
     &lt;sdx:repositories>
     &lt;sdx:repository baseDirectory = "blah4" depth = "3" extent = "100" sdx:type = "FS" sdx:default = "true" sdx:id = "blah4"/>
     &lt;sdx:repository ref = "blah2"/>
     &lt;/sdx:repositories>
     &lt;/sdx:documentBase>
     </pre>
     *@see #documented_application.xconf we should link to this in the future when we have better documentation capabilities
     */
    public void configure(Configuration configuration) throws ConfigurationException {
        //verifying the object
        Utilities.checkConfiguration(configuration);
        //retaining the documentbase configuration
        this.configuration = configuration;
        //configuring the super class
        super.configure(configuration);
    }

    protected void configureDocumentBase(Configuration configuration) throws ConfigurationException {
        this.keepOriginalDocuments = configuration.getAttributeAsBoolean(DBELEM_ATTRIBUTE_KEEP_ORIGINAL, true);
        configureFieldList(configuration);
        configureSearchIndex();
    }


    //Lucene specific configuration
    protected void configureFieldList(Configuration configuration) throws ConfigurationException {
        try {
            /*check for the ref attribute, if it exists, get the repository object and add it to the local hashtable
            *if the attribute doesn't exist create the repo like below, we also need to handle DEFAULTS with refs*/
            //getting the list of fields for indexing
            Configuration fieldList = configuration.getChild(LuceneDocumentBase.ELEMENT_NAME_FIELD_LIST, true);
            String ref = fieldList.getAttribute(Repository.ATTRIBUTE_REF, null);
            if (Utilities.checkString(ref)) {
                Hashtable appFieldLists = (Hashtable) props.get(Application.APPLICATION_FIELD_LISTS);
                if (appFieldLists != null)
                    this.fieldsDef = (FieldsDefinition) appFieldLists.get(ref);
                this.fieldsDef = (FieldsDefinition) this.fieldsDef.clone();
                this.fieldsDef.configure(configuration);//going to add any additional fields to this fieldList reference
                if (this.fieldsDef == null) {
                    String[] args = new String[1];
                    args[0] = ref;
                    throw new SDXException(logger, SDXExceptionCode.ERROR_LOAD_REFERENCED_FIELD_LIST, args, null);
                }
            } else
            //creating the repository
            //at this point, we should have <sdx:fieldList> containing a list of fields
                this.fieldsDef = Utilities.configureFieldList(logger, configuration, this.props);

            props.put(FIELDS_DEFINITION, fieldsDef);

        } catch (SDXException e) {
            //we don't want all repositories configurations to fail so we won't throw this farther out
            //the creation of the SDXException should log this message
        } catch (ConfigurationException e) {
            //we don't want all repository configurations to fail so we won't throw this farther out
            Utilities.logException(logger, e);
        }

    }

    //SDX search architecture/Lucene specific
    //TODORefactor: in the future this may be declared in the parent class and overridden here
    protected void configureSearchIndex() throws ConfigurationException {
        try {
            //Getting configuration info for the search index
            String rmiHost = Utilities.getStringFromHashtable(FrameworkImpl.RMI_HOST, props);
            Integer rmiPort = (Integer) props.get(FrameworkImpl.RMI_PORT);
            String appId = Utilities.getStringFromHashtable(Application.APPLICATION_ID, props);
            //creating the search index directory
            //by using our document base dir path from the properties object, as this way we are sure it is the same each time
            String sdxSearchIndexPath = Utilities.getStringFromHashtable(DOCUMENTBASE_DIR_PATH, props) + SEARCH_INDEX_DIRECTORY_NAME + File.separator;
            //testing the directory, to ensure it is available and we have access
            File sdxSearchIndexDir = Utilities.checkDirectory(sdxSearchIndexPath, logger);
            //building the search index
            luceneSearchIndex = new LuceneIndex(sdxSearchIndexDir, rmiHost, rmiPort, appId, getId());
            luceneSearchIndex.enableLogging(this.logger);
            luceneSearchIndex.configure(this.configuration);
            //creating a metaAnalyzer for the search index
            MetaAnalyzer mAnalyzer = new MetaAnalyzer();
            //setting the logger
            mAnalyzer.enableLogging(logger);
            //setting up the metaAnalyzer
            mAnalyzer.setUp(fieldsDef);
            //passing the metaAnalyzer to the search index
            luceneSearchIndex.setMetaAnalyzer(mAnalyzer);
        } catch (SDXException e) {
            throw new ConfigurationException(e.getMessage(), e);
        }
    }

    protected void configureOAIRepository(Configuration configuration) throws ConfigurationException {
        Configuration oaiRepoConf = configuration.getChild(ELEMENT_NAME_OAI_REPOSITORY, false);
        if (oaiRepoConf != null) {
            LuceneDocumentBaseOAIRepository luceneOaiRepo = new LuceneDocumentBaseOAIRepository(this);
            luceneOaiRepo.enableLogging(this.logger);
            try {
                luceneOaiRepo.compose(super._manager);
                luceneOaiRepo.contextualize(super.context);
            } catch (ComponentException e) {
                throw new ConfigurationException(e.getMessage(), e);
            } catch (ContextException e) {
                throw new ConfigurationException(e.getMessage(), e);
            }
            luceneOaiRepo.setProperties(this.props);
            this.oaiRepo = luceneOaiRepo;
            this.oaiRepo.configure(oaiRepoConf);
        }
    }

    protected void configureOAIHarvester(Configuration configuration) throws ConfigurationException {
        Configuration oaiHarvConf = configuration.getChild(ELEMENT_NAME_OAI_HARVESTER, false);
        if (oaiHarvConf != null) {
            super.oaiHarv = new LuceneDocumentBaseOAIHarvester(this);
            super.oaiHarv.enableLogging(this.logger);
            LuceneDocumentBaseOAIHarvester luceneHarvester = (LuceneDocumentBaseOAIHarvester) super.oaiHarv;
            try {
                luceneHarvester.compose(super._manager);
            } catch (ComponentException e) {
                throw new ConfigurationException(e.getMessage(), e);
            }
            luceneHarvester.setProperties(this.props);
            super.oaiHarv.configure(oaiHarvConf);
        }
    }

    /**
     * Adds an indexable document to this base.
     *
     * @param originalDoc   The document to add and to index.
     * @param repository    The repository into which the document will be saved.
     * @param conn          The connection to the repository.
     */

    /**Overriding parent method only to add lucene index optimazation*/
    public synchronized void delete(Document[] docs, ContentHandler handler) throws SDXException, SAXException, ProcessingException {
        try {
            super.delete(docs, handler);
        } finally {
            //optimizing the search index
            this.luceneSearchIndex.optimize();
            this.luceneSearchIndex.writeLastModificationTimestampFile(true);
        }
    }

    /**Set's the default pipeline parameters and ensures the params have a pipeline
     *
     * @param params The params object provided by the user at indexation time
     */
    protected IndexParameters setBaseParameters(IndexParameters params) {
        //we check if we have lucene params and set them to the index
        if (params != null && params instanceof LuceneIndexParameters)
            setSearchIndexParameters((LuceneIndexParameters) params);

        return super.setBaseParameters(params);
    }

    /** Gets the Index object for indexing and searching.
     * @return The LuceneIndex object.
     */
    public Index getIndex() {
        return luceneSearchIndex; //TODO : this method seems to be only used by sdx.xsl -pb
        //TODO : name this method getSearchIndex-) ? -pb
        /*A: we need this to be generic enough to support other types of search indexes,
        *ie remote indicies or any other index which someone may design in the future-rbp
        */
    }

    //TODO : possibly move this method to the LuceneIndex class -pb
    /**Sets the search index parameters for indexation performance
     *
     * @param params The lucene specific params to user
     */
    protected void setSearchIndexParameters(LuceneIndexParameters params) {
        luceneSearchIndex.setParameters(params);
    }


    //TODO : possibly move this method to the LuceneIndex class -pb
    /**Writes a document to the search index
     *
     * @param indexationDoc The Document to add
     * @param batchIndex
     * @throws SDXException
     */
    protected void addToSearchIndex(Object indexationDoc, boolean batchIndex) throws SDXException {
        //writing the Lucene index document to the search index
        luceneSearchIndex.writeDocument((org.apache.lucene.document.Document) indexationDoc, batchIndex);
    }

    //TODO : possibly move this method to the LuceneIndex class -pb
    protected void deleteFromSearchIndex(String docId) throws SDXException {
        luceneSearchIndex.deleteDocument(docId);
        //TODO : transfer a batch control here -pb
    }


    //TODO : possibly move this method to the LuceneIndex class -pb
    protected void compactSearchIndex() throws SDXException {
        luceneSearchIndex.mergeBatch();
    }


    //TODO : possibly move this method to the LuceneIndex class -pb
    //TODOLuceneSpecific: move to lucene index and let the document base add the few system fields
    /*TODO, this could probably be refactored to return a generic object like Parameters and moved to the parent class
    *the sub-class would then call this method and use the generic object to build it's particular object for indexation*/
    protected Object getIndexationDocument(IndexableDocument doc, String storeDocId, String repoId, IndexParameters params) throws SDXException {

        org.apache.lucene.document.Document lDoc = new org.apache.lucene.document.Document();
        float docBoost = doc.getBoost();
        lDoc.setBoost(docBoost);

        //getting Lucene fields
        Enumeration fieldVals = doc.getFieldValues();
        while (fieldVals != null && fieldVals.hasMoreElements()) {
            IndexableFieldProperty prop = (IndexableFieldProperty) fieldVals.nextElement();
            //TODO: what do we do with non defined fields??? For now we will have a NPE...maybe just index and tokenize no store!
            if (prop != null) {
                String fieldName = prop.getName();
                String[] fieldValues = prop.getValues();
                float fieldBoost = prop.getBoost();
                fr.gouv.culture.sdx.search.lucene.Field sdxF = fieldsDef.getField(fieldName);
                if (sdxF != null) {
                    for (int i = 0; i < fieldValues.length; i++) {
                        Field f = sdxF.getLuceneField(fieldValues[i]);
                        if (f != null) {
                            f.setBoost(fieldBoost);
                            lDoc.add(f);
                        }
                    }
                }
                //else we add an unindexed field
                else {
                    for (int i = 0; i < fieldValues.length; i++) {
                        if (Utilities.checkString(fieldValues[i])) ;
                        lDoc.add(Field.UnIndexed(fieldName, fieldValues[i]));
                    }
                }
            }
            // else  return null //TODOLogging:log.warn or info? the unknown fields
        }

        //if we dont have a document id after the parse or for the attached document, we have a problem and need throw an error
        Utilities.checkDocument(logger, doc);

        //adding system metadata
        Field fId = null;
        if (Utilities.checkString(storeDocId))
            fId = Field.Keyword(INTERNAL_FIELD_NAME_SDXDOCID, storeDocId);
        if (fId != null)
            lDoc.add(fId);

        Field fId2 = Field.Keyword(INTERNAL_FIELD_NAME_SDXALL, INTERNAL_SDXALL_FIELD_VALUE);
        if (fId2 != null)
            lDoc.add(fId2);

        Field fId3 = null;
        String docbaseId = this.getId();
        if (Utilities.checkString(docbaseId))
            fId3 = Field.Keyword(INTERNAL_FIELD_NAME_SDXDBID, docbaseId);
        if (fId3 != null)
            lDoc.add(fId3);

        //Warning : what if this DB is shared between applications ?
        Field fId4 = null;
        String appId = Utilities.getStringFromHashtable(Application.APPLICATION_ID, props);
        if (Utilities.checkString(appId))
            fId4 = Field.Keyword(INTERNAL_FIELD_NAME_SDXAPPID, appId);
        if (fId4 != null)
            lDoc.add(fId4);
        Field fId5 = null;
        String doctype = doc.getDocType();
        if (Utilities.checkString(doctype))
            fId5 = Field.Keyword(INTERNAL_FIELD_NAME_SDXDOCTYPE, doctype);
        if (fId5 != null)
            lDoc.add(fId5);

        Field fId6 = null;
        long modDate = fr.gouv.culture.sdx.utils.Date.getUtcIso8601Date().getTime();
        //getting the documents indexation date parameter
        modDate = Long.parseLong(params.getPipelineParams().getParameter(SDX_DATE_MILLISECONDS, Long.toString(modDate)));
        fId6 = Field.Keyword(INTERNAL_FIELD_NAME_SDXMODDATE, DateField.timeToString(modDate));
        if (fId6 != null)
            lDoc.add(fId6);


        Field fId7 = null;
        String length = Integer.toString(doc.getLength());
        if (Utilities.checkString(length))
            fId7 = Field.Keyword(INTERNAL_FIELD_NAME_SDXCONTENTLENGTH, length);
        if (fId7 != null)
            lDoc.add(fId7);

        Field fId8 = null;
        if (Utilities.checkString(repoId))
            fId8 = Field.Keyword(INTERNAL_FIELD_NAME_SDXREPOID, repoId);
        if (fId8 != null)
            lDoc.add(fId8);

        return lDoc;
    }


    public Date lastModificationDate() {
        return luceneSearchIndex.getLastModificationDate();
    }

    public Date creationDate() {
        return luceneSearchIndex.getCreationDate();
    }

    public void init() throws SDXException {
        super.init();
        //initializing the search index
        this.luceneSearchIndex.init();
    }


}
