/*
SDX: Documentary System in XML.
Copyright (C) 2000, 2001, 2002  Ministere de la culture et de la communication (France), AJLSM

Ministere de la culture et de la communication,
Mission de la recherche et de la technologie
3 rue de Valois, 75042 Paris Cedex 01 (France)
mrt@culture.fr, michel.bottin@culture.fr

AJLSM, 17, rue Vital Carles, 33000 Bordeaux (France)
sevigny@ajlsm.com

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the
Free Software Foundation, Inc.
59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
or connect to:
http://www.fsf.org/copyleft/gpl.html
*/
package fr.gouv.culture.sdx.search.lucene.analysis.filter;

import org.apache.avalon.framework.logger.LogEnabled;
import org.apache.avalon.framework.logger.Logger;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;

/**
 * A filter that transforms accented characters in the ISO Latin 1 character set to their unaccented counterparts.
 *
 * <p>
 * For example, the letter '' will be converted to 'e'.
 * <p>
 * This filter does'nt change the character case. If one wants to lowercase letters, it should
 * also use another filter.
 */
public class ISOLatin1AccentFilter extends TokenFilter implements LogEnabled {

    /** Avalon logger to write information. */
    private org.apache.avalon.framework.logger.Logger logger;

    /**Builds a new filter*/
    public ISOLatin1AccentFilter() {
    }

    /**
     *	Builds a filter from a token stream.
     *
     *	@param	in	The input token stream.
     */
    public void setUp(TokenStream in) {
        // Just keep a reference to the token stream.
        this.input = in;
    }

    /**
     *	Transform ISOLatin1 accented characters to their unaccented counterparts.
     */
    public final Token next() throws java.io.IOException {
        // We will work on the following available token.
        Token t = input.next();
        if (t == null) return null;

        String tokenText = t.termText();
        StringBuffer chars = new StringBuffer();

        // Loop over the characters, replace those that need to be.
        for (int i = 0; i < tokenText.length(); i++) {
            switch (tokenText.charAt(i)) {
                case '':
                case '':
                case '':
                case '':
                case '':
                case '':
                    chars.append("A");
                    break;
                case '':
                    chars.append("AE");
                    break;
                case '':
                    chars.append("C");
                    break;
                case '':
                case '':
                case '':
                case '':
                    chars.append("E");
                    break;
                case '':
                case '':
                case '':
                case '':
                    chars.append("I");
                    break;
                case '':
                    chars.append("D");
                    break;
                case '':
                    chars.append("N");
                    break;
                case '':
                case '':
                case '':
                case '':
                case '':
                case '':
                    chars.append("O");
                    break;
                case '':
                    chars.append("OE");
                    break;
                case '':
                    chars.append("TH");
                    break;
                case '':
                case '':
                case '':
                case '':
                    chars.append("U");
                    break;
                case '':
                case '':
                    chars.append("Y");
                    break;
                case '':
                case '':
                case '':
                case '':
                case '':
                case '':
                    chars.append("a");
                    break;
                case '':
                    chars.append("ae");
                    break;
                case '':
                    chars.append("c");
                    break;
                case '':
                case '':
                case '':
                case '':
                    chars.append("e");
                    break;
                case '':
                case '':
                case '':
                case '':
                    chars.append("i");
                    break;
                case '':
                    chars.append("d");
                    break;
                case '':
                    chars.append("n");
                    break;
                case '':
                case '':
                case '':
                case '':
                case '':
                case '':
                    chars.append("o");
                    break;
                case '':
                    chars.append("oe");
                    break;
                case '':
                    chars.append("ss");
                    break;
                case '':
                    chars.append("th");
                    break;
                case '':
                case '':
                case '':
                case '':
                    chars.append("u");
                    break;
                case '':
                case '':
                    chars.append("y");
                    break;
                default:
                    chars.append(tokenText.charAt(i));
                    break;
            }
        }
        // Finally we return a new token with transformed characters.
        return new Token(chars.toString(), t.startOffset(), t.endOffset(), t.type());
    }

    /** Set's the logger
     *
     * @param logger    The logger to use.
     */
    public void enableLogging(Logger logger) {
        this.logger = logger;
    }

}
