package tecgraf.javautils.parsers.tsv;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;

import tecgraf.javautils.parsers.FiniteAutomaton;
import tecgraf.javautils.parsers.Parser;
import tecgraf.javautils.parsers.State;
import tecgraf.javautils.parsers.Token;
import tecgraf.javautils.parsers.actions.AppendAction;
import tecgraf.javautils.parsers.actions.GenerateTokenAction;
import tecgraf.javautils.parsers.exception.AutomatonException;
import tecgraf.javautils.parsers.exception.MalformedInputException;
import tecgraf.javautils.parsers.iterators.CharSymbolIterator;
import tecgraf.javautils.parsers.symbols.CharSymbol;
import tecgraf.javautils.parsers.symbols.Symbol;

/**
 * Parser para arquivo TSV
 * 
 * @author Tecgraf
 */
public final class TsvParser extends Parser {

  /**
   * Construtor.
   */
  public TsvParser() {
    super(new TsvFiniteAutomaton());
  }

  /**
   * @param inStream stream de entrada
   * @param charset charset a ser utilizado para o parser
   * @return matriz de strings da leitura do stream
   * @throws IOException em caso de erro de I/O
   * @throws MalformedInputException em caso de m formao da entrada.
   */
  public String[][] parse(InputStream inStream, Charset charset)
    throws IOException, MalformedInputException {
    final InputStreamReader reader = new InputStreamReader(inStream, charset);
    List<List<String>> lineList = createCsvRegisterList(reader);
    final String[][] csvRegisterArray = toCsvRegisterArray(lineList);
    reader.close();
    return csvRegisterArray;
  }

  /**
   * Cria a lista determinada para CSV.
   * 
   * @param reader reader (j ajustado para o charset correto)
   * @return lista de listas de strings
   * @throws IOException em caso de erro de I/O.
   * @throws MalformedInputException em caso de m formao do arquivo.
   */
  private List<List<String>> createCsvRegisterList(InputStreamReader reader)
    throws IOException, MalformedInputException {
    List<List<String>> lineList = new ArrayList<List<String>>();
    LineNumberReader lineNumberReader = new LineNumberReader(reader);
    for (String line = lineNumberReader.readLine(); line != null; line =
      lineNumberReader.readLine()) {
      List<Token> tokens = null;
      try {
        tokens = this.parse(new CharSymbolIterator(line));
      }
      catch (AutomatonException e) {
        throw new MalformedInputException(lineNumberReader.getLineNumber(), e);
      }
      List<String> stringList = createStringList(tokens);
      lineList.add(stringList);
    }
    return lineList;
  }

  /**
   * Cria a lista de strings com base na lista de tokens.
   * 
   * @param tokens lista de tokens
   * @return a lista
   */
  private List<String> createStringList(List<Token> tokens) {
    final int size = tokens.size();
    List<String> stringList = new ArrayList<String>(size);
    Iterator<Token> tokenIterator = tokens.iterator();
    while (tokenIterator.hasNext()) {
      Token token = tokenIterator.next();
      List<Symbol<?>> symbolList = token.getSymbolList();
      Iterator<Symbol<?>> symbolListIterator = symbolList.iterator();
      StringBuffer buffer = new StringBuffer();
      while (symbolListIterator.hasNext()) {
        CharSymbol symbol = (CharSymbol) symbolListIterator.next();
        final Object obj = symbol.getObject();
        buffer.append(obj);
      }
      final String string = buffer.toString();
      stringList.add(string);
    }
    return stringList;
  }

  /**
   * Monta uma matriz de strings.
   * 
   * @param lineList lista de lista de strings
   * @return matriz de strings
   */
  private String[][] toCsvRegisterArray(List<List<String>> lineList) {
    final int lineListSize = lineList.size();
    String[][] fieldMatrix = new String[lineListSize][];
    ListIterator<List<String>> lineListIterator = lineList.listIterator();
    for (int lineCounter = 0; lineListIterator.hasNext(); lineCounter++) {
      List<String> fieldList = lineListIterator.next();
      fieldMatrix[lineCounter] = new String[fieldList.size()];
      ListIterator<String> fieldListIterator = fieldList.listIterator();
      for (int fieldCounter = 0; fieldListIterator.hasNext(); fieldCounter++) {
        final String nextString = fieldListIterator.next();
        fieldMatrix[lineCounter][fieldCounter] = nextString;
      }
    }
    return fieldMatrix;
  }

  /**
   * Tsv autmato
   * 
   * @author Tecgraf
   */
  private static class TsvFiniteAutomaton extends FiniteAutomaton {

    /**
     * Separador
     */
    private static final CharSymbol SEPARATOR = new CharSymbol('\t');

    /**
     * Construtor.
     */
    private TsvFiniteAutomaton() {
      super(new State(true));
      State initial = this.getInitialState();
      final AppendAction appendAction = AppendAction.getInstance();
      initial.setDefaultTransition(appendAction, initial);
      final GenerateTokenAction genTokenAction =
        GenerateTokenAction.getInstance();
      initial.addTransition(SEPARATOR, genTokenAction, initial);
    }
  }
}
