package tecgraf.javautils.parsers.csv;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;

import tecgraf.javautils.parsers.FiniteAutomaton;
import tecgraf.javautils.parsers.Parser;
import tecgraf.javautils.parsers.State;
import tecgraf.javautils.parsers.Token;
import tecgraf.javautils.parsers.actions.AppendAction;
import tecgraf.javautils.parsers.actions.DiscardAction;
import tecgraf.javautils.parsers.actions.GenerateTokenAction;
import tecgraf.javautils.parsers.exception.AutomatonException;
import tecgraf.javautils.parsers.exception.MalformedInputException;
import tecgraf.javautils.parsers.iterators.CharSymbolIterator;
import tecgraf.javautils.parsers.symbols.CharSymbol;
import tecgraf.javautils.parsers.symbols.Symbol;

/**
 * Parser CSV
 * 
 * @author Tecgraf/PUC-Rio
 */
public class CsvParser extends Parser {
  /**
   * Construtor
   */
  public CsvParser() {
    super(new CsvFiniteAutomaton());
  }

  /**
   * Faz o parser do stream, fechando o mesmo ao final.
   * 
   * @param inputStream stream para leitura.
   * @param charset charset a ser utilizado para o parser
   * @return a matriz de textos lidos
   * @throws IOException em caso de erro de I/O
   * @throws MalformedInputException em caso de erro de entrada com formato
   *         invlido.
   */
  final public String[][] parse(InputStream inputStream, Charset charset)
    throws IOException, MalformedInputException {
    try (InputStreamReader reader = new InputStreamReader(inputStream, charset)) {
      return this.parse(reader);
    }
  }

  /**
   * @param reader leitor
   * @return matriz de textos
   * @throws IOException em caso de erro de I/O.
   * @throws MalformedInputException em caso de m formao.
   */
  private String[][] parse(InputStreamReader reader) throws IOException,
    MalformedInputException {
    List<List<String>> lineList = this.createCsvRegisterList(reader);
    return toCsvRegisterArray(lineList);
  }

  /**
   * Monta uma matriz de textos com base na lista de lista de strings.
   * 
   * @param lineList lista de lista de strings
   * @return matriz de textos
   */
  private String[][] toCsvRegisterArray(List<List<String>> lineList) {
    String[][] fieldMatrix = new String[lineList.size()][];
    ListIterator<List<String>> lineListIterator = lineList.listIterator();
    for (int lineCounter = 0; lineListIterator.hasNext(); lineCounter++) {
      List<String> fieldList = lineListIterator.next();
      fieldMatrix[lineCounter] = new String[fieldList.size()];
      ListIterator<String> fieldListIterator = fieldList.listIterator();
      for (int fieldCounter = 0; fieldListIterator.hasNext(); fieldCounter++) {
        fieldMatrix[lineCounter][fieldCounter] = fieldListIterator.next();
      }
    }
    return fieldMatrix;
  }

  /**
   * Montagem de lista de linhas.
   * 
   * @param reader o reader
   * @return a lista
   * @throws IOException em caso de erro de I/O
   * @throws MalformedInputException em caso de erro de mformao.
   */
  private List<List<String>> createCsvRegisterList(Reader reader)
    throws IOException, MalformedInputException {
    List<List<String>> lineList = new ArrayList<>();
    LineNumberReader lineNumberReader = new LineNumberReader(reader);
    for (String line = lineNumberReader.readLine(); line != null; line =
      lineNumberReader.readLine()) {
      List<Token> tokens;
      try {
        final CharSymbolIterator symbolIterator = new CharSymbolIterator(line);
        tokens = this.parse(symbolIterator);
      }
      catch (AutomatonException e) {
        final int lineNumber = lineNumberReader.getLineNumber();
        throw new MalformedInputException(lineNumber, e);
      }
      List<String> stringList = this.createStringList(tokens);
      lineList.add(stringList);
    }
    return lineList;
  }

  /**
   * Montagem de uma lista de strings.
   * 
   * @param tokens lista de tokens
   * @return a lista
   */
  private List<String> createStringList(List<Token> tokens) {
    final int size = tokens.size();
    List<String> stringList = new ArrayList<>(size);
    for (Token token : tokens) {
      List<Symbol<?>> symbolList = token.getSymbolList();
      Iterator<Symbol<?>> symbolListIterator = symbolList.iterator();
      StringBuilder buffer = new StringBuilder();
      while (symbolListIterator.hasNext()) {
        CharSymbol symbol = (CharSymbol) symbolListIterator.next();
        buffer.append(symbol.getObject());
      }
      stringList.add(buffer.toString());
    }
    return stringList;
  }

  /**
   * Atmato de CSV
   * 
   * @author Tecgraf/PUC-Rio
   */
  private static class CsvFiniteAutomaton extends FiniteAutomaton {
    /**
     * Smbolo: ","
     */
    private static final CharSymbol COMMA = new CharSymbol(',');

    /**
     * Smbolo: ";"
     */
    private static final CharSymbol SEMICOLON = new CharSymbol(';');

    /**
     * Smbolo: aspas
     */
    private static final CharSymbol QUOTE = new CharSymbol('\"');

    /**
     * Smbolos que representam espaos.
     */
    private static final CharSymbol[] SPACES = { new CharSymbol(' '),
        new CharSymbol('\t'), new CharSymbol('\r'), new CharSymbol('\n') };

    /**
     * Construtor
     */
    private CsvFiniteAutomaton() {
      super(new State(true));
      State delimiter = this.getInitialState();
      State inWord = new State(true);
      State inQuoteWord = new State(false);
      State quoteInQuoteWord = new State(true);
      delimiter.setDefaultTransition(AppendAction.getInstance(), inWord);
      delimiter.addTransition(COMMA, GenerateTokenAction.getInstance(),
        delimiter);
      delimiter.addTransition(SEMICOLON, GenerateTokenAction.getInstance(),
        delimiter);
      delimiter.addTransition(QUOTE, DiscardAction.getInstance(), inQuoteWord);
      delimiter.addTransition(SPACES, DiscardAction.getInstance(), delimiter);
      inWord.setDefaultTransition(AppendAction.getInstance(), inWord);
      inWord.addTransition(COMMA, GenerateTokenAction.getInstance(), delimiter);
      inWord.addTransition(SEMICOLON, GenerateTokenAction.getInstance(),
        delimiter);
      inQuoteWord.setDefaultTransition(AppendAction.getInstance(), inQuoteWord);
      inQuoteWord.addTransition(QUOTE, DiscardAction.getInstance(),
        quoteInQuoteWord);
      quoteInQuoteWord.setDefaultTransition(AppendAction.getInstance(), inWord);
      quoteInQuoteWord.addTransition(QUOTE, AppendAction.getInstance(),
        inQuoteWord);
      quoteInQuoteWord.addTransition(COMMA, GenerateTokenAction.getInstance(),
        delimiter);
      quoteInQuoteWord.addTransition(SEMICOLON, GenerateTokenAction
        .getInstance(), delimiter);
    }
  }
}
