package br.pucrio.tecgraf.soma.job.log.reader;

import br.pucrio.tecgraf.soma.job.log.monitor.event.FileChunk;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.Arrays;

import static java.lang.String.format;

public class FileReader {

  private static final Logger LOG = LoggerFactory.getLogger(FileReader.class);
  private final Charset defaultCharset;
  private final boolean enableCharsetDetection;
  private final Integer maxLengthSize;

  public FileReader(Integer maxLengthSize, Charset defaultCharset, boolean enableCharsetDetection) {
    this.maxLengthSize = maxLengthSize;
    this.defaultCharset = defaultCharset;
    this.enableCharsetDetection = enableCharsetDetection;
  }

  private static void checkFileExistsAndIsReadable(File file) throws IOException {
    if (!file.exists() || !file.isFile()) {
      throw new IOException(format("File %s does not exists", file.getAbsolutePath()));
    }
    if (!file.canRead()) {
      throw new IOException(format("File %s is not readable", file.getAbsolutePath()));
    }
  }

  public Integer getMaxLengthSize() {
    return maxLengthSize;
  }

  public FileChunk readFile(String filePath, Long offset, Integer length, Charset charset)
      throws IOException {
    File file = new File(filePath);
    String fileName = file.getName();
    Long fileLength = file.length();

    if (offset == null || offset < 0 || offset > fileLength) {
      return null;
    }

    if (length == null) {
      // O length será a diferença entre o tamanho do arquivo e o offset
      // Caso a diferença seja negativa, devido a um valor de offset maior que o tamanho do arquivo,
      // length será 0
      length = Math.max(Math.toIntExact(fileLength - offset), 0);
    }

    try {
      checkFileExistsAndIsReadable(file);
    } catch (IOException e) {
      LOG.error(e.getMessage());
      throw e;
    }

    // Limita o tamanho do trecho lido em maxLengthSize
    Integer lengthToRead = Math.min(length, maxLengthSize);

    Charset charsetToUse = getCharset(file, charset);

    // Padding do buffer para tratar truncagem de caracteres
    int paddingStart = 0;
    int paddingEnd = 0;
    if (charsetToUse == StandardCharsets.UTF_8) {
      paddingStart = adjustStartPadding(offset);
      paddingEnd = 3;
    }

    LOG.info(
        "Reading file [{}, {}, {}] with charset {}", fileName, offset, lengthToRead, charsetToUse);
    try (RandomAccessFile raf = new RandomAccessFile(file, "r")) {
      int paddingLength = lengthToRead + paddingStart + paddingEnd;
      byte[] buffer = new byte[paddingLength];

      // Ajusta o paddingOffset conforme o tamanho do arquivo
      long paddingOffset = offset;
      if (offset < fileLength) {
        paddingOffset = offset - paddingStart;
      }

      raf.seek(paddingOffset);
      int bytesRead = raf.read(buffer);
      LOG.debug("Bytes read from file [{}]: {}", fileName, bytesRead);

      byte[] data;
      if (bytesRead <= 0) {
        LOG.debug("No data read from file [{}]", fileName);
        data = new byte[0];
      } else if (paddingLength > bytesRead) {
        LOG.debug("Data read from file [{}] is less than length {}", fileName, lengthToRead);
        data = Arrays.copyOf(buffer, bytesRead);
      } else {
        LOG.debug("Data read from file [{}] is equal to length {}", fileName, lengthToRead);
        data = buffer;
      }

      byte[] finalData = data;
      long finalOffset = offset;
      if (data.length > 0 && charsetToUse == StandardCharsets.UTF_8) {
        // Corrige o paddingEnd
        paddingEnd = Math.min(paddingStart + lengthToRead, data.length) - 1;

        Tuple tuple = checkAndAdjustSplitUTF8Char(data, paddingStart, paddingEnd);

        // Obtém os novos valores de dados e offset corrigidos pelo splitChar
        finalData = tuple.data;
        finalOffset = offset - tuple.offset;
      }

      String encodedData = new String(finalData, charsetToUse);

      return new FileChunk(
          Path.of(filePath), encodedData, finalData.length, fileLength, finalOffset, charsetToUse);
    }
  }

  private int adjustStartPadding(Long adjustedOffset) {
    if (adjustedOffset >= 3) {
      return 3;
    } else if (adjustedOffset == 2) {
      return 2;
    } else if (adjustedOffset == 1) {
      return 1;
    }
    return 0;
  }

  private Tuple checkAndAdjustSplitUTF8Char(
      byte[] buffer, final int paddingStart, final int paddingEnd) {
    int dataStart = paddingStart;
    int dataEnd = paddingEnd;
    byte[] data;

    // Busca a posição inicial do caractere no início do padding no buffer
    dataStart = findUTF8CharStartAtPosition(buffer, dataStart);

    // Busca a posição final do último caractere até o final do padding no buffer
    dataEnd = findLastUTF8CharEndBeforePosition(buffer, dataEnd);

    // Tratar o caso da consulta cair dentro do mesmo caractere
    if (dataStart > dataEnd) {
      dataEnd += getUTF8CharRemainingBytesNumber(buffer[dataStart]);
    }

    // Copiar até dataEnd + 1, pois Arrays.copyOfRange não incluí a última posição
    data = Arrays.copyOfRange(buffer, dataStart, dataEnd + 1);

    return new Tuple(paddingStart - dataStart, data);
  }

  private int getUTF8CharRemainingBytesNumber(byte byteChar) {
    byte res = (byte) (byteChar & UFT8Constants.FOUR_BYTES_MASK);

    switch (res) {
      case UFT8Constants.TWO_BYTES_VALUE:
        return 2;
      case UFT8Constants.THREE_BYTES_VALUE:
        return 3;
      case UFT8Constants.FOUR_BYTES_VALUE:
        return 4;
      default:
        return 1;
    }
  }

  /**
   * Obtém a posição inicial do caractere UTF-8 que contém o byte da posição corrente.
   *
   * @param buffer o buffer de bytes contendo os caracteres UTF-8
   * @param currentPosition a posição corrente
   * @return a posição inicial do caractere UTF-8
   */
  private int findUTF8CharStartAtPosition(byte[] buffer, int currentPosition) {
    if ((buffer[currentPosition] & UFT8Constants.COMMON_MULTIBYTE_MASK)
        == UFT8Constants.COMMON_MULTIBYTE_VALUE) {
      int pos = currentPosition;
      for (; pos > 0; pos--) {
        if ((buffer[pos] & UFT8Constants.COMMON_MULTIBYTE_MASK)
            != UFT8Constants.COMMON_MULTIBYTE_VALUE) {
          break;
        }
      }
      return pos;
    } else {
      return currentPosition;
    }
  }

  /**
   * Obtém a posição final do último caractere UTF-8 no buffer até a posição corrente. A posição
   * corrente é incluída no intervalo de busca.
   *
   * @param buffer o buffer de bytes contendo os caracteres UTF-8
   * @param currentPosition a posição corrente
   * @return a posição inicial do caractere UTF-8
   */
  private int findLastUTF8CharEndBeforePosition(byte[] buffer, int currentPosition) {
    int pos = findUTF8CharStartAtPosition(buffer, currentPosition);
    byte readByte = buffer[pos];
    // Caracteres de 2 bytes
    if ((readByte & UFT8Constants.TWO_BYTES_MASK) == UFT8Constants.TWO_BYTES_VALUE) {
      if (currentPosition - pos + 1 == 2) {
        return currentPosition;
      }
      return pos - 1;
    }
    // Caracteres de 3 bytes
    else if ((readByte & UFT8Constants.THREE_BYTES_MASK) == UFT8Constants.THREE_BYTES_VALUE) {
      if (currentPosition - pos + 1 == 3) {
        return currentPosition;
      }
      return pos - 1;
    }
    // Caracteres de 4 bytes
    else if ((readByte & UFT8Constants.FOUR_BYTES_MASK) == UFT8Constants.FOUR_BYTES_VALUE) {
      if (currentPosition - pos + 1 == 4) {
        return currentPosition;
      }
      return pos - 1;
    }
    // Caracteres de 1 byte
    else {
      return pos;
    }
  }

  protected Charset getCharset(File file, Charset forcedCharset) throws IOException {
    Charset charset = forcedCharset;
    if (forcedCharset != null) {
      LOG.debug(
          "Using charset {} requested by client for file {}.", charset, file.getAbsolutePath());
      return charset;
    }
    if (!this.enableCharsetDetection) {
      charset = defaultCharset;
      LOG.debug(
          "Using charset {} for file {}. Auto-detection is disabled.",
          charset,
          file.getAbsolutePath());
      return charset;
    }
    try (BufferedInputStream bs = new BufferedInputStream(new FileInputStream(file))) {
      CharsetDetector charsetDetector = new CharsetDetector();
      charsetDetector.setText(bs);
      charsetDetector.enableInputFilter(true);
      CharsetMatch cm = charsetDetector.detect();
      if (cm.getConfidence() > 80) {
        final String UTF_8 = "UTF-8";
        final String ISO_8859_1 = "ISO-8859-1";
        switch (cm.getName()) {
          case UTF_8:
            charset = StandardCharsets.UTF_8;
            LOG.debug(
                "Guessed charset {} with confidence {} for file {}",
                charset,
                cm.getConfidence(),
                file.getAbsolutePath());
            break;
          case ISO_8859_1:
            charset = StandardCharsets.ISO_8859_1;
            LOG.debug(
                "Guessed charset {} with confidence {} for file {}",
                charset,
                cm.getConfidence(),
                file.getAbsolutePath());
            break;
          default:
            charset = defaultCharset;
            LOG.debug(
                "Using default charset {} for file {} because the guessed charset was not UTF-8 nor ISO-8859-1 [guessed charset {} with confidence {}]",
                charset,
                file.getAbsolutePath(),
                cm.getName(),
                cm.getConfidence());
            break;
        }
      } else {
        charset = defaultCharset;
        LOG.debug(
            "Using default charset {} for file {} because the confidence was lower than 80 [guessed charset {} with confidence {}]",
            charset,
            file.getAbsolutePath(),
            cm.getName(),
            cm.getConfidence());
      }

      return charset;
    }
  }

  /** Classe auxiliar para retornar o offset e o vetor de dados após manipulação de split char */
  static class Tuple {
    int offset;
    byte[] data;

    public Tuple(int offset, byte[] data) {
      this.offset = offset;
      this.data = data;
    }
  }
}
