#' A tibble containing sample sequence frequencies and estimated probabilities
#'
#' This function returns a tibble containing the sample sequences, their frequencies
#' and the estimated transition probabilities.
#'
#' @details The parameters \code{S} and \code{j} determine which columns of \code{countsTab}
#' are retained in the output. Specifying a lag \code{j} is optional. All lags can
#' be specified via \code{S}, while leaving \code{j = NULL} (default). The output
#' remains the same as when specifying \code{S} and \code{j} separately. The
#' inclusion of \code{j} as a parameter improves clarity within the package's
#' algorithms. Note that \code{j} cannot be an element of \code{S}.
#'
#' @param S A numeric vector of positive integers or \code{NULL}. Represents a set
#'  of past lags that must be present within the columns of the \code{countsTab}
#'  argument and are to be considered while estimating the transition probabilities.
#'  Both \code{S} and \code{j} cannot be \code{NULL} at the same time.
#' @param j An integer or \code{NULL}. Typically represents a lag \code{j} in the
#'  \eqn{complement} of \code{S}. Both \code{S} and \code{j} cannot be \code{NULL}
#'  at the same time. See *Details* for further information.
#' @param A A vector with nonnegative integers. Must have at least two different entries.
#' \code{A} represents the state space.
#' @param countsTab A tibble or a data frame with all sequences of length d+1 that
#'  appear in the sample, and their absolute frequency. This tibble is typically
#'  generated by the function [countsTab()]. If using a custom data frame not
#'  generated by [countsTab()], make sure its format and column names match the
#'  expected structure; otherwise, errors may occur in [freqTab()].
#' @param complete Logical. If \code{TRUE} all sequences that did not appear in the
#' sample will be included in the output with frequency equal to 0.
#'
#' @return A tibble where each row represents a sequence of elements from \code{A}.
#' The initial columns display each sequence symbol separated into columns
#' corresponding to their time indexes. The remaining columns show the sample
#' frequencies of the sequences and the MLE (Maximum Likelihood Estimator)
#' of the transition probabilities.
#'
#' @export
#' @importFrom dplyr %>%
#' @importFrom methods is
#' @examples
#' # Reproducible simulated data
#' set.seed(1)
#' M <- MTDmodel(Lambda = c(1, 4), A = c(1, 2, 3), lam0 = 0.1)
#' X <- perfectSample(M, N = 400)
#' ct <- countsTab(X, d = 5)
#'
#' # Example with S non-empty and j specified
#' freqTab(S = c(1, 4), j = 2, A = c(1, 2, 3), countsTab = ct)
#'
#' # Equivalent to calling with S = c(1,2,4) and j = NULL
#' freqTab(S = c(1, 2, 4), j = NULL, A = c(1, 2, 3), countsTab = ct)

#'
freqTab <- function(S, j = NULL, A, countsTab, complete = TRUE){

  # Validate inputs
  check_freqTab_inputs(S, j, A, countsTab, complete)

  Sj <- sort(c(S, j), decreasing = TRUE)
  d <- ncol(countsTab) - 2
  A <- sort(A)
  lenSj <- length(Sj)
  lenA <- length(A)
  filtrs <- c(paste0("x", Sj), "a")

  # Summarize countsTab and computes counts
  freqTab <- countsTab %>%
      dplyr::group_by(dplyr::across(dplyr::all_of(filtrs))) %>%
      dplyr::summarise(Nxa_Sj = sum(Nxa), .groups = "drop")

  # If needed, complete the table with sequences that did not appear in the sample
  if ((nrow(freqTab) < lenA^(lenSj + 1)) && complete){

    # Try to generate all possible sequences of length lenSj+1 with elements of A
    Tablexa <- try(expand.grid(rep(list(A), lenSj+1))[, order((lenSj+1):1)], silent = TRUE)
    if (inherits(Tablexa, "try-error")) {
      stop("The dataset is too large. Consider reducing the number of lags in S.")
    }

    # Identify sequences missing from the sample
    list1 <- apply(freqTab[, seq_len(lenSj+1)], 1, paste0, collapse = "") # A list
# with sequences that appeared in sample
    list2 <- apply(Tablexa, 1, paste0, collapse = "") # A list with all possible sequences
    Tablexa <- Tablexa[match(setdiff(list2, list1), list2), ] # A list with all sequences
# that did not appear in the sample


    # Add the missing sequences with 0 frequency
    Tablexa <- cbind(Tablexa, 0)
    colnames(Tablexa) <- colnames(freqTab)
    freqTab <- rbind(freqTab, Tablexa)
    freqTab <- dplyr::arrange_at(freqTab, filtrs)

  }

  # Computes counts and transition probabilities
  freqTab <- freqTab %>%
    dplyr::group_by(dplyr::across(dplyr::all_of(paste0("x", Sj)))) %>%
    dplyr::mutate(Nx_Sj = sum(Nxa_Sj)) %>%
    dplyr::mutate(qax_Sj = dplyr::if_else(Nx_Sj > 0, Nxa_Sj/Nx_Sj, 1/lenA)) %>%
    dplyr::ungroup()

  dplyr::as_tibble(freqTab)
}

