% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fd_test.R
\name{FDOTT}
\alias{FDOTT}
\title{Test linear relationships between probability vectors in factorial designs}
\usage{
FDOTT(
  samples,
  costm,
  H0 = "*",
  fac.names = NULL,
  method = c("plug-in", "bootstrap-deriv", "bootstrap-m", "permutation"),
  num.sim = 1000,
  null.mu = NULL,
  m.p = 0.5,
  is.metric = is_metric_cost_mat(costm, tol.ti = Inf),
  verbose = FALSE
)
}
\arguments{
\item{samples}{nested list of depth \eqn{D} (representing a \eqn{D}-way layout) containing count vectors.
A count vector is a vector of length \eqn{N} that contains the number of times a sample was observed at the respective points.
Can also be given as a matrix (row-wise), which is viewed as a one-way layout.}

\item{costm}{semi-metric cost matrix \eqn{c \in \mathbb{R}^{N \times N}}.}

\item{H0}{null hypothesis, see details.}

\item{fac.names}{names of the \eqn{D} factors. Used for printing. Default \code{NULL} corresponds to \code{"F1"} for factor 1, and so on.}

\item{method}{the method to use to simulate from the null distribution, see details.}

\item{num.sim}{number of samples to draw from the limiting null distribution.}

\item{null.mu}{probability vectors \eqn{\mu} underlying the null distribution used only for \code{method = "plug-in"}.
Must be of the same structure as \code{samples}.}

\item{m.p}{exponent \eqn{p \in (0, 1)} used only for \code{method = "bootstrap-m"}.}

\item{is.metric}{value indicating whether \eqn{c} is a metric cost matrix, see \code{\link{is_metric_cost_mat}}.}

\item{verbose}{logical value indicating whether additional information should be printed.}
}
\value{
A \code{FDOTT} object containing:
\tabular{ll}{
\code{fac.lvls}     \tab vector of levels of the factors \cr
\code{mu}           \tab matrix, empirical version \eqn{\hat{\mu}_n} of \eqn{\mu} that is based on \code{samples} \cr
\code{n}            \tab vector of sample sizes \eqn{n} \cr
\code{L}            \tab matrix \eqn{L} for the null hypothesis \eqn{H_0^L} \cr
\code{p.value}      \tab the \eqn{p}-value \cr
\code{statistic}    \tab the value of the test statistic \eqn{T^L(\hat{\mu}_n)} \cr
\code{null.samples} \tab samples drawn from the null distribution \cr
}
}
\description{
Perform FDOTT, an optimal transport (OT) based test in factorial designs,
to test linear relationships between probability vectors, based on samples from them.
}
\details{
Denote with \eqn{\mu} the matrix (row-wise) of the probability vectors (in lexicographical order of the factor combinations)
that underlie \code{samples}. FDOTT deals with null hypotheses of the form
\deqn{
H^L_0 : \; L\mu = 0\,,
}
where \eqn{L} is a suitable matrix with row sums all equal to \eqn{0}. The FDOTT statistic is defined as
\deqn{
 T^L(\hat{\mu}_n) := \frac{\sqrt{\rho_n}}{s} \sum_{m=1}^M \mathrm{OT}^{\pm}_c([L\hat{\mu}_n]_m, 0)\,,
}
where \eqn{\rho_n} and \eqn{s} are scaling factors, \eqn{[L\mu]_m} is the \eqn{m}-th row-vector of \eqn{L\mu}
and \eqn{\mathrm{OT}^{\pm}_c} the extended OT functional, see \code{\link{ot_cost_sgn}}.
The test is based on the asymptotic distribution of \eqn{T^L(\hat{\mu}_n)} under under the null, for more details see Groppe et al. (2025).

The form of \eqn{H_0^L} allows for testing hypotheses like interaction effects in classical ANOVA, obtained by formally substituting means by measures.
The following values are allowed for \code{H0}:
\itemize{
\item \code{H0 = "*"} (the default). Test all interaction (including main effects) of the factors. A specific interaction or main effect can be tested by
including the corresponding indices of the factors in a list, e.g., \code{H0 = list("*", c(1, 3))} corresponds to the interaction effect between factor 1 and 3.
Note that in a one-way layout, \code{H0 = "*"} reduces to \code{H0 = "="}.
\item \code{H0 = "|"}. Test all simple factor effects. A specific simple factor effect can be tested by by including the
corresponding indices of the factors in a list, e.g., \code{H0 = list("|", c(1, 3))} corresponds to the simple factor effect of factor 1 and 3 within the
other remaining factors.
\item \code{H0 = "="}. Test for treatment effect, i.e., whether all underlying probability vectors are the same. Note that each pairwise comparison can be tested
simultaneously via \code{\link{FDOTT_HSD}}.
\item \code{H0 = L}. Test \eqn{H_0^L} for the directly supplied \eqn{L} matrix. The name of the tested effect (useful for printing)
and the scaling \eqn{s} (by default \code{nrow(L)}) can be supplied by setting the \code{"effect"} and \code{"scaling"} attribute of \code{L}, respectively.
\item \code{H0 = list(...)}. Test a combined null hypothesis. Each element of the list represents a null hypothesis and can be given by one of the options above.
This is useful in combination with \code{\link{FDOTT_HSD}}, which allows to test all the given null hypotheses simultaneously.
}

To simulate from the limiting null distribution, there are four different methods:
\itemize{
\item \code{"plug-in"}: uses the limiting distribution where \eqn{\mu} is substituted by its empirical version (or \code{null.mu}, when specified).
\item \code{"bootstrap-deriv"}: uses the so-called derivative bootstrap.
\item \code{"bootstrap-m"}: uses \eqn{m}-out-of-\eqn{n} bootstrap with \eqn{m = \lfloor n^p \rfloor}.
\item \code{"permutation"}: uses a permutation approach, only works for \code{H0 = "="}.
}
These simulations can be done in parallel via \code{\link[future:plan]{future::plan}} and the progress can be shown with \code{\link[progressr:with_progress]{progressr::with_progress}}.
}
\examples{

# enable txt progressbar
progressr::handlers("txtprogressbar")
# enable parallel computation
if (requireNamespace("future")) {
    future::plan(future::multisession)
}

# use higher number to better approximate null distribution and get more accurate p-value
num.sim <- 10

### one-way layout

N <- 2
costm <- cost_matrix_lp(1:N)

K <- 3
n <- c(300, 360, 200)

# underlying probability vectors, all measures are equal
mu <- matrix(1 / N, K, N, TRUE)

set.seed(123)
samples <- tab_sample(n, mu)
# show progress
progressr::with_progress({
    # default in one-way layout is H0 = "="
    res <- FDOTT(samples, costm, num.sim = num.sim)
})
print(res)

# measures are not equal
mu[2, ] <- c(0.1, 0.9)

set.seed(123)
samples <- tab_sample(n, mu)
res2 <- FDOTT(samples, costm, num.sim = num.sim)
print(res2)
# find out which measures are not equal via HSD
res3 <- FDOTT_HSD(res2)
print(res3)

### two-way layout

K1 <- K2 <- 2
N <- 3
costm <- cost_matrix_lp(1:N)

n <- list(list(300, 360), list(280, 200))

# underlying probability vectors (two-way layout)
# no interaction effect, only factor 2 has main effect
mu <- list(
    list(c(0, 0.5, 0.5), c(0.25, 0.25, 0.5)),
    list(c(0, 0.5, 0.5), c(0.25, 0.25, 0.5))
)

# test interaction effect and main effects, equivalent to H0 <- "*"
H0 <- list(list("*", 1:2), list("*", 1), list("*", 2))

set.seed(123)
samples <- tab_sample(n, mu)\donttest{
res <- FDOTT(samples, costm, H0 = H0, num.sim = num.sim)
print(res)

# find out exactly which effect gets rejected via HSD
res1 <- FDOTT_HSD(res)
print(res1)}

# now with interaction effect
mu[[1]][[1]] <- c(0.3, 0.3, 0.4)

# only test for interaction effect
H0 <- list("*", 1:2)

set.seed(123)
samples <- tab_sample(n, mu)\donttest{
res2 <- FDOTT(samples, costm, H0 = H0, num.sim = num.sim, method = "bootstrap-deriv")
print(res2)}

### custom effect

K <- 2
N <- 2
costm <- cost_matrix_lp(1:N)
num.sim <- 100

# null hypothesis H0: mu^1 - 0.5 * mu^2 - 0.5 * mu^3 = 0
L <- matrix(c(1, -0.5, -0.5), 1, 3)
# give custom name
attr(L, "effect") <- "mu^1 = 0.5 * (mu^2 + mu^3)"

# underlying probability vectors
mu <- matrix(c(0.4, 0.6, 0.6, 0.4, 0.2, 0.8), 3, 2, TRUE)
print(L \%*\% mu)

n <- c(250, 280, 230)

# test L, as well as mu^1 = mu^2 = mu^3
H0 <- list(L, "=")

set.seed(123)
samples <- tab_sample(n, mu)\donttest{
res <- FDOTT(samples, costm, H0 = H0, num.sim = num.sim)
print(res)
# find out which effect is responsible for rejection
res2 <- FDOTT_HSD(res)}
print(res2)

# L \%*\% mu = 0 not satisfied anymore
mu[2, ] <- c(1, 0)
print(L \%*\% mu)

# only test for L \%*\% mu = 0
H0 <- L

set.seed(123)
samples <- tab_sample(n, mu)\donttest{
res3 <- FDOTT(samples, costm, H0 = H0, num.sim = num.sim)
print(res3)}
\dontshow{
## R CMD check: make sure any open connections are closed afterward
if (requireNamespace("future") && !inherits(future::plan(), "sequential")) future::plan(future::sequential)
}
}
\references{
M. Groppe, L. Niemöller, S. Hundrieser, D. Ventzke, A. Blob, S. Köster and A. Munk (2025). Optimal Transport Based Testing in Factorial Design.
arXiv preprint. \doi{10.48550/arXiv.2509.13970}.
}
\seealso{
\code{\link{FDOTT_HSD}}
}
