% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sampleOccurrences.R
\name{sampleOccurrences}
\alias{sampleOccurrences}
\title{Sample occurrences in a virtual species distribution}
\usage{
sampleOccurrences(
  x,
  n,
  type = "presence only",
  extract.probability = FALSE,
  sampling.area = NULL,
  detection.probability = 1,
  correct.by.suitability = FALSE,
  error.probability = 0,
  bias = "no.bias",
  bias.strength = 50,
  bias.area = NULL,
  weights = NULL,
  sample.prevalence = NULL,
  replacement = FALSE,
  plot = TRUE
)
}
\arguments{
\item{x}{a \code{SpatRaster} object or the output list from 
\code{generateSpFromFun}, \code{generateSpFromPCA}, \code{generateRandomSp},
\code{convertToPA}
or  \code{limitDistribution}
The raster must contain values of 0 or 1 (or NA).}

\item{n}{an integer. The number of occurrence points / records to sample.}

\item{type}{\code{"presence only"} or \code{"presence-absence"}. The type of 
occurrence points to sample.}

\item{extract.probability}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, then
true probability at sampled locations will also be extracted}

\item{sampling.area}{a character string, a \code{polygon} or an \code{extent}
object.
The area in which the sampling will take place. See details.}

\item{detection.probability}{a numeric value between 0 and 1, corresponding 
to the probability of detection of the species. See details.}

\item{correct.by.suitability}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, 
then the probability of detection will be weighted by the suitability, such 
that cells with lower suitabilities will further decrease the chance that 
the species is detected when sampled. NOTE: this will NOT increase 
likelihood of samplings in areas of high suitability. In this case look for 
argument weights.}

\item{error.probability}{\code{TRUE} or \code{FALSE}. Probability to 
attribute an erroneous presence (False Positive) in cells where the species 
is actually absent.}

\item{bias}{\code{"no.bias"},  \code{"country"},  \code{"region"},
\code{"extent"},  \code{"polygon"} or \code{"manual"}. The method used to 
generate a sampling bias: see details.}

\item{bias.strength}{a positive numeric value. The strength of the bias to be
applied in \code{area} (as a multiplier). Above 1, \code{area} will be 
oversampled. Below 1, \code{area} will be undersampled.}

\item{bias.area}{\code{NULL}, a character string, a \code{polygon} or an 
\code{extent} object. The area in which the sampling will be biased: see
details. If \code{NULL} and \code{bias = "extent"}, then you will be asked to
draw an extent on the map.}

\item{weights}{\code{NULL} or a raster layer. Only used if 
\code{bias = "manual"}. The raster of bias weights to be applied to the 
sampling of occurrences. Higher weights mean a higher probability of 
sampling. For example, species suitability raster can be entered here to
increase likelihood of sampling occurrences in areas with high suitability.}

\item{sample.prevalence}{\code{NULL} or a numeric value between 0 and 1. 
Only useful if \code{type = "presence-absence"}. Defines the sample
prevalence, i.e. the proportion of presences sampled. Note that the
probabilities of detection and error are applied AFTER this parameter,
so the final sample prevalence may not different if you apply probabilities
of detection and/or error}

\item{replacement}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, multiple
samples can occur in the same cell. Can be useful to mimic real datasets 
where samplings can be duplicated or repeated in time.}

\item{plot}{\code{TRUE} or \code{FALSE}. If \code{TRUE}, the sampled 
occurrence points will be plotted.}
}
\value{
a \code{list} with 8 elements:
\itemize{
\item{\code{type}: type of occurrence sampled (presence-absences or 
presence-only)}
\item{\code{sample.points}: data.frame containing the coordinates of 
samples, true and sampled observations (i.e, 1, 0 or NA), and, if asked, the true
environmental suitability in sampled locations}
\item{\code{detection.probability}: the chosen probability of detection of
the virtual species}
\item{\code{error.probability}: the chosen probability to assign presence
in cells where the species is absent}
\item{\code{bias}: if a bias was chosen, then the type of bias and the
associated \code{area} will be included.}
\item{\code{replacement}: indicates whether multiple samples could occur
in the same cells}
\item{\code{original.distribution.raster}: the distribution raster from
which samples were drawn}
\item{\code{sample.plot}: a recorded plot showing the sampled points 
overlaying the original distribution.}
}
}
\description{
This function samples occurrences/records (presence only or presence-absence) 
within a species distribution, either randomly or with a sampling bias. 
The sampling bias can be defined manually or with a set of predefined 
biases.
}
\details{
\href{https://borisleroy.com/virtualspecies_tutorial/07-sampleoccurrences.html}{
Online tutorial for this function}




\bold{How the function works:}

The function randomly selects \code{n} cells in which samples occur. If a 
\code{bias} is chosen, then the selection of these cells will be biased 
according to the type and strength of bias chosen. If the sampling is of 
\code{type "presence only"}, then only cells where the species is present 
will be chosen. If the sampling is of \code{type "presence-absence"}, then 
all non-NA cells can be chosen.

The function then samples the species inside the chosen cells. In cells 
where the species is present the species will always be sampled unless 
the parameter \code{detection.probability} is lower than 1. In that case the
species will be sampled with the associated probability of detection.

In cells where the species is absent (in case of a \code{"presence-absence"}
sampling), the function will always assign absence unless 
\code{error.probability} is greater than 1. In that case, the species can be
found present with the associated probability of error. Note that this step 
happens AFTER the detection step. Hence, in cells where the species is
present but not detected, it can still be sampled due to a sampling error.

\bold{How to restrict the sampling area:}

Use the argument \code{sampling.area}:
\itemize{
\item{Provide the name (s) (or a combination of names) of country(ies), 
region(s) or continent(s).
Examples:
\itemize{
\item{\code{sampling.area = "Africa"}}
\item{\code{sampling.area = c("Africa", "North America", "France")}}
}}
\item{Provide a polygon (\code{SpatialPolygons} or 
\code{SpatialPolygonsDataFrame} of package \code{sp})}
\item{Provide an \code{extent} object}
}

\bold{How the sampling bias works:}

The argument \code{bias.strength} indicates the strength of the bias.
For example, a value of 50 will result in 50 times more samples within the
 \code{bias.area} than outside.
Conversely, a value of 0.5 will result in half less samples within the
\code{bias.area} than outside.

\bold{How to choose where the sampling is biased:}

You can choose to bias the sampling in:
\enumerate{
\item{a particular country, region or continent (assuming your raster has
the WGS84 projection): 

Set the argument
\code{bias} to \code{"country"}, \code{"region"} or
\code{"continent"}, and provide the name(s) of the associated countries,
regions or continents to \code{bias.area} (see examples). 

List of possible \code{bias.area} names:
\itemize{
\item{Countries: type 
\code{unique(rnaturalearth::ne_countries(returnclass ='sf')$sovereignt)} 
in the console}
\item{Regions: "Africa", "Antarctica", "Asia", "Oceania", "Europe", 
"Americas"}
\item{Continents: "Africa", "Antarctica", "Asia", "Europe", 
"North America", "Oceania", "South America"}}
}
\item{a polygon:

Set \code{bias} to \code{"polygon"}, and provide your
polygon to \code{area}.
}
\item{an extent object:

Set \code{bias} to \code{"extent"}, and either provide your
extent object to \code{bias.area}, or leave it \code{NULL} to draw an extent 
on the map.}
} 

Otherwise you can enter a raster of sampling probability. It can be useful 
if you want to increase likelihood of samplings in areas of high 
suitability (simply enter the suitability raster in weights; see examples
below),
or if you want to define sampling biases manually, \emph{e.g.} to to create
biases along roads. In that case you have to provide to \code{weights} a 
raster layer in which each cell contains the probability to be sampled.

The \code{\link{.Random.seed}} and \code{\link{RNGkind}} are stored as 
\code{\link{attributes}} when the function is called, and can be used to 
reproduce the results as shown in the examples (though
it is preferable to set the seed with \code{\link{set.seed}} before calling
\code{sampleOccurrences()} and to then use the same value in 
\code{\link{set.seed}} to reproduce results later. Note that 
reproducing the sampling will only work if the same original distribution map 
is used.
}
\note{
Setting \code{sample.prevalence} may at least partly
override \code{bias}, e.g. if \code{bias} is specified with \code{extent} to 
an area that contains no presences, but sample prevalence is set to > 0, 
then cells outside of the biased sampling extent will be sampled until 
the number of presences required by \code{sample.prevalence} are obtained, 
after which the sampling of absences will proceed according to the specified
bias.
}
\examples{
# Create an example stack with six environmental variables
a <- matrix(rep(dnorm(1:100, 50, sd = 25)), 
            nrow = 100, ncol = 100, byrow = TRUE)
env <- c(rast(a * dnorm(1:100, 50, sd = 25)),
         rast(a * 1:100),
         rast(a * logisticFun(1:100, alpha = 10, beta = 70)),
         rast(t(a)),
         rast(exp(a)),
         rast(log(a)))
names(env) <- paste("Var", 1:6, sep = "")   

# More than 6 variables: by default a PCA approach will be used
sp <- generateRandomSp(env, niche.breadth = "wide")

# Sampling of 25 presences
sampleOccurrences(sp, n = 25)

# Sampling of 30 presences and absences
sampleOccurrences(sp, n = 30, type = "presence-absence")


# Reducing of the probability of detection
sampleOccurrences(sp, n = 30, type = "presence-absence", 
                  detection.probability = 0.5)
                  
# Further reducing in relation to environmental suitability
sampleOccurrences(sp, n = 30, type = "presence-absence", 
                  detection.probability = 0.5,
                  correct.by.suitability = TRUE)
                  
                  
# Creating sampling errors (far too much)
sampleOccurrences(sp, n = 30, type = "presence-absence", 
                  error.probability = 0.5)
                  
# Introducing a sampling bias (oversampling)
biased.area <- ext(1, 50, 1, 50)
sampleOccurrences(sp, n = 50, type = "presence-absence", 
                  bias = "extent",
                  bias.area = biased.area)
# Showing the area in which the sampling is biased
plot(biased.area, add = TRUE)     

# Introducing a sampling bias (no sampling at all in the chosen area)
biased.area <- ext(1, 50, 1, 50)
sampleOccurrences(sp, n = 50, type = "presence-absence", 
                  bias = "extent",
                  bias.strength = 0,
                  bias.area = biased.area)
# Showing the area in which the sampling is biased
plot(biased.area, add = TRUE)    
samps <- sampleOccurrences(sp, n = 50, 
                           bias = "manual",
                           weights = sp$suitab.raster)
plot(sp$suitab.raster)
points(samps$sample.points[, c("x", "y")])

# Create a sampling bias so that more presences are sampled in areas with 
# higher suitability

  
    

# Reproduce sampling based on the saved .Random.seed from a previous result
samps <- sampleOccurrences(sp, n = 100, 
                           type = "presence-absence", 
                           detection.probability = 0.7, 
                           bias = "extent", 
                           bias.strength = 50, 
                           bias.area = biased.area)
# Reset the random seed using the value saved in the attributes               
.Random.seed <- attr(samps, "seed") 
reproduced_samps <- sampleOccurrences(sp, n = 100, 
                                      type = "presence-absence",
                                      detection.probability = 0.7,
                                      bias = "extent",
                                      bias.strength = 50,
                                      bias.area = biased.area)
identical(samps$sample.points, reproduced_samps$sample.points)          
}
\author{
Boris Leroy \email{leroy.boris@gmail.com}
Willson Gaul \email{wgaul@hotmail.com}

with help from C. N. Meynard, C. Bellard & F. Courchamp
}
