#'@rdname CST_Intlr
#'@title Downscaling using interpolation and linear regression.
#' 
#'@author J. Ramon, \email{jaumeramong@gmail.com}
#'@author E. Duzenli, \email{eren.duzenli@bsc.es}
#'
#'@description This function performs a downscaling using an interpolation and a linear 
#'regression. Different methodologies that employ linear regressions are available. See 
#'parameter 'lr_method' for more information. It is recommended that the observations 
#'are passed already in the target grid. Otherwise, the function will also perform an 
#'interpolation of the observed field into the target grid. The coarse scale and 
#'observation data can be either global or regional. In the latter case, the region is 
#'defined by the user. In principle, the coarse and observation data are intended to 
#'be of the same variable, although different variables can also be admitted. 
#'
#'@param exp an 's2dv object' containing the experimental field on the
#'coarse scale for which the downscaling is aimed. The object must have, at least,
#'the dimensions latitude, longitude, start date and member. The object is expected to be
#'already subset for the desired region. Data can be in one or two integrated regions, e.g.,
#'crossing the Greenwich meridian. To get the correct results in the latter case,
#'the borders of the region should be specified in the parameter 'region'. See parameter
#''region'.
#'@param obs an 's2dv object' containing the observational field. The object
#'must have, at least, the dimensions latitude, longitude and start date. The object is
#'expected to be already subset for the desired region.
#'@param exp_cor an optional 's2dv_cube' object with named dimensions containing the seasonal
#'forecast experiment data. If provided, the forecast will be downscaled using the hindcast 
#'and observations; if not, the hindcast will be downscaled instead. The default value is NULL. 
#'Since the Intlr function is built separately for each ensemble member, it is not recommended 
#'for forecast cases where the member_dim length of exp_cor differs from that of exp. 
#'In such situations, the use of other functions in the package is more appropriate.
#'@param lr_method a character vector indicating the linear regression method to be applied. 
#'Accepted methods are 'basic', 'large-scale' and '9nn'. The 'basic' method fits a 
#'linear regression using high resolution observations as predictands and the 
#'interpolated model data as predictor. Then, the regression equation is applied to the 
#'interpolated model data to correct the interpolated values. The 'large-scale' method 
#'fits a linear regression with large-scale predictors (e.g. teleconnection indices) as 
#'predictors and high-resolution observations as predictands.
#'Finally, the '9nn' method uses a linear regression 
#'with the nine nearest neighbours as predictors and high-resolution observations as 
#'predictands. Instead of constructing a regression model using all nine predictors, 
#'principal component analysis is applied to the data of neighboring grids to reduce the 
#'dimension of the predictors. The linear regression model is then built using the principal 
#'components that explain 95% of the variance. The '9nn' method does not require a 
#'pre-interpolation process. 
#'@param target_grid a character vector indicating the target grid to be passed to CDO.
#'It must be a grid recognised by CDO or a NetCDF file.
#'@param points a list of two elements containing the point latitudes and longitudes 
#'of the locations to downscale the model data. The list must contain the two elements 
#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is 
#'to a point location, only regular grids are allowed for exp and obs. Only needed if the 
#'downscaling is to a point location. 
#'@param int_method a character vector indicating the regridding method to be passed
#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is
#'to be used, CDO_1.9.8 or newer version is required. For method "con2", 
#'CDO_2.2.2 or older version is required.
#'@param method_point_interp a character vector indicating the interpolation method to
#'interpolate model gridded data into the point locations. Accepted methods are "nearest",
#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW".
#'@param predictors an array with large-scale data to be used in the 'large-scale' method.
#'Only needed if the linear regression method is set to 'large-scale'. It must have, at 
#'least the dimension start date and another dimension whose name has to be specified in 
#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the
#'number of large-scale predictors.  
#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' 
#'in exp and obs. Default set to "lat".
#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' 
#'in exp and obs. Default set to "lon".
#'@param sdate_dim a character vector indicating the start date dimension name in the element 
#''data' in exp and obs. Default set to "sdate".
#'@param time_dim a character vector indicating the time dimension name in the element
#''data' in exp and obs. Default set to "time". 
#'@param member_dim a character vector indicating the member dimension name in the element
#''data' in exp and obs. Default set to "member".
#'@param large_scale_predictor_dimname a character vector indicating the name of the 
#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'.
#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when
#'generating the linear regressions. In this procedure, all values from the corresponding 
#'year are excluded, so that when building the regression model for a given year, none of 
#'that year’s data are used. Default to TRUE.
#'@param region a numeric vector indicating the borders of the downscaling region.
#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers
#'to the left border, while lonmax refers to the right border. latmin indicates the lower
#'border, whereas latmax indicates the upper border. If set to NULL (default), the function
#'takes the first and last elements of the latitudes and longitudes in obs.
#'@param ncores an integer indicating the number of cores to use in parallel computation.
#'The default value is NULL.
#'@import multiApply 
#'@import plyr
#'@import s2dv 
#'@importFrom ClimProjDiags Subset
#'@importFrom CSTools SplitDim
#'
#'@return A list with two s2dv_cube objects, exp and obs, each with 
#'elements 'data' containing the downscaled field, 'coords' containing the 
#'coordinate information, 'dims' describing the dimension structure, and 'attrs' 
#'containing the associated attributes.
#'@examples 
#'\donttest{
#'exp <- rnorm(500) 
#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) 
#'exp_lons <- 1:5 
#'exp_lats <- 1:4 
#'obs <- rnorm(900) 
#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) 
#'obs_lons <- seq(1,5, 4/14) 
#'obs_lats <- seq(1,4, 3/11) 
#'exp <- CSTools::s2dv_cube(data = exp, coords = list(lat = exp_lats, lon = exp_lons))
#'obs <- CSTools::s2dv_cube(data = obs, coords = list(lat = obs_lats, lon = obs_lons))
#'if (Sys.which("cdo") != "") {
#'res <- CST_Intlr(exp = exp, obs = obs, target_grid = 'r1280x640', 
#'                 lr_method = 'basic', int_method = 'conservative')
#'}
#'}
#'@export
CST_Intlr <- function(exp, obs, exp_cor = NULL, lr_method, target_grid = NULL, points = NULL, 
                      int_method = NULL, method_point_interp = NULL, predictors = NULL, 
                      lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time",
                      member_dim = "member", large_scale_predictor_dimname = 'vars', 
                      loocv = TRUE, region = NULL, ncores = NULL) {
  
  if (!inherits(exp, 's2dv_cube')) {
    stop("Parameter 'exp' must be of the class 's2dv_cube'")
  }
  
  if (!inherits(obs, 's2dv_cube')) {
    stop("Parameter 'obs' must be of the class 's2dv_cube'")
  }

  exp_cor_aux <- NULL

  if (!is.null(exp_cor)) {
    if (identical(lr_method, 'basic') | identical(lr_method, '9nn')) {
      if (!inherits(exp_cor, 's2dv_cube')) {
        stop("Parameter 'exp_cor' must be of the class 's2dv_cube'")
      }
      exp_cor_aux <- exp_cor$data
    # when large-scale is selected, the forecast object does not have to be an s2dv_cube object
    } else if (identical(lr_method, 'large-scale')) {
      if (!inherits(exp_cor, 's2dv_cube')) {
        exp_cor_aux <- exp_cor
      } else {
        exp_cor_aux <- exp_cor$data
      }
    }
  }
  res <- Intlr(exp = exp$data, obs = obs$data, exp_cor = exp_cor_aux, 
               exp_lats = exp$coords[[lat_dim]], exp_lons = exp$coords[[lon_dim]], 
               obs_lats = obs$coords[[lat_dim]], obs_lons = obs$coords[[lon_dim]], 
               points = points, source_file_exp = exp$attrs$source_files[1], 
               source_file_obs = obs$attrs$source_files[1], 
               target_grid = target_grid, lr_method = lr_method, int_method = int_method, 
               method_point_interp = method_point_interp, predictors = predictors, 
               lat_dim = lat_dim, lon_dim = lon_dim, sdate_dim = sdate_dim, time_dim = time_dim, 
               member_dim = member_dim, 
               large_scale_predictor_dimname = large_scale_predictor_dimname, 
               loocv = loocv, region = region, ncores = ncores) 
 
  # Modify data, lat and lon in the origina s2dv_cube, adding the downscaled data
  obs$data <- res$obs
  obs$dims <- dim(obs$data)
  obs$coords[[lon_dim]] <- res$lon
  obs$coords[[lat_dim]] <- res$lat

  if (is.null(exp_cor)) {
    exp$data <- res$data
    exp$dims <- dim(exp$data)
    exp$coords[[lon_dim]] <- res$lon
    exp$coords[[lat_dim]] <- res$lat

    res_s2dv <- list(exp = exp, obs = obs)
  } else {
    if (identical(lr_method, 'basic') | identical(lr_method, '9nn')) {
      exp_cor$data <- res$data
      exp_cor$dims <- dim(exp_cor$data)
      exp_cor$coords[[lon_dim]] <- res$lon
      exp_cor$coords[[lat_dim]] <- res$lat
    # when large-scale is selected, the forecast object does not have to be an s2dv_cube object
    } else if (identical(lr_method, 'large-scale')) {
      if (!inherits(exp_cor, 's2dv_cube')) {
        exp_cor <- suppressWarnings(s2dv_cube(res$data, lat = res$lat, lon = res$lon))
      } else {
        exp_cor$data <- res$data
        exp_cor$dims <- dim(exp_cor$data)
        exp_cor$coords[[lon_dim]] <- res$lon
        exp_cor$coords[[lat_dim]] <- res$lat
      }
    }

    res_s2dv <- list(exp = exp_cor, obs = obs)
  }

  return(res_s2dv)
}

#'@rdname Intlr
#'@title Downscaling using interpolation and linear regression.
#' 
#'@author J. Ramon, \email{jaumeramong@gmail.com}
#'@author E. Duzenli, \email{eren.duzenli@bsc.es}
#'
#'@description This function performs a downscaling using an interpolation and a linear 
#'regression. Different methodologies that employ linear regressions are available. See 
#'parameter 'lr_method' for more information. It is recommended that the observations 
#'are passed already in the target grid. Otherwise, the function will also perform an 
#'interpolation of the observed field into the target grid. The coarse scale and 
#'observation data can be either global or regional. In the latter case, the region is 
#'defined by the user. In principle, the coarse and observation data are intended to 
#'be of the same variable, although different variables can also be admitted. 
#'
#'@param exp an array with named dimensions containing the experimental field on the
#'coarse scale for which the downscaling is aimed. The object must have, at least,
#'the dimensions latitude, longitude and start date. The object is expected to be 
#'already subset for the desired region. Data can be in one or two integrated regions, e.g.,
#'crossing the Greenwich meridian. To get the correct results in the latter case,
#'the borders of the region should be specified in the parameter 'region'. See parameter
#''region'.
#'@param obs an array with named dimensions containing the observational field. The object 
#'must have, at least, the dimensions latitude, longitude and start date. The object is 
#'expected to be already subset for the desired region. 
#'@param exp_cor an optional 's2dv_cube' object with named dimensions containing the seasonal
#'forecast experiment data. If provided, the forecast will be downscaled using the hindcast 
#'and observations; if not, the hindcast will be downscaled instead. The default value is NULL. 
#'Since the Intlr function is built separately for each ensemble member, it is not recommended 
#'for forecast cases where the member_dim length of exp_cor differs from that of exp. 
#'In such situations, the use of other functions in the package is more appropriate.
#'@param exp_lats a numeric vector containing the latitude values in 'exp'. Latitudes must 
#'range from -90 to 90.
#'@param exp_lons a numeric vector containing the longitude values in 'exp'. Longitudes 
#'can range from -180 to 180 or from 0 to 360.
#'@param obs_lats a numeric vector containing the latitude values in 'obs'. Latitudes must
#'range from -90 to 90.
#'@param obs_lons a numeric vector containing the longitude values in 'obs'. Longitudes
#'can range from -180 to 180 or from 0 to 360.
#'@param lr_method a character vector indicating the linear regression method to be applied. 
#'Accepted methods are 'basic', 'large-scale' and '9nn'. The 'basic' method fits a 
#'linear regression using high resolution observations as predictands and the 
#'interpolated model data as predictor. Then, the regression equation is applied to the 
#'interpolated model data to correct the interpolated values. The 'large-scale' method 
#'fits a linear regression with large-scale predictors (e.g. teleconnection indices) as 
#'predictors and high-resolution observations as predictands. 
#'Finally, the '9nn' method uses a linear regression 
#'with the nine nearest neighbours as predictors and high-resolution observations as 
#'predictands. Instead of constructing a regression model using all nine predictors, 
#'principal component analysis is applied to the data of neighboring grids to reduce the 
#'dimension of the predictors. The linear regression model is then built using the principal 
#'components that explain 95% of the variance. The '9nn' method does not require a 
#'pre-interpolation process. 
#'@param target_grid a character vector indicating the target grid to be passed to CDO.
#'It must be a grid recognised by CDO or a NetCDF file.
#'@param points a list of two elements containing the point latitudes and longitudes 
#'of the locations to downscale the model data. The list must contain the two elements 
#'named as indicated in the parameters 'lat_dim' and 'lon_dim'. If the downscaling is 
#'to a point location, only regular grids are allowed for exp and obs. Only needed if the 
#'downscaling is to a point location. 
#'@param int_method a character vector indicating the regridding method to be passed
#'to CDORemap. Accepted methods are "con", "bil", "bic", "nn", "con2". If "nn" method is
#'to be used, CDO_1.9.8 or newer version is required. For method "con2", 
#'CDO_2.2.2 or older version is required.
#'@param method_point_interp a character vector indicating the interpolation method to
#'interpolate model gridded data into the point locations. Accepted methods are "nearest",
#'"bilinear", "9point", "invdist4nn", "NE", "NW", "SE", "SW".
#'@param source_file_exp a character vector with a path to an example file of the exp data.
#'Only needed if the downscaling is to a point location.
#'@param source_file_obs a character vector with a path to an example file of the obs data.
#'Only needed if the downscaling is to a point location.
#'@param predictors an array with large-scale data to be used in the 'large-scale' method.
#'Only needed if the linear regression method is set to 'large-scale'. It must have, at 
#'least the dimension start date and another dimension whose name has to be specified in 
#'the parameter 'large_scale_predictor_dimname'. It should contain as many elements as the
#'number of large-scale predictors.  
#'@param lat_dim a character vector indicating the latitude dimension name in the element 'data' 
#'in exp and obs. Default set to "lat".
#'@param lon_dim a character vector indicating the longitude dimension name in the element 'data' 
#'in exp and obs. Default set to "lon".
#'@param sdate_dim a character vector indicating the start date dimension name in the element 
#''data' in exp and obs. Default set to "sdate".
#'@param time_dim a character vector indicating the time dimension name in the element
#''data' in exp and obs. Default set to "time". 
#'@param member_dim a character vector indicating the member dimension name in the element
#''data' in exp and obs. Default set to "member". 
#'@param region a numeric vector indicating the borders of the downscaling region.
#'It consists of four elements in this order: lonmin, lonmax, latmin, latmax. lonmin refers
#'to the left border, while lonmax refers to the right border. latmin indicates the lower
#'border, whereas latmax indicates the upper border. If set to NULL (default), the function
#'takes the first and last elements of the latitudes and longitudes in obs.
#'@param large_scale_predictor_dimname a character vector indicating the name of the 
#'dimension in 'predictors' that contain the predictor variables. See parameter 'predictors'.
#'@param loocv a logical indicating whether to apply leave-one-out cross-validation when
#'generating the linear regressions. In this procedure, all values from the corresponding 
#'year are excluded, so that when building the regression model for a given year, none of 
#'that year’s data are used. Default to TRUE.
#'@param ncores an integer indicating the number of cores to use in parallel computation.
#'The default value is NULL.
#'@import multiApply 
#'@import plyr
#'@import s2dv 
#'@importFrom ClimProjDiags Subset
#'@importFrom CSTools SplitDim
#'@importFrom stats prcomp na.omit predict lm
#'
#'@return A list of three elements. 'data' contains the dowscaled field, 'lat' the
#'downscaled latitudes, and 'lon' the downscaled longitudes.
#'@examples 
#'\donttest{
#'exp <- rnorm(500) 
#'dim(exp) <- c(member = 5, lat = 4, lon = 5, sdate = 5) 
#'exp_lons <- 1:5 
#'exp_lats <- 1:4 
#'obs <- rnorm(900) 
#'dim(obs) <- c(lat = 12, lon = 15, sdate = 5) 
#'obs_lons <- seq(1,5, 4/14) 
#'obs_lats <- seq(1,4, 3/11)
#'if (Sys.which("cdo") != "") { 
#'res <- Intlr(exp = exp, obs = obs, exp_lats = exp_lats, exp_lons = exp_lons, 
#'             obs_lats = obs_lats, obs_lons = obs_lons, target_grid = 'r1280x640', 
#'             lr_method = 'basic', int_method = 'conservative')
#'}
#'}
#'@export
Intlr <- function(exp, obs, exp_cor = NULL, exp_lats, exp_lons, obs_lats, obs_lons, lr_method, 
                  target_grid = NULL, points = NULL, int_method = NULL, method_point_interp = NULL, 
                  source_file_exp = NULL, source_file_obs = NULL, predictors = NULL, 
                  lat_dim = "lat", lon_dim = "lon", sdate_dim = "sdate", time_dim = "time", 
                  member_dim = "member", region = NULL, large_scale_predictor_dimname = 'vars', 
                  loocv = TRUE, ncores = NULL) {
  
  #-----------------------------------
  # Checkings
  #-----------------------------------

  if (!inherits(lr_method, 'character')) {
    stop("Parameter 'lr_method' must be of the class 'character'")
  }
  
  if (!inherits(large_scale_predictor_dimname, 'character')) {
    stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'")
  }
  
  if (!inherits(loocv, 'logical')) {
    stop("Parameter 'loocv' must be set to TRUE or FALSE")
  }
  
  if (!inherits(lat_dim, 'character')) {
    stop("Parameter 'lat_dim' must be of the class 'character'")
  }
  
  if (!inherits(lon_dim, 'character')) {
    stop("Parameter 'lon_dim' must be of the class 'character'")
  }
  
  if (!inherits(sdate_dim, 'character')) {
    stop("Parameter 'sdate_dim' must be of the class 'character'")
  }
  
  if (!inherits(large_scale_predictor_dimname, 'character')) {
    stop("Parameter 'large_scale_predictor_dimname' must be of the class 'character'")
  }
  
  if (is.na(match(lon_dim, names(dim(exp))))) {
    stop("Missing longitude dimension in 'exp', or does not match the parameter ",
         "'lon_dim'")
  }
  
  if (is.na(match(lat_dim, names(dim(exp))))) {
    stop("Missing latitude dimension in 'exp', or does not match the parameter ",
         "'lat_dim'")
  }
  
  if (is.na(match(sdate_dim, names(dim(exp)))) | is.na(match(sdate_dim, names(dim(obs))))) {
    stop("Missing start date dimension in 'exp' and/or 'obs', or does not match the parameter ",
         "'sdate_dim'")
  }

  if (!is.null(exp_cor)) {

    if (is.na(match(sdate_dim, names(dim(exp_cor))))) {
      stop("Missing start date dimension in 'exp_cor', or does not match the parameter ",
           "'sdate_dim'")
    }

    if (is.na(match(member_dim, names(dim(exp_cor))))) {
      stop("Missing member dimension in 'exp_cor', or does not match the parameter 'member_dim'")
    }

    if (loocv) { # loocv equal to false to train with the whole hindcast and predict with the forecast
      loocv <- FALSE
      warning("Forecast data will be downscaled. 'loocv' is set to FALSE ", 
              "to train the model with the whole hindcast and predict with the forecast.")
    }

    if (is.null(predictors)) {    

      if (is.na(match(lon_dim, names(dim(exp_cor))))) {
        stop("Missing longitude dimension in 'exp_cor', or does not match the parameter ",
             "'lon_dim'")
      }

      if (is.na(match(lat_dim, names(dim(exp_cor))))) {
        stop("Missing latitude dimension in 'exp_cor', or does not match the parameter ",
             "'lat_dim'")
      }
    } 
  }

  if (lr_method == '9nn') {
    warning("9nn method skips the interpolation step since the method itself inherently downscale ",
            "the coarse resolution data to the reference data resolution without the need ",
            "for interpolation. Thus, target_grid feature is inactive. Data will directly be ",
            "downscaled to the reference domain.")
  }

  # When observations are pointwise
  if (!is.null(points) & !is.na(match("location", names(dim(obs))))) {
    point_obs <- T
    # dimension aux in obs is needed 
    if (is.na(match("aux", names(dim(obs))))) {
      obs <- InsertDim(obs, posdim = 1, lendim = 1, name = "aux")
    }
  } else {
    point_obs <- F
  }
  
  if (!is.null(points) & is.null(source_file_exp)) {
    stop("No source file found. Source file for exp must be provided in the parameter ",
         "'source_file_exp'.")
  }
  
  if (!is.null(points) & is.null(method_point_interp)) {
    stop("Please provide the interpolation method to interpolate gridded data to point locations ",
         "through the parameter 'method_point_interp'.")
  }
  
  # the code is not yet prepared to handle members in the observations
  restore_ens <- FALSE
  if (member_dim %in% names(dim(obs))) {       
    if (identical(as.numeric(dim(obs)[member_dim]), 1)) {         
      obs <- ClimProjDiags::Subset(x = obs, along = member_dim, indices = 1, drop = 'selected')
      restore_ens <- TRUE       
    } else {         
      stop("Not implemented for observations with members ('obs' can have 'member_dim', ",
           "but it should be of length = 1).")       
    }     
  }
  
  # checkings for the parametre 'predictors' 
  if (!is.null(predictors)) {
    if (!is.array(predictors)) {
      stop("Parameter 'predictors' must be of the class 'array'")
    } else {
      # ensure the predictor variable name matches the parametre large_scale_predictor_dimname
      stopifnot(large_scale_predictor_dimname %in% names(dim(predictors)))
      stopifnot(sdate_dim %in% names(dim(predictors)))
      stopifnot(dim(predictors)[sdate_dim] == dim(exp)[sdate_dim]) 
    }
    # forecasts for the large scale predictors 
    if (!is.null(exp_cor)) {
      if (is.na(match(large_scale_predictor_dimname, names(dim(exp_cor))))) {
          stop("Missing large scale predictor dimension in 'exp_cor', or does not match ",
               "the parameter 'large_scale_predictor_dimname'")
      }
      if (!identical(dim(exp_cor)[names(dim(exp_cor)) == large_scale_predictor_dimname],
          dim(predictors)[names(dim(predictors)) == large_scale_predictor_dimname])) {
          stop("Large scale predictor dimension in exp_cor and predictors must be identical.")
      }
    }
  }
  ## ncores
  if (!is.null(ncores)) {
    if (!is.numeric(ncores) | any(ncores %% 1 != 0) | any(ncores < 0) |
        length(ncores) > 1) {
      stop("Parameter 'ncores' must be a positive integer.")
    }
  }
  
  #-----------------------------------
  # Interpolation
  #-----------------------------------
  if (lr_method != '9nn') {

    if (is.null(points)) {
      if (is.null(int_method)) {
        stop("Parameter 'int_method' must be a character vector indicating the interpolation",
             " method. Accepted methods are con, bil, bic, nn, con2")
      }
    }

    if (is.null(region)) {
      warning("The borders of the downscaling region have not been provided. Assuming the ",
              "four borders of the downscaling region are defined by the first and last ",
              "elements of the parametres 'obs_lats' and 'obs_lons'.")      
      region <- c(obs_lons[1], obs_lons[length(obs_lons)], obs_lats[1], obs_lats[length(obs_lats)])
    }
   
    # Normally, the data to be downscaled is not expected to have the same spatial resolution 
    # as the observations. However, in the use of full cross-validation with SUNSET, 
    # interpolation is performed outside the loop using a separate "Interpolation" function 
    # to speed up the process and avoid repeating interpolation at each step. 
    # Therefore, in such cases, the observations and model data can share the same coordinates 
    # and the interpolation step is skipped through the following if condition to save time.

    if (.check_coords(lat1 = exp_lats, lat2 = obs_lats,
                      lon1 = exp_lons, lon2 = obs_lons)) {
      exp_interpolated <- NULL
      exp_interpolated$data <- exp
      exp_interpolated$lat <- exp_lats
      exp_interpolated$lon <- exp_lons
      if (!is.null(exp_cor)) {
        exp_cor_interpolated <- NULL
        exp_cor_interpolated$data <- exp_cor
      }
      obs_interpolated <- obs
      lats <- obs_lats
      lons <- obs_lons
    } else {
      exp_interpolated <- Interpolation(exp = exp, lats = exp_lats, lons = exp_lons, 
                                        target_grid = target_grid, points = points, 
                                        method_point_interp = method_point_interp, 
                                        source_file = source_file_exp, lat_dim = lat_dim, 
                                        lon_dim = lon_dim, method_remap = int_method, 
                                        region = region, ncores = ncores)

      if (!is.null(exp_cor) & is.null(predictors)) {
        exp_cor_interpolated <- Interpolation(exp = exp_cor, lats = exp_lats, lons = exp_lons, 
                                              target_grid = target_grid, points = points, 
                                              method_point_interp = method_point_interp,
                                              source_file = source_file_exp, lat_dim = lat_dim, 
                                              lon_dim = lon_dim, method_remap = int_method, 
                                              region = region, ncores = ncores)
      }
    
      # If the coordinates do not match after interpolating 'exp' data, the obs data is interpolated
      # to the same grid to force the matching
      if ((!suppressWarnings(.check_coords(lat1 = exp_interpolated$lat, lat2 = obs_lats, 
                                           lon1 = exp_interpolated$lon, lon2 = obs_lons))) | 
                                           !(point_obs)) {
        obs_interpolated <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, 
                                          target_grid = target_grid, points = points, 
                                          method_point_interp = method_point_interp, 
                                          source_file = source_file_obs, lat_dim = lat_dim, 
                                          lon_dim = lon_dim, method_remap = int_method, 
                                          region = region, ncores = ncores)
      
        lats <- obs_interpolated$lat
        lons <- obs_interpolated$lon 
        obs_interpolated <- obs_interpolated$data
      } else {
        obs_interpolated <- obs
        lats <- obs_lats
        lons <- obs_lons
      }
    }
  }  
  #-----------------------------------
  # Linear regressions
  #-----------------------------------
  # Pointwise linear regression
  # Predictor: model data
  # Predictand: observations
  if (lr_method == 'basic') {
    predictor <- exp_interpolated$data
    predictand <- obs_interpolated
    
    target_dims_predictor <- sdate_dim
    target_dims_predictand <- sdate_dim

    if (!is.null(exp_cor)) {
      aux_dim <- NULL
      forecast <- exp_cor_interpolated$data
      if (dim(predictor)[[member_dim]] == dim(forecast)[[member_dim]]) {
        target_dims_forecast <- c(sdate_dim)
      } else {
        target_dims_predictor <- c(sdate_dim, member_dim)
        target_dims_forecast <- c(sdate_dim, member_dim)
      }
    } else {
      forecast <- NULL
      target_dims_forecast <- NULL
    }
  } 
  
  # (Multi) linear regression with large-scale predictors
  # Predictor: passed through the parameter 'predictors' by the user. Can be model or observations
  # Predictand: model data
  else if (lr_method == 'large-scale') {  
    if (is.null(predictors)) {
      stop("The large-scale predictors must be passed through the parametre 'predictors'")
    }  
    
    predictand <- obs_interpolated
    predictor <- predictors
    
    target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname)
    target_dims_predictand <- sdate_dim

    if (!is.null(exp_cor)) {
      aux_dim <- large_scale_predictor_dimname
      if (!inherits(exp_cor, 's2dv_cube')) {
        forecast <- exp_cor
      } else {
        forecast <- exp_cor$data
      }
      if (dim(predictor)[[member_dim]] == dim(forecast)[[member_dim]]) {
        target_dims_forecast <- c(sdate_dim, large_scale_predictor_dimname)
      } else {
        target_dims_predictor <- c(sdate_dim, large_scale_predictor_dimname, member_dim)
        target_dims_forecast <- c(sdate_dim, large_scale_predictor_dimname, member_dim)
      }     
    }
  } 
  
  # Multi-linear regression with the four nearest neighbours
  # Predictors: model data
  # Predictand: observations
  else if (lr_method == '9nn') {
    
    predictor <- .find_nn(coar = exp, lats_hres = obs_lats, lons_hres = obs_lons, 
                          lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, 
                          lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, 
                          nn = 9, ncores = ncores)
   
    if (!is.null(exp_cor)) {
      aux_dim <- 'nn'
      forecast <- .find_nn(coar = exp_cor, lats_hres = obs_lats, lons_hres = obs_lons, 
                           lats_coar = exp_lats, lons_coar = exp_lons, lat_dim = lat_dim, 
                           lon_dim = lon_dim, sdate_dim = sdate_dim, member_dim = member_dim, 
                           nn = 9, ncores = ncores)
    } 

    if (is.null(points) | ("location" %in% names(dim(obs)))) {
      if (!is.null(target_grid)) {
        warning("Interpolating to the 'obs' grid")
      }
      predictand <- obs
      
      lats <- obs_lats
      lons <- obs_lons
    } else {
    # If the downscaling is to point locations: Once the 9 nearest neighbours have been found, 
    # interpolate to point locations
    
      predictor <- Interpolation(exp = predictor, lats = obs_lats, lons = obs_lons, 
                                 lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL, 
                                 points = points, method_point_interp = method_point_interp, 
                                 source_file = source_file_obs, method_remap = NULL, 
                                 region = region, ncores = ncores)
      
      predictand <- Interpolation(exp = obs, lats = obs_lats, lons = obs_lons, 
                                  lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL,
                                  points = points, method_point_interp = method_point_interp, 
                                  source_file = source_file_obs, method_remap = NULL, 
                                  region = region, ncores = ncores)

      if (!is.null(exp_cor)) {      
        forecast <- Interpolation(exp = forecast, lats = obs_lats, lons = obs_lons,
                                  lon_dim = lon_dim, lat_dim = lat_dim, target_grid = NULL,
                                  points = points, method_point_interp = method_point_interp,
                                  source_file = source_file_obs, method_remap = NULL,
                                  region = region, ncores = ncores)
        forecast <- forecast$data
      }

      lats <- predictor$lat
      lons <- predictor$lon 
      predictor <- predictor$data
      predictand <- predictand$data
    }

    target_dims_predictand <- sdate_dim 
    target_dims_predictor <- c(sdate_dim,'nn')

    if (!is.null(exp_cor)) {
      if (dim(predictor)[[member_dim]] == dim(forecast)[[member_dim]]) {
        target_dims_forecast <- c(sdate_dim,'nn')   
      } else {
        target_dims_predictor <- c(sdate_dim,'nn', member_dim)
        target_dims_forecast <- c(sdate_dim,'nn', member_dim)
      }
    } 

  } else {
    stop(paste0(lr_method, " method is not implemented yet"))
  }

  k_out <- 1 ## ##  It is used in constructing the window for loocv = TRUE case. 
             ## k_out is also used to order the dimensions of pred.lm output for 
             ## both loocv = TRUE and loocv = FALSE cases. 
             ## In case the data is NOT daily (i.e., time_dim does not exist), k_out is 1.
  daily <- FALSE # time_dim does not exist
  output_dims <- c(time_dim, sdate_dim) # for hindcast dwn 
  if (  time_dim %in% names(dim(predictor)) ) {
    daily <- TRUE # time_dim exists
    k_out <- as.numeric (dim(predictor)[time_dim])  
    sdate_num <- as.numeric (dim(predictand)[sdate_dim]) ## sdate_num of hindcast
    predictor <- MergeDims (predictor, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
    predictand <- MergeDims (predictand, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
    if (!is.null(exp_cor)) {
      sdate_num_fr <- as.numeric (dim(forecast)[sdate_dim]) ## sdate_num of forecast
      forecast <- MergeDims (forecast, merge_dims = c(time_dim, sdate_dim), rename_dim = sdate_dim)
    }
  }

  # Apply the linear regressions
  ## case hindcast - forecast
  if (!is.null(exp_cor)) {
    res <- Apply(list(predictor, predictand, forecast),
                 target_dims = list(target_dims_predictor, target_dims_predictand, 
                                    target_dims_forecast),
                 fun = .intlr, loocv = loocv, aux_dim = aux_dim, ncores = ncores, 
                 sdate_dim = sdate_dim, member_dim = member_dim,
                 k_out = k_out)$output1

    if (daily) {
      res <- CSTools::SplitDim(data = res, split_dim = sdate_dim, new_dim_name = time_dim,
                               indices = rep (1:k_out, sdate_num_fr))
    }
  } 
  ## case hindcast only
  else {
    res <- Apply(list(predictor, predictand), target_dims = list(target_dims_predictor, 
                                                                 target_dims_predictand),
                 fun = .intlr, loocv = loocv, ncores = ncores, 
                 sdate_dim = sdate_dim, member_dim = member_dim, 
                 k_out = k_out,
                 output_dims = output_dims)$output1
    if (!daily) {
      res <- ClimProjDiags::Subset(x = res, along = time_dim, indices = 1, drop = 'selected')
    }
  }
  
  if (daily) {
    predictand <- CSTools::SplitDim(data = predictand, split_dim = sdate_dim, 
                                    new_dim_name = time_dim,
                                    indices = rep (1:k_out, sdate_num))
  }

  # restore ensemble dimension in observations if it existed originally
  if (restore_ens) {
    predictand <- s2dv::InsertDim(predictand, posdim = 1, lendim = 1, name = member_dim)
  }
  
  # Return a list of three elements
  res <- list(data = res, obs = predictand, lon = lons, lat = lats)
  
  # for testing 9nn
  #x <- predictor[10,10,1,1,1,,,]
  #x <- Reorder(x,c(2,3,1))
  #y <- predictand[1,1,1,10,,10]
  #f <- forecast[10,10,1,1,1,,,]
  #f <- Reorder(f,c(2,1))
  #f <- InsertDim(f,1,1,'sdate')

  # large-scale
  #x <- Reorder(predictor,c(1,3,2))
  #y <- predictand[1,1,1,10,,10]
  #f <- Reorder(forecast,c(1,3,2))  

  return(res)
}
#-----------------------------------
# Atomic function to generate and apply the linear regressions
#-----------------------------------
.intlr <- function(x, y, f = NULL, loocv, aux_dim = NULL, 
                   sdate_dim = "sdate", member_dim = "member", k_out = 1) {

  if (!is.null(f) & any(names(dim(f)) == member_dim)) {
    if (!is.null(aux_dim)) {
      tmp_df <- data.frame(x = adply(x,.margins = 3, .id = NULL, .fun = as.matrix), y = y)
    } else {
      tmp_df <- data.frame(x = as.vector(x), y = y)
    }
  } else {
    tmp_df <- data.frame(x = x, y = y)
  }

  colnames(tmp_df) <- c(paste0("x.",1:(ncol(tmp_df)-1)), "y")

  # if the data is all NA, force return return NA
  if (all(is.na(tmp_df)) | (sum(apply(tmp_df, 2, function(x) !all(is.na(x)))) == 1)) {
    if (is.null(f)) {  
      res <- array(rep(NA, nrow(tmp_df)), c(k_out, nrow(tmp_df)/k_out))
    } else {
      if (!is.null(aux_dim)) {
        res <- array(NA, dim(f)[names(dim(f)) != aux_dim])
      } else {
        res <- array(NA, dim(f))
      }
    }
  } else {

    # for now, PCA is TRUE only for nn. in future, it might be true for multiple var options and ls.   
    if (any(names(dim(x)) == "nn")) {
      pca <- TRUE
    } else {
      pca <- FALSE
    }

    # training
    lm1 <- .train_lm(df = tmp_df, loocv = loocv, k_out = k_out, pca = pca)  
 
    # prediction
    res <- .pred_lm(lm1 = lm1, df = tmp_df, f = f, loocv = loocv, aux_dim = aux_dim, 
                    k_out = k_out, pca = pca)
  }
  return(res)
}

#-----------------------------------
# Function to generate the linear regressions.
# Returns a list
#-----------------------------------
.train_lm <- function(df, loocv, k_out = 1, pca = FALSE) {

  # Remove predictor columns containing only NA's
  df_all <- df[ ,apply(as.matrix(df[,colnames(df) != 'y'],nrow(df),ncol(df)-1), 2,
                        function(x) !all(is.na(x)))]

  if (loocv) {
    lm1 <- lapply(1:(nrow(df_all)/k_out), function(j) {
      window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # omit the data of the year 
                                                      # including corresponding day  
      df <- df_all[-window,]
      if (all(is.na(df$y))) {
        return(list(lm = NA))
      } else {
        ## apply principle components if method is 9nn
        if (pca) {
          PCA <- .pca(dx = df[, -ncol(df)], exp.var = 0.95, center = TRUE, scale = FALSE)
          df <- data.frame(x = PCA$PR_mat, y = df[,ncol(df)])
          colnames(df) <- c(paste0("x.",1:(ncol(df)-1)), "y")
          PCA$rotation <- as.matrix(PCA$rotation)
          colnames(PCA$rotation) <- c(paste0("x.",1:(ncol(df)-1)))
        } else {
          PCA <- NULL
        }
        return(list(lm = lm(df, formula = y ~ .), N_predictors = PCA$N_predictors,
                    rotation = PCA$rotation))
      }
      })
  } else {
    if(all(is.na(df_all$y))) {
      lm1 <- NA
    } else {
      if (pca) {
        PCA <- .pca(dx = df_all[,-ncol(df_all)], exp.var = 0.95, center = TRUE, scale = FALSE)
        df_all <- data.frame(x = PCA$PR_mat, y = df_all[,ncol(df_all)])
        colnames(df_all) <- c(paste0("x.",1:(ncol(df_all)-1)), "y")
        PCA$rotation <- as.matrix(PCA$rotation)
        colnames(PCA$rotation) <- c(paste0("x.",1:(ncol(df_all)-1)))
      } else {
        PCA <- NULL
      }
      lm1<- list(lm = lm(data = df_all, formula = y ~ .), N_predictors = PCA$N_predictors,
                 rotation = PCA$rotation)
    }
  }
  return(lm1)
}

#-----------------------------------
# Function to apply the linear regressions.
#-----------------------------------
.pred_lm <- function(df, lm1, f, loocv, aux_dim, k_out = 1, pca = FALSE) {

  if (loocv) {
    pred_vals <- sapply(1:length(lm1), function(j) {
      if (all(is.na(lm1[[j]]$lm))) {
        return(array(rep(NA, k_out), c(k_out, nrow(df) / k_out)))
      } else {
        window <- ((j-1)*k_out+1):((j-1)*k_out + k_out) # test the daily data of 
                                                        # the corresponding year
        if (pca) {
          ## transfer test data to PCA space.
          pred_data <- as.matrix(df[window,-ncol(df)])%*%lm1[[j]]$rotation
          pred_data <- as.data.frame (pred_data)
        } else {
          pred_data <- df[window,]
        }
        return(predict(lm1[[j]]$lm, pred_data))
      }})
    pred_vals <- array (pred_vals, c(k_out, nrow(df) / k_out))
  } else {
    # if lm function exist, in other words not NA
    if (any(!is.na(lm1))) {
      # case to downscale hindcasts
      if (is.null(f)) {
        if (pca) {
          pred_data <- as.matrix(df[,-ncol(df)])%*%lm1$rotation
          pred_data <- as.data.frame(pred_data)
        } else {
          pred_data <- df
        }
        pred_vals_ls <- predict(lm1$lm, newdata = pred_data)    
        pred_vals <- array (pred_vals_ls, c(k_out, nrow(df) / k_out))
      # case to downscale forecasts
      } else {
        if (!is.null(aux_dim)) {
          # 9nn & large-scale
          if (length(dim(f)) == 3) { 
          # if ens member number is different in hcst and fcst, concatenate members
            fcst_df <- as.data.frame(matrix(aperm(f, c(1, 3, 2)), 
                                            nrow = dim(f)[1] * dim(f)[3], ncol = dim(f)[2]))
          } else {
            fcst_df <- as.data.frame(f)
          }
          if (pca) {
            ## transfer test data to PCA space.
            fcst_df <- as.matrix(fcst_df)%*%lm1$rotation
            fcst_df <- as.data.frame (fcst_df)
          } 
          colnames(fcst_df) <- paste0('x.',1:ncol(fcst_df))
          pred_vals <- predict(lm1$lm, newdata = fcst_df)
          dim(pred_vals) <- dim(f)[names(dim(f)) != aux_dim] 
        } else { 
        # basic
          pred_vals <- predict(lm1$lm, newdata = data.frame(x.1 = as.vector(f)))
          dim(pred_vals) <- dim(f)
        }
      }
    } else {
      if (is.null(f)) {
        pred_vals <- array(rep(NA, k_out), c(k_out, nrow(df) / k_out))
      } else {
        if (!is.null(aux_dim)) {
          pred_vals <- array(NA, dim(f)[names(dim(f)) != aux_dim])
        } else {
          pred_vals <- array(NA,  dim(f))
        }
      }
    }
  }
  return(pred_vals)
}

#-----------------------------------
# Function to ind N nearest neighbours.
# 'coar' is an array with named dimensions
#-----------------------------------
.find_nn <- function(coar, lats_hres, lons_hres, lats_coar, lons_coar, 
                     lat_dim, lon_dim, sdate_dim, member_dim, nn = 9, ncores = NULL) {

  # order lats and lons of the high res and coarse res data in 2-dimension 
  # in order to calculate the distances to each other.
  hres_mat <- expand.grid(as.numeric(lats_hres), as.numeric(lons_hres))
  coar_mat <- expand.grid(as.numeric(lats_coar), as.numeric(lons_coar))

  # calculate distances and obtain the closest 4 coarse res grid for each high res grid
  dist_id <- apply (proxy::dist(hres_mat, coar_mat), 1, order)[1:nn,] 

  names(dim(dist_id)) <- c("nn", lon_dim)

  idh <- CSTools::SplitDim(dist_id, split_dim = lon_dim,
                           indices = rep(1:(length(lats_hres)),length(lons_hres)),
                           new_dim_name = lat_dim)

  idh <- aperm (idh, c(1,3,2))  # dimension: nn, lat_dim, lon_dim

  #coar 
  idc_lat_grid <- match( coar_mat[,1], as.numeric(lats_coar))
  idc_lon_grid <- match( coar_mat[,2], as.numeric(lons_coar))
  cgrid <- cbind(idc_lat_grid,idc_lon_grid) # For the coarse res data, include the lat and lon 
                                            # order each grids ordered in 2-dimension.

  idh_dum <- array(NA, dim = c(nn, 2, length(lats_hres), length(lons_hres)))

  for (i in 1:length(lats_hres)) {
    for (j in 1:length(lons_hres)) {
      idh_dum[,,i,j] <- cgrid[idh[,i,j],]
    }
  }

  idh <- idh_dum
  rm(idh_dum)
  names(dim(idh)) <- c("nn", "grd", lat_dim, lon_dim) 

  # idh includes the lat and lon order of the nearest neighbours in the coarse 
  # res data specified for each lat lon of the high res data.

  nearest <- Apply(list(coar, idh),              
                   target_dims = list(c(sdate_dim, lat_dim, lon_dim, member_dim), 
                                      "grd" ),   
                   fun = function(x, y) {
                           a <- x[, y[1], y[2], , drop = FALSE]
                           a <- ClimProjDiags::Subset (a, along = c(lat_dim, lon_dim), 
                                        indices = list (1,1), 
                                        drop = "selected") # drop lat lon dim coming from coar data
                           return(a)
                         },
                   ncores = ncores)$output1

  return(nearest)
}

.pca <- function (dx, exp.var = 0.95, scale = FALSE, center = TRUE, ...) {
  PR <- suppressWarnings(prcomp (~., dx, center = center,
                                 na.action = na.omit,
                                 scale = scale, ...)) ## prcomp between predictors

  N_predictors <- which (summary(PR)$importance[ "Cumulative Proportion",] > exp.var)[1]
  PR_mat <- PR$x[, 1:N_predictors, drop = FALSE]
  if (!is.null(PR$na.action)) { ## if there are NAs, relocate them to the correct order
    dum <- array(NA, dim = c(nrow(dx), ncol(PR_mat) ))
    dum [which(!is.na(dx[, 1])), ] <- PR_mat
    PR_mat <- dum
  }
  return (list(PR_mat = PR_mat, N_predictors = N_predictors, 
               rotation = PR$rotation[,1:N_predictors]))
}
