###########Function.

logit <- function(x){
  1 / (1 + exp(-x))
}

invLogit <- function(x){
  log(x / (1 - x))
}

ExtractCovariatesFromText <- function(covariates_text) {
  
  covariates <- sapply(strsplit(covariates_text, split = ",")[[1]], function(x){
    if(grepl("-",x)){
      as.numeric(seq(strsplit(x, split = "-")[[1]][1], strsplit(x, split = "-")[[1]][2]))
    } else {
      as.numeric(x)
    }
  })
  covariates <- as.vector(unlist(covariates))
  if(covariates[1] == 0) covariates <- c()
  
  covariates
}

# true or false if the covaraites is categorical or numerical
ExtractClassCovariates <- function(ncov, fac_covariates, column_covariate){
  classCovariates <- rep(T, ncov)
  classCovariates[match(fac_covariates, column_covariate)] <- F
  
  classCovariates
}

Extract_IndexesCovariates <- function(X, column_covariate, ncov, classCovariates) {
  
  indexes_covariates <- c()
  indexes_covariates[1] <- 1
  if(any(column_covariate != 0)){
    
    indexes_covariates <- c()
    indexes_covariates[1] <- 1
    k <- 2
    for (i in 1:ncov) {
      if(classCovariates[i]){
        indexes_covariates[k] <- i + 1
        k <- k + 1
      } else {
        num_levels <- length(unique(X[,i]))
        indexes_covariates[k + 0:(num_levels-2)] <- i + 1
        k <- k + num_levels - 1
      }
    }
    
  }
  
  indexes_covariates
}

# compute subset of X consisting only of the columns (covariates) used
computeXgamma <- function(X, indexes_covariates, gamma){
  
  index_present <- indexes_covariates %in% which(gamma == 1)
  X_gamma <- X[,index_present,drop = F]
  
  X_gamma
}

# compute subset of the coefficient beta consisting only of the covariates used
compute_betagamma <- function(beta, indexes_covariates, gamma){
  
  index_present <- indexes_covariates %in% which(gamma == 1)
  beta_gamma <- beta[index_present]
  
  beta_gamma
}

sample_z <- function(k_s, w_sm, psi, theta11, theta10, p){
  
  S <- nrow(w_sm)
  M <- ncol(w_sm)
  
  # number of sampling occasion per site
  M_site <- apply(w_sm, 1, function(x){
    sum(!is.na(x))  
  })
  
  w_s <- apply(w_sm, 1, function(x) {
    sum(x, na.rm = T)
  })
  
  z <- rep(NA, S)
  
  for (s in 1:S) {
    if(k_s[s] == 1){
      z[s] = 1
    } else {
      
      p_zsequal1 <- ((1 - p) * psi[s] * theta11[s]^w_s[s] * (1 - theta11[s])^(M_site[s] - w_s[s])) / 
        ((1 - p) *psi[s] * theta11[s]^w_s[s] * (1 - theta11[s])^(M_site[s] - w_s[s]) + (1 - psi[s]) * theta10[s]^w_s[s] * (1 - theta10[s])^(M_site[s] - w_s[s]))
      
      z[s] <- rbinom(1, 1, p_zsequal1)
      
    }
  }
  
  z
}

sample_wsm <- function(theta11, z, theta10, p11, p10, y_sm, K){
  
  S <- nrow(y_sm)
  M <- ncol(y_sm)
  w_sm <- matrix(NA, nrow = S, ncol = M)
  
  for (s in 1:S) {
    
    for (m in 1:M) {
      
      if(!is.na(y_sm[s, m])){
        
        p_wsm <- (theta11[s]^z[s] * theta10[s]^(1 - z[s]) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) ) / 
          (theta11[s]^z[s] * theta10[s]^(1 - z[s]) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) +
             (1 - theta11[s])^z[s] * (1 - theta10[s])^(1 - z[s]) * p10[s]^y_sm[s,m] *(1 - p10[s])^(K - y_sm[s,m])  )
        
        w_sm[s,m] <- rbinom(1, 1, p_wsm)
        
      }
      
    }
    
  }
  
  w_sm
}

compute_pzwsm_old <- function(zi, w_sm_current, psi, p, theta11, theta10, k_s, p11, p10, y_sm, K, M_site, s){
  
  totalProb <- 1
  
  ws <- sum(w_sm_current)
  
  p_zsequal1 <- ((1 - p) * psi[s] * theta11[s]^ws * (1 - theta11[s])^(M_site[s] - ws)) / 
    ((1 - p) *psi[s] * theta11[s]^ws * (1 - theta11[s])^(M_site[s] - ws) + (1 - psi[s]) * theta10[s]^ws * (1 - theta10[s])^(M_site[s] - ws))
  
  if(zi == 1){ # this is actually 0
    
    zs <- 0
    
    totalProb <- totalProb * (1 - p_zsequal1)
    
  } else {
    
    zs <- 1
    
    totalProb <- totalProb * p_zsequal1
    
  }
  
  for (m in 1:M_site[s]) {
    
    p_wsm <- (theta11[s]^zs * theta10[s]^(1 - zs) * p11[s]^y_sm[s,m] * (1 - p11[s])^(K - y_sm[s,m])) / 
      (theta11[s]^zs * theta10[s]^(1 - zs) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) +
         (1 - theta11[s])^zs * (1 - theta10[s])^(1 - zs) * p10[s]^y_sm[s,m] *(1 - p10[s])^(K - y_sm[s,m]))
    
    if(w_sm_current[m] == 1){
      totalProb <- totalProb * p_wsm  
    } else {
      totalProb <- totalProb * (1 - p_wsm)
    }
    
  }
  
  totalProb
}

compute_pzwsm <- function(zi, w_sm_current, psi, p, theta11, theta10, k_s, p11, p10, y_sm, K, M_site, s){
  
  firstTerm <- (psi[s] * (1 - p))^zi
  
  prodFirstTerm <- 1
  
  for (m in 1:M_site[s]) {
    prodFirstTerm <- prodFirstTerm * (theta11[s] * dbinom(y_sm[s,m],K,p11[s]))^(w_sm_current[m]) *
      ((1 - theta11[s]) * dbinom(y_sm[s,m],K,p10[s]))^(1 - w_sm_current[m])
  }
  
  firstTerm <- firstTerm * (prodFirstTerm)^(zi)
  
  secondTerm <- (1 - psi[s])^(1 - zi)
  
  prodSecondTerm <- 1
  
  for (m in 1:M_site[s]) {
    prodSecondTerm <- prodSecondTerm * (theta10[s] * dbinom(y_sm[s,m],K,p11[s]))^(w_sm_current[m]) *
      ((1 - theta10[s]) * dbinom(y_sm[s,m],K,p10[s]))^(1 - w_sm_current[m])
  }
  
  secondTerm <- secondTerm * prodSecondTerm^(1 - zi)
  
  firstTerm * secondTerm
}

BinToDec <- function(x) {
  sum(2^(which(rev(unlist(strsplit(as.character(x), "")) == 1))-1)) 
}

DecToBin <- function(x, M){
  x <- rev(intToBits(x))
  x <- x[length(x) - 0:(M-1)]
  x <- paste(as.integer(x), collapse = "")  
  as.numeric(strsplit(x,"")[[1]]) 
}

sample_z_wsm <- function(psi, theta11, theta10, k_s, p11, p10, y_sm, K, p){
  
  S <- nrow(y_sm)
  M <- ncol(y_sm)
  
  M_site <- apply(y_sm, 1, function(x){
    sum(!is.na(x))  
  })
  
  z <- rep(NA, S)
  w_sm <- matrix(NA, nrow = S, ncol = M)
  
  for (s in 1:S) {
    
    if(k_s[s] == 1){
      
      z[s] = 1
      
      for (m in 1:M_site[s]) {
        
        if(!is.na(y_sm[s, m])){
          
          p_wsm <- (theta11[s]^z[s] * theta10[s]^(1 - z[s]) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) ) / 
            (theta11[s]^z[s] * theta10[s]^(1 - z[s]) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) +
               (1 - theta11[s])^z[s] * (1 - theta10[s])^(1 - z[s]) * p10[s]^y_sm[s,m] *(1 - p10[s])^(K - y_sm[s,m])  )
          
          w_sm[s,m] <- rbinom(1, 1, p_wsm)
          
        }
        
      }
      
    } else {
      
      # p_zwsm_old <- matrix(NA, nrow = 2, ncol = 2^M_site[s])
      # 
      # for (zi in 1:2) { # 1 is 0 and 2 is 1
      # 
      #   for (l in 1:(2^(M_site[s]))) {
      # 
      #     w_sm_current <- DecToBin(l - 1, M_site[s])
      # 
      #     p_zwsm_old[zi, l] <- compute_pzwsm_old(zi, w_sm_current, psi, p, theta11, theta10, k_s, p11, p10, y_sm, K, M_site, s)
      # 
      #   }
      # 
      # }
      
      p_zwsm <- matrix(NA, nrow = 2, ncol = 2^M_site[s])
      
      for (zi in 1:2) { # 1 is 0 and 2 is 1
        
        for (l in 1:(2^(M_site[s]))) {
          
          w_sm_current <- DecToBin(l - 1, M_site[s])
          
          p_zwsm[zi, l] <- compute_pzwsm(zi - 1, w_sm_current, psi, p, theta11, theta10, k_s, p11, p10, y_sm, K, M_site, s)
          
        }
        
      }
      
      p_zwsm <- p_zwsm / sum(p_zwsm)
      
      sampledCombination <- sample(nrow(p_zwsm) * ncol(p_zwsm), 1, prob = as.vector(p_zwsm))
      
      z[s] <- (1 - sampledCombination %% 2)
      
      index_wsm_combination <- floor((sampledCombination + 1) / 2) 
      w_sm[s,1:M_site[s]] <- DecToBin(index_wsm_combination - 1, M_site[s])
    }
    
  }
  
  list("z" = z,
       "w_sm" = w_sm)
}

update_psi <- function(beta, gamma, z, X, indexes_covariates, b_psi, B_psi, usingC, d_bar){
  
  S <- length(z)
  ncov <- ncol(gamma) - 1
  
  gamma_psi <- NULL
  
  if(usingC){ # if covariates are used
    
    k <- z - .5
    n <- rep(1, S)
    
    list_gammabeta <- sample_gamma_beta_cpp(gamma[1,], beta[1,],  X, b_psi, B_psi, 
                                            ncov, n, k, indexes_covariates, 1, d_bar)
    gamma_psi <- list_gammabeta$gamma
    beta_psi <- list_gammabeta$beta
    
    index_present <- indexes_covariates %in% which(gamma_psi == 1)
    beta_gamma <- beta_psi[index_present]
    
    psi <- as.vector(logit(X[,index_present,drop = F] %*% beta_gamma))
    
  } else { # if no covariates a used
    
    n <- rep(1, S)
    sumn <- sum(n)
    sumz <- sum(z)
    k <- sumz - sumn * (.5)
    
    X_single <- matrix(1)
    
    beta_psi <- sample_beta_nocov_cpp(beta[1,1], X_single, b_psi, B_psi, sumn, k)
    psi <- logit(beta_psi)
    
    psi <- rep(psi, S)
    
  }
  
  list("beta" = beta_psi, "psi" = psi, "gamma" = gamma_psi)
}

update_theta <- function(beta, gamma, w_sm, z, X, indexes_covariates, theta_1or0, b_theta, 
                         B_theta, usingC, d_bar){
  
  M <- ncol(w_sm)
  S <- nrow(w_sm)
  ncov <- ncol(gamma) - 1
  
  M_site <- apply(w_sm, 1, function(x){
    sum(!is.na(x))  
  })
  
  gamma_theta <- NULL
  
  if(usingC){
    
    if(theta_1or0){
      
      k <- as.vector(t(w_sm[z==1,])) - .5
      k <- k[!is.na(k)]
      
      n <- rep(1, length(k))
      
      cov_indexes <- rep(1:S, times = M_site * z) # select only the observation with w == 1
      X_cov <- X[cov_indexes,,drop = FALSE]
      
      list_gammabeta <- sample_gamma_beta_cpp(gamma[2,], beta[2,], 
                                              X_cov, b_theta, B_theta, ncov, n, k, indexes_covariates, 1, d_bar)
      gamma_theta <- list_gammabeta$gamma
      beta_theta <- list_gammabeta$beta
      
    } else {
      
      k <- as.vector(t(w_sm[z==0,])) - .5
      k <- k[!is.na(k)]
      
      n <- rep(1, length(k))
      
      cov_indexes <- rep(1:S, times = M_site * (1-z))
      X_cov <- X[cov_indexes,,drop = FALSE]
      
      list_gammabeta <- sample_gamma_beta_cpp(gamma[3,], beta[3,], 
                                              X_cov, b_theta, B_theta, ncov, n, k, indexes_covariates, 1, d_bar)
      gamma_theta <- list_gammabeta$gamma
      beta_theta <- list_gammabeta$beta
      
    }
    
    index_present <- indexes_covariates %in% which(gamma_theta == 1)
    beta_gamma <- beta_theta[index_present]
    
    theta <- as.vector(logit(X[,index_present,drop = F] %*% beta_gamma))
    
  } else {
    
    if(theta_1or0){
      
      sumn <- sum(z * M_site)
      sumz <- sum(w_sm[z==1,], na.rm = T)
      # sumn <- sum(z) * M
      # sumz <- sum(w_sm[z==1,])
      k <- sumz - sumn * (.5)
      
      X_single <- matrix(1)
      
      beta_theta <- sample_beta_nocov_cpp(beta[2,1], X_single, b_theta, B_theta, sumn, k)
      
    } else {
      
      sumn <- sum((1 - z) * M_site)
      sumz <- sum(w_sm[z==0,], na.rm = T)
      # sumn <- (S - sum(z)) * M
      # sumz <- sum(w_sm[z==0,])
      k <- sumz - sumn * (.5)
      
      X_single <- matrix(1)
      
      beta_theta <- sample_beta_nocov_cpp(beta[3,1], X_single, b_theta, B_theta, sumn, k)
      
    }
    
    theta <- logit(beta_theta)
    theta <- rep(theta, S)
    
  }
  
  list("theta" = theta, "beta" = beta_theta, "gamma" = gamma_theta)
}

update_p <- function(beta, gamma, y_sm, w_sm, z, X, indexes_covariates, p_1or0, K, b_p,
                     B_p, usingC, d_bar){
  
  M <- ncol(w_sm) 
  S <- nrow(w_sm)  
  ncov <- ncol(gamma) - 1
  
  M_site <- apply(w_sm, 1, function(x){
    sum(!is.na(x))  
  })
  
  if(usingC){
    
    if(p_1or0){ # if p11 is updated
      
      # take the subset of y_sm corresponding to the occasion where eDNA was present
      k <- as.vector(t(y_sm))[as.vector(t(w_sm)) == 1] - (K/2)
      k <- k[!is.na(k)]
      n <- rep(K, length(k))
      
      # take the row of the covariance matrix (potentially replicating some rows) corresponding
      # to the cells of y_sm where eDNA was present
      sum_wsm <- apply(w_sm, 1, function(x){
        sum(x, na.rm = T)  
      })
      cov_indexes <- rep(1:S, times = sum_wsm)
      X_cov <- X[cov_indexes,,drop = FALSE]
      
      list_gammabeta <- sample_gamma_beta_cpp(gamma[4,], beta[4,], 
                                              X_cov, b_p, B_p, ncov, n, k, indexes_covariates, 1, d_bar)
      
    } else { # if p10 is updated
      
      # take the subset of y_sm corresponding to the occasion where eDNA was not present
      k <- as.vector(t(y_sm))[as.vector(t(w_sm)) == 0] - (K/2)
      k <- k[!is.na(k)]
      n <- rep(K, length(k))
      
      # take the row of the covariance matrix (potentially replicating some rows) corresponding
      # to the cells of y_sm where eDNA was not present
      sum_wsm <- apply(w_sm, 1, function(x){
        sum(x, na.rm = T)  
      })
      cov_indexes <- rep(1:S, times = M_site - sum_wsm)
      X_cov <- X[cov_indexes,,drop = FALSE]
      
      list_gammabeta <- sample_gamma_beta_cpp(gamma[5,], beta[5,], 
                                              X_cov, b_p, B_p, ncov, n, k, indexes_covariates, 1, d_bar)
      
    }
    
    gamma_p <- list_gammabeta$gamma
    beta_p <- list_gammabeta$beta
    
    X_gamma <- computeXgamma(X, indexes_covariates, gamma_p)
    beta_gamma <- compute_betagamma(beta_p, indexes_covariates, gamma_p)
    
    p <-  as.vector(logit(X_gamma %*% as.matrix(beta_gamma)))
    
  } else {
    
    if(p_1or0){
      
      successes_y_sm_w_smequal1 <- sum(as.vector(y_sm)[as.vector(w_sm)==1], na.rm = T)
      trials_w_smequal1 <- K * sum(w_sm == 1, na.rm = T)
      
      sumn <- trials_w_smequal1
      sumz <-  successes_y_sm_w_smequal1
      k <- sumz - sumn * (.5)
      
      X_single <- matrix(1)
      
      beta_p <- sample_beta_nocov_cpp(beta[4,1], X_single, b_p, B_p, sumn, k)
      
    } else {
      
      successes_y_sm_w_smequal0 <- sum(as.vector(y_sm)[as.vector(w_sm)==0], na.rm = T)
      trials_w_smequal0 <- K * sum(w_sm == 0, na.rm = T)
      
      sumn <- trials_w_smequal0
      sumz <-  successes_y_sm_w_smequal0
      k <- sumz - sumn * (.5)
      
      X_single <- matrix(1)
      
      beta_p <- sample_beta_nocov_cpp(beta[5,1], X_single, b_p, B_p, sumn, k)
      
    }
    
    p <- logit(beta_p)
    p <- rep(p, S)
    gamma_p <- NULL
    
  }
  
  list("p" = p, "beta" = beta_p, "gamma" = gamma_p)
}

createSummaries <- function(data_output, beta_data_output, gamma_data_output, indexes_covariates,
                            usingC, S, niter, nchain, nameVariable, VariableText ){
  
  data_output2 <- matrix(NA, nrow = niter * nchain, ncol = S)
  for (chain in 1:nchain) {
    data_output2[(chain - 1)*niter + 1:niter,] <- data_output[chain,,]
  }
  
  data_output_long <- melt(data_output2)
  
  CI_data  <- sapply(1:ncol(data_output2), function(i){
    c(quantile(data_output2[,i], probs = c(0.025,0.975)),
      mean(data_output2[,i]))
  })
  
  if(usingC){
    plot1 <- ggplot() + 
      geom_errorbar(data = NULL, aes(x = 1:S, ymax=CI_data[2,], 
                                     ymin=CI_data[1,]),
                    width=0.2, size=1, color="black") + 
      geom_point(data = NULL, aes(x = 1:S, 
                                  y=CI_data[3,]), size=2, shape=21, fill="white") +
      # theme(panel.background = element_rect(fill = "white")) +
      theme_bw() + scale_y_continuous(name = nameVariable) +
      xlab("Sites") 
  } else {
    plot1 <- ggplot(data = NULL, aes(x = "Site", y = data_output2[,1])) + geom_boxplot() +
      theme_bw() + scale_y_continuous(name = nameVariable) +
      xlab("")
  }
  
  data_plot <- plot1
  
  # beta_psi - gamma_psi
  if(usingC){
    
    {
      beta_data_output2 <- matrix(NA, nrow = niter * nchain, ncol = dim(beta_data_output)[3])
      for (chain in 1:nchain) {
        beta_data_output2[(chain - 1)*niter + 1:niter,] <- beta_data_output[chain,,]
      }
      gamma_data_output2 <- matrix(NA, nrow = niter * nchain, ncol = dim(gamma_data_output)[3])
      for (chain in 1:nchain) {
        gamma_data_output2[(chain - 1)*niter + 1:niter,] <- gamma_data_output[chain,,]
      }
      
      beta_data_output2[,1] <- logit(beta_data_output2[,1])
      colnames(beta_data_output2) <- dimnames(beta_data_output)[[3]]
      
      beta0_data_plot <- ggplot(data = NULL, aes(x = beta_data_output2[,1], y = ..density..)) + 
        geom_histogram(fill = "cornsilk", color = "black") + ylab("") + xlab("Probability") + 
        theme(plot.title = element_text(hjust = 0.5, margin=margin(0,0,5,0), size = 14, face = "bold"),
              panel.background = element_rect(fill = "white"), 
              panel.border = element_rect(fill = NA, colour = "grey20"),
              panel.grid.major = element_line(colour = "grey92"), 
              panel.grid.minor = element_line(colour = "grey92", size = 0.25), 
              strip.background = element_rect(fill = "grey85", colour = "grey20"), 
              legend.key = element_rect(fill = "white", colour = NA)) + 
        ggtitle(VariableText)
      
      CICoefficients_data  <- sapply(1:dim(beta_data_output2)[2], function(i){
        if(i == 1){
          c(quantile(beta_data_output2[,1], probs = c(0.025,0.975)),
            mean(beta_data_output2[,1]))
        } else {
          # print(beta_data_output2[gamma_data_output2[,indexes_covariates[i]-1]!= 0,i])
          c(quantile(beta_data_output2[gamma_data_output2[,indexes_covariates[i]-1]!= 0,i], probs = c(0.025,0.975)),
            mean(beta_data_output2[gamma_data_output2[,indexes_covariates[i]-1]!= 0,i]))
        }
      })
      
      PIP_data <- data.frame(name = dimnames(gamma_data_output)[[3]],
                             prob = apply(gamma_data_output2, 2, mean))
      
      gamma_data_plot <- ggplot(PIP_data, aes(x=reorder(name, prob), y=prob)) +
        geom_point(size=3) + # Use a larger dot
        theme_bw() +
        ylab("PIP") + xlab("Variable") + 
        theme(plot.title = element_text(hjust = 0.5, margin=margin(0,0,5,0), size = 14, face = "bold"),
              panel.grid.major.x = element_blank(),
              panel.grid.minor.x = element_blank(),
              panel.grid.major.y = element_line(colour="grey60", linetype="dashed"),
              axis.text.x = element_text(angle = 90)) +
        ylim(c(0,1)) + geom_hline(aes(yintercept = .5), color = "red")
      
      beta_data_plot <- ggplot() +
        geom_errorbar(data = NULL, aes(x = reorder(factor(colnames(beta_data_output2)[-1], 
                                                          levels = colnames(beta_data_output2)[-1]), 
                                                   rep(PIP_data$prob, table(indexes_covariates[-1]))), ymax=CICoefficients_data[1,-1], 
                                       ymin=CICoefficients_data[2,-1]),
                      width=0.2, size=1, color="black") +
        geom_point(data = NULL, aes(x = reorder(factor(colnames(beta_data_output2)[-1], 
                                                       levels = colnames(beta_data_output2)[-1]), 
                                                rep(PIP_data$prob, table(indexes_covariates[-1]))), 
                                    y=CICoefficients_data[3,-1]), size=4, shape=21, fill="white") +
        theme(plot.title = element_text(hjust = 0.5, margin=margin(0,0,5,0), size = 14, face = "bold"),
              panel.background = element_rect(fill = "white"),
              panel.border = element_rect(fill = NA, colour = "grey20"),
              panel.grid.major = element_line(colour = "grey92"),
              panel.grid.minor = element_line(colour = "grey92", size = 0.25),
              strip.background = element_rect(fill = "grey85", colour = "grey20"),
              legend.key = element_rect(fill = "white", colour = NA),
              axis.text.x = element_text(angle = 90))  +
        xlab("Variable")+ ylab("Coefficient") #+ coord_flip()
      
    }
    
  } else {
    CICoefficients_data <- NULL
    PIP_data <- NULL
  }
  
  list("data_CI" = CI_data,
       "beta_CI" = CICoefficients_data,
       "PIP_data" = PIP_data)
}

fitModel <- function(data, K,
                     column_presence, num_covariates_text, fac_covariates_text, 
                     usingPsi, usingTheta11, usingTheta10, usingP11, usingP10,
                     prior_psi, prior_theta11, prior_theta10, prior_p11, prior_p10,
                     phi_mu, phi_beta, d_bar,
                     nchain, nburn, niter, nthin){
  
  # input data
  {
    df <- data
    
    num_covariates <- sapply(strsplit(num_covariates_text, split = ",")[[1]], function(x){
      if(grepl("-",x)){
        as.numeric(seq(strsplit(x, split = "-")[[1]][1], strsplit(x, split = "-")[[1]][2]))
      } else {
        as.numeric(x)
      }
    })
    num_covariates <- as.vector(unlist(num_covariates))
    
    fac_covariates <- sapply(strsplit(fac_covariates_text, split = ",")[[1]], function(x){
      if(grepl("-",x)){
        as.numeric(seq(strsplit(x, split = "-")[[1]][1], strsplit(x, split = "-")[[1]][2]))
      } else {
        as.numeric(x)
      }
    })
    fac_covariates <- as.vector(unlist(fac_covariates))
    
    column_covariate <- c(num_covariates,fac_covariates)
    
    indexesToRemove <- c()
    
    if(column_presence != 0){
      indexesToRemove <- c(indexesToRemove, column_presence)
    }
    
    if(any(num_covariates != 0) | any(fac_covariates != 0)){
      indexesToRemove <- c(indexesToRemove, column_covariate)
    }
    
    if(any(column_covariate != 0) | column_presence != 0){
      df <- df[,-indexesToRemove]  
    }
    
    y_sm <- as.matrix(df)
    
    S <- nrow(y_sm)
    M <- ncol(y_sm)
    
    if(column_presence != 0){
      k_s <- as.vector(df[,column_presence])
    } else {
      k_s <- rep(0, S)
    }
    
    usingCov <- rep(F, 5)
    if(usingPsi){
      usingCov[1] <- T
    }
    if(usingTheta11){
      usingCov[2] <- T
    }
    if(usingTheta10){
      usingCov[3] <- T
    }
    if(usingP11){
      usingCov[4] <- T
    }
    if(usingP10){
      usingCov[5] <- T
    }  
  }
  
  # clean data
  {
    num_covariates <- ExtractCovariatesFromText(num_covariates_text)
    fac_covariates <- ExtractCovariatesFromText(fac_covariates_text)
    
    column_covariate <- sort(c(num_covariates,fac_covariates))
    
    # create design matrix
    if(length(column_covariate) != 0){ # if there are covariates
      X <- data[,column_covariate,drop = F]
    } else {
      X <- as.matrix(rep(1, S))
    }
    
    nameVariables <- colnames(X)
    ncov <- ncol(X)
    
    if(length(column_covariate) != 0){ # if there are covariates available
      
      classCovariates <- ExtractClassCovariates(ncov, fac_covariates, column_covariate)
      
      # convert the categorical columns to factors
      for (i in 1:ncov) {
        if(!classCovariates[i]){
          X[,i] <- as.factor(X[,i])
        }
      }
      
    }
    
    # vector that assign to each dummy variable of the complete design matrix the 
    # index of its corresponding covariate (including intercept)
    indexes_covariates <- Extract_IndexesCovariates(X, column_covariate, ncov, classCovariates)
    
    originalX <- X # backup the original  matrix of covariates
    if(length(column_covariate) != 0){
      
      X <- model.matrix(~ ., X)
      
    } 
    
  }
  
  # rv$num_iter <- niter
  # rv$num_chain <- nchain
  
  # input priors
  {
    
    a_pi <- 1
    b_pi <- 1
    
    d_bar <- ifelse(d_bar <= ncov, d_bar, 2)
    
    epsilon <- pnorm(1)
    
    alpha0_theta <- 1
    alpha0_p <- 1
    
    delta <- qnorm(epsilon)
    
    mu_psi <- invLogit(prior_psi)
    mu_theta11 <- invLogit(prior_theta11)
    mu_theta10 <- invLogit(prior_theta10)
    mu_p11 <- invLogit(prior_p11)
    mu_p10 <- invLogit(prior_p10)
    
    if(any(num_covariates != 0) | any(fac_covariates != 0)){
      
      C <- matrix(0, nrow = ncol(X) - 1, ncol = ncol(X) - 1)
      l <- 0
      for (i in 1:ncov) {
        if(classCovariates[i]){
          C[l + 1, l + 1] <- 1
          l <- l + 1
        } else {
          num_levels <- length(unique(originalX[,i]))
          C[l + 1:(num_levels-1), l + 1:(num_levels-1)] <- .5 * (diag(1, nrow = (num_levels-1)) + 
                                                                   matrix(1, nrow = (num_levels-1), ncol = (num_levels-1)))
          l <- l + num_levels - 1
        }
      }
      
      Sigma <- cov(X[,-1, drop = F])
      sumCSigma <- sum(C * Sigma)
      
    }
    
    # compute C matrix
    if(length(column_covariate) != 0){
      
      C <- matrix(0, nrow = ncol(X) - 1, ncol = ncol(X) - 1)
      l <- 0 # l loop through the covariates 
      for (i in 1:ncov) {
        if(classCovariates[i]){ # if it's a numerical covariates
          C[l + 1, l + 1] <- 1
          l <- l + 1
        } else { # if it's a categorical covariates
          num_levels <- length(unique(originalX[,i]))
          C[l + 1:(num_levels-1), l + 1:(num_levels-1)] <- .5 * (diag(1, nrow = (num_levels-1)) + 
                                                                   matrix(1, nrow = (num_levels-1), ncol = (num_levels-1)))
          l <- l + num_levels - 1
        }
      }
      
      Sigma <- cov(X[,-1, drop = F])
      sumCSigma <- sum(C * Sigma)
      
    }
    
    # prior for psi
    
    if(usingCov[1]){ 
      
      {
        b_psi <- c(mu_psi, rep(0, ncol(X) - 1))
        sigma_beta_psi <- C * phi_beta  
        
        B_psi <- matrix(0, nrow = ncol(X), ncol = ncol(X))
        B_psi[1, 1] <- phi_mu
        B_psi[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_psi
      }
      
    } else {
      
      {
        b_psi <- mu_psi
        B_psi <- matrix(0, nrow = 1, ncol = 1)
        B_psi[1, 1] <- phi_mu  
      }
      
    }
    
    # prior for theta11
    
    if(usingCov[2]){
      
      delta_theta <- (mu_theta11 - mu_theta10)^2 / (2 * delta^2 * (alpha0_theta + sumCSigma))
      sigma_theta11 <- alpha0_theta * delta_theta
      sigma_beta_theta11 <- C * delta_theta
      
      b_theta11 <- c(mu_theta11, rep(0, ncol(X) - 1))
      B_theta11 <- matrix(0, nrow = ncol(X), ncol = ncol(X))
      B_theta11[1, 1] <- sigma_theta11
      B_theta11[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_theta11
      
    } else {
      
      delta_theta <- (mu_theta11 - mu_theta10)^2 / (2 * delta^2)
      
      b_theta11 <- mu_theta11
      B_theta11 <- matrix(0, nrow = 1, ncol = 1)
      B_theta11[1, 1] <- delta_theta
      
    }
    
    # prior for theta10
    
    if(usingCov[3]){
      
      delta_theta <- (mu_theta11 - mu_theta10)^2 / (2 * delta^2 * (alpha0_theta + sumCSigma))
      sigma_theta10 <- alpha0_theta * delta_theta
      sigma_beta_theta10 <- C * delta_theta
      
      b_theta10 <- c(mu_theta10, rep(0, ncol(X) - 1))
      B_theta10 <- matrix(0, nrow = ncol(X), ncol = ncol(X))
      B_theta10[1, 1] <- sigma_theta10
      B_theta10[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_theta10
      
    } else {
      
      delta_theta <- (mu_theta11 - mu_theta10)^2 / (2 * delta^2)
      
      b_theta10 <- mu_theta10
      B_theta10 <- matrix(0, nrow = 1, ncol = 1)
      B_theta10[1, 1] <- delta_theta
      
    }
    
    # prior for p11
    
    if(usingCov[4]){
      
      delta_p <- (mu_p11 - mu_p10)^2 / (2 * delta^2 * (alpha0_p + sumCSigma))
      sigma_p11 <- alpha0_p * delta_p
      sigma_beta_p11 <- C * delta_p
      
      b_p11 <- c(mu_p11, rep(0, ncol(X) - 1))
      B_p11 <- matrix(0, nrow = ncol(X), ncol = ncol(X))
      B_p11[1, 1] <- sigma_p11
      B_p11[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_p11
      
      
    } else {
      
      delta_p <- (mu_p11 - mu_p10)^2 / (2 * delta^2)
      
      b_p11 <- mu_p11
      B_p11 <- matrix(0, nrow = 1, ncol = 1)
      B_p11[1, 1] <- delta_p
      
    }
    
    # prior for p10
    
    if(usingCov[5]){
      
      delta_p <- (mu_p11 - mu_p10)^2 / (2 * delta^2 * (alpha0_p + sumCSigma))
      sigma_p10 <- alpha0_p * delta_p
      sigma_beta_p10 <- C * delta_p
      
      b_p10 <- c(mu_p10, rep(0, ncol(X) - 1))
      B_p10 <- matrix(0, nrow = ncol(X), ncol = ncol(X))
      B_p10[1, 1] <- sigma_p10
      B_p10[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_p10
      
    } else {
      
      delta_p <- (mu_p11 - mu_p10)^2 / (2 * delta^2)
      
      b_p10 <- mu_p10
      B_p10 <- matrix(0, nrow = 1, ncol = 1)
      B_p10[1, 1] <- delta_p
      
    }
    
  }
  
  # initialize output
  {
    z_output <- array(0, dim = c(nchain, niter, S))
    psi_output <- array(NA,  dim = c(nchain, niter, S))
    gamma_psi_output <- array(NA, dim = c(nchain , niter, ncov),
                              dimnames = list(c(), c(), nameVariables))
    beta_psi_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)),
                             dimnames = list(c(), c(), colnames(X)))
    theta11_output <- array(NA,  dim = c(nchain, niter, S))
    theta10_output <- array(NA,  dim = c(nchain, niter, S))
    beta_theta11_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)),
                                 dimnames = list(c(), c(), colnames(X)))
    beta_theta10_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)),
                                 dimnames = list(c(), c(), colnames(X)))
    gamma_theta11_output <- array(NA, dim = c(nchain , niter, ncov),
                                  dimnames = list(c(), c(), nameVariables))
    gamma_theta10_output <- array(NA, dim = c(nchain , niter, ncov),
                                  dimnames = list(c(), c(), nameVariables))
    p11_output <- array(NA,  dim = c(nchain, niter, S))
    p10_output <- array(NA,  dim = c(nchain, niter, S))
    beta_p11_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)),
                             dimnames = list(c(), c(), colnames(X)))
    beta_p10_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)),
                             dimnames = list(c(), c(), colnames(X)))
    gamma_p11_output <- array(NA, dim = c(nchain , niter, ncov),
                              dimnames = list(c(), c(), nameVariables))
    gamma_p10_output <- array(NA, dim = c(nchain , niter, ncov),
                              dimnames = list(c(), c(), nameVariables))
    
    oneminpsix <- array(NA, dim = c(nchain , niter, K + 1))
    qx <- array(NA, dim = c(nchain , niter, K + 1))
  }
  
  for (chain in 1:nchain) {
    
    # initialize parameters
    {
      
      # initialize beta (matrix of coefficients) and gamma (indicator variables of covariates in the model)
      # please note gamma includes also an index for the intercept, which obviously is always 1
      beta <- matrix(0, nrow = 5, ncol = length(indexes_covariates))
      gamma <- matrix(rbinom((ncov + 1) * 5, 1, d_bar / ncov), nrow = 5, ncol = ncov + 1)
      gamma[,1] <- 1 # index of the intercept
      
      if(usingCov[1]){ # if covariates are used
        beta[1,1] <- mu_psi
        X_gamma <- computeXgamma(X, indexes_covariates, gamma[1,])
        beta_gamma <- compute_betagamma(beta[1,,drop = F], indexes_covariates, gamma[1,])
        psi <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma)))
      } else { 
        beta[1,1] <- mu_psi
        psi <- rep(logit(mu_psi), S)
      }
      
      if(usingCov[2]){
        beta[2,1] <- mu_theta11
        X_gamma <- computeXgamma(X, indexes_covariates, gamma[2,])
        beta_gamma <- compute_betagamma(beta[2,,drop = F], indexes_covariates, gamma[2,])
        theta11 <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma)))
      } else {
        beta[2,1] <- mu_theta11
        theta11 <- rep(logit(mu_theta11), S)
      }
      
      if(usingCov[3]){
        beta[3,1] <- mu_theta10
        X_gamma <- computeXgamma(X, indexes_covariates, gamma[3,])
        beta_gamma <- compute_betagamma(beta[3,,drop = F], indexes_covariates, gamma[3,])
        theta10 <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma)))
      } else {
        beta[3,1] <- mu_theta10
        theta10 <- rep(logit(mu_theta10), S)
      }
      
      if(usingCov[4]){
        beta[4,1] <- mu_p11
        X_gamma <- computeXgamma(X, indexes_covariates, gamma[4,])
        beta_gamma <- compute_betagamma(beta[4,,drop = F], indexes_covariates, gamma[4,])
        p11 <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma)))
      } else {
        beta[4,1] <- mu_p11
        p11 <- rep(logit(mu_p11), S)
      }
      
      if(usingCov[5]){
        beta[5,1] <- mu_p10
        X_gamma <- computeXgamma(X, indexes_covariates, gamma[5,])
        beta_gamma <- compute_betagamma(beta[5,,drop = F], indexes_covariates, gamma[5,])
        p10 <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma)))
      } else {
        beta[5,1] <- mu_p10
        p10 <- rep(logit(mu_p10), S)
      }
      
      pi <- 0.5
      
      w_sm <- matrix(NA, nrow = S, ncol = M)
      for (s in 1:S) {
        for (m in 1:M) {
          if(!is.na(y_sm[s, m])){
            if(y_sm[s, m] >= floor(K * p11[s])){
              w_sm[s, m] <-  1
            } else {
              w_sm[s, m] <-  0
            }  
          } else {
            w_sm[s, m] <-  NA
          }
        }
      }
      
      # initialize the occupied site as the confirmed presences
      z <- k_s
      
    }
    
    for (iter in seq_len(nburn + niter*nthin)) {
      
      # sample z
      z <- sample_z(k_s, w_sm, psi, theta11, theta10, pi)
      
      # sample wsm
      w_sm <- sample_wsm(theta11, z, theta10, p11, p10, y_sm, K)
      
      # sample z and w_sm
      # list_z_wsm <- sample_z_wsm(psi, theta11, theta10, k_s, p11, p10, y_sm, K, pi)
      # w_sm <- list_z_wsm$w_sm
      # z <- list_z_wsm$z
      
      # sample p
      pi <- rbeta(1, 1 + sum(k_s * z), 1 + sum(z * (1 - k_s)))
      
      # sample psi
      list_psi <- update_psi(beta, gamma, z, X, indexes_covariates, b_psi, B_psi, usingCov[1], d_bar)
      psi <- list_psi$psi
      beta[1,] <- list_psi$beta
      if(usingCov[1]){
        gamma[1,] <- list_psi$gamma
      }
      
      # sample theta11 (only if there are > 0 occupied site)
      if(sum(z) != 0){
        list_theta11 <- update_theta(beta, gamma, w_sm, z, X, indexes_covariates, T, b_theta11, B_theta11, 
                                     usingCov[2], d_bar)
        theta11 <- list_theta11$theta
        beta[2,] <- list_theta11$beta
        if(usingCov[2]){
          gamma[2,] <- list_theta11$gamma
        }
        
      }
      
      # sample theta10 (only if there are > 0 unoccupied site)
      if(sum(z) != S){
        list_theta10 <- update_theta(beta, gamma, w_sm, z, X, indexes_covariates, F, b_theta10, B_theta10, 
                                     usingCov[3], d_bar)
        theta10 <- list_theta10$theta
        beta[3,] <- list_theta10$beta
        if(usingCov[3]){
          gamma[3,] <- list_theta10$gamma
        }
      }
      
      # sample p11 (only if there are > 0 sites with edna presences)
      if(sum(w_sm, na.rm = T) > 0){
        list_p11 <- update_p(beta, gamma, y_sm, w_sm, z, X, indexes_covariates, T, K, b_p11, B_p11, 
                             usingCov[4], d_bar)
        p11 <- list_p11$p
        beta[4,] <- list_p11$beta
        if(usingCov[4]){
          gamma[4,] <- list_p11$gamma
        }
      }
      
      # sample p10 (only if there are > 0 sites with no edna presences)
      if(sum(w_sm, na.rm = T) != (S * M)){
        list_p10 <- update_p(beta, gamma, y_sm, w_sm, z, X, indexes_covariates, F, K, b_p10, B_p10, 
                             usingCov[5], d_bar)
        p10 <- list_p10$p
        beta[5,] <- list_p10$beta
        if(usingCov[5]){
          gamma[5, ] <- list_p10$gamma
        }
      }
      
      if(iter > nburn & (iter - nburn) %% nthin == 0){
        trueIter <- (iter - nburn)/nthin
        z_output[chain,trueIter,] <- z
        psi_output[chain, trueIter,] <- psi
        beta_psi_output[chain,trueIter,] <- beta[1,]
        gamma_psi_output[chain,trueIter,] <- gamma[1,-1]
        theta11_output[chain, trueIter,] <- theta11
        theta10_output[chain, trueIter,] <- theta10
        beta_theta11_output[chain,trueIter,] <- beta[2,]
        beta_theta10_output[chain,trueIter,] <- beta[3,]
        gamma_theta11_output[chain,trueIter,] <- gamma[2,-1]
        gamma_theta10_output[chain,trueIter,] <- gamma[3,-1]
        p11_output[chain, trueIter,] <- p11
        p10_output[chain, trueIter,] <- p10
        beta_p11_output[chain,trueIter,] <- beta[4,]
        beta_p10_output[chain,trueIter,] <- beta[5,]
        gamma_p11_output[chain,trueIter,] <- gamma[4,-1]
        gamma_p10_output[chain,trueIter,] <- gamma[5,-1]
        
        # compute 1 - psi(x) and q(x)
        psi_0 <- logit(beta[1,1])
        theta11_0 <- logit(beta[2,1])
        theta10_0 <- logit(beta[3,1])
        p11_0 <- logit(beta[4,1])
        p10_0 <- logit(beta[5,1])
        pxgivenzequal1 <- psi_0 * ((theta11_0) * dbinom(0:K, K, prob = p11_0) + (1 - theta11_0) * dbinom(0:K, K, prob = p10_0))
        pxgivenzequal0 <- (1 - psi_0) * ((theta10_0) * dbinom(0:K, K, prob = p11_0) + (1 - theta10_0) * dbinom(0:K, K, prob = p10_0) )
        
        oneminpsix[chain,trueIter,] <- pxgivenzequal0 / (pxgivenzequal1 + pxgivenzequal0)
        qx[chain,trueIter,] <- dbinom(0:K, K, p11_0) * (theta11_0) + dbinom(0:K, K, p10_0) * (1 - theta11_0)
        
      }
      
    }
    
  }
  
  for (chain in 1:nchain) {
    for (i in seq_along(indexes_covariates)) {
      beta_psi_output[chain,gamma_psi_output[chain,,indexes_covariates[i]-1]==0,i] <- 0
      beta_theta11_output[chain,gamma_theta11_output[chain,,indexes_covariates[i]-1]==0,i] <- 0
      beta_theta10_output[chain,gamma_theta10_output[chain,,indexes_covariates[i]-1]==0,i] <- 0
      beta_p11_output[chain,gamma_p11_output[chain,,indexes_covariates[i]-1]==0,i] <- 0
      beta_p10_output[chain,gamma_p10_output[chain,,indexes_covariates[i]-1]==0,i] <- 0
    }  
  }
  
  # create results
  {
    
    # create summaries
    {
      
      list_psi_plot <- createSummaries(psi_output, beta_psi_output, gamma_psi_output, indexes_covariates,
                                       usingCov[1], S, niter, nchain, "\u03C8", "Baseline occupancy probability")
      psi_CI <- list_psi_plot$data_CI
      beta_psi_CI <- list_psi_plot$beta_CI
      gamma_psi_CI <- list_psi_plot$PIP_data
      
      list_theta11_plot <- createSummaries(theta11_output, beta_theta11_output, gamma_theta11_output, indexes_covariates,
                                           usingCov[2], S, niter, nchain, expression(theta[1][1]), 
                                           "Baseline eDNA presence probability given species presence")
      theta11_CI <- list_theta11_plot$data_CI
      beta_theta11_CI <- list_theta11_plot$beta_CI
      gamma_theta11_CI <- list_theta11_plot$PIP_data
      
      list_theta10_plot <- createSummaries(theta10_output, beta_theta10_output, gamma_theta10_output, indexes_covariates,
                                           usingCov[3], S, niter, nchain, expression(theta[1][0]), 
                                           "Baseline eDNA presence probability given species absence")
      theta10_CI <- list_theta10_plot$data_CI
      beta_theta10_CI <- list_theta10_plot$beta_CI
      gamma_theta10_CI <- list_theta10_plot$PIP_data
      
      list_p11_plot <- createSummaries(p11_output, beta_p11_output, gamma_p11_output, indexes_covariates,
                                       usingCov[4], S, niter, nchain, expression(p[11]), 
                                       "Baseline eDNA detection probability given eDNA presence")
      p11_CI <- list_p11_plot$data_CI
      beta_p11_CI <- list_p11_plot$beta_CI
      gamma_p11_CI <- list_p11_plot$PIP_data
      
      list_p10_plot <- createSummaries(p10_output, beta_p10_output, gamma_p10_output, indexes_covariates,
                                       usingCov[5], S, niter, nchain, expression(p[10]), 
                                       "Baseline eDNA detection probability given eDNA absence")
      p10_CI <- list_p10_plot$data_CI
      beta_p10_CI <- list_p10_plot$beta_CI
      gamma_p10_CI <- list_p10_plot$PIP_data
      
    }
    
    # create results
    {
      
      # download psi
      {
        psi_output2 <- matrix(NA, nrow = niter * nchain, ncol = S)
        for (chain in 1:nchain) {
          psi_output2[(chain - 1)*niter + 1:niter,] <- psi_output[chain,,]
        }
        
        psi_means <- apply(psi_output2, 2, mean)
        ci95_psi <- apply(psi_output2, 2, function(x){
          quantile(x, probs = .975)
        })
        ci25_psi <- apply(psi_output2, 2, function(x){
          quantile(x, probs = .025)
        })
        
        df <- data.frame("Site" = 1:S)
        
        if(column_presence != 0){
          k_s <- as.vector(presenceInput())
        } else {
          k_s <- rep(0, S)
        }
        
        df$`2.5Credible Interval` <- ci25_psi
        df$`Posterior Mean` <- psi_means
        df$`97.5Credible Interval` <- ci95_psi
        
        download_psi <- df
      }
      
      # download beta psi
      {
        nchain <- dim(beta_psi_output)[1]
        niter <- dim(beta_psi_output)[2]
        ncov <- dim(gamma_psi_output)[3] 
        ncov_all <- dim(beta_psi_output)[3]
        
        beta_psi_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all)
        for (chain in 1:nchain) {
          beta_psi_output2[(chain - 1)*niter + 1:niter,] <- beta_psi_output[chain,,]
        }
        gamma_psi_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov)
        for (chain in 1:nchain) {
          gamma_psi_output2[(chain - 1)*niter + 1:niter,] <- gamma_psi_output[chain,,]
        }
        
        beta_psi_output2[,1] <- logit(beta_psi_output2[,1])
        colnames(beta_psi_output2) <- dimnames(beta_psi_output)[[3]]
        
        beta_psi_mean <- sapply(1:ncol(beta_psi_output2), function(i){
          if(i == 1){
            mean(beta_psi_output2[,i])
          } else {
            mean(beta_psi_output2[gamma_psi_output2[,indexes_covariates[i]-1]!=0,i])  
          }
        })
        ci95_psi <- sapply(1:ncol(beta_psi_output2), function(i){
          if(i == 1){
            quantile(beta_psi_output2[,i], probs = .975)  
          } else {
            quantile(beta_psi_output2[gamma_psi_output2[,indexes_covariates[i]-1]!=0,i], probs = .975)  
          }
        })
        ci25_psi <- sapply(1:ncol(beta_psi_output2), function(i){
          if(i == 1){
            quantile(beta_psi_output2[,i], probs = .025)
          } else {
            quantile(beta_psi_output2[gamma_psi_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025)  
          }
        })
        
        df <- data.frame("2.5Credible Interval" = ci25_psi)
        colnames(df)[1] <- "2.5Credible Interval"
        df$`Posterior Mean` <- beta_psi_mean
        df$`97.5Credible Interval` <- ci95_psi
        df$PIP <- c(1,apply(gamma_psi_output2, 2, mean)[(indexes_covariates-1)[-1]])
        
        row.names(df) <- colnames(beta_psi_output2)
        
        download_beta_psi <- df
      }
      
      # download theta11
      {
        theta11_output2 <- matrix(NA, nrow = niter * nchain, ncol = S)
        for (chain in 1:nchain) {
          theta11_output2[(chain - 1)*niter + 1:niter,] <- theta11_output[chain,,]
        }
        
        theta11_means <- apply(theta11_output2, 2, mean)
        ci95_theta11 <- apply(theta11_output2, 2, function(x){
          quantile(x, probs = .975)
        })
        ci25_theta11 <- apply(theta11_output2, 2, function(x){
          quantile(x, probs = .025)
        })
        
        df <- data.frame("Site" = 1:S)
        
        df$`2.5Credible Interval` <- ci25_theta11
        df$`Posterior Mean` <- theta11_means
        df$`97.5Credible Interval` <- ci95_theta11
        
        download_theta11 <- df
      }
      
      # download beta_theta11
      {
        beta_theta11_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all)
        for (chain in 1:nchain) {
          beta_theta11_output2[(chain - 1)*niter + 1:niter,] <- beta_theta11_output[chain,,]
        }
        gamma_theta11_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov)
        for (chain in 1:nchain) {
          gamma_theta11_output2[(chain - 1)*niter + 1:niter,] <- gamma_theta11_output[chain,,]
        }
        
        beta_theta11_output2[,1] <- logit(beta_theta11_output2[,1])
        colnames(beta_theta11_output2) <- dimnames(beta_theta11_output)[[3]]
        
        beta_theta11_mean <- sapply(1:ncol(beta_theta11_output2), function(i){
          if(i == 1){
            mean(beta_theta11_output2[,i])
          } else {
            mean(beta_theta11_output2[gamma_theta11_output2[,indexes_covariates[i]-1]!=0,i])  
          }
        })
        ci95_theta11 <- sapply(1:ncol(beta_theta11_output2), function(i){
          if(i == 1){
            quantile(beta_theta11_output2[,i], probs = .975)  
          } else {
            quantile(beta_theta11_output2[gamma_theta11_output2[,indexes_covariates[i]-1]!=0,i], probs = .975)  
          }
        })
        ci25_theta11 <- sapply(1:ncol(beta_theta11_output2), function(i){
          if(i == 1){
            quantile(beta_theta11_output2[,i], probs = .025)
          } else {
            quantile(beta_theta11_output2[gamma_theta11_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025)  
          }
        })
        
        df <- data.frame("2.5Credible Interval" = ci25_theta11)
        colnames(df)[1] <- "2.5Credible Interval"
        df$`Posterior Mean` <- beta_theta11_mean
        df$`97.5Credible Interval` <- ci95_theta11
        df$PIP <- c(1,apply(gamma_theta11_output2, 2, mean)[(indexes_covariates-1)[-1]])
        
        row.names(df) <- colnames(beta_theta11_output2)
        
        download_beta_theta11 <- df
      }
      
      # download theta10
      {
        theta10_output2 <- matrix(NA, nrow = niter * nchain, ncol = S)
        for (chain in 1:nchain) {
          theta10_output2[(chain - 1)*niter + 1:niter,] <- theta10_output[chain,,]
        }
        
        theta10_means <- apply(theta10_output2, 2, mean)
        ci95_theta10 <- apply(theta10_output2, 2, function(x){
          quantile(x, probs = .975)
        })
        ci25_theta10 <- apply(theta10_output2, 2, function(x){
          quantile(x, probs = .025)
        })
        
        df <- data.frame("Site" = 1:S)
        
        df$`2.5Credible Interval` <- ci25_theta10
        df$`Posterior Mean` <- theta10_means
        df$`97.5Credible Interval` <- ci95_theta10
        
        download_theta10 <- df
      }
      
      # download beta_theta10
      {
        beta_theta10_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all)
        for (chain in 1:nchain) {
          beta_theta10_output2[(chain - 1)*niter + 1:niter,] <- beta_theta10_output[chain,,]
        }
        gamma_theta10_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov)
        for (chain in 1:nchain) {
          gamma_theta10_output2[(chain - 1)*niter + 1:niter,] <- gamma_theta10_output[chain,,]
        }
        
        beta_theta10_output2[,1] <- logit(beta_theta10_output2[,1])
        colnames(beta_theta10_output2) <- dimnames(beta_theta10_output)[[3]]
        
        beta_theta10_mean <- sapply(1:ncol(beta_theta10_output2), function(i){
          if(i == 1){
            mean(beta_theta10_output2[,i])
          } else {
            mean(beta_theta10_output2[gamma_theta10_output2[,indexes_covariates[i]-1]!=0,i])  
          }
        })
        ci95_theta10 <- sapply(1:ncol(beta_theta10_output2), function(i){
          if(i == 1){
            quantile(beta_theta10_output2[,i], probs = .975)  
          } else {
            quantile(beta_theta10_output2[gamma_theta10_output2[,indexes_covariates[i]-1]!=0,i], probs = .975)  
          }
        })
        ci25_theta10 <- sapply(1:ncol(beta_theta10_output2), function(i){
          if(i == 1){
            quantile(beta_theta10_output2[,i], probs = .025)
          } else {
            quantile(beta_theta10_output2[gamma_theta10_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025)  
          }
        })
        
        df <- data.frame("2.5Credible Interval" = ci25_theta10)
        colnames(df)[1] <- "2.5Credible Interval"
        df$`Posterior Mean` <- beta_theta10_mean
        df$`97.5Credible Interval` <- ci95_theta10
        df$PIP <- c(1,apply(gamma_theta10_output2, 2, mean)[(indexes_covariates-1)[-1]])
        
        row.names(df) <- colnames(beta_theta10_output2)
        
        download_beta_theta10 <- df
      }
      
      # download p11
      {
        p11_output2 <- matrix(NA, nrow = niter * nchain, ncol = S)
        for (chain in 1:nchain) {
          p11_output2[(chain - 1)*niter + 1:niter,] <- p11_output[chain,,]
        }
        
        p11_means <- apply(p11_output2, 2, mean)
        ci95_p11 <- apply(p11_output2, 2, function(x){
          quantile(x, probs = .975)
        })
        ci25_p11 <- apply(p11_output2, 2, function(x){
          quantile(x, probs = .025)
        })
        
        df <- data.frame("Site" = 1:S)
        
        df$`2.5Credible Interval` <- ci25_p11
        df$`Posterior Mean` <- p11_means
        df$`97.5Credible Interval` <- ci95_p11
        
        download_p11 <- df
      }
      
      # downlaod beta_p11
      {
        beta_p11_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all)
        for (chain in 1:nchain) {
          beta_p11_output2[(chain - 1)*niter + 1:niter,] <- beta_p11_output[chain,,]
        }
        gamma_p11_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov)
        for (chain in 1:nchain) {
          gamma_p11_output2[(chain - 1)*niter + 1:niter,] <- gamma_p11_output[chain,,]
        }
        
        beta_p11_output2[,1] <- logit(beta_p11_output2[,1])
        colnames(beta_p11_output2) <- dimnames(beta_p11_output)[[3]]
        
        beta_p11_mean <- sapply(1:ncol(beta_p11_output2), function(i){
          if(i == 1){
            mean(beta_p11_output2[,i])
          } else {
            mean(beta_p11_output2[gamma_p11_output2[,indexes_covariates[i]-1]!=0,i])  
          }
        })
        ci95_p11 <- sapply(1:ncol(beta_p11_output2), function(i){
          if(i == 1){
            quantile(beta_p11_output2[,i], probs = .975)  
          } else {
            quantile(beta_p11_output2[gamma_p11_output2[,indexes_covariates[i]-1]!=0,i], probs = .975)  
          }
        })
        ci25_p11 <- sapply(1:ncol(beta_p11_output2), function(i){
          if(i == 1){
            quantile(beta_p11_output2[,i], probs = .025)
          } else {
            quantile(beta_p11_output2[gamma_p11_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025)  
          }
        })
        
        df <- data.frame("2.5Credible Interval" = ci25_p11)
        colnames(df)[1] <- "2.5Credible Interval"
        df$`Posterior Mean` <- beta_p11_mean
        df$`97.5Credible Interval` <- ci95_p11
        df$PIP <- c(1,apply(gamma_p11_output2, 2, mean)[(indexes_covariates-1)[-1]])
        
        row.names(df) <- colnames(beta_p11_output2)
        
        download_beta_p11 <- df
      }
      
      # download p10
      {
        p10_output2 <- matrix(NA, nrow = niter * nchain, ncol = S)
        for (chain in 1:nchain) {
          p10_output2[(chain - 1)*niter + 1:niter,] <- p10_output[chain,,]
        }
        
        p10_means <- apply(p10_output2, 2, mean)
        ci95_p10 <- apply(p10_output2, 2, function(x){
          quantile(x, probs = .975)
        })
        ci25_p10 <- apply(p10_output2, 2, function(x){
          quantile(x, probs = .025)
        })
        
        df <- data.frame("Site" = 1:S)
        
        df$`2.5Credible Interval` <- ci25_p10
        df$`Posterior Mean` <- p10_means
        df$`97.5Credible Interval` <- ci95_p10
        
        download_p10 <- df
      }
      
      # downlaod beta_p10
      {
        beta_p10_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all)
        for (chain in 1:nchain) {
          beta_p10_output2[(chain - 1)*niter + 1:niter,] <- beta_p10_output[chain,,]
        }
        gamma_p10_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov)
        for (chain in 1:nchain) {
          gamma_p10_output2[(chain - 1)*niter + 1:niter,] <- gamma_p10_output[chain,,]
        }
        
        beta_p10_output2[,1] <- logit(beta_p10_output2[,1])
        colnames(beta_p10_output2) <- dimnames(beta_p10_output)[[3]]
        
        beta_p10_mean <- sapply(1:ncol(beta_p10_output2), function(i){
          if(i == 1){
            mean(beta_p10_output2[,i])
          } else {
            mean(beta_p10_output2[gamma_p10_output2[,indexes_covariates[i]-1]!=0,i])  
          }
        })
        ci95_p10 <- sapply(1:ncol(beta_p10_output2), function(i){
          if(i == 1){
            quantile(beta_p10_output2[,i], probs = .975)  
          } else {
            quantile(beta_p10_output2[gamma_p10_output2[,indexes_covariates[i]-1]!=0,i], probs = .975)  
          }
        })
        ci25_p10 <- sapply(1:ncol(beta_p10_output2), function(i){
          if(i == 1){
            quantile(beta_p10_output2[,i], probs = .025)
          } else {
            quantile(beta_p10_output2[gamma_p10_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025)  
          }
        })
        
        df <- data.frame("2.5Credible Interval" = ci25_p10)
        colnames(df)[1] <- "2.5Credible Interval"
        df$`Posterior Mean` <- beta_p10_mean
        df$`97.5Credible Interval` <- ci95_p10
        df$PIP <- c(1,apply(gamma_p10_output2, 2, mean)[(indexes_covariates-1)[-1]])
        
        row.names(df) <- colnames(beta_p10_output2)
        
        download_beta_p10 <- df
      }
      
      # conditional probability tables
      {
        condProbTable <- matrix(NA, nrow = 2, ncol = K + 1)
        condProbTable[1,] <- apply(oneminpsix[1,,], 2, mean)
        condProbTable[2,] <- apply(qx[1,,], 2, mean)
        
        colnames(condProbTable) <-  0:K
        rownames(condProbTable) <- c("1 - p(x)","q(x)")
      }
      
    }
    
    list_downloads <- list("download_psi" = download_psi,
                           "download_beta_psi" = download_beta_psi,
                           "download_theta11" = download_theta11,
                           "download_beta_theta11" = download_beta_theta11,
                           "download_theta10" = download_theta10,
                           "download_beta_theta10" = download_beta_theta10,
                           "download_p11" = download_p11,
                           "download_beta_p11" = download_beta_p11,
                           "download_p10" = download_p10,
                           "download_beta_p10" = download_beta_p10,
                           "condProbTable" = condProbTable)
    
    list_results <- list("psi_CI" = psi_CI,
                         "beta_psi_CI" = beta_psi_CI,
                         "gamma_psi_CI" = gamma_psi_CI,
                         "theta11_CI" = theta11_CI,
                         "beta_theta11_CI" = beta_theta11_CI,
                         "gamma_theta11_CI" = gamma_theta11_CI,
                         "theta10_CI" = theta10_CI,
                         "beta_theta10_CI" = beta_theta10_CI,
                         "gamma_theta10_CI" = gamma_theta10_CI,
                         "p11_CI" = p11_CI,
                         "beta_p11_CI" = beta_p11_CI,
                         "gamma_p11_CI" = gamma_p11_CI,
                         "p10_CI" = p10_CI,
                         "beta_p10_CI" = beta_p10_CI,
                         "gamma_p10_CI" = gamma_p10_CI,
                         "condProbTable" = condProbTable)
    
  }
  
  list_results
}


#############Simulation



#library(devtools)
#install.packages("remotes")
#remotes::install_github("AlexDiana/shinyAppFunctions")
#remotes::install_github("r-lib/vctrs")

#  simulation for practitioners paper

rm(list=ls())

# logit function
logit_fun <- function(x) log(x/(1-x))

# inverse logit function
inv_logit_fun <- function(x) 1/(1+exp(-x))

source("function.R")

# may need to install these if you get an error message
library(shinyAppFunctions) 
library(reshape2) 
library(ggplot2)

# simulate data with varying S, psi, theta11, theta10, p11 and p10 and save estimates 
# and conditional probabilities

# number of runs for each simulation setting
nruns <- 10 # I would suggest starting with say 10, see how long it takes to do them and consider if you can increase them

# number of samples from each site
M <- 1
# number of PCR runs from each sample
K <- 6


# save estimates (names should be self-explanatory)
all_condprob <- matrix(NA, nrow = nruns, ncol = K+1)
all_psi <- matrix(NA, nrow = nruns, ncol = 3)
all_theta11 <- matrix(NA, nrow = nruns, ncol = 3)
all_theta10 <- matrix(NA, nrow = nruns, ncol = 3)
all_p11 <- matrix(NA, nrow = nruns, ncol = 3)
all_p10 <- matrix(NA, nrow = nruns, ncol = 3)

# ignore these names, I originally set it up for the covid test i.e. K=M=1 hence the use of covid throughout!
covid <- list()
covidpres <- list()
positives <- list()

# I use ind as an indicator for the simulation run so I can save everything and then which simulation each result corresponds to
ind <- 3
# choose setting

# set values for all 
            S <- 20; #"S" is the number of sites
            # simulate covariates
            x1 <- rbinom(S, 1, 0.5) # binary covariate
            x2 <- runif(S, -2, 2)
            psi <- inv_logit_fun(logit_fun(0.1) + x1 - x2); # set psi in simulated data here we select 0.1
            theta11 <- 0.85; theta10 <- 0.01; p11 <- 0.9; p10 <- 0.01 # set theta11, theta10, p11 and p10 within simulated data, here we select 0.85, 0.01, 0.9 and 0.01 
# save settings so you can remember them (named after ind)
            write.table(list("S" = S, "psi" = psi, "theta11" = theta11[1], "theta10" = theta10[1], "p11" = p11[1], "p10" = p10[1]), paste("setting", ind, ".txt", sep = ""))
            
        # run it
            for(run in 1:nruns)
            {
              # infection or not (infection is occupancy!)
              covid[[run]] <- rbinom(S, 1, prob = psi)
              
              # presence of covid in sample or not (presece of dna in sample or not!)
              covidpres[[run]] <- matrix(0, nrow = S, ncol = M)
              for(i in 1:S) for(j in 1:M) covidpres[[run]][i,j] <- rbinom(1, 1, prob = covid[[run]][i]*theta11) + rbinom(1, 1, (1-covid[[run]][i])*theta10)
              
              # positives in the pcr replicates
              # the result is a matrix with S rows and M columns
              positives[[run]] <- matrix(0, nrow = S, ncol = M)
              for(i in 1:S) for(j in 1:M) positives[[run]][i,j] <- rbinom(1, K, prob = covidpres[[run]][i,j]*p11) + rbinom(1, K, prob = (1-covidpres[[run]][i,j])*p10)
              
              data <- data.frame(unlist(positives[[run]]), x1, x2)
              #colnames(data) <- NULL
              
              # these are settings for the app 
              
              #here I don't have verified presences and I have two covariates for psi
              column_presence <- 0
              num_covariates_text <- paste(M+2)
              fac_covariates_text <- paste(M+1)
              usingPsi <- T
              usingTheta11 <- F
              usingTheta10 <- F
              usingP11 <- F
              usingP10 <- F
              
              # settings for priors, currently set at levels which assume we are correct results are correct more than they are false
              prior_psi <- .5
              prior_theta11 <- .8
              prior_theta10 <- .2
              prior_p11 <- .8
              prior_p10 <- .2
              phi_mu <- 4
              phi_beta <- .25
              d_bar <- 2
              
              # settings for MCMC runs,
              nchain <- 1 #Number of Chains
              nburn <- 1000 # Number of Burn Iterations
              niter <- 2000 #Number of Iterations
              nthin <- 10 #Number of thinned Iterations
              
              # this fits the model and saves the results
              list_results <- fitModel(data, K,
                                       column_presence, num_covariates_text, fac_covariates_text, 
                                       usingPsi, usingTheta11, usingTheta10, usingP11, usingP10,
                                       prior_psi, prior_theta11, prior_theta10, prior_p11, prior_p10,
                                       phi_mu, phi_beta, d_bar = 1,
                                       nchain, nburn, niter, nthin)
              
              all_condprob[run,] <- list_results$condProbTable[1,]
              all_psi[run,] <- list_results$beta_psi_CI[,1]
              all_theta11[run,] <- list_results$theta11_CI[,1]
              all_theta10[run,] <- list_results$theta10_CI[,1]
              all_p11[run,] <- list_results$p11_CI[,1]
              all_p10[run,] <- list_results$p10_CI[,1]
              print(run)
              save.image(paste("sim", ind, ".RData", sep = ""))
              
            }
            save.image(paste("sim", ind, ".RData", sep = ""))
            
            # once you finish with one run, you can increase the ind by 1 before moving on to the next setting
            ind <- ind + 1
            print(ind)         
 
write.csv(all_psi,"", row.names = TRUE)
write.csv(all_theta10,"", row.names = TRUE)
write.csv(all_theta11,"", row.names = TRUE)
write.csv(all_p10,"", row.names = TRUE)
write.csv(all_p11,"", row.names = TRUE)
write.csv(all_condprob,"", row.names = TRUE)