###########Function. logit <- function(x){ 1 / (1 + exp(-x)) } invLogit <- function(x){ log(x / (1 - x)) } ExtractCovariatesFromText <- function(covariates_text) { covariates <- sapply(strsplit(covariates_text, split = ",")[[1]], function(x){ if(grepl("-",x)){ as.numeric(seq(strsplit(x, split = "-")[[1]][1], strsplit(x, split = "-")[[1]][2])) } else { as.numeric(x) } }) covariates <- as.vector(unlist(covariates)) if(covariates[1] == 0) covariates <- c() covariates } # true or false if the covaraites is categorical or numerical ExtractClassCovariates <- function(ncov, fac_covariates, column_covariate){ classCovariates <- rep(T, ncov) classCovariates[match(fac_covariates, column_covariate)] <- F classCovariates } Extract_IndexesCovariates <- function(X, column_covariate, ncov, classCovariates) { indexes_covariates <- c() indexes_covariates[1] <- 1 if(any(column_covariate != 0)){ indexes_covariates <- c() indexes_covariates[1] <- 1 k <- 2 for (i in 1:ncov) { if(classCovariates[i]){ indexes_covariates[k] <- i + 1 k <- k + 1 } else { num_levels <- length(unique(X[,i])) indexes_covariates[k + 0:(num_levels-2)] <- i + 1 k <- k + num_levels - 1 } } } indexes_covariates } # compute subset of X consisting only of the columns (covariates) used computeXgamma <- function(X, indexes_covariates, gamma){ index_present <- indexes_covariates %in% which(gamma == 1) X_gamma <- X[,index_present,drop = F] X_gamma } # compute subset of the coefficient beta consisting only of the covariates used compute_betagamma <- function(beta, indexes_covariates, gamma){ index_present <- indexes_covariates %in% which(gamma == 1) beta_gamma <- beta[index_present] beta_gamma } sample_z <- function(k_s, w_sm, psi, theta11, theta10, p){ S <- nrow(w_sm) M <- ncol(w_sm) # number of sampling occasion per site M_site <- apply(w_sm, 1, function(x){ sum(!is.na(x)) }) w_s <- apply(w_sm, 1, function(x) { sum(x, na.rm = T) }) z <- rep(NA, S) for (s in 1:S) { if(k_s[s] == 1){ z[s] = 1 } else { p_zsequal1 <- ((1 - p) * psi[s] * theta11[s]^w_s[s] * (1 - theta11[s])^(M_site[s] - w_s[s])) / ((1 - p) *psi[s] * theta11[s]^w_s[s] * (1 - theta11[s])^(M_site[s] - w_s[s]) + (1 - psi[s]) * theta10[s]^w_s[s] * (1 - theta10[s])^(M_site[s] - w_s[s])) z[s] <- rbinom(1, 1, p_zsequal1) } } z } sample_wsm <- function(theta11, z, theta10, p11, p10, y_sm, K){ S <- nrow(y_sm) M <- ncol(y_sm) w_sm <- matrix(NA, nrow = S, ncol = M) for (s in 1:S) { for (m in 1:M) { if(!is.na(y_sm[s, m])){ p_wsm <- (theta11[s]^z[s] * theta10[s]^(1 - z[s]) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) ) / (theta11[s]^z[s] * theta10[s]^(1 - z[s]) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) + (1 - theta11[s])^z[s] * (1 - theta10[s])^(1 - z[s]) * p10[s]^y_sm[s,m] *(1 - p10[s])^(K - y_sm[s,m]) ) w_sm[s,m] <- rbinom(1, 1, p_wsm) } } } w_sm } compute_pzwsm_old <- function(zi, w_sm_current, psi, p, theta11, theta10, k_s, p11, p10, y_sm, K, M_site, s){ totalProb <- 1 ws <- sum(w_sm_current) p_zsequal1 <- ((1 - p) * psi[s] * theta11[s]^ws * (1 - theta11[s])^(M_site[s] - ws)) / ((1 - p) *psi[s] * theta11[s]^ws * (1 - theta11[s])^(M_site[s] - ws) + (1 - psi[s]) * theta10[s]^ws * (1 - theta10[s])^(M_site[s] - ws)) if(zi == 1){ # this is actually 0 zs <- 0 totalProb <- totalProb * (1 - p_zsequal1) } else { zs <- 1 totalProb <- totalProb * p_zsequal1 } for (m in 1:M_site[s]) { p_wsm <- (theta11[s]^zs * theta10[s]^(1 - zs) * p11[s]^y_sm[s,m] * (1 - p11[s])^(K - y_sm[s,m])) / (theta11[s]^zs * theta10[s]^(1 - zs) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) + (1 - theta11[s])^zs * (1 - theta10[s])^(1 - zs) * p10[s]^y_sm[s,m] *(1 - p10[s])^(K - y_sm[s,m])) if(w_sm_current[m] == 1){ totalProb <- totalProb * p_wsm } else { totalProb <- totalProb * (1 - p_wsm) } } totalProb } compute_pzwsm <- function(zi, w_sm_current, psi, p, theta11, theta10, k_s, p11, p10, y_sm, K, M_site, s){ firstTerm <- (psi[s] * (1 - p))^zi prodFirstTerm <- 1 for (m in 1:M_site[s]) { prodFirstTerm <- prodFirstTerm * (theta11[s] * dbinom(y_sm[s,m],K,p11[s]))^(w_sm_current[m]) * ((1 - theta11[s]) * dbinom(y_sm[s,m],K,p10[s]))^(1 - w_sm_current[m]) } firstTerm <- firstTerm * (prodFirstTerm)^(zi) secondTerm <- (1 - psi[s])^(1 - zi) prodSecondTerm <- 1 for (m in 1:M_site[s]) { prodSecondTerm <- prodSecondTerm * (theta10[s] * dbinom(y_sm[s,m],K,p11[s]))^(w_sm_current[m]) * ((1 - theta10[s]) * dbinom(y_sm[s,m],K,p10[s]))^(1 - w_sm_current[m]) } secondTerm <- secondTerm * prodSecondTerm^(1 - zi) firstTerm * secondTerm } BinToDec <- function(x) { sum(2^(which(rev(unlist(strsplit(as.character(x), "")) == 1))-1)) } DecToBin <- function(x, M){ x <- rev(intToBits(x)) x <- x[length(x) - 0:(M-1)] x <- paste(as.integer(x), collapse = "") as.numeric(strsplit(x,"")[[1]]) } sample_z_wsm <- function(psi, theta11, theta10, k_s, p11, p10, y_sm, K, p){ S <- nrow(y_sm) M <- ncol(y_sm) M_site <- apply(y_sm, 1, function(x){ sum(!is.na(x)) }) z <- rep(NA, S) w_sm <- matrix(NA, nrow = S, ncol = M) for (s in 1:S) { if(k_s[s] == 1){ z[s] = 1 for (m in 1:M_site[s]) { if(!is.na(y_sm[s, m])){ p_wsm <- (theta11[s]^z[s] * theta10[s]^(1 - z[s]) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) ) / (theta11[s]^z[s] * theta10[s]^(1 - z[s]) * p11[s]^y_sm[s,m] *(1 - p11[s])^(K - y_sm[s,m]) + (1 - theta11[s])^z[s] * (1 - theta10[s])^(1 - z[s]) * p10[s]^y_sm[s,m] *(1 - p10[s])^(K - y_sm[s,m]) ) w_sm[s,m] <- rbinom(1, 1, p_wsm) } } } else { # p_zwsm_old <- matrix(NA, nrow = 2, ncol = 2^M_site[s]) # # for (zi in 1:2) { # 1 is 0 and 2 is 1 # # for (l in 1:(2^(M_site[s]))) { # # w_sm_current <- DecToBin(l - 1, M_site[s]) # # p_zwsm_old[zi, l] <- compute_pzwsm_old(zi, w_sm_current, psi, p, theta11, theta10, k_s, p11, p10, y_sm, K, M_site, s) # # } # # } p_zwsm <- matrix(NA, nrow = 2, ncol = 2^M_site[s]) for (zi in 1:2) { # 1 is 0 and 2 is 1 for (l in 1:(2^(M_site[s]))) { w_sm_current <- DecToBin(l - 1, M_site[s]) p_zwsm[zi, l] <- compute_pzwsm(zi - 1, w_sm_current, psi, p, theta11, theta10, k_s, p11, p10, y_sm, K, M_site, s) } } p_zwsm <- p_zwsm / sum(p_zwsm) sampledCombination <- sample(nrow(p_zwsm) * ncol(p_zwsm), 1, prob = as.vector(p_zwsm)) z[s] <- (1 - sampledCombination %% 2) index_wsm_combination <- floor((sampledCombination + 1) / 2) w_sm[s,1:M_site[s]] <- DecToBin(index_wsm_combination - 1, M_site[s]) } } list("z" = z, "w_sm" = w_sm) } update_psi <- function(beta, gamma, z, X, indexes_covariates, b_psi, B_psi, usingC, d_bar){ S <- length(z) ncov <- ncol(gamma) - 1 gamma_psi <- NULL if(usingC){ # if covariates are used k <- z - .5 n <- rep(1, S) list_gammabeta <- sample_gamma_beta_cpp(gamma[1,], beta[1,], X, b_psi, B_psi, ncov, n, k, indexes_covariates, 1, d_bar) gamma_psi <- list_gammabeta$gamma beta_psi <- list_gammabeta$beta index_present <- indexes_covariates %in% which(gamma_psi == 1) beta_gamma <- beta_psi[index_present] psi <- as.vector(logit(X[,index_present,drop = F] %*% beta_gamma)) } else { # if no covariates a used n <- rep(1, S) sumn <- sum(n) sumz <- sum(z) k <- sumz - sumn * (.5) X_single <- matrix(1) beta_psi <- sample_beta_nocov_cpp(beta[1,1], X_single, b_psi, B_psi, sumn, k) psi <- logit(beta_psi) psi <- rep(psi, S) } list("beta" = beta_psi, "psi" = psi, "gamma" = gamma_psi) } update_theta <- function(beta, gamma, w_sm, z, X, indexes_covariates, theta_1or0, b_theta, B_theta, usingC, d_bar){ M <- ncol(w_sm) S <- nrow(w_sm) ncov <- ncol(gamma) - 1 M_site <- apply(w_sm, 1, function(x){ sum(!is.na(x)) }) gamma_theta <- NULL if(usingC){ if(theta_1or0){ k <- as.vector(t(w_sm[z==1,])) - .5 k <- k[!is.na(k)] n <- rep(1, length(k)) cov_indexes <- rep(1:S, times = M_site * z) # select only the observation with w == 1 X_cov <- X[cov_indexes,,drop = FALSE] list_gammabeta <- sample_gamma_beta_cpp(gamma[2,], beta[2,], X_cov, b_theta, B_theta, ncov, n, k, indexes_covariates, 1, d_bar) gamma_theta <- list_gammabeta$gamma beta_theta <- list_gammabeta$beta } else { k <- as.vector(t(w_sm[z==0,])) - .5 k <- k[!is.na(k)] n <- rep(1, length(k)) cov_indexes <- rep(1:S, times = M_site * (1-z)) X_cov <- X[cov_indexes,,drop = FALSE] list_gammabeta <- sample_gamma_beta_cpp(gamma[3,], beta[3,], X_cov, b_theta, B_theta, ncov, n, k, indexes_covariates, 1, d_bar) gamma_theta <- list_gammabeta$gamma beta_theta <- list_gammabeta$beta } index_present <- indexes_covariates %in% which(gamma_theta == 1) beta_gamma <- beta_theta[index_present] theta <- as.vector(logit(X[,index_present,drop = F] %*% beta_gamma)) } else { if(theta_1or0){ sumn <- sum(z * M_site) sumz <- sum(w_sm[z==1,], na.rm = T) # sumn <- sum(z) * M # sumz <- sum(w_sm[z==1,]) k <- sumz - sumn * (.5) X_single <- matrix(1) beta_theta <- sample_beta_nocov_cpp(beta[2,1], X_single, b_theta, B_theta, sumn, k) } else { sumn <- sum((1 - z) * M_site) sumz <- sum(w_sm[z==0,], na.rm = T) # sumn <- (S - sum(z)) * M # sumz <- sum(w_sm[z==0,]) k <- sumz - sumn * (.5) X_single <- matrix(1) beta_theta <- sample_beta_nocov_cpp(beta[3,1], X_single, b_theta, B_theta, sumn, k) } theta <- logit(beta_theta) theta <- rep(theta, S) } list("theta" = theta, "beta" = beta_theta, "gamma" = gamma_theta) } update_p <- function(beta, gamma, y_sm, w_sm, z, X, indexes_covariates, p_1or0, K, b_p, B_p, usingC, d_bar){ M <- ncol(w_sm) S <- nrow(w_sm) ncov <- ncol(gamma) - 1 M_site <- apply(w_sm, 1, function(x){ sum(!is.na(x)) }) if(usingC){ if(p_1or0){ # if p11 is updated # take the subset of y_sm corresponding to the occasion where eDNA was present k <- as.vector(t(y_sm))[as.vector(t(w_sm)) == 1] - (K/2) k <- k[!is.na(k)] n <- rep(K, length(k)) # take the row of the covariance matrix (potentially replicating some rows) corresponding # to the cells of y_sm where eDNA was present sum_wsm <- apply(w_sm, 1, function(x){ sum(x, na.rm = T) }) cov_indexes <- rep(1:S, times = sum_wsm) X_cov <- X[cov_indexes,,drop = FALSE] list_gammabeta <- sample_gamma_beta_cpp(gamma[4,], beta[4,], X_cov, b_p, B_p, ncov, n, k, indexes_covariates, 1, d_bar) } else { # if p10 is updated # take the subset of y_sm corresponding to the occasion where eDNA was not present k <- as.vector(t(y_sm))[as.vector(t(w_sm)) == 0] - (K/2) k <- k[!is.na(k)] n <- rep(K, length(k)) # take the row of the covariance matrix (potentially replicating some rows) corresponding # to the cells of y_sm where eDNA was not present sum_wsm <- apply(w_sm, 1, function(x){ sum(x, na.rm = T) }) cov_indexes <- rep(1:S, times = M_site - sum_wsm) X_cov <- X[cov_indexes,,drop = FALSE] list_gammabeta <- sample_gamma_beta_cpp(gamma[5,], beta[5,], X_cov, b_p, B_p, ncov, n, k, indexes_covariates, 1, d_bar) } gamma_p <- list_gammabeta$gamma beta_p <- list_gammabeta$beta X_gamma <- computeXgamma(X, indexes_covariates, gamma_p) beta_gamma <- compute_betagamma(beta_p, indexes_covariates, gamma_p) p <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma))) } else { if(p_1or0){ successes_y_sm_w_smequal1 <- sum(as.vector(y_sm)[as.vector(w_sm)==1], na.rm = T) trials_w_smequal1 <- K * sum(w_sm == 1, na.rm = T) sumn <- trials_w_smequal1 sumz <- successes_y_sm_w_smequal1 k <- sumz - sumn * (.5) X_single <- matrix(1) beta_p <- sample_beta_nocov_cpp(beta[4,1], X_single, b_p, B_p, sumn, k) } else { successes_y_sm_w_smequal0 <- sum(as.vector(y_sm)[as.vector(w_sm)==0], na.rm = T) trials_w_smequal0 <- K * sum(w_sm == 0, na.rm = T) sumn <- trials_w_smequal0 sumz <- successes_y_sm_w_smequal0 k <- sumz - sumn * (.5) X_single <- matrix(1) beta_p <- sample_beta_nocov_cpp(beta[5,1], X_single, b_p, B_p, sumn, k) } p <- logit(beta_p) p <- rep(p, S) gamma_p <- NULL } list("p" = p, "beta" = beta_p, "gamma" = gamma_p) } createSummaries <- function(data_output, beta_data_output, gamma_data_output, indexes_covariates, usingC, S, niter, nchain, nameVariable, VariableText ){ data_output2 <- matrix(NA, nrow = niter * nchain, ncol = S) for (chain in 1:nchain) { data_output2[(chain - 1)*niter + 1:niter,] <- data_output[chain,,] } data_output_long <- melt(data_output2) CI_data <- sapply(1:ncol(data_output2), function(i){ c(quantile(data_output2[,i], probs = c(0.025,0.975)), mean(data_output2[,i])) }) if(usingC){ plot1 <- ggplot() + geom_errorbar(data = NULL, aes(x = 1:S, ymax=CI_data[2,], ymin=CI_data[1,]), width=0.2, size=1, color="black") + geom_point(data = NULL, aes(x = 1:S, y=CI_data[3,]), size=2, shape=21, fill="white") + # theme(panel.background = element_rect(fill = "white")) + theme_bw() + scale_y_continuous(name = nameVariable) + xlab("Sites") } else { plot1 <- ggplot(data = NULL, aes(x = "Site", y = data_output2[,1])) + geom_boxplot() + theme_bw() + scale_y_continuous(name = nameVariable) + xlab("") } data_plot <- plot1 # beta_psi - gamma_psi if(usingC){ { beta_data_output2 <- matrix(NA, nrow = niter * nchain, ncol = dim(beta_data_output)[3]) for (chain in 1:nchain) { beta_data_output2[(chain - 1)*niter + 1:niter,] <- beta_data_output[chain,,] } gamma_data_output2 <- matrix(NA, nrow = niter * nchain, ncol = dim(gamma_data_output)[3]) for (chain in 1:nchain) { gamma_data_output2[(chain - 1)*niter + 1:niter,] <- gamma_data_output[chain,,] } beta_data_output2[,1] <- logit(beta_data_output2[,1]) colnames(beta_data_output2) <- dimnames(beta_data_output)[[3]] beta0_data_plot <- ggplot(data = NULL, aes(x = beta_data_output2[,1], y = ..density..)) + geom_histogram(fill = "cornsilk", color = "black") + ylab("") + xlab("Probability") + theme(plot.title = element_text(hjust = 0.5, margin=margin(0,0,5,0), size = 14, face = "bold"), panel.background = element_rect(fill = "white"), panel.border = element_rect(fill = NA, colour = "grey20"), panel.grid.major = element_line(colour = "grey92"), panel.grid.minor = element_line(colour = "grey92", size = 0.25), strip.background = element_rect(fill = "grey85", colour = "grey20"), legend.key = element_rect(fill = "white", colour = NA)) + ggtitle(VariableText) CICoefficients_data <- sapply(1:dim(beta_data_output2)[2], function(i){ if(i == 1){ c(quantile(beta_data_output2[,1], probs = c(0.025,0.975)), mean(beta_data_output2[,1])) } else { # print(beta_data_output2[gamma_data_output2[,indexes_covariates[i]-1]!= 0,i]) c(quantile(beta_data_output2[gamma_data_output2[,indexes_covariates[i]-1]!= 0,i], probs = c(0.025,0.975)), mean(beta_data_output2[gamma_data_output2[,indexes_covariates[i]-1]!= 0,i])) } }) PIP_data <- data.frame(name = dimnames(gamma_data_output)[[3]], prob = apply(gamma_data_output2, 2, mean)) gamma_data_plot <- ggplot(PIP_data, aes(x=reorder(name, prob), y=prob)) + geom_point(size=3) + # Use a larger dot theme_bw() + ylab("PIP") + xlab("Variable") + theme(plot.title = element_text(hjust = 0.5, margin=margin(0,0,5,0), size = 14, face = "bold"), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(), panel.grid.major.y = element_line(colour="grey60", linetype="dashed"), axis.text.x = element_text(angle = 90)) + ylim(c(0,1)) + geom_hline(aes(yintercept = .5), color = "red") beta_data_plot <- ggplot() + geom_errorbar(data = NULL, aes(x = reorder(factor(colnames(beta_data_output2)[-1], levels = colnames(beta_data_output2)[-1]), rep(PIP_data$prob, table(indexes_covariates[-1]))), ymax=CICoefficients_data[1,-1], ymin=CICoefficients_data[2,-1]), width=0.2, size=1, color="black") + geom_point(data = NULL, aes(x = reorder(factor(colnames(beta_data_output2)[-1], levels = colnames(beta_data_output2)[-1]), rep(PIP_data$prob, table(indexes_covariates[-1]))), y=CICoefficients_data[3,-1]), size=4, shape=21, fill="white") + theme(plot.title = element_text(hjust = 0.5, margin=margin(0,0,5,0), size = 14, face = "bold"), panel.background = element_rect(fill = "white"), panel.border = element_rect(fill = NA, colour = "grey20"), panel.grid.major = element_line(colour = "grey92"), panel.grid.minor = element_line(colour = "grey92", size = 0.25), strip.background = element_rect(fill = "grey85", colour = "grey20"), legend.key = element_rect(fill = "white", colour = NA), axis.text.x = element_text(angle = 90)) + xlab("Variable")+ ylab("Coefficient") #+ coord_flip() } } else { CICoefficients_data <- NULL PIP_data <- NULL } list("data_CI" = CI_data, "beta_CI" = CICoefficients_data, "PIP_data" = PIP_data) } fitModel <- function(data, K, column_presence, num_covariates_text, fac_covariates_text, usingPsi, usingTheta11, usingTheta10, usingP11, usingP10, prior_psi, prior_theta11, prior_theta10, prior_p11, prior_p10, phi_mu, phi_beta, d_bar, nchain, nburn, niter, nthin){ # input data { df <- data num_covariates <- sapply(strsplit(num_covariates_text, split = ",")[[1]], function(x){ if(grepl("-",x)){ as.numeric(seq(strsplit(x, split = "-")[[1]][1], strsplit(x, split = "-")[[1]][2])) } else { as.numeric(x) } }) num_covariates <- as.vector(unlist(num_covariates)) fac_covariates <- sapply(strsplit(fac_covariates_text, split = ",")[[1]], function(x){ if(grepl("-",x)){ as.numeric(seq(strsplit(x, split = "-")[[1]][1], strsplit(x, split = "-")[[1]][2])) } else { as.numeric(x) } }) fac_covariates <- as.vector(unlist(fac_covariates)) column_covariate <- c(num_covariates,fac_covariates) indexesToRemove <- c() if(column_presence != 0){ indexesToRemove <- c(indexesToRemove, column_presence) } if(any(num_covariates != 0) | any(fac_covariates != 0)){ indexesToRemove <- c(indexesToRemove, column_covariate) } if(any(column_covariate != 0) | column_presence != 0){ df <- df[,-indexesToRemove] } y_sm <- as.matrix(df) S <- nrow(y_sm) M <- ncol(y_sm) if(column_presence != 0){ k_s <- as.vector(df[,column_presence]) } else { k_s <- rep(0, S) } usingCov <- rep(F, 5) if(usingPsi){ usingCov[1] <- T } if(usingTheta11){ usingCov[2] <- T } if(usingTheta10){ usingCov[3] <- T } if(usingP11){ usingCov[4] <- T } if(usingP10){ usingCov[5] <- T } } # clean data { num_covariates <- ExtractCovariatesFromText(num_covariates_text) fac_covariates <- ExtractCovariatesFromText(fac_covariates_text) column_covariate <- sort(c(num_covariates,fac_covariates)) # create design matrix if(length(column_covariate) != 0){ # if there are covariates X <- data[,column_covariate,drop = F] } else { X <- as.matrix(rep(1, S)) } nameVariables <- colnames(X) ncov <- ncol(X) if(length(column_covariate) != 0){ # if there are covariates available classCovariates <- ExtractClassCovariates(ncov, fac_covariates, column_covariate) # convert the categorical columns to factors for (i in 1:ncov) { if(!classCovariates[i]){ X[,i] <- as.factor(X[,i]) } } } # vector that assign to each dummy variable of the complete design matrix the # index of its corresponding covariate (including intercept) indexes_covariates <- Extract_IndexesCovariates(X, column_covariate, ncov, classCovariates) originalX <- X # backup the original matrix of covariates if(length(column_covariate) != 0){ X <- model.matrix(~ ., X) } } # rv$num_iter <- niter # rv$num_chain <- nchain # input priors { a_pi <- 1 b_pi <- 1 d_bar <- ifelse(d_bar <= ncov, d_bar, 2) epsilon <- pnorm(1) alpha0_theta <- 1 alpha0_p <- 1 delta <- qnorm(epsilon) mu_psi <- invLogit(prior_psi) mu_theta11 <- invLogit(prior_theta11) mu_theta10 <- invLogit(prior_theta10) mu_p11 <- invLogit(prior_p11) mu_p10 <- invLogit(prior_p10) if(any(num_covariates != 0) | any(fac_covariates != 0)){ C <- matrix(0, nrow = ncol(X) - 1, ncol = ncol(X) - 1) l <- 0 for (i in 1:ncov) { if(classCovariates[i]){ C[l + 1, l + 1] <- 1 l <- l + 1 } else { num_levels <- length(unique(originalX[,i])) C[l + 1:(num_levels-1), l + 1:(num_levels-1)] <- .5 * (diag(1, nrow = (num_levels-1)) + matrix(1, nrow = (num_levels-1), ncol = (num_levels-1))) l <- l + num_levels - 1 } } Sigma <- cov(X[,-1, drop = F]) sumCSigma <- sum(C * Sigma) } # compute C matrix if(length(column_covariate) != 0){ C <- matrix(0, nrow = ncol(X) - 1, ncol = ncol(X) - 1) l <- 0 # l loop through the covariates for (i in 1:ncov) { if(classCovariates[i]){ # if it's a numerical covariates C[l + 1, l + 1] <- 1 l <- l + 1 } else { # if it's a categorical covariates num_levels <- length(unique(originalX[,i])) C[l + 1:(num_levels-1), l + 1:(num_levels-1)] <- .5 * (diag(1, nrow = (num_levels-1)) + matrix(1, nrow = (num_levels-1), ncol = (num_levels-1))) l <- l + num_levels - 1 } } Sigma <- cov(X[,-1, drop = F]) sumCSigma <- sum(C * Sigma) } # prior for psi if(usingCov[1]){ { b_psi <- c(mu_psi, rep(0, ncol(X) - 1)) sigma_beta_psi <- C * phi_beta B_psi <- matrix(0, nrow = ncol(X), ncol = ncol(X)) B_psi[1, 1] <- phi_mu B_psi[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_psi } } else { { b_psi <- mu_psi B_psi <- matrix(0, nrow = 1, ncol = 1) B_psi[1, 1] <- phi_mu } } # prior for theta11 if(usingCov[2]){ delta_theta <- (mu_theta11 - mu_theta10)^2 / (2 * delta^2 * (alpha0_theta + sumCSigma)) sigma_theta11 <- alpha0_theta * delta_theta sigma_beta_theta11 <- C * delta_theta b_theta11 <- c(mu_theta11, rep(0, ncol(X) - 1)) B_theta11 <- matrix(0, nrow = ncol(X), ncol = ncol(X)) B_theta11[1, 1] <- sigma_theta11 B_theta11[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_theta11 } else { delta_theta <- (mu_theta11 - mu_theta10)^2 / (2 * delta^2) b_theta11 <- mu_theta11 B_theta11 <- matrix(0, nrow = 1, ncol = 1) B_theta11[1, 1] <- delta_theta } # prior for theta10 if(usingCov[3]){ delta_theta <- (mu_theta11 - mu_theta10)^2 / (2 * delta^2 * (alpha0_theta + sumCSigma)) sigma_theta10 <- alpha0_theta * delta_theta sigma_beta_theta10 <- C * delta_theta b_theta10 <- c(mu_theta10, rep(0, ncol(X) - 1)) B_theta10 <- matrix(0, nrow = ncol(X), ncol = ncol(X)) B_theta10[1, 1] <- sigma_theta10 B_theta10[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_theta10 } else { delta_theta <- (mu_theta11 - mu_theta10)^2 / (2 * delta^2) b_theta10 <- mu_theta10 B_theta10 <- matrix(0, nrow = 1, ncol = 1) B_theta10[1, 1] <- delta_theta } # prior for p11 if(usingCov[4]){ delta_p <- (mu_p11 - mu_p10)^2 / (2 * delta^2 * (alpha0_p + sumCSigma)) sigma_p11 <- alpha0_p * delta_p sigma_beta_p11 <- C * delta_p b_p11 <- c(mu_p11, rep(0, ncol(X) - 1)) B_p11 <- matrix(0, nrow = ncol(X), ncol = ncol(X)) B_p11[1, 1] <- sigma_p11 B_p11[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_p11 } else { delta_p <- (mu_p11 - mu_p10)^2 / (2 * delta^2) b_p11 <- mu_p11 B_p11 <- matrix(0, nrow = 1, ncol = 1) B_p11[1, 1] <- delta_p } # prior for p10 if(usingCov[5]){ delta_p <- (mu_p11 - mu_p10)^2 / (2 * delta^2 * (alpha0_p + sumCSigma)) sigma_p10 <- alpha0_p * delta_p sigma_beta_p10 <- C * delta_p b_p10 <- c(mu_p10, rep(0, ncol(X) - 1)) B_p10 <- matrix(0, nrow = ncol(X), ncol = ncol(X)) B_p10[1, 1] <- sigma_p10 B_p10[2:(ncol(X)), 2:(ncol(X))] <- sigma_beta_p10 } else { delta_p <- (mu_p11 - mu_p10)^2 / (2 * delta^2) b_p10 <- mu_p10 B_p10 <- matrix(0, nrow = 1, ncol = 1) B_p10[1, 1] <- delta_p } } # initialize output { z_output <- array(0, dim = c(nchain, niter, S)) psi_output <- array(NA, dim = c(nchain, niter, S)) gamma_psi_output <- array(NA, dim = c(nchain , niter, ncov), dimnames = list(c(), c(), nameVariables)) beta_psi_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)), dimnames = list(c(), c(), colnames(X))) theta11_output <- array(NA, dim = c(nchain, niter, S)) theta10_output <- array(NA, dim = c(nchain, niter, S)) beta_theta11_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)), dimnames = list(c(), c(), colnames(X))) beta_theta10_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)), dimnames = list(c(), c(), colnames(X))) gamma_theta11_output <- array(NA, dim = c(nchain , niter, ncov), dimnames = list(c(), c(), nameVariables)) gamma_theta10_output <- array(NA, dim = c(nchain , niter, ncov), dimnames = list(c(), c(), nameVariables)) p11_output <- array(NA, dim = c(nchain, niter, S)) p10_output <- array(NA, dim = c(nchain, niter, S)) beta_p11_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)), dimnames = list(c(), c(), colnames(X))) beta_p10_output <- array(NA, dim = c(nchain , niter, length(indexes_covariates)), dimnames = list(c(), c(), colnames(X))) gamma_p11_output <- array(NA, dim = c(nchain , niter, ncov), dimnames = list(c(), c(), nameVariables)) gamma_p10_output <- array(NA, dim = c(nchain , niter, ncov), dimnames = list(c(), c(), nameVariables)) oneminpsix <- array(NA, dim = c(nchain , niter, K + 1)) qx <- array(NA, dim = c(nchain , niter, K + 1)) } for (chain in 1:nchain) { # initialize parameters { # initialize beta (matrix of coefficients) and gamma (indicator variables of covariates in the model) # please note gamma includes also an index for the intercept, which obviously is always 1 beta <- matrix(0, nrow = 5, ncol = length(indexes_covariates)) gamma <- matrix(rbinom((ncov + 1) * 5, 1, d_bar / ncov), nrow = 5, ncol = ncov + 1) gamma[,1] <- 1 # index of the intercept if(usingCov[1]){ # if covariates are used beta[1,1] <- mu_psi X_gamma <- computeXgamma(X, indexes_covariates, gamma[1,]) beta_gamma <- compute_betagamma(beta[1,,drop = F], indexes_covariates, gamma[1,]) psi <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma))) } else { beta[1,1] <- mu_psi psi <- rep(logit(mu_psi), S) } if(usingCov[2]){ beta[2,1] <- mu_theta11 X_gamma <- computeXgamma(X, indexes_covariates, gamma[2,]) beta_gamma <- compute_betagamma(beta[2,,drop = F], indexes_covariates, gamma[2,]) theta11 <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma))) } else { beta[2,1] <- mu_theta11 theta11 <- rep(logit(mu_theta11), S) } if(usingCov[3]){ beta[3,1] <- mu_theta10 X_gamma <- computeXgamma(X, indexes_covariates, gamma[3,]) beta_gamma <- compute_betagamma(beta[3,,drop = F], indexes_covariates, gamma[3,]) theta10 <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma))) } else { beta[3,1] <- mu_theta10 theta10 <- rep(logit(mu_theta10), S) } if(usingCov[4]){ beta[4,1] <- mu_p11 X_gamma <- computeXgamma(X, indexes_covariates, gamma[4,]) beta_gamma <- compute_betagamma(beta[4,,drop = F], indexes_covariates, gamma[4,]) p11 <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma))) } else { beta[4,1] <- mu_p11 p11 <- rep(logit(mu_p11), S) } if(usingCov[5]){ beta[5,1] <- mu_p10 X_gamma <- computeXgamma(X, indexes_covariates, gamma[5,]) beta_gamma <- compute_betagamma(beta[5,,drop = F], indexes_covariates, gamma[5,]) p10 <- as.vector(logit(X_gamma %*% as.matrix(beta_gamma))) } else { beta[5,1] <- mu_p10 p10 <- rep(logit(mu_p10), S) } pi <- 0.5 w_sm <- matrix(NA, nrow = S, ncol = M) for (s in 1:S) { for (m in 1:M) { if(!is.na(y_sm[s, m])){ if(y_sm[s, m] >= floor(K * p11[s])){ w_sm[s, m] <- 1 } else { w_sm[s, m] <- 0 } } else { w_sm[s, m] <- NA } } } # initialize the occupied site as the confirmed presences z <- k_s } for (iter in seq_len(nburn + niter*nthin)) { # sample z z <- sample_z(k_s, w_sm, psi, theta11, theta10, pi) # sample wsm w_sm <- sample_wsm(theta11, z, theta10, p11, p10, y_sm, K) # sample z and w_sm # list_z_wsm <- sample_z_wsm(psi, theta11, theta10, k_s, p11, p10, y_sm, K, pi) # w_sm <- list_z_wsm$w_sm # z <- list_z_wsm$z # sample p pi <- rbeta(1, 1 + sum(k_s * z), 1 + sum(z * (1 - k_s))) # sample psi list_psi <- update_psi(beta, gamma, z, X, indexes_covariates, b_psi, B_psi, usingCov[1], d_bar) psi <- list_psi$psi beta[1,] <- list_psi$beta if(usingCov[1]){ gamma[1,] <- list_psi$gamma } # sample theta11 (only if there are > 0 occupied site) if(sum(z) != 0){ list_theta11 <- update_theta(beta, gamma, w_sm, z, X, indexes_covariates, T, b_theta11, B_theta11, usingCov[2], d_bar) theta11 <- list_theta11$theta beta[2,] <- list_theta11$beta if(usingCov[2]){ gamma[2,] <- list_theta11$gamma } } # sample theta10 (only if there are > 0 unoccupied site) if(sum(z) != S){ list_theta10 <- update_theta(beta, gamma, w_sm, z, X, indexes_covariates, F, b_theta10, B_theta10, usingCov[3], d_bar) theta10 <- list_theta10$theta beta[3,] <- list_theta10$beta if(usingCov[3]){ gamma[3,] <- list_theta10$gamma } } # sample p11 (only if there are > 0 sites with edna presences) if(sum(w_sm, na.rm = T) > 0){ list_p11 <- update_p(beta, gamma, y_sm, w_sm, z, X, indexes_covariates, T, K, b_p11, B_p11, usingCov[4], d_bar) p11 <- list_p11$p beta[4,] <- list_p11$beta if(usingCov[4]){ gamma[4,] <- list_p11$gamma } } # sample p10 (only if there are > 0 sites with no edna presences) if(sum(w_sm, na.rm = T) != (S * M)){ list_p10 <- update_p(beta, gamma, y_sm, w_sm, z, X, indexes_covariates, F, K, b_p10, B_p10, usingCov[5], d_bar) p10 <- list_p10$p beta[5,] <- list_p10$beta if(usingCov[5]){ gamma[5, ] <- list_p10$gamma } } if(iter > nburn & (iter - nburn) %% nthin == 0){ trueIter <- (iter - nburn)/nthin z_output[chain,trueIter,] <- z psi_output[chain, trueIter,] <- psi beta_psi_output[chain,trueIter,] <- beta[1,] gamma_psi_output[chain,trueIter,] <- gamma[1,-1] theta11_output[chain, trueIter,] <- theta11 theta10_output[chain, trueIter,] <- theta10 beta_theta11_output[chain,trueIter,] <- beta[2,] beta_theta10_output[chain,trueIter,] <- beta[3,] gamma_theta11_output[chain,trueIter,] <- gamma[2,-1] gamma_theta10_output[chain,trueIter,] <- gamma[3,-1] p11_output[chain, trueIter,] <- p11 p10_output[chain, trueIter,] <- p10 beta_p11_output[chain,trueIter,] <- beta[4,] beta_p10_output[chain,trueIter,] <- beta[5,] gamma_p11_output[chain,trueIter,] <- gamma[4,-1] gamma_p10_output[chain,trueIter,] <- gamma[5,-1] # compute 1 - psi(x) and q(x) psi_0 <- logit(beta[1,1]) theta11_0 <- logit(beta[2,1]) theta10_0 <- logit(beta[3,1]) p11_0 <- logit(beta[4,1]) p10_0 <- logit(beta[5,1]) pxgivenzequal1 <- psi_0 * ((theta11_0) * dbinom(0:K, K, prob = p11_0) + (1 - theta11_0) * dbinom(0:K, K, prob = p10_0)) pxgivenzequal0 <- (1 - psi_0) * ((theta10_0) * dbinom(0:K, K, prob = p11_0) + (1 - theta10_0) * dbinom(0:K, K, prob = p10_0) ) oneminpsix[chain,trueIter,] <- pxgivenzequal0 / (pxgivenzequal1 + pxgivenzequal0) qx[chain,trueIter,] <- dbinom(0:K, K, p11_0) * (theta11_0) + dbinom(0:K, K, p10_0) * (1 - theta11_0) } } } for (chain in 1:nchain) { for (i in seq_along(indexes_covariates)) { beta_psi_output[chain,gamma_psi_output[chain,,indexes_covariates[i]-1]==0,i] <- 0 beta_theta11_output[chain,gamma_theta11_output[chain,,indexes_covariates[i]-1]==0,i] <- 0 beta_theta10_output[chain,gamma_theta10_output[chain,,indexes_covariates[i]-1]==0,i] <- 0 beta_p11_output[chain,gamma_p11_output[chain,,indexes_covariates[i]-1]==0,i] <- 0 beta_p10_output[chain,gamma_p10_output[chain,,indexes_covariates[i]-1]==0,i] <- 0 } } # create results { # create summaries { list_psi_plot <- createSummaries(psi_output, beta_psi_output, gamma_psi_output, indexes_covariates, usingCov[1], S, niter, nchain, "\u03C8", "Baseline occupancy probability") psi_CI <- list_psi_plot$data_CI beta_psi_CI <- list_psi_plot$beta_CI gamma_psi_CI <- list_psi_plot$PIP_data list_theta11_plot <- createSummaries(theta11_output, beta_theta11_output, gamma_theta11_output, indexes_covariates, usingCov[2], S, niter, nchain, expression(theta[1][1]), "Baseline eDNA presence probability given species presence") theta11_CI <- list_theta11_plot$data_CI beta_theta11_CI <- list_theta11_plot$beta_CI gamma_theta11_CI <- list_theta11_plot$PIP_data list_theta10_plot <- createSummaries(theta10_output, beta_theta10_output, gamma_theta10_output, indexes_covariates, usingCov[3], S, niter, nchain, expression(theta[1][0]), "Baseline eDNA presence probability given species absence") theta10_CI <- list_theta10_plot$data_CI beta_theta10_CI <- list_theta10_plot$beta_CI gamma_theta10_CI <- list_theta10_plot$PIP_data list_p11_plot <- createSummaries(p11_output, beta_p11_output, gamma_p11_output, indexes_covariates, usingCov[4], S, niter, nchain, expression(p[11]), "Baseline eDNA detection probability given eDNA presence") p11_CI <- list_p11_plot$data_CI beta_p11_CI <- list_p11_plot$beta_CI gamma_p11_CI <- list_p11_plot$PIP_data list_p10_plot <- createSummaries(p10_output, beta_p10_output, gamma_p10_output, indexes_covariates, usingCov[5], S, niter, nchain, expression(p[10]), "Baseline eDNA detection probability given eDNA absence") p10_CI <- list_p10_plot$data_CI beta_p10_CI <- list_p10_plot$beta_CI gamma_p10_CI <- list_p10_plot$PIP_data } # create results { # download psi { psi_output2 <- matrix(NA, nrow = niter * nchain, ncol = S) for (chain in 1:nchain) { psi_output2[(chain - 1)*niter + 1:niter,] <- psi_output[chain,,] } psi_means <- apply(psi_output2, 2, mean) ci95_psi <- apply(psi_output2, 2, function(x){ quantile(x, probs = .975) }) ci25_psi <- apply(psi_output2, 2, function(x){ quantile(x, probs = .025) }) df <- data.frame("Site" = 1:S) if(column_presence != 0){ k_s <- as.vector(presenceInput()) } else { k_s <- rep(0, S) } df$`2.5Credible Interval` <- ci25_psi df$`Posterior Mean` <- psi_means df$`97.5Credible Interval` <- ci95_psi download_psi <- df } # download beta psi { nchain <- dim(beta_psi_output)[1] niter <- dim(beta_psi_output)[2] ncov <- dim(gamma_psi_output)[3] ncov_all <- dim(beta_psi_output)[3] beta_psi_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all) for (chain in 1:nchain) { beta_psi_output2[(chain - 1)*niter + 1:niter,] <- beta_psi_output[chain,,] } gamma_psi_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov) for (chain in 1:nchain) { gamma_psi_output2[(chain - 1)*niter + 1:niter,] <- gamma_psi_output[chain,,] } beta_psi_output2[,1] <- logit(beta_psi_output2[,1]) colnames(beta_psi_output2) <- dimnames(beta_psi_output)[[3]] beta_psi_mean <- sapply(1:ncol(beta_psi_output2), function(i){ if(i == 1){ mean(beta_psi_output2[,i]) } else { mean(beta_psi_output2[gamma_psi_output2[,indexes_covariates[i]-1]!=0,i]) } }) ci95_psi <- sapply(1:ncol(beta_psi_output2), function(i){ if(i == 1){ quantile(beta_psi_output2[,i], probs = .975) } else { quantile(beta_psi_output2[gamma_psi_output2[,indexes_covariates[i]-1]!=0,i], probs = .975) } }) ci25_psi <- sapply(1:ncol(beta_psi_output2), function(i){ if(i == 1){ quantile(beta_psi_output2[,i], probs = .025) } else { quantile(beta_psi_output2[gamma_psi_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025) } }) df <- data.frame("2.5Credible Interval" = ci25_psi) colnames(df)[1] <- "2.5Credible Interval" df$`Posterior Mean` <- beta_psi_mean df$`97.5Credible Interval` <- ci95_psi df$PIP <- c(1,apply(gamma_psi_output2, 2, mean)[(indexes_covariates-1)[-1]]) row.names(df) <- colnames(beta_psi_output2) download_beta_psi <- df } # download theta11 { theta11_output2 <- matrix(NA, nrow = niter * nchain, ncol = S) for (chain in 1:nchain) { theta11_output2[(chain - 1)*niter + 1:niter,] <- theta11_output[chain,,] } theta11_means <- apply(theta11_output2, 2, mean) ci95_theta11 <- apply(theta11_output2, 2, function(x){ quantile(x, probs = .975) }) ci25_theta11 <- apply(theta11_output2, 2, function(x){ quantile(x, probs = .025) }) df <- data.frame("Site" = 1:S) df$`2.5Credible Interval` <- ci25_theta11 df$`Posterior Mean` <- theta11_means df$`97.5Credible Interval` <- ci95_theta11 download_theta11 <- df } # download beta_theta11 { beta_theta11_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all) for (chain in 1:nchain) { beta_theta11_output2[(chain - 1)*niter + 1:niter,] <- beta_theta11_output[chain,,] } gamma_theta11_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov) for (chain in 1:nchain) { gamma_theta11_output2[(chain - 1)*niter + 1:niter,] <- gamma_theta11_output[chain,,] } beta_theta11_output2[,1] <- logit(beta_theta11_output2[,1]) colnames(beta_theta11_output2) <- dimnames(beta_theta11_output)[[3]] beta_theta11_mean <- sapply(1:ncol(beta_theta11_output2), function(i){ if(i == 1){ mean(beta_theta11_output2[,i]) } else { mean(beta_theta11_output2[gamma_theta11_output2[,indexes_covariates[i]-1]!=0,i]) } }) ci95_theta11 <- sapply(1:ncol(beta_theta11_output2), function(i){ if(i == 1){ quantile(beta_theta11_output2[,i], probs = .975) } else { quantile(beta_theta11_output2[gamma_theta11_output2[,indexes_covariates[i]-1]!=0,i], probs = .975) } }) ci25_theta11 <- sapply(1:ncol(beta_theta11_output2), function(i){ if(i == 1){ quantile(beta_theta11_output2[,i], probs = .025) } else { quantile(beta_theta11_output2[gamma_theta11_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025) } }) df <- data.frame("2.5Credible Interval" = ci25_theta11) colnames(df)[1] <- "2.5Credible Interval" df$`Posterior Mean` <- beta_theta11_mean df$`97.5Credible Interval` <- ci95_theta11 df$PIP <- c(1,apply(gamma_theta11_output2, 2, mean)[(indexes_covariates-1)[-1]]) row.names(df) <- colnames(beta_theta11_output2) download_beta_theta11 <- df } # download theta10 { theta10_output2 <- matrix(NA, nrow = niter * nchain, ncol = S) for (chain in 1:nchain) { theta10_output2[(chain - 1)*niter + 1:niter,] <- theta10_output[chain,,] } theta10_means <- apply(theta10_output2, 2, mean) ci95_theta10 <- apply(theta10_output2, 2, function(x){ quantile(x, probs = .975) }) ci25_theta10 <- apply(theta10_output2, 2, function(x){ quantile(x, probs = .025) }) df <- data.frame("Site" = 1:S) df$`2.5Credible Interval` <- ci25_theta10 df$`Posterior Mean` <- theta10_means df$`97.5Credible Interval` <- ci95_theta10 download_theta10 <- df } # download beta_theta10 { beta_theta10_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all) for (chain in 1:nchain) { beta_theta10_output2[(chain - 1)*niter + 1:niter,] <- beta_theta10_output[chain,,] } gamma_theta10_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov) for (chain in 1:nchain) { gamma_theta10_output2[(chain - 1)*niter + 1:niter,] <- gamma_theta10_output[chain,,] } beta_theta10_output2[,1] <- logit(beta_theta10_output2[,1]) colnames(beta_theta10_output2) <- dimnames(beta_theta10_output)[[3]] beta_theta10_mean <- sapply(1:ncol(beta_theta10_output2), function(i){ if(i == 1){ mean(beta_theta10_output2[,i]) } else { mean(beta_theta10_output2[gamma_theta10_output2[,indexes_covariates[i]-1]!=0,i]) } }) ci95_theta10 <- sapply(1:ncol(beta_theta10_output2), function(i){ if(i == 1){ quantile(beta_theta10_output2[,i], probs = .975) } else { quantile(beta_theta10_output2[gamma_theta10_output2[,indexes_covariates[i]-1]!=0,i], probs = .975) } }) ci25_theta10 <- sapply(1:ncol(beta_theta10_output2), function(i){ if(i == 1){ quantile(beta_theta10_output2[,i], probs = .025) } else { quantile(beta_theta10_output2[gamma_theta10_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025) } }) df <- data.frame("2.5Credible Interval" = ci25_theta10) colnames(df)[1] <- "2.5Credible Interval" df$`Posterior Mean` <- beta_theta10_mean df$`97.5Credible Interval` <- ci95_theta10 df$PIP <- c(1,apply(gamma_theta10_output2, 2, mean)[(indexes_covariates-1)[-1]]) row.names(df) <- colnames(beta_theta10_output2) download_beta_theta10 <- df } # download p11 { p11_output2 <- matrix(NA, nrow = niter * nchain, ncol = S) for (chain in 1:nchain) { p11_output2[(chain - 1)*niter + 1:niter,] <- p11_output[chain,,] } p11_means <- apply(p11_output2, 2, mean) ci95_p11 <- apply(p11_output2, 2, function(x){ quantile(x, probs = .975) }) ci25_p11 <- apply(p11_output2, 2, function(x){ quantile(x, probs = .025) }) df <- data.frame("Site" = 1:S) df$`2.5Credible Interval` <- ci25_p11 df$`Posterior Mean` <- p11_means df$`97.5Credible Interval` <- ci95_p11 download_p11 <- df } # downlaod beta_p11 { beta_p11_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all) for (chain in 1:nchain) { beta_p11_output2[(chain - 1)*niter + 1:niter,] <- beta_p11_output[chain,,] } gamma_p11_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov) for (chain in 1:nchain) { gamma_p11_output2[(chain - 1)*niter + 1:niter,] <- gamma_p11_output[chain,,] } beta_p11_output2[,1] <- logit(beta_p11_output2[,1]) colnames(beta_p11_output2) <- dimnames(beta_p11_output)[[3]] beta_p11_mean <- sapply(1:ncol(beta_p11_output2), function(i){ if(i == 1){ mean(beta_p11_output2[,i]) } else { mean(beta_p11_output2[gamma_p11_output2[,indexes_covariates[i]-1]!=0,i]) } }) ci95_p11 <- sapply(1:ncol(beta_p11_output2), function(i){ if(i == 1){ quantile(beta_p11_output2[,i], probs = .975) } else { quantile(beta_p11_output2[gamma_p11_output2[,indexes_covariates[i]-1]!=0,i], probs = .975) } }) ci25_p11 <- sapply(1:ncol(beta_p11_output2), function(i){ if(i == 1){ quantile(beta_p11_output2[,i], probs = .025) } else { quantile(beta_p11_output2[gamma_p11_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025) } }) df <- data.frame("2.5Credible Interval" = ci25_p11) colnames(df)[1] <- "2.5Credible Interval" df$`Posterior Mean` <- beta_p11_mean df$`97.5Credible Interval` <- ci95_p11 df$PIP <- c(1,apply(gamma_p11_output2, 2, mean)[(indexes_covariates-1)[-1]]) row.names(df) <- colnames(beta_p11_output2) download_beta_p11 <- df } # download p10 { p10_output2 <- matrix(NA, nrow = niter * nchain, ncol = S) for (chain in 1:nchain) { p10_output2[(chain - 1)*niter + 1:niter,] <- p10_output[chain,,] } p10_means <- apply(p10_output2, 2, mean) ci95_p10 <- apply(p10_output2, 2, function(x){ quantile(x, probs = .975) }) ci25_p10 <- apply(p10_output2, 2, function(x){ quantile(x, probs = .025) }) df <- data.frame("Site" = 1:S) df$`2.5Credible Interval` <- ci25_p10 df$`Posterior Mean` <- p10_means df$`97.5Credible Interval` <- ci95_p10 download_p10 <- df } # downlaod beta_p10 { beta_p10_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov_all) for (chain in 1:nchain) { beta_p10_output2[(chain - 1)*niter + 1:niter,] <- beta_p10_output[chain,,] } gamma_p10_output2 <- matrix(NA, nrow = niter * nchain, ncol = ncov) for (chain in 1:nchain) { gamma_p10_output2[(chain - 1)*niter + 1:niter,] <- gamma_p10_output[chain,,] } beta_p10_output2[,1] <- logit(beta_p10_output2[,1]) colnames(beta_p10_output2) <- dimnames(beta_p10_output)[[3]] beta_p10_mean <- sapply(1:ncol(beta_p10_output2), function(i){ if(i == 1){ mean(beta_p10_output2[,i]) } else { mean(beta_p10_output2[gamma_p10_output2[,indexes_covariates[i]-1]!=0,i]) } }) ci95_p10 <- sapply(1:ncol(beta_p10_output2), function(i){ if(i == 1){ quantile(beta_p10_output2[,i], probs = .975) } else { quantile(beta_p10_output2[gamma_p10_output2[,indexes_covariates[i]-1]!=0,i], probs = .975) } }) ci25_p10 <- sapply(1:ncol(beta_p10_output2), function(i){ if(i == 1){ quantile(beta_p10_output2[,i], probs = .025) } else { quantile(beta_p10_output2[gamma_p10_output2[,indexes_covariates[i] - 1]!=0,i], probs = .025) } }) df <- data.frame("2.5Credible Interval" = ci25_p10) colnames(df)[1] <- "2.5Credible Interval" df$`Posterior Mean` <- beta_p10_mean df$`97.5Credible Interval` <- ci95_p10 df$PIP <- c(1,apply(gamma_p10_output2, 2, mean)[(indexes_covariates-1)[-1]]) row.names(df) <- colnames(beta_p10_output2) download_beta_p10 <- df } # conditional probability tables { condProbTable <- matrix(NA, nrow = 2, ncol = K + 1) condProbTable[1,] <- apply(oneminpsix[1,,], 2, mean) condProbTable[2,] <- apply(qx[1,,], 2, mean) colnames(condProbTable) <- 0:K rownames(condProbTable) <- c("1 - p(x)","q(x)") } } list_downloads <- list("download_psi" = download_psi, "download_beta_psi" = download_beta_psi, "download_theta11" = download_theta11, "download_beta_theta11" = download_beta_theta11, "download_theta10" = download_theta10, "download_beta_theta10" = download_beta_theta10, "download_p11" = download_p11, "download_beta_p11" = download_beta_p11, "download_p10" = download_p10, "download_beta_p10" = download_beta_p10, "condProbTable" = condProbTable) list_results <- list("psi_CI" = psi_CI, "beta_psi_CI" = beta_psi_CI, "gamma_psi_CI" = gamma_psi_CI, "theta11_CI" = theta11_CI, "beta_theta11_CI" = beta_theta11_CI, "gamma_theta11_CI" = gamma_theta11_CI, "theta10_CI" = theta10_CI, "beta_theta10_CI" = beta_theta10_CI, "gamma_theta10_CI" = gamma_theta10_CI, "p11_CI" = p11_CI, "beta_p11_CI" = beta_p11_CI, "gamma_p11_CI" = gamma_p11_CI, "p10_CI" = p10_CI, "beta_p10_CI" = beta_p10_CI, "gamma_p10_CI" = gamma_p10_CI, "condProbTable" = condProbTable) } list_results } #############Simulation #library(devtools) #install.packages("remotes") #remotes::install_github("AlexDiana/shinyAppFunctions") #remotes::install_github("r-lib/vctrs") # simulation for practitioners paper rm(list=ls()) # logit function logit_fun <- function(x) log(x/(1-x)) # inverse logit function inv_logit_fun <- function(x) 1/(1+exp(-x)) source("function.R") # may need to install these if you get an error message library(shinyAppFunctions) library(reshape2) library(ggplot2) # simulate data with varying S, psi, theta11, theta10, p11 and p10 and save estimates # and conditional probabilities # number of runs for each simulation setting nruns <- 10 # I would suggest starting with say 10, see how long it takes to do them and consider if you can increase them # number of samples from each site M <- 1 # number of PCR runs from each sample K <- 6 # save estimates (names should be self-explanatory) all_condprob <- matrix(NA, nrow = nruns, ncol = K+1) all_psi <- matrix(NA, nrow = nruns, ncol = 3) all_theta11 <- matrix(NA, nrow = nruns, ncol = 3) all_theta10 <- matrix(NA, nrow = nruns, ncol = 3) all_p11 <- matrix(NA, nrow = nruns, ncol = 3) all_p10 <- matrix(NA, nrow = nruns, ncol = 3) # ignore these names, I originally set it up for the covid test i.e. K=M=1 hence the use of covid throughout! covid <- list() covidpres <- list() positives <- list() # I use ind as an indicator for the simulation run so I can save everything and then which simulation each result corresponds to ind <- 3 # choose setting # set values for all S <- 20; #"S" is the number of sites # simulate covariates x1 <- rbinom(S, 1, 0.5) # binary covariate x2 <- runif(S, -2, 2) psi <- inv_logit_fun(logit_fun(0.1) + x1 - x2); # set psi in simulated data here we select 0.1 theta11 <- 0.85; theta10 <- 0.01; p11 <- 0.9; p10 <- 0.01 # set theta11, theta10, p11 and p10 within simulated data, here we select 0.85, 0.01, 0.9 and 0.01 # save settings so you can remember them (named after ind) write.table(list("S" = S, "psi" = psi, "theta11" = theta11[1], "theta10" = theta10[1], "p11" = p11[1], "p10" = p10[1]), paste("setting", ind, ".txt", sep = "")) # run it for(run in 1:nruns) { # infection or not (infection is occupancy!) covid[[run]] <- rbinom(S, 1, prob = psi) # presence of covid in sample or not (presece of dna in sample or not!) covidpres[[run]] <- matrix(0, nrow = S, ncol = M) for(i in 1:S) for(j in 1:M) covidpres[[run]][i,j] <- rbinom(1, 1, prob = covid[[run]][i]*theta11) + rbinom(1, 1, (1-covid[[run]][i])*theta10) # positives in the pcr replicates # the result is a matrix with S rows and M columns positives[[run]] <- matrix(0, nrow = S, ncol = M) for(i in 1:S) for(j in 1:M) positives[[run]][i,j] <- rbinom(1, K, prob = covidpres[[run]][i,j]*p11) + rbinom(1, K, prob = (1-covidpres[[run]][i,j])*p10) data <- data.frame(unlist(positives[[run]]), x1, x2) #colnames(data) <- NULL # these are settings for the app #here I don't have verified presences and I have two covariates for psi column_presence <- 0 num_covariates_text <- paste(M+2) fac_covariates_text <- paste(M+1) usingPsi <- T usingTheta11 <- F usingTheta10 <- F usingP11 <- F usingP10 <- F # settings for priors, currently set at levels which assume we are correct results are correct more than they are false prior_psi <- .5 prior_theta11 <- .8 prior_theta10 <- .2 prior_p11 <- .8 prior_p10 <- .2 phi_mu <- 4 phi_beta <- .25 d_bar <- 2 # settings for MCMC runs, nchain <- 1 #Number of Chains nburn <- 1000 # Number of Burn Iterations niter <- 2000 #Number of Iterations nthin <- 10 #Number of thinned Iterations # this fits the model and saves the results list_results <- fitModel(data, K, column_presence, num_covariates_text, fac_covariates_text, usingPsi, usingTheta11, usingTheta10, usingP11, usingP10, prior_psi, prior_theta11, prior_theta10, prior_p11, prior_p10, phi_mu, phi_beta, d_bar = 1, nchain, nburn, niter, nthin) all_condprob[run,] <- list_results$condProbTable[1,] all_psi[run,] <- list_results$beta_psi_CI[,1] all_theta11[run,] <- list_results$theta11_CI[,1] all_theta10[run,] <- list_results$theta10_CI[,1] all_p11[run,] <- list_results$p11_CI[,1] all_p10[run,] <- list_results$p10_CI[,1] print(run) save.image(paste("sim", ind, ".RData", sep = "")) } save.image(paste("sim", ind, ".RData", sep = "")) # once you finish with one run, you can increase the ind by 1 before moving on to the next setting ind <- ind + 1 print(ind) write.csv(all_psi,"", row.names = TRUE) write.csv(all_theta10,"", row.names = TRUE) write.csv(all_theta11,"", row.names = TRUE) write.csv(all_p10,"", row.names = TRUE) write.csv(all_p11,"", row.names = TRUE) write.csv(all_condprob,"", row.names = TRUE)