#libraries
library(brms)
library(tidybayes)
library(ggplot2)
library(patchwork)
library(ape)
library(dplyr)
library(RColorBrewer)

#working directory
setwd("C:/Users/san260/OneDrive - CSIRO/Desktop/R_codes/All_virus")

#Load host and virus trees and compute VCV matrices
htree <- read.tree("Bat_tree.nwk")
vcv_host <- ape::vcv(htree)

corona_tree <- read.tree("BatCorona_Cophylo_231124_input.newick")
corona_tree$tip.label <- gsub("_$", "", corona_tree$tip.label)
vcv_corona <- ape::vcv(corona_tree)

para_tree <- read.tree("Paramxyovirus_WGS_tree.newick")
para_tree$tip.label <- gsub("_$", "", para_tree$tip.label)
vcv_para <- ape::vcv(para_tree)

rhabdo_tree <- read.tree("Rhabdovirus_phylogenetic_tree250425.newick")
rhabdo_tree$tip.label <- gsub("_$", "", rhabdo_tree$tip.label)
vcv_rhabdo <- ape::vcv(rhabdo_tree)

#Global fixed y-limits (0–100)
global_ylim <- c(0, 100)

#function to fit model and plot by bat family
fit_and_plot_fam <- function(file_path, vcv_host, vcv_virus, family_name,
                             iter = 4000, thin = 1, y_limits = global_ylim) {
  dat <- read.csv(file_path)
  dat$phylo <- dat$host
  dat$vphy <- dat$virus
  dat$fam <- as.factor(dat$fam)
  
  # color palette
  fam_levels <- levels(dat$fam)
  blue_shades <- colorRampPalette(brewer.pal(9, "Blues"))(length(fam_levels))
  blue_shades[1] <- "#08306B" 
  fam_colors <- setNames(blue_shades, fam_levels)
  
  # Fit Bayesian mixed model
  model <- brm(
    residual ~ fam + cites + (1 | host) + (1 | virus) +
      (1 | gr(phylo, cov = A)) + (1 | gr(vphy, cov = V)),
    data = dat,
    data2 = list(A = vcv_host, V = vcv_virus),
    family = gaussian(),
    chains = 4, iter = iter, warmup = iter * 0.5,
    control = list(adapt_delta = 0.999, max_treedepth = 15),
    silent = TRUE
  )
  
  # Prepare new data for predictions
  newdat <- data.frame(fam = fam_levels, cites = mean(dat$cites, na.rm = TRUE))
  
  # Posterior fitted draws
  pdraw <- add_fitted_draws(newdat, model, seed = 42, n = 100,
                            re_formula = ~0, dpar = TRUE)
  
 
  y_breaks <- seq(0, 100, by = 20)  # Define more uniformly spaced y-axis breaks
  
  # Plot
  ggplot(pdraw, aes(x = fam, y = .value, fill = fam)) +
    geom_jitter(data = dat, aes(x = fam, y = residual, color = fam),
                width = 0.2, alpha = 0.3, show.legend = FALSE) +
    stat_halfeye(.width = 0.95, alpha = 0.6, show.legend = FALSE) +
    scale_fill_manual(values = fam_colors) +
    scale_color_manual(values = fam_colors) +
    scale_y_continuous(limits = y_limits, breaks = y_breaks) +
    theme_minimal(base_size = 14) +
    theme(
      plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
      axis.text.x = element_text(angle = 45, hjust = 1),
      axis.line = element_line(color = "black", size = 0.5),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      axis.ticks = element_line(color = "black")
    ) +
    labs(title = family_name,
         x = "Bat Family", y = "PACo residual")
}

p_corona <- fit_and_plot_fam("merged_corona_residual.csv", vcv_host, vcv_corona,
                             "Coronaviridae", iter = 4000, thin = 1)

p_para <- fit_and_plot_fam("merged_para_residual.csv", vcv_host, vcv_para,
                           "Paramyxoviridae", iter = 4000, thin = 1)

p_rhabdo <- fit_and_plot_fam("residual_data_with_log_cites.csv", vcv_host, vcv_rhabdo,
                             "Rhabdoviridae", iter = 4000, thin = 2)

# Remove y-axis legends except for the last panel
p_para <- p_para + theme(
  axis.title.y = element_blank(),
  axis.text.y  = element_blank(),
  axis.ticks.y = element_blank()
)

p_rhabdo <- p_rhabdo + theme(
  axis.title.y = element_blank(),
  axis.text.y  = element_blank(),
  axis.ticks.y = element_blank()
)

# Combine all into a single panel with shared y label
final_plot <- (p_corona + p_para + p_rhabdo) +
  plot_layout(ncol = 3, guides = 'collect') &
  plot_annotation(theme = theme(plot.margin = margin(5, 5, 5, 5))) &
  theme(plot.title = element_text(face = "bold"))

print(final_plot)

#Italicize virus family names
p_corona <- p_corona + labs(title = expression(italic("Coronaviridae")))
p_para   <- p_para   + labs(title = expression(italic("Paramyxoviridae")))
p_rhabdo <- p_rhabdo + labs(title = expression(italic("Rhabdoviridae")))

#Combine all
final_plot <- (p_corona + p_para + p_rhabdo) +
  plot_layout(ncol = 3, guides = 'collect') &
  plot_annotation(theme = theme(plot.margin = margin(5, 5, 5, 5))) &
  theme(plot.title = element_text(face = "bold"))

# Display final
print(final_plot)


