# Title: 'Code from: Asynchronous seasonal dynamics of Nycteribiid bat flies and Bartonella in Australian flying foxes (Pteropus spp.)'
# Code authors: 'Brent D Jones, Griffith University & Nicholas J Clark, University of Queensland'
# Date: '19/11/2025'

##### Load Packages #####
library(tidyverse)
library(MetBrewer)
library(ecopaR)  # from https://rdrr.io/github/ralphmp/ecopaR/
library(mgcv)
library(marginaleffects)
library(patchwork)
library(vegan)
library(ggbiplot)
library(dagitty)


##### Load Data #####

# Dataset1 details individual host variables and nycteribiid and bacteria detection results
dataset1 <- read_csv("detection_data_20250827.csv")

# Dataset2 details nycteribiid morphology results
dataset2 <- read_csv("nyct_morphology_20250827.csv")

# Dataset3 details abiotic variables for PCA of sample sessions
dataset3 <- read_csv("session_abiotics_20250827.csv")


##### Descriptive summary (Table 1) #####

# Summary of sampling effort
bind_rows(dataset1 %>%
    dplyr::filter(!is.na(ecto_pos)) %>%
    dplyr::group_by(site) %>% 
    dplyr::summarise(no_sessions = n_distinct(accession),
              no_individuals = sum(!is.na(ecto_pos)),
              prevalence = round(sum(ecto_pos, na.rm = TRUE)/sum(!is.na(ecto_pos)), 3),
              lower_CI = binom.test(sum(ecto_pos, na.rm = TRUE),
                                    sum(!is.na(ecto_pos)),
                                    p = 0.5,
                                    conf.level = 0.95)[["conf.int"]][1],
              upper_CI = binom.test(sum(ecto_pos, na.rm = TRUE),
                                    sum(!is.na(ecto_pos)),
                                    p = 0.5,
                                    conf.level = 0.95)[["conf.int"]][2],
              intensity_recorded = sum(!is.na(ecto_burden)),
              bff = sum(bat_species == "bff", na.rm = TRUE),
              ghff = sum(bat_species == "ghff", na.rm = TRUE),
              adult = sum(bat_age == "adult", na.rm = TRUE),
              subadult = sum(bat_age == "sub_adult", na.rm = TRUE),
              juvenile = sum(bat_age == "juve", na.rm = TRUE),
              female = sum(bat_sex == "female", na.rm = TRUE),
              male = sum(bat_sex == "male", na.rm = TRUE)
    ),
  # Now need to create row for study total and bind to above
  dataset1 %>%
    dplyr::filter(!is.na(ecto_pos)) %>% 
    dplyr::summarise(site = "Total",
              no_sessions = n_distinct(accession),
              no_individuals = sum(!is.na(ecto_pos)),
              prevalence = round(sum(ecto_pos, na.rm = TRUE)/sum(!is.na(ecto_pos)), 3),
              lower_CI = binom.test(sum(ecto_pos, na.rm = TRUE),
                                    sum(!is.na(ecto_pos)),
                                    p = 0.5,
                                    conf.level = 0.95)[["conf.int"]][1],
              upper_CI = binom.test(sum(ecto_pos, na.rm = TRUE),
                                    sum(!is.na(ecto_pos)),
                                    p = 0.5,
                                    conf.level = 0.95)[["conf.int"]][2],
              intensity_recorded = sum(!is.na(ecto_burden)),
              bff = sum(bat_species == "bff", na.rm = TRUE),
              ghff = sum(bat_species == "ghff", na.rm = TRUE),
              adult = sum(bat_age == "adult", na.rm = TRUE),
              subadult = sum(bat_age == "sub_adult", na.rm = TRUE),
              juvenile = sum(bat_age == "juve", na.rm = TRUE),
              female = sum(bat_sex == "female", na.rm = TRUE),
              male = sum(bat_sex == "male", na.rm = TRUE)
    )
)


##### Nycteribiid prevalence over time (Fig. 4) #####

# Plotting prevalence and binomial 95% CI for each session over time,
# with pointranges coloured by roost site. Winter and spring denoted by shading on plot.

# Objects for shading season in plot.
winter1 <- data.frame(start = c( "2018-06-01", "2019-06-01", "2020-06-01", "2021-06-01", "2022-06-01"),
                      end = c( "2018-08-31", "2019-08-31", "2020-08-31", "2021-08-31", "2022-08-31")) %>% 
  mutate(start = ymd(start), end = ymd(end))

spring1 <- data.frame(start = c("2018-09-01", "2019-09-01", "2020-09-01", "2021-09-01", "2022-09-01"),
                     end = c("2018-11-30", "2019-11-30", "2020-11-30", "2021-11-30", "2022-11-30")) %>% 
  mutate(start = ymd(start), end = ymd(end))

# Plot
dataset1 %>%
  dplyr::filter(!is.na(ecto_pos)) %>% 
  dplyr::mutate(ecto_pos = ifelse(ecto_pos == "1", 1, 0)) %>% 
  dplyr::group_by(date_start, site) %>% 
  dplyr::summarise(prev = mean(ecto_pos, na.rm = TRUE),
            lowerCI = binom.test(sum(ecto_pos, na.rm = TRUE),
                                 sum(!is.na(ecto_pos)),
                                 p = 0.5,
                                 conf.level = 0.95)[["conf.int"]][1],
            upperCI = binom.test(sum(ecto_pos, na.rm = TRUE),
                                 sum(!is.na(ecto_pos)),
                                 p = 0.5,
                                 conf.level = 0.95)[["conf.int"]][2]
  )  %>% 
  ggplot2::ggplot(aes(x = date_start, y = prev, colour = site))  +
  ggplot2::geom_rect(data = winter1, inherit.aes = FALSE, aes(xmin = start, xmax = end, 
                                                    ymin = -Inf, ymax = Inf), colour = "lightgrey",
            alpha = 0.3, linewidth = NULL) +
  ggplot2::geom_rect(data = spring1, inherit.aes = FALSE, aes(xmin = start, xmax = end, 
                                                    ymin = -Inf, ymax = Inf), colour = "lightgrey",
            alpha = 0.2, linewidth = NULL) +
  ggplot2::geom_pointrange(aes(ymin = lowerCI, ymax = upperCI), size = 0.5, linewidth = 1,
                  alpha = 0.6, position = position_dodge2(preserve = "single", width = 10)) +
  ggplot2::theme_bw() +
  ggplot2::labs(x = NULL,
       y = "Nycteribiid Prevalence",
       colour = "Roost") +
  ggplot2::scale_colour_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#CC79A7", "#D55E00", "#000000", "#0072B2")) +
  ggplot2::scale_x_date(minor_breaks = "3 months", date_breaks = "3 months", date_labels = "%b %Y") +
  ggplot2::theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 12),
                 axis.text.y = element_text(size = 12),
                 axis.title = element_text(size = 14),
                 legend.title = element_text(size = 14),
                 legend.text = element_text(size = 12))
# saved as pdf, 12x4 inches


##### Nycteribiid median intensity over time (Fig. S2) #####

# Objects for shading season in plot.
winter1 <- data.frame(start = c( "2018-06-01", "2019-06-01", "2020-06-01", "2021-06-01", "2022-06-01"),
                      end = c( "2018-08-31", "2019-08-31", "2020-08-31", "2021-08-31", "2022-08-31")) %>% 
  mutate(start = ymd(start), end = ymd(end))

spring1 <- data.frame(start = c("2018-09-01", "2019-09-01", "2020-09-01", "2021-09-01", "2022-09-01"),
                      end = c("2018-11-30", "2019-11-30", "2020-11-30", "2021-11-30", "2022-11-30")) %>% 
  mutate(start = ymd(start), end = ymd(end))

# Plot
dataset1 %>%
  dplyr::filter(!is.na(ecto_burden)) %>% 
  group_by(date_start, site) %>% 
  summarise(median = median(ecto_burden, na.rm = TRUE),
            lowerCI = DescTools::MedianCI(ecto_burden, conf.level = 0.95, method = "boot", na.rm = TRUE)[2],
            upperCI = DescTools::MedianCI(ecto_burden, conf.level = 0.95, method = "boot", na.rm = TRUE)[3]
  ) %>% 
  ggplot(aes(x = date_start, y = median, colour = site))  +
  geom_rect(data = winter1, inherit.aes = FALSE, aes(xmin = start, xmax = end, 
                                                    ymin = -Inf, ymax = Inf), colour = "lightgrey",
            alpha = 0.3, linewidth = NULL) +
  geom_rect(data = spring1, inherit.aes = FALSE, aes(xmin = start, xmax = end, 
                                                    ymin = -Inf, ymax = Inf), colour = "lightgrey",
            alpha = 0.2, linewidth = NULL) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI), size = 0.75, linewidth = 1,
                  alpha = 0.6, position = position_dodge2(preserve = "single", width = 10)) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title = element_text(size = 14)) +
  labs(
    x = NULL,
    y = "Median intensity and 95% CI",
    colour = "Roost") +
  scale_colour_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#CC79A7", "#D55E00", "#0072B2")) +
  scale_x_date(minor_breaks = "3 months", date_breaks = "3 months", date_labels = "%b %Y") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
# saved as pdf, 4x10 inches

##### Aggregation plot (Fig. S4) ##### 

dataset1 %>% 
  filter(site != "Sunnybank") %>% # Remove Sunnybank as no burden data recorded.
  group_by(accession) %>% 
  summarise(prev = mean(ecto_pos, na.rm = TRUE)) %>%  
  left_join(.,
            dataset1 %>% 
              dplyr::filter(!is.na(ecto_burden)) %>% 
              group_by(accession) %>% 
              summarise(indexD = ecopaR::discrepancy(ecto_burden),
                        median = median(ecto_burden)),
            by= join_by(accession)) %>% 
  left_join(., dataset1 %>% 
              dplyr::select(accession, date_start, site) %>% 
              dplyr::distinct(),
            by = join_by(accession)) %>% 
  ggplot(aes(x = median, y = prev)) +
  geom_point(aes(colour = indexD), alpha = 0.8, size = 3) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title = element_text(size = 14),
        plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm")) +
  labs(x = "Median intensity", y = "Prevalence", colour = "Poulin's D") +
  scale_colour_gradientn(colours = MetBrewer::met.brewer("Homer1", direction = -1)) +
  ylim(0,1)
# saved as pdf, 4x5 inches


##### Nycteribiid sex ratio by session (Fig. S1) #####

# Objects for shading season in plot.
winter2 <- data.frame(start = c( "2018-06-01"),
                      end = c( "2018-09-01")) %>% 
  mutate(start = ymd(start), end = ymd(end))

spring2 <- data.frame(start = c( "2018-09-01"),
                      end = c( "2018-11-30")) %>% 
  mutate(start = ymd(start), end = ymd(end))


left_join(dataset2 %>% 
            dplyr::group_by(accession) %>% 
            dplyr::summarise(total = sum(female, na.rm = TRUE) + sum(male, na.rm = TRUE),
                             female = sum(female, na.rm = TRUE),
                             male = sum(male, na.rm = TRUE),
                             ratio = male/female,
                             fem_prop = female/total,
                             lowerCI = binom.test(female,
                                                  total,
                                                  p = 0.5,
                                                  conf.level = 0.95)[["conf.int"]][1],
                             upperCI = binom.test(female,
                                                  total,
                                                  p = 0.5,
                                                  conf.level = 0.95)[["conf.int"]][2]),
          dataset2 %>% 
            dplyr::select(accession, date_start, site) %>% 
            dplyr::distinct(),
          by = join_by(accession)) %>% 
  ggplot(aes(x = date_start, y = fem_prop)) +
  geom_rect(data = winter2, inherit.aes = FALSE, aes(xmin = start, xmax = end, 
                                                     ymin = -Inf, ymax = Inf), colour = "lightgrey",
            alpha = 0.3, linewidth = NULL) +
  geom_rect(data = spring2, inherit.aes = FALSE, aes(xmin = start, xmax = end, 
                                                     ymin = -Inf, ymax = Inf), colour = "lightgrey",
            alpha = 0.2, linewidth = NULL) +
  geom_pointrange(aes(ymin = lowerCI, ymax = upperCI, colour = site), size = 0.75, linewidth = 1, alpha = 0.8) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title = element_text(size = 14)) +
  labs(x = NULL, y = "Proportion of female nycteribiids", colour = "Roost") +
  scale_colour_manual(values = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#CC79A7", "#D55E00", "#000000", "#0072B2")) +
  scale_x_date(minor_breaks = "1 month", date_breaks = "2 months", date_labels = "%b %Y") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0,1)
# save as pdf, 4x6 inch



##### DAG informed variable selection for GAMs ######

# Create DAG
DAG <- dagitty("dag{
  Host_age -> Host_reproductive_status
  Host_age -> Host_weight
  Host_age -> Parasitism_Infection
  Host_reproductive_status -> Parasitism_Infection
  Host_sex -> Host_reproductive_status
  Host_sex -> Host_weight
  Host_sex -> Parasitism_Infection
  Host_species -> Host_weight
  Host_species -> Parasitism_Infection
  Host_weight -> Parasitism_Infection
  ONI -> Roost
  ONI -> Season
  Roost -> Parasitism_Infection
  Season -> Host_age
  Season -> Host_reproductive_status
  Season -> Host_sex
  Season -> Host_species
  Season -> Host_weight
  Season -> Roost
}")

coordinates(DAG) <- list(
  x = c(Host_age = 0.360, Host_reproductive_status = 0.382, Host_sex = 0.431,
        Host_species = 0.431, Host_weight = 0.431, Parasitism_Infection = 0.583, ONI = 0.278,
        Roost = 0.426, Season = 0.279),
  y = c(Host_age = 0.420, Host_reproductive_status = 0.570, Host_sex = 0.439,
        Host_species = 0.232, Host_weight = 0.309, Parasitism_Infection = 0.308, ONI = 0.157,
        Roost = 0.159, Season = 0.288))

plot(DAG)

adjustmentSets(DAG,
               exposure = "Season",
               outcome = "Parasitism_Infection",
               effect = "total")

variable_list <- c("Season", "ONI", "Roost", "Host_age", "Host_reproductive_status", 
           "Host_sex", "Host_species", "Host_weight")


lapply(variable_list, adjustmentSets, x = DAG, outcome = "Parasitism_Infection", effect = "total")



##### Nycteribiid parasitism GAMs ######

# Convert categorical data to factor and create model dataset
nyct_model_data <- dataset1 %>%
  group_by(bat_species, bat_sex, bat_age) %>% 
  mutate(weight_std = (bat_weight - mean(bat_weight, na.rm = TRUE))/sd(bat_weight, na.rm = TRUE)) %>% 
  ungroup() %>% 
  dplyr::filter(!is.na(ecto_pos)) %>% 
  mutate_at(c("site", "bat_species", "bat_sex", "bat_repro", "bat_age",
              "ecto_pos"), as.factor) %>% 
  mutate(month = month(date_start))

str(nyct_model_data)


# Season as exposure - adjustment set (ONI)
nyct_mod1 <- bam(ecto_pos ~ 
              s(month, bs = "cc", m = 2, k = 6) +
              s(ONI, bs = "ts", k = 6),
            data = nyct_model_data,
            method = "fREML",
            family = "binomial",
            discrete = TRUE,
            select = TRUE,
            knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod1, full = FALSE)$estimate
summary(nyct_mod1)

# ONI as exposure - adjustment set ()
nyct_mod2 <- bam(ecto_pos ~ 
              s(ONI, bs= "ts", k = 6),
            data = nyct_model_data,
            method = "fREML",
            family = "binomial",
            discrete = TRUE,
            select = TRUE,
            knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod2, full = FALSE)$estimate
summary(nyct_mod2)

# Roost as exposure - adjustment set (Season)
nyct_mod3 <- bam(ecto_pos ~ 
              s(month, bs = "cc", k = 6) +
              s(site, bs = "re"),
            data = nyct_model_data,
            method = "fREML",
            family = "binomial",
            discrete = TRUE,
            select = TRUE,
            knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod3, full = FALSE)$estimate
summary(nyct_mod3)

# Host Age as exposure - adjustment set (season)
nyct_mod4 <- bam(ecto_pos ~ 
              s(month, bs = "cc", m =2, k = 6) +
              bat_age,
            data = nyct_model_data,
            method = "fREML",
            family = "binomial",
            discrete = TRUE,
            select = TRUE,
            knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod4, full = FALSE)$estimate
summary(nyct_mod4)


# Host reproductive status as exposure - adjustment set (Season, Host age,
#  Host sex)
# High concurvity for repro, age, and sex. Split data set by sex and
# include age as fixed effect.

female_nyct <- nyct_model_data %>% 
  filter(bat_sex == "female")

nyct_mod5 <- bam(ecto_pos ~ 
              s(month, bs = "cc", m = 2, k = 6) +
              bat_repro + bat_age,
            data = female_nyct,
            method = "fREML",
            family = "binomial",
            discrete = TRUE,
            select = TRUE,
            knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod5, full = FALSE)$estimate
summary(nyct_mod5)

male_nyct <- nyct_model_data %>% 
  filter(bat_sex == "male") %>% 
  dplyr::mutate(bat_repro = as.factor(as.character(bat_repro)))

nyct_mod6 <- bam(ecto_pos ~ 
               s(month, bs = "cc", m = 2, k = 6) +
               bat_repro + bat_age,
             data = male_nyct,
             method = "fREML",
             family = "binomial",
             discrete = TRUE,
             select = TRUE,
             knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod6, full = FALSE)$estimate
summary(nyct_mod6)

# Host Sex as exposure - adjustment set (Season)
nyct_mod7 <- bam(ecto_pos ~ 
              s(month, bs = "cc", m = 2, k = 6) +
              bat_sex,
            data = nyct_model_data,
            method = "fREML",
            family = "binomial",
            discrete = TRUE,
            select = TRUE,
            knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod7, full = FALSE)$estimate
summary(nyct_mod7)

# Host Species as exposure - adjustment set (Season)
nyct_mod8 <- bam(ecto_pos ~ 
               s(month, bs = "cc", m = 2, k = 6) +
               bat_species,
             data = nyct_model_data,
             method = "fREML",
             family = "binomial",
             discrete = TRUE,
             select = TRUE,
             knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod8, full = FALSE)$estimate
summary(nyct_mod8)


# Host weight as exposure - adjustment set (Host age, Host sex, Host species, Season)
#  Standardised host weight variable by host species, sex, and age.
nyct_mod9 <- bam(ecto_pos ~ 
               s(month, bs = "cc", m = 2, k = 6) + 
               s(weight_std, bs = "ts"),
             data = nyct_model_data,
             method = "fREML",
             family = "binomial",
             discrete = TRUE,
             select = TRUE,
             knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod9, full = FALSE)$estimate
summary(nyct_mod9)


##### Bartonella and Borrelia infection GAMs ######

# Convert categorical data to factor and create model dataset
# This dataset can be used for borrelia also
bart_model_data <- dataset1 %>%
  group_by(bat_species, bat_sex, bat_age) %>% 
  mutate(weight_std = (bat_weight - mean(bat_weight, na.rm = TRUE))/sd(bat_weight, na.rm = TRUE)) %>% 
  ungroup() %>% 
  dplyr::filter(!is.na(bartonella)) %>% 
  mutate_at(c("site", "bat_species", "bat_sex", "bat_repro", "bat_age",
              "bartonella", "borrelia"), as.factor) %>% 
  mutate(month = month(date_start))


str(bart_model_data)

###### Bartonella ######
# Season as exposure - adjustment set (ONI)
bart_mod1 <- bam(bartonella ~ 
                   s(month, bs = "cc", m = 2, k = 6) +
                   s(ONI, bs = "ts", k = 6),
                 data = bart_model_data,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(bart_mod1, full = FALSE)$estimate
summary(bart_mod1)

# ONI as exposure - adjustment set ()
bart_mod2 <- bam(bartonella ~ 
                   s(ONI, bs= "ts", k = 6),
                 data = bart_model_data,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(bart_mod2, full = FALSE)$estimate
summary(bart_mod2)

# Roost as exposure - adjustment set (Season)
bart_mod3 <- bam(bartonella ~ 
                   s(month, bs = "cc", k = 6) +
                   s(site, bs = "re"),
                 data = bart_model_data,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(bart_mod3, full = FALSE)$estimate
summary(bart_mod3)

# Host Age as exposure - adjustment set (season)
bart_mod4 <- bam(bartonella ~ 
                   s(month, bs = "cc", m =2, k = 6) +
                   bat_age,
                 data = bart_model_data,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(bart_mod4, full = FALSE)$estimate
summary(bart_mod4)


# Host reproductive status as exposure - adjustment set (Season, Host age,
#  Host sex)
# High concurvity for repro, age, and sex. Split data set by sex and
# include age as fixed effect.

female_bart <- bart_model_data %>% 
  filter(bat_sex == "female")

bart_mod5 <- bam(bartonella ~ 
                   s(month, bs = "cc", m = 2, k = 6) +
                   bat_repro + bat_age,
                 data = female_bart,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(bart_mod5, full = FALSE)$estimate
summary(bart_mod5)

male_bart <- bart_model_data %>% 
  filter(bat_sex == "male") %>% 
  dplyr::mutate(bat_repro = as.factor(as.character(bat_repro)))

bart_mod6 <- bam(bartonella ~ 
                   s(month, bs = "cc", m = 2, k = 6) +
                   bat_repro + bat_age,
                 data = male_bart,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(bart_mod6, full = FALSE)$estimate
summary(bart_mod6)

# Host Sex as exposure - adjustment set (Season)
bart_mod7 <- bam(bartonella ~ 
                   s(month, bs = "cc", m = 2, k = 6) +
                   bat_sex,
                 data = bart_model_data,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(bart_mod7, full = FALSE)$estimate
summary(bart_mod7)


# Host Species as exposure - adjustment set (Season)
bart_mod8 <- bam(bartonella ~ 
                   s(month, bs = "cc", m = 2, k = 6) +
                   bat_species,
                 data = bart_model_data,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(bart_mod8, full = FALSE)$estimate
summary(bart_mod8)

# Host weight as exposure - adjustment set (Host age, Host sex, Host species, Season)
#  Standardised host weight variable by host species, sex, and age.
bart_mod9 <- bam(bartonella ~ 
                   s(month, bs = "cc", m = 2, k = 6) + 
                   s(weight_std, bs = "ts"),
                 data = bart_model_data,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(bart_mod9, full = FALSE)$estimate
summary(bart_mod9)


###### Borrelia ######
# Season as exposure - adjustment set (ONI)
borr_mod1 <- bam(borrelia ~ 
                   s(month, bs = "cc", m = 2, k = 6) +
                   s(ONI, bs = "ts", k = 6),
                 data = bart_model_data,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(borr_mod1, full = FALSE)$estimate
summary(borr_mod1)


##### GLMs of blood borne bacteria with nycteribiid as predictor #####

# Convert categorical data to factor and create model dataset
glm_data <- dataset1 %>%
  dplyr::filter(!is.na(bartonella)) %>% 
  mutate_at(c("bartonella", "borrelia", "ecto_pos"), as.factor)


glm1 <- glm(bartonella ~ ecto_pos, glm_data, family = "binomial")
glm2 <- glm(borrelia ~ ecto_pos, glm_data, family = "binomial")

summary(glm1)
exp(cbind(Odds_Ratio = coef(glm1), confint(glm1)))

summary(glm2)
exp(cbind(Odds_Ratio = coef(glm2), confint(glm2)))


##### Model prediction of seasonal probability of parasitism/infection (Fig. 5A) #####

# Need to run GAMs in above section to create model objects for use in plot here.
# Then predict from each model and save output as dataframe.
nyct_season <- plot_predictions(nyct_mod1,
                              condition = c("month"),
                              draw = FALSE) %>% 
              dplyr::mutate(var = "nycteribiid")

bart_season <- plot_predictions(bart_mod1,
                              condition = c("month"),
                              draw=FALSE)%>% 
              dplyr::mutate(var = "bartonella")

borr_season <- plot_predictions(borr_mod1,
                             condition = c("month"),
                             draw=FALSE)%>% 
              dplyr::mutate(var = "borrelia")

# Next create dataframes of session prevalence for each pathogen.
nyct_prev <- dataset1 %>% 
  dplyr::group_by(date_start, site) %>%
  dplyr::summarise(prev = mean(ecto_pos, na.rm = TRUE)) %>%
  dplyr::mutate(mth = month(date_start), var = "nycteribiid")

bart_prev <- dataset1 %>% 
  dplyr::group_by(date_start, site) %>%
  dplyr::summarise(prev = mean(bartonella, na.rm = TRUE)) %>%
  dplyr::mutate(mth = month(date_start), var = "bartonella")

borr_prev <- dataset1 %>% 
  dplyr::group_by(date_start, site) %>%
  dplyr::summarise(prev = mean(borrelia, na.rm = TRUE)) %>%
  dplyr::mutate(mth = month(date_start), var = "borrelia")

# Combine model estimates and observed prevalence into single plot object.
season_plot <- ggplot() +
  geom_ribbon(data = nyct_season, aes(x = month, ymin = conf.low, ymax = conf.high, fill = var), alpha = 0.2) +
  geom_line(data = nyct_season, aes(x = month, y = estimate, colour = var), alpha = 0.6) +
  geom_ribbon(data = bart_season, aes(x = month, ymin = conf.low, ymax = conf.high, fill = var), alpha = 0.6) +
  geom_line(data = bart_season, aes(x = month, y = estimate, colour = var), linewidth = 0.8, alpha = 0.8) +
  geom_ribbon(data = borr_season, aes(x = month, ymin = conf.low, ymax = conf.high, fill = var), alpha = 0.6) +
  geom_line(data = borr_season, aes(x = month, y = estimate, colour = var), linewidth = 0.8, alpha = 0.8) +
  geom_point(data = bart_prev, inherit.aes = FALSE, aes(x = mth, y = prev, colour = var),
             size = 2, alpha = 0.6)+
  geom_point(data = borr_prev, inherit.aes = FALSE, aes(x = mth, y = prev, colour = var),
             size = 2, alpha = 0.6)+
  geom_point(data = nyct_prev, inherit.aes = FALSE, aes(x = mth, y = prev, colour = var),
             size = 1.5, alpha = 0.4)+
  ylim(0, 1) +
  scale_x_continuous(breaks = c(2, 4, 6, 8, 10, 12),
                     labels = c("Feb", "Apr", "Jun",
                                "Aug", "Oct", "Dec")) +
  labs(x = NULL, y = "Pr(parasitism/infection)") +
  scale_colour_manual("Parasite/bacteria", values = c("#01295F", "#437F97", "#849324"),
                      labels = c(expression(italic("Bartonella")), 
                                 expression(italic("Borrelia")),
                                 "Nycteribiid")) +
  scale_fill_manual("Parasite/bacteria", values = c("#01295F", "#437F97", "#849324"),
                    labels = c(expression(italic("Bartonella")), 
                               expression(italic("Borrelia")),
                               "Nycteribiid")) +
  theme_bw()


##### Model prediction of nycteribiid parasitism by site (Fig. 5B) #####

roost_prev <- dataset1 %>% 
  dplyr::group_by(date_start, site) %>%
  dplyr::summarise(prev = mean(ecto_pos, na.rm = TRUE)) %>%
  dplyr::mutate(mth = month(date_start), var = "nycteribiid") %>% 
  dplyr::filter(site %in% c("Redcliffe", "Toowoomba"))
  
nyct_roost <- plot_predictions(nyct_mod3,
                               condition = c("month", "site"),
                              draw = FALSE) %>% 
  dplyr::filter(site %in% c("Redcliffe", "Toowoomba"))

roost_plot <- ggplot(nyct_roost, aes(x = month, y = estimate, fill = site)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.4) +
  geom_line(aes(colour = site),linewidth = 0.8) +
  geom_point(data = roost_prev, inherit.aes = FALSE, aes(x = mth, y = prev, colour = site),
             size = 2, alpha = 0.6)+
  labs(x = NULL, y = "Pr(parasitism)", colour = "Roost", fill = "Roost") +
  ylim(0, 1) +
  scale_x_continuous(breaks = c(2, 4, 6, 8, 10, 12),
                     labels = c("Feb", "Apr", "Jun",
                                "Aug", "Oct", "Dec")) +
  scale_colour_manual(values = c("#D55E00", "#0072B2")) +
  scale_fill_manual(values = c("#D55E00", "#0072B2")) +
  theme_bw()

##### Model prediction of nycteribiid parasitism by host sex (Fig. 5C) #####

sex_prev <- dataset1 %>%  
  group_by(date_start, site, bat_sex) %>% 
  summarise(prev = mean(ecto_pos, na.rm = TRUE)) %>%
  dplyr::mutate(mth = month(date_start), var = "nycteribiid") %>%
  dplyr::filter(!is.na(bat_sex))

nyct_sex <- plot_predictions(nyct_mod7,
                             condition = c("month", "bat_sex"),
                             draw = FALSE)

sex_plot <- ggplot(nyct_sex, aes(x = month, y = estimate, fill = bat_sex)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.4) +
  geom_line(aes(colour = bat_sex),linewidth = 0.8) +
  geom_point(data = sex_prev, inherit.aes = FALSE, aes(x = mth, y = prev, colour = bat_sex),
             size = 2, alpha = 0.6)+
  labs(x = NULL, y = "Pr(parasitism)", colour = "Sex", fill = "Sex") +
  ylim(0, 1) +
  scale_x_continuous(breaks = c(2, 4, 6, 8, 10, 12),
                     labels = c("Feb", "Apr", "Jun",
                                "Aug", "Oct", "Dec")) +
  scale_colour_manual("Host sex", values = met.brewer("VanGogh3", 2),
                      labels = c("Female", "Male")) +
  scale_fill_manual("Host sex", values = met.brewer("VanGogh3", 2),
                    labels = c("Female", "Male")) +
  theme_bw()


##### Model prediction of nycteribiid parasitism by female host repro status (Fig. 5F) #####

nyct_repro <- plot_predictions(nyct_mod5,
                               condition = c("bat_repro"),
                               draw = FALSE)

repro_plot <- ggplot(nyct_repro, aes(x = bat_repro, y = estimate, colour = bat_repro)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Female reproduction status", y = "Pr(parasitism)") +
  ylim(0.5, 1) +
  scale_colour_manual(values = met.brewer("Java", 4)) +
  scale_fill_manual(values = met.brewer("Java", 4)) +
  scale_x_discrete(labels = c("Lact.","Non-rep.",  "Preg.", "Rep.")) +
  theme_bw()

##### Model prediction of Bartonella infection by site (Fig. 5D) #####

bart_roost_prev <- dataset1 %>% 
  dplyr::group_by(date_start, site) %>%
  dplyr::summarise(prev = mean(bartonella, na.rm = TRUE)) %>%
  dplyr::mutate(mth = month(date_start), var = "bartonella") %>% 
  dplyr::filter(site %in% c("Redcliffe", "Toowoomba"))

bart_roost <- plot_predictions(bart_mod3,
                               condition = c("month", "site"),
                               draw = FALSE) %>% 
  dplyr::filter(site %in% c("Redcliffe", "Toowoomba"))

bart_roost_plot <- ggplot(bart_roost, aes(x = month, y = estimate, fill = site)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.4) +
  geom_line(aes(colour = site),linewidth = 0.8) +
  geom_point(data = bart_roost_prev, inherit.aes = FALSE, aes(x = mth, y = prev, colour = site),
             size = 2, alpha = 0.6)+
  labs(x = NULL, y = "Pr(infection)", colour = "Roost", fill = "Roost") +
  ylim(0, 1) +
  scale_x_continuous(breaks = c(2, 4, 6, 8, 10, 12),
                     labels = c("Feb", "Apr", "Jun",
                                "Aug", "Oct", "Dec")) +
  scale_colour_manual(values = c("#D55E00", "#0072B2")) +
  scale_fill_manual(values = c("#D55E00", "#0072B2")) +
  theme_bw()

##### Model prediction of Bartonella infection by host sex (Fig. 5E) #####

bart_sex_prev <- dataset1 %>%  
  group_by(date_start, site, bat_sex) %>% 
  summarise(prev = mean(bartonella, na.rm = TRUE)) %>%
  dplyr::mutate(mth = month(date_start), var = "bartonella") %>%
  dplyr::filter(!is.na(bat_sex))

bart_sex <- plot_predictions(bart_mod7,
                             condition = c("month", "bat_sex"),
                             draw = FALSE)

bart_sex_plot <- ggplot(bart_sex, aes(x = month, y = estimate, fill = bat_sex)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.4) +
  geom_line(aes(colour = bat_sex),linewidth = 0.8) +
  geom_point(data = bart_sex_prev, inherit.aes = FALSE, aes(x = mth, y = prev, colour = bat_sex),
             size = 2, alpha = 0.6)+
  labs(x = NULL, y = "Pr(infection)", colour = "Sex", fill = "Sex") +
  ylim(0, 1) +
  scale_x_continuous(breaks = c(2, 4, 6, 8, 10, 12),
                     labels = c("Feb", "Apr", "Jun",
                                "Aug", "Oct", "Dec")) +
  scale_colour_manual("Host sex", values = met.brewer("VanGogh3", 2),
                      labels = c("Female", "Male")) +
  scale_fill_manual("Host sex", values = met.brewer("VanGogh3", 2),
                    labels = c("Female", "Male")) +
  theme_bw()

##### Model prediction of nycteribiid parasitism by host age (Fig. 5G) #####

nyct_age <- plot_predictions(nyct_mod4,
                             condition = c("bat_age"),
                             draw = FALSE)%>% 
  mutate(bat_age = factor(bat_age, levels = c("adult", "sub_adult", "juve")))


nyct_age_plot <- ggplot(nyct_age, aes(x = bat_age, y = estimate, colour = bat_age)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Host age", y = "Pr(parasitism)", colour = "Age", fill = "Age") +
  ylim(0.5, 1) +
  scale_colour_manual(values = met.brewer("Greek", 3)) +
  scale_fill_manual(values = met.brewer("Greek", 3)) +
  scale_x_discrete(labels = c("Ad.", "Sub.", "Juv.")) +
  theme_bw()


##### Model prediction of Bartonella infection by host age (Fig. 5H) #####

bart_age <- plot_predictions(bart_mod4,
                             condition = c("bat_age"),
                             draw = FALSE)%>% 
  dplyr::mutate(bat_age = factor(bat_age, levels = c("adult", "sub_adult", "juve")))


bart_age_plot <- ggplot(bart_age, aes(x = bat_age, y = estimate, colour = bat_age)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Host age", y = "Pr(infection)", colour = "Age", fill = "Age") +
  ylim(0.5, 1) +
  scale_colour_manual(values = met.brewer("Greek", 3)) +
  scale_fill_manual(values = met.brewer("Greek", 3)) +
  scale_x_discrete(labels = c("Ad.", "Sub.", "Juv.")) +
  theme_bw()



##### Combining individual plots for Fig. 5 #####


new_layout <- '
AAAA
AAAA
BBCC
DDEE
FFGG
HH##'

season_plot +
  roost_plot +
  sex_plot +
  nyct_age_plot +
  repro_plot +
  bart_roost_plot +
  bart_sex_plot +
  bart_age_plot +
  #guide_area() +
  plot_annotation(tag_levels = 'A') +
  plot_layout(design = new_layout, guides = 'collect')
# saved as pdf, portrait and 8x11 inches


##### Model prediction of nycteribiid parasitism by ONI anomaly (Fig. S3) #####

ONI_prev <- dataset1 %>% 
  dplyr::group_by(date_start, site, ONI) %>% 
  dplyr::summarise(prev = mean(ecto_pos, na.rm = TRUE)) %>% 
  dplyr::mutate(mth = month(date_start))

nyct_ONI <- plot_predictions(nyct_mod2, 
                             condition = c("ONI"),
                             draw = FALSE)

ggplot(nyct_ONI, aes(x = ONI, y = estimate)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.4) +
  geom_line(linewidth = 0.8) +
  geom_point(data = ONI_prev, inherit.aes = FALSE, aes(x = ONI, y = prev),
             size = 2, alpha = 0.6)+
  labs(x = "ONI anomaly (°C)", y = "Pr(parasitism)") +
  ylim(0, 1) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title = element_text(size = 14),
        plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), 'cm'))
# saved as pdf, 3x4 inches


##### Model prediction of nycteribiid parasitism by standardised host weight (Fig. S8) #####

nyct_weight <- plot_predictions(nyct_mod9,
                                condition = c("weight_std"),
                                draw = FALSE)
ggplot(nyct_weight, aes(x = weight_std, y = estimate)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.4) +
  geom_line(linewidth = 0.8) +
  labs(x = "Standardised weight", y = "Pr(parasitism)") +
  ylim(0, 1) +
  theme_bw() +
  theme(axis.text=element_text(size=12),
        axis.title = element_text(size = 14),
        plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), 'cm'))
# saved as pdf, 3x4 inches

##### Model prediction of nycteribiid parasitism by male host repro status (Fig. S6) #####

nyct_repro_m <- plot_predictions(nyct_mod6,
                                 condition = c("bat_repro"),
                                 draw = FALSE)

ggplot(nyct_repro_m, aes(x = bat_repro, y = estimate, colour = bat_repro)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Male reproduction status", y = "Pr(parasitism)") +
  #ylim(0.5, 1) +
  scale_colour_manual(values = c("#cf3a36", "#0c7156")) +
  scale_x_discrete(labels = c("Non-rep.",  "Rep.")) +
  theme_bw() +
  theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), 'cm'))
# saved as pdf, 3x4 inches

##### Model prediction of nycteribiid parasitism by host species (Fig. S7) #####

nyct_species <- plot_predictions(nyct_mod8,
                                 condition = c("bat_species"),
                                 draw = FALSE)

ggplot(nyct_species, aes(x = bat_species, y = estimate, colour = bat_species)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Host species", y = "Pr(parasitism)") +
  ylim(0.5, 1) +
  scale_colour_manual(values = c("black", "grey")) +
  scale_x_discrete(labels = c("BFF",  "GHFF")) +
  theme_bw() +
  theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), 'cm'))
# saved as pdf, 3x4 inches

##### Additional analysis split by host species #####

# Split data by host species
bff_nyct <- nyct_model_data %>% 
  filter(bat_species == "bff")

ghff_nyct <- nyct_model_data %>% 
  filter(bat_species == "ghff")

# Model age, sex, and repro status using individual species datasets
# Host Age as exposure - adjustment set (season)
nyct_mod10 <- bam(ecto_pos ~ 
                   s(month, bs = "cc", m =2, k = 6) +
                   bat_age,
                 data = bff_nyct,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod10, full = FALSE)$estimate
summary(nyct_mod10)

nyct_age_bff <- plot_predictions(nyct_mod10,
                             condition = c("bat_age"),
                             draw = FALSE)%>% 
  mutate(bat_age = factor(bat_age, levels = c("adult", "sub_adult", "juve")))


nyct_age_bff_plot <- ggplot(nyct_age_bff, aes(x = bat_age, y = estimate, colour = bat_age)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Host age", y = "Pr(parasitism)", colour = "Age", fill = "Age") +
  ylim(0.5, 1) +
  scale_colour_manual(values = met.brewer("Greek", 3)) +
  scale_fill_manual(values = met.brewer("Greek", 3)) +
  scale_x_discrete(labels = c("Ad.", "Sub.", "Juv.")) +
  theme_bw()

nyct_mod11 <- bam(ecto_pos ~ 
                    s(month, bs = "cc", m =2, k = 6) +
                    bat_age,
                  data = ghff_nyct,
                  method = "fREML",
                  family = "binomial",
                  discrete = TRUE,
                  select = TRUE,
                  knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod11, full = FALSE)$estimate
summary(nyct_mod11)

nyct_age_ghff <- plot_predictions(nyct_mod11,
                                 condition = c("bat_age"),
                                 draw = FALSE)%>% 
  mutate(bat_age = factor(bat_age, levels = c("adult", "sub_adult", "juve")))


nyct_age_ghff_plot <- ggplot(nyct_age_ghff, aes(x = bat_age, y = estimate, colour = bat_age)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Host age", y = "Pr(parasitism)", colour = "Age", fill = "Age") +
  #ylim(0.5, 1) +
  scale_colour_manual(values = met.brewer("Greek", 3)) +
  scale_fill_manual(values = met.brewer("Greek", 3)) +
  scale_x_discrete(labels = c("Ad.", "Sub.", "Juv.")) +
  theme_bw()


# Host reproductive status as exposure - adjustment set (Season, Host age,
#  Host sex)
# High concurvity for repro, age, and sex. Split data set by sex and
# include age as fixed effect.

bff_female_nyct <- bff_nyct %>% 
  filter(bat_sex == "female")

nyct_mod12 <- bam(ecto_pos ~ 
                   s(month, bs = "cc", m = 2, k = 6) +
                   bat_repro + bat_age,
                 data = bff_female_nyct,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod12, full = FALSE)$estimate
summary(nyct_mod12)

bff_fem_repro <- plot_predictions(nyct_mod12,
                               condition = c("bat_repro"),
                               draw = FALSE)

bff_fem_repro_plot <- ggplot(bff_fem_repro, aes(x = bat_repro, y = estimate, colour = bat_repro)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Female reproduction status", y = "Pr(parasitism)") +
  ylim(0.5, 1) +
  scale_colour_manual(values = met.brewer("Java", 4)) +
  scale_fill_manual(values = met.brewer("Java", 4)) +
  scale_x_discrete(labels = c("Lact.","Non-rep.",  "Preg.", "Rep.")) +
  theme_bw()

bff_male_nyct <- bff_nyct %>% 
  filter(bat_sex == "male") %>% 
  dplyr::mutate(bat_repro = as.factor(as.character(bat_repro)))

nyct_mod13 <- bam(ecto_pos ~ 
                   s(month, bs = "cc", m = 2, k = 6) +
                   bat_repro + bat_age,
                 data = bff_male_nyct,
                 method = "fREML",
                 family = "binomial",
                 discrete = TRUE,
                 select = TRUE,
                 knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod13, full = FALSE)$estimate
summary(nyct_mod13)

bff_m_repro <- plot_predictions(nyct_mod13,
                                 condition = c("bat_repro"),
                                 draw = FALSE)

bff_m_repro_plot <- ggplot(bff_m_repro, aes(x = bat_repro, y = estimate, colour = bat_repro)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Male reproduction status", y = "Pr(parasitism)") +
  #ylim(0.5, 1) +
  scale_colour_manual(values = c("#cf3a36", "#0c7156")) +
  scale_x_discrete(labels = c("Non-rep.",  "Rep.")) +
  theme_bw()


ghff_female_nyct <- ghff_nyct %>% 
  filter(bat_sex == "female")

nyct_mod14 <- bam(ecto_pos ~ 
                    s(month, bs = "cc", m = 2, k = 6) +
                    bat_repro + bat_age,
                  data = ghff_female_nyct,
                  method = "fREML",
                  family = "binomial",
                  discrete = TRUE,
                  select = TRUE,
                  knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod14, full = FALSE)$estimate
summary(nyct_mod14)

ghff_fem_repro <- plot_predictions(nyct_mod14,
                                  condition = c("bat_repro"),
                                  draw = FALSE)

ghff_fem_repro_plot <- ggplot(ghff_fem_repro, aes(x = bat_repro, y = estimate, colour = bat_repro)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Female reproduction status", y = "Pr(parasitism)") +
  #ylim(0.5, 1) +
  scale_colour_manual(values = met.brewer("Java", 4)) +
  scale_fill_manual(values = met.brewer("Java", 4)) +
  scale_x_discrete(labels = c("Lact.","Non-rep.",  "Preg.", "Rep.")) +
  theme_bw()

ghff_male_nyct <- ghff_nyct %>% 
  filter(bat_sex == "male") %>% 
  dplyr::mutate(bat_repro = as.factor(as.character(bat_repro)))

nyct_mod15 <- bam(ecto_pos ~ 
                    s(month, bs = "cc", m = 2, k = 6) +
                    bat_repro + bat_age,
                  data = ghff_male_nyct,
                  method = "fREML",
                  family = "binomial",
                  discrete = TRUE,
                  select = TRUE,
                  knots = list(month = c(0.5, 12.5)))
concurvity(nyct_mod15, full = FALSE)$estimate
summary(nyct_mod15)

ghff_m_repro <- plot_predictions(nyct_mod15,
                                condition = c("bat_repro"),
                                draw = FALSE)

ghff_m_repro_plot <- ggplot(ghff_m_repro, aes(x = bat_repro, y = estimate, colour = bat_repro)) +
  geom_pointrange(aes(ymin = conf.low, ymax = conf.high),
                  show.legend = FALSE, linewidth = 0.8) +
  labs(x = "Male reproduction status", y = "Pr(parasitism)") +
  #ylim(0.5, 1) +
  scale_colour_manual(values = c("#cf3a36", "#0c7156")) +
  scale_x_discrete(labels = c("Non-rep.",  "Rep.")) +
  theme_bw()



##### PCA of roost site abiotic factors #####

# Perform PCA function - scale argument is true.
PCA <- prcomp(dataset3 %>% 
                column_to_rownames(var="accession") %>% 
                dplyr::select(!site) %>% 
                dplyr::select(!prev), scale. = TRUE)

# Express eigenvalue as proportion of the total variation
PCA$sdev^2/sum(PCA$sdev^2)
# 0.55 and 0.31 of total variation described by PC1 and PC2 respectively.

# View eigenvectors (loadings)
PCA$rotation
# PC1 contributed to by temperature and evaporation mostly.
# PC2 contributed to by humidity.

# Perform PERMANOVA on PC1 and PC2.
# First create dataframe of roost location and PCA scores
PCA_scores <- data.frame(PCA$x,
                         roost = dataset3$site)

vegan::adonis2(PCA_scores$PC1 ~ roost,
        data = PCA_scores,
        method = "euc")
# F(8,32) = 1.557, R2 = 0.28, p = 0.138

vegan::adonis2(PCA_scores$PC2 ~ roost,
               data = PCA_scores,
               method = "euc")
# F(8,32) = 4.157, R2 = 0.51, p = 0.004

# Create biplot 

# Function for biplot - adapted from ggbiplot::ggbiplot
ggbiplot2 <- function (pcobj, choices = 1:2, scale = 1, pc.biplot = TRUE, 
                       obs.scale = 1 - scale, var.scale = scale, var.factor = 1, 
                       groups = NULL, point.size = 1.5, ellipse = FALSE, ellipse.prob = 0.68, 
                       ellipse.linewidth = 1.3, ellipse.fill = TRUE, ellipse.alpha = 0.25, 
                       labels = NULL, labels.size = 3, alpha = 1, var.axes = TRUE, 
                       circle = FALSE, circle.prob = 0.68, varname.size = 3, varname.adjust = 1.25, 
                       varname.color = "black", varname.abbrev = FALSE, axis.title = "PC", arrow.size = 0.5,
                       ...) 
{
  if (length(choices) > 2) {
    warning("choices = ", choices, " is not of length 2. Only the first 2 will be used")
    choices <- choices[1:2]
  }
  svd <- get_SVD(pcobj)
  n <- svd$n
  d <- svd$D
  u <- svd$U
  v <- svd$V
  nobs.factor <- ifelse(inherits(pcobj, "prcomp"), sqrt(n - 
                                                          1), sqrt(n))
  angle <- circle_chol <- ed <- hjust <- mu <- sigma <- varname <- xvar <- yvar <- NULL
  choices <- pmin(choices, ncol(u))
  df.u <- as.data.frame(sweep(u[, choices], 2, d[choices]^obs.scale, 
                              FUN = "*"))
  v <- sweep(v, 2, d^var.scale, FUN = "*")
  df.v <- as.data.frame(v[, choices])
  df.v <- var.factor * df.v
  names(df.u) <- c("xvar", "yvar")
  names(df.v) <- names(df.u)
  if (pc.biplot) {
    df.u <- df.u * nobs.factor
  }
  r <- sqrt(qchisq(circle.prob, df = 2)) * prod(colMeans(df.u^2))^(1/4)
  v.scale <- rowSums(v^2)
  df.v <- r * df.v/sqrt(max(v.scale))
  if (obs.scale == 0) {
    u.axis.labs <- paste("standardized", axis.title, choices, 
                         sep = "")
  }
  else {
    u.axis.labs <- paste(axis.title, choices, sep = "")
  }
  u.axis.labs <- paste(u.axis.labs, sprintf("(%0.1f%%)", 100 * 
                                              d[choices]^2/sum(d^2)))
  if (!is.null(labels)) {
    df.u$labels <- labels
  }
  if (!is.null(groups)) {
    df.u$groups <- groups
  }
  if (varname.abbrev) {
    df.v$varname <- abbreviate(rownames(v))
  }
  else {
    df.v$varname <- rownames(v)
  }
  df.v$angle <- with(df.v, (180/pi) * atan(yvar/xvar))
  df.v$hjust = with(df.v, (1 - varname.adjust * sign(xvar))/2)
  g <- ggplot(data = df.u, aes(x = xvar, y = yvar), show.legend = FALSE) + xlab(u.axis.labs[1]) + 
    ylab(u.axis.labs[2]) + coord_equal()
  if (!is.null(df.u$labels)) {
    if (!is.null(df.u$groups)) {
      g <- g + geom_text(aes(label = labels, color = groups), 
                         size = labels.size)
    }
    else {
      g <- g + geom_text(aes(label = labels), size = labels.size)
    }
  }
  else {
    if (!is.null(df.u$groups)) {
      g <- g + geom_point(aes(color = groups), alpha = alpha, 
                          size = point.size, show.legend = FALSE)
    }
    else {
      g <- g + geom_point(alpha = alpha, size = point.size)
    }
  }
  if (var.axes) {
    if (circle) {
      theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, 
                                                length = 50))
      circle <- data.frame(xvar = r * cos(theta), yvar = r * 
                             sin(theta))
      g <- g + geom_path(data = circle, color = scales::muted("white"), 
                         linewidth = 1/2, alpha = 1/3)
    }
    arrow_style <- arrow(length = unit(1/2, "picas"), type = "closed", 
                         angle = 15)
    g <- g + geom_segment(data = df.v, aes(x = 0, y = 0, 
                                           xend = xvar, yend = yvar), arrow = arrow_style, color = varname.color, 
                          linewidth = arrow.size)
  }
  if (!is.null(df.u$groups) && ellipse) {
    theta <- c(seq(-pi, pi, length = 50), seq(pi, -pi, length = 50))
    circle <- cbind(cos(theta), sin(theta))
    geom <- if (isTRUE(ellipse.fill)) 
      "polygon"
    else "path"
    if (isTRUE(ellipse.fill)) {
      g <- g + stat_ellipse(geom = "polygon", aes(group = groups, 
                                                  color = groups, fill = groups), alpha = ellipse.alpha, 
                            linewidth = ellipse.linewidth, type = "norm", 
                            level = ellipse.prob, show.legend = FALSE)
    }
    else {
      g <- g + stat_ellipse(geom = "path", aes(group = groups, 
                                               color = groups), linewidth = ellipse.linewidth, 
                            type = "norm", level = ellipse.prob)
    }
  }
  if (var.axes) {
    g <- g + geom_text(data = df.v, aes(label = varname, 
                                        x = xvar, y = yvar, angle = angle, hjust = hjust), 
                       color = varname.color, size = varname.size)
  }
  return(g)
}

##### Biplot of PCA (Fig. S5) #####
ggbiplot2(PCA, alpha = 0, groups = dataset3$site, 
          ellipse = TRUE, ellipse.linewidth = 0.5) +
  geom_point(aes(colour = dataset3$site, size = dataset3$prev),
             alpha = 0.6) +
  theme_bw() +
  labs(colour = "Roost",
       groups = "Roost",
       size = "Prevalence",
       y = "PC2 (31.1%)",
       x = "PC1 (55.2%)") +
  xlim(-3, 3)+
  ylim(-3, 3)+
  scale_colour_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#CC79A7", "#D55E00", "#000000", "#0072B2"))+
  scale_fill_manual(values = c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#CC79A7", "#D55E00", "#000000", "#0072B2")) +
  theme(plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), 'cm'))
# export as pdf, portrait and 6x6 inches
